Countries#
Countries data
This dataset contains administrative boundaries of countries.
Source
: Natural EarthURL
Processing
: transformations documented incountries_cleaning.ipynb
Clean file:
countries_clean.geojson
%matplotlib inline
import geopandas
Remove small islands
source_url = ("https://www.naturalearthdata.com/"\
"http//www.naturalearthdata.com/download/"\
"10m/cultural/ne_10m_admin_0_countries.zip"
)
source_url
'https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip'
ctys = geopandas.read_file(source_url)
ctys.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f3811b3fa10>
areas = ctys.to_crs(epsg=3857).area
areas.plot.hist(bins=100)
<matplotlib.axes._subplots.AxesSubplot at 0x7f3811976250>
smallest = areas.max() / 4000
small = areas.loc[areas<smallest].index
large = ctys.loc[ctys.index.difference(small), :]
large.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f38119b8450>
Antartica
ys = large.centroid.geometry.y
large = large.loc[ys > ys.min(), :]
%time large = large.to_crs(epsg=3857)
CPU times: user 1.67 s, sys: 20.9 ms, total: 1.7 s
Wall time: 1.7 s
large.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f3811b79650>
Keep only relevant columns
large.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 160 entries, 0 to 239
Data columns (total 95 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 featurecla 160 non-null object
1 scalerank 160 non-null int64
2 LABELRANK 160 non-null int64
3 SOVEREIGNT 160 non-null object
4 SOV_A3 160 non-null object
5 ADM0_DIF 160 non-null int64
6 LEVEL 160 non-null int64
7 TYPE 160 non-null object
8 ADMIN 160 non-null object
9 ADM0_A3 160 non-null object
10 GEOU_DIF 160 non-null int64
11 GEOUNIT 160 non-null object
12 GU_A3 160 non-null object
13 SU_DIF 160 non-null int64
14 SUBUNIT 160 non-null object
15 SU_A3 160 non-null object
16 BRK_DIFF 160 non-null int64
17 NAME 160 non-null object
18 NAME_LONG 160 non-null object
19 BRK_A3 160 non-null object
20 BRK_NAME 160 non-null object
21 BRK_GROUP 0 non-null object
22 ABBREV 160 non-null object
23 POSTAL 160 non-null object
24 FORMAL_EN 158 non-null object
25 FORMAL_FR 5 non-null object
26 NAME_CIAWF 159 non-null object
27 NOTE_ADM0 5 non-null object
28 NOTE_BRK 4 non-null object
29 NAME_SORT 160 non-null object
30 NAME_ALT 2 non-null object
31 MAPCOLOR7 160 non-null int64
32 MAPCOLOR8 160 non-null int64
33 MAPCOLOR9 160 non-null int64
34 MAPCOLOR13 160 non-null int64
35 POP_EST 160 non-null int64
36 POP_RANK 160 non-null int64
37 GDP_MD_EST 160 non-null float64
38 POP_YEAR 160 non-null int64
39 LASTCENSUS 160 non-null int64
40 GDP_YEAR 160 non-null int64
41 ECONOMY 160 non-null object
42 INCOME_GRP 160 non-null object
43 WIKIPEDIA 160 non-null int64
44 FIPS_10_ 160 non-null object
45 ISO_A2 160 non-null object
46 ISO_A3 160 non-null object
47 ISO_A3_EH 160 non-null object
48 ISO_N3 160 non-null object
49 UN_A3 160 non-null object
50 WB_A2 160 non-null object
51 WB_A3 160 non-null object
52 WOE_ID 160 non-null int64
53 WOE_ID_EH 160 non-null int64
54 WOE_NOTE 160 non-null object
55 ADM0_A3_IS 160 non-null object
56 ADM0_A3_US 160 non-null object
57 ADM0_A3_UN 160 non-null int64
58 ADM0_A3_WB 160 non-null int64
59 CONTINENT 160 non-null object
60 REGION_UN 160 non-null object
61 SUBREGION 160 non-null object
62 REGION_WB 160 non-null object
63 NAME_LEN 160 non-null int64
64 LONG_LEN 160 non-null int64
65 ABBREV_LEN 160 non-null int64
66 TINY 160 non-null int64
67 HOMEPART 160 non-null int64
68 MIN_ZOOM 160 non-null float64
69 MIN_LABEL 160 non-null float64
70 MAX_LABEL 160 non-null float64
71 NE_ID 160 non-null int64
72 WIKIDATAID 160 non-null object
73 NAME_AR 160 non-null object
74 NAME_BN 160 non-null object
75 NAME_DE 160 non-null object
76 NAME_EN 160 non-null object
77 NAME_ES 160 non-null object
78 NAME_FR 160 non-null object
79 NAME_EL 160 non-null object
80 NAME_HI 160 non-null object
81 NAME_HU 160 non-null object
82 NAME_ID 160 non-null object
83 NAME_IT 160 non-null object
84 NAME_JA 160 non-null object
85 NAME_KO 160 non-null object
86 NAME_NL 160 non-null object
87 NAME_PL 160 non-null object
88 NAME_PT 160 non-null object
89 NAME_RU 160 non-null object
90 NAME_SV 160 non-null object
91 NAME_TR 160 non-null object
92 NAME_VI 160 non-null object
93 NAME_ZH 160 non-null object
94 geometry 160 non-null geometry
dtypes: float64(4), geometry(1), int64(27), object(63)
memory usage: 120.0+ KB
tokeep = [
"ADMIN",
"geometry"
]
Write out to file
large[tokeep].to_file('countries_clean.gpkg',
driver="GPKG"
)