Skip to content

Commit d64f320

Browse files
authored
Merge pull request #2037 from Esri/jy-rvw-sedf-advanced
update markdown to create nav, change link to relative, update grammar
2 parents c6dce66 + 8db7636 commit d64f320

File tree

1 file changed

+83
-39
lines changed

1 file changed

+83
-39
lines changed

guide/05-working-with-the-spatially-enabled-dataframe/spatially-enabled-dataframe-advanced-topics.ipynb

Lines changed: 83 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"outputs": [],
4141
"source": [
4242
"from arcgis.gis import GIS\n",
43+
"\n",
4344
"gis = GIS()"
4445
]
4546
},
@@ -264,6 +265,7 @@
264265
],
265266
"source": [
266267
"from arcgis import GIS\n",
268+
"\n",
267269
"item = gis.content.get(\"85d0ca4ea1ca4b9abf0c51b9bd34de2e\")\n",
268270
"flayer = item.layers[0]\n",
269271
"df = flayer.query(where=\"AGE_45_54 < 1500\").sdf\n",
@@ -466,7 +468,7 @@
466468
}
467469
],
468470
"source": [
469-
"df.loc[0] #the first row returned"
471+
"df.loc[0] # the first row returned"
470472
]
471473
},
472474
{
@@ -497,7 +499,7 @@
497499
}
498500
],
499501
"source": [
500-
"df['POP2010'] #the data from the `POP2010` attribute column"
502+
"df[\"POP2010\"] # the data from the `POP2010` attribute column"
501503
]
502504
},
503505
{
@@ -646,8 +648,8 @@
646648
}
647649
],
648650
"source": [
649-
"#rows 0-9 with a subset of columns indexed as a list\n",
650-
"df.loc[0:9][['FID', 'NAME', 'ST', 'POP2010', 'POPULATION']]"
651+
"# rows 0-9 with a subset of columns indexed as a list\n",
652+
"df.loc[0:9][[\"FID\", \"NAME\", \"ST\", \"POP2010\", \"POPULATION\"]]"
651653
]
652654
},
653655
{
@@ -758,7 +760,7 @@
758760
}
759761
],
760762
"source": [
761-
"df[df['NAME'].str.contains('Camp Pendleton South')]"
763+
"df[df[\"NAME\"].str.contains(\"Camp Pendleton South\")]"
762764
]
763765
},
764766
{
@@ -782,7 +784,9 @@
782784
}
783785
],
784786
"source": [
785-
"camp_pendleton_s_geodefn = dict(df.loc[69]).get(\"SHAPE\") #geometry definition from row 2\n",
787+
"camp_pendleton_s_geodefn = dict(df.loc[69]).get(\n",
788+
" \"SHAPE\"\n",
789+
") # geometry definition from row 2\n",
786790
"print(camp_pendleton_s_geodefn)"
787791
]
788792
},
@@ -824,7 +828,7 @@
824828
"metadata": {},
825829
"source": [
826830
"## Spatial Index\n",
827-
"In addition to row and column indexes to search a DataFrame, we can use a spatial index to quickly access information based on its location and relationship with other features. The index is based on the concept of a **minimum bounding rectangle** - the smallest rectangle that contains an entire geometric shape. Each of these rectangles are then grouped into `leaf` nodes representing a single shape and `node` structures containing groups of shapes according to whatever algorithm the different types of spatial indexing use. Querying these rectangles requires magnitudes fewer compute resources for accessing and processing geometries relative to accessing the entire feature array of coordinate pairs that compose a shape. Access to points, complex lines and irregularly-shaped polygons becomes much quicker and easier through different flavors of spatial indexing.\n",
831+
"In addition to row and column indexes to search a DataFrame, we can use spatial indexes to quickly access information based on its location and relationship with other features. They are based on the concept of a **minimum bounding rectangle** - the smallest rectangle that contains an entire geometric shape. Each of these rectangles are then grouped into `leaf` nodes representing a single shape and `node` structures containing groups of shapes according to whatever algorithm the different types of spatial indexing use. Querying these rectangles requires magnitudes fewer compute resources for accessing and processing geometries relative to accessing the entire feature array of coordinate pairs that compose a shape. Access to points, complex lines and irregularly-shaped polygons becomes much quicker and easier through different flavors of spatial indexing.\n",
828832
"\n",
829833
"The Spatially Enabled DataFrame uses an implementation of spatial indexing known as [QuadTree indexing](https://en.wikipedia.org/wiki/Quadtree), which searches nodes when determining locations, relationships and attributes of specific features. `QuadTree` indexes are the default spatial index, but the SEDF also supports `r-tree` implementations. In the [**DataFrame Index**](#DataFrame-index) section of this notebook, the USA Major Cities feature layer was queried and the `sdf` property was called on the results to create a DataFrame. The [`sindex`](/python/api-reference/arcgis.features.toc.html#arcgis.features.GeoAccessor.sindex) method on the DataFrame creates a QuadTree index:"
830834
]
@@ -835,7 +839,7 @@
835839
"metadata": {},
836840
"outputs": [],
837841
"source": [
838-
"si = df.spatial.sindex('quadtree',reset=False)"
842+
"si = df.spatial.sindex(\"quadtree\", reset=False)"
839843
]
840844
},
841845
{
@@ -923,7 +927,7 @@
923927
}
924928
],
925929
"source": [
926-
"sp_ref = camp_pendleton_s_geodefn['spatialReference']\n",
930+
"sp_ref = camp_pendleton_s_geodefn[\"spatialReference\"]\n",
927931
"sp_ref"
928932
]
929933
},
@@ -937,16 +941,17 @@
937941
"from arcgis.geometry import Geometry, Polygon\n",
938942
"from arcgis.map.symbols import SimpleFillSymbolEsriSFS\n",
939943
"\n",
940-
"#define a symbol to visualize the spatial index quadrants\n",
944+
"# define a symbol to visualize the spatial index quadrants\n",
941945
"sym = {\n",
942946
" \"type\": \"esriSFS\",\n",
943947
" \"style\": \"esriSFSSolid\",\n",
944-
" \"color\": [0,0,0,0],\n",
948+
" \"color\": [0, 0, 0, 0],\n",
945949
" \"outline\": {\n",
946950
" \"type\": \"esriSLS\",\n",
947951
" \"style\": \"esriSLSSolid\",\n",
948-
" \"color\": [0,0,0,255],\n",
949-
" \"width\": 4}\n",
952+
" \"color\": [0, 0, 0, 255],\n",
953+
" \"width\": 4,\n",
954+
" },\n",
950955
"}\n",
951956
"\n",
952957
"# loop through the children of the root index and draw each extent\n",
@@ -955,10 +960,10 @@
955960
" sym[\"outline\"][\"color\"][i] = 255\n",
956961
" if i > 0:\n",
957962
" sym[\"outline\"][\"color\"][i] = 255\n",
958-
" sym[\"outline\"][\"color\"][i-1] = 0\n",
963+
" sym[\"outline\"][\"color\"][i - 1] = 0\n",
959964
" child = si._index.children[i]\n",
960-
" width_factor = child.width/2\n",
961-
" height_factor = child.width/2\n",
965+
" width_factor = child.width / 2\n",
966+
" height_factor = child.width / 2\n",
962967
" minx = child.center[0] - width_factor\n",
963968
" miny = child.center[1] - height_factor\n",
964969
" maxx = child.center[0] + width_factor\n",
@@ -1312,7 +1317,7 @@
13121317
}
13131318
],
13141319
"source": [
1315-
"query = df['ST'] == 'MI'\n",
1320+
"query = df[\"ST\"] == \"MI\"\n",
13161321
"df[query]"
13171322
]
13181323
},
@@ -1392,8 +1397,8 @@
13921397
}
13931398
],
13941399
"source": [
1395-
"query = (df['POP2010'] > 20000) & (df['ST'] == 'OH')\n",
1396-
"df[query][['NAME','ST','POP2010','HOUSEHOLDS','HSEHLD_1_F', 'HSEHLD_1_M']]"
1400+
"query = (df[\"POP2010\"] > 20000) & (df[\"ST\"] == \"OH\")\n",
1401+
"df[query][[\"NAME\", \"ST\", \"POP2010\", \"HOUSEHOLDS\", \"HSEHLD_1_F\", \"HSEHLD_1_M\"]]"
13971402
]
13981403
},
13991404
{
@@ -1535,11 +1540,25 @@
15351540
"metadata": {},
15361541
"outputs": [],
15371542
"source": [
1538-
"area_of_interest = [-13043219.122301877, 3911134.034258818, -13243219.102301877, 4111134.0542588173]\n",
1539-
"minx, miny, maxx, maxy = area_of_interest[0], area_of_interest[1], area_of_interest[2], area_of_interest[3]\n",
1543+
"area_of_interest = [\n",
1544+
" -13043219.122301877,\n",
1545+
" 3911134.034258818,\n",
1546+
" -13243219.102301877,\n",
1547+
" 4111134.0542588173,\n",
1548+
"]\n",
1549+
"minx, miny, maxx, maxy = (\n",
1550+
" area_of_interest[0],\n",
1551+
" area_of_interest[1],\n",
1552+
" area_of_interest[2],\n",
1553+
" area_of_interest[3],\n",
1554+
")\n",
15401555
"\n",
1541-
"area_of_interest_ring = [[[minx, miny], [minx, maxy], [maxx, maxy], [maxx, miny], [minx, miny]]]\n",
1542-
"area_of_interest_geom = Geometry({'rings': area_of_interest_ring, 'spatialReference': sp_ref})\n",
1556+
"area_of_interest_ring = [\n",
1557+
" [[minx, miny], [minx, maxy], [maxx, maxy], [maxx, miny], [minx, miny]]\n",
1558+
"]\n",
1559+
"area_of_interest_geom = Geometry(\n",
1560+
" {\"rings\": area_of_interest_ring, \"spatialReference\": sp_ref}\n",
1561+
")\n",
15431562
"\n",
15441563
"sym_poly_aoi = SimpleFillSymbolEsriSFS(**{\n",
15451564
" \"type\": \"esriSFS\",\n",
@@ -1874,7 +1893,7 @@
18741893
"pt_sym = SimpleMarkerSymbolEsriSMS(**{\n",
18751894
" \"type\": \"esriSMS\",\n",
18761895
" \"style\": \"esriSMSDiamond\",\n",
1877-
" \"color\": [255,140,0,255], # yellowish\n",
1896+
" \"color\": [255, 140, 0, 255], # yellowish\n",
18781897
" \"size\": 8,\n",
18791898
" \"angle\": 0,\n",
18801899
" \"xoffset\": 0,\n",
@@ -2096,8 +2115,8 @@
20962115
}
20972116
],
20982117
"source": [
2099-
"query = (df['ST'] == 'OH') | (df['ST'] == 'MI')\n",
2100-
"df1 = df[query][['NAME', 'ST', 'POP2010']]\n",
2118+
"query = (df[\"ST\"] == \"OH\") | (df[\"ST\"] == \"MI\")\n",
2119+
"df1 = df[query][[\"NAME\", \"ST\", \"POP2010\"]]\n",
21012120
"df1"
21022121
]
21032122
},
@@ -2304,8 +2323,19 @@
23042323
}
23052324
],
23062325
"source": [
2307-
"query = df['ST'] == 'OH'\n",
2308-
"df2 = df[query][['NAME', 'POPULATION','BLACK', 'HAWN_PI', 'HISPANIC', 'WHITE', 'MULT_RACE', 'OTHER']]\n",
2326+
"query = df[\"ST\"] == \"OH\"\n",
2327+
"df2 = df[query][\n",
2328+
" [\n",
2329+
" \"NAME\",\n",
2330+
" \"POPULATION\",\n",
2331+
" \"BLACK\",\n",
2332+
" \"HAWN_PI\",\n",
2333+
" \"HISPANIC\",\n",
2334+
" \"WHITE\",\n",
2335+
" \"MULT_RACE\",\n",
2336+
" \"OTHER\",\n",
2337+
" ]\n",
2338+
"]\n",
23092339
"df2"
23102340
]
23112341
},
@@ -2540,7 +2570,7 @@
25402570
"source": [
25412571
"import pandas as pd\n",
25422572
"\n",
2543-
"pd.merge(df1, df2, on='NAME', how='inner')"
2573+
"pd.merge(df1, df2, on=\"NAME\", how=\"inner\")"
25442574
]
25452575
},
25462576
{
@@ -2912,7 +2942,7 @@
29122942
}
29132943
],
29142944
"source": [
2915-
"pd.merge(df1, df2, on='NAME', how = 'outer')"
2945+
"pd.merge(df1, df2, on=\"NAME\", how=\"outer\")"
29162946
]
29172947
},
29182948
{
@@ -2969,7 +2999,8 @@
29692999
"outputs": [],
29703000
"source": [
29713001
"import os\n",
2972-
"data_pth = r'/path/to/your/data/census_2010/example'\n",
3002+
"\n",
3003+
"data_pth = r\"/path/to/your/data/census_2010/example\"\n",
29733004
"cities = r\"cities.shp\"\n",
29743005
"states = r\"states.shp\""
29753006
]
@@ -3312,7 +3343,9 @@
33123343
"source": [
33133344
"from arcgis.geometry import SpatialReference\n",
33143345
"\n",
3315-
"sdf_join = pd.DataFrame.spatial.from_featureclass(os.path.join(data_pth, states), sr=SpatialReference(4326).as_arcpy)\n",
3346+
"sdf_join = pd.DataFrame.spatial.from_featureclass(\n",
3347+
" os.path.join(data_pth, states), sr=SpatialReference(4326).as_arcpy\n",
3348+
")\n",
33163349
"sdf_join.head()"
33173350
]
33183351
},
@@ -3365,8 +3398,13 @@
33653398
}
33663399
],
33673400
"source": [
3368-
"for a,b,c,d in zip(sdf_target_cols[::4],sdf_target_cols[1::4],sdf_target_cols[2::4], sdf_target_cols[3::4]):\n",
3369-
" print(\"{:<30}{:<30}{:<30}{:<}\".format(a,b,c,d))"
3401+
"for a, b, c, d in zip(\n",
3402+
" sdf_target_cols[::4],\n",
3403+
" sdf_target_cols[1::4],\n",
3404+
" sdf_target_cols[2::4],\n",
3405+
" sdf_target_cols[3::4],\n",
3406+
"):\n",
3407+
" print(\"{:<30}{:<30}{:<30}{:<}\".format(a, b, c, d))"
33703408
]
33713409
},
33723410
{
@@ -3390,8 +3428,14 @@
33903428
}
33913429
],
33923430
"source": [
3393-
"for a,b,c,d,e in zip(sdf_join_cols[::5],sdf_join_cols[1::5],sdf_join_cols[2::5],sdf_join_cols[3::5],sdf_join_cols[4::5]):\n",
3394-
" print(\"{:<20}{:<20}{:<20}{:<20}{:<}\".format(a,b,c,d,e))"
3431+
"for a, b, c, d, e in zip(\n",
3432+
" sdf_join_cols[::5],\n",
3433+
" sdf_join_cols[1::5],\n",
3434+
" sdf_join_cols[2::5],\n",
3435+
" sdf_join_cols[3::5],\n",
3436+
" sdf_join_cols[4::5],\n",
3437+
"):\n",
3438+
" print(\"{:<20}{:<20}{:<20}{:<20}{:<}\".format(a, b, c, d, e))"
33953439
]
33963440
},
33973441
{
@@ -3421,7 +3465,7 @@
34213465
}
34223466
],
34233467
"source": [
3424-
"sdf_target.loc[0]['SHAPE'].as_arcpy"
3468+
"sdf_target.loc[0][\"SHAPE\"].as_arcpy"
34253469
]
34263470
},
34273471
{
@@ -3637,7 +3681,7 @@
36373681
}
36383682
],
36393683
"source": [
3640-
"q = sdf_target['ST'] == 'WY'\n",
3684+
"q = sdf_target[\"ST\"] == \"WY\"\n",
36413685
"left = sdf_target[q].copy()\n",
36423686
"left.head()"
36433687
]
@@ -3713,7 +3757,7 @@
37133757
}
37143758
],
37153759
"source": [
3716-
"q = sdf_join.STATE_ABBR == 'WY'\n",
3760+
"q = sdf_join.STATE_ABBR == \"WY\"\n",
37173761
"right = sdf_join[q].copy()\n",
37183762
"right.head()"
37193763
]

0 commit comments

Comments
 (0)