Skip to content

Commit 66e53f3

Browse files
committed
Updated languages.pymd with three variants of same analysis.
Added `doctype` option to pweave setup subcommand.
1 parent c14f60d commit 66e53f3

File tree

9 files changed

+158
-36
lines changed

9 files changed

+158
-36
lines changed

core/src/test/scala/Scratch.sc

Whitespace-only changes.

core/src/test/scala/org/locationtech/rasterframes/ReprojectGeometrySpec.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ class ReprojectGeometrySpec extends TestEnvironment {
105105
"""
106106
| SELECT st_reproject(ll, llCRS, 'EPSG:3857') as wm2,
107107
| st_reproject(wm, 'EPSG:3857', llCRS) as ll2,
108-
| st_reproject(st_reproject(ll, llCRS, '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'), '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs', 'EPSG:3857') as wm3
108+
| st_reproject(st_reproject(ll, llCRS, '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'),
109+
| '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs', 'EPSG:3857') as wm3
109110
| FROM geom
110111
""".stripMargin).as[(Geometry, Geometry, Geometry)]
111112

docs/README.md

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ To set up an environment whereby you can easily test/evaluate your code blocks d
6767
sbt:RasterFrames> pyrasterframes/doc
6868
```
6969
There's a command alias for this last step: `pyDocs`.
70-
4. To evaluate a single `.pymd` file, you pass the `-f` option and the filename relative to the `python` direoctry:
70+
4. To evaluate a single `.pymd` file, you pass the `-f` option and the filename relative to the `python` directory:
7171
```
7272
sbt:RasterFrames> pyrasterframes/pySetup pweave -f docs/getting-started.pymd
7373
[info] Synchronizing 44 files to '<src-root>/pyrasterframes/target/python'
@@ -81,11 +81,22 @@ To set up an environment whereby you can easily test/evaluate your code blocks d
8181
Processing chunk 1 named None from line 14
8282
...
8383
Weaved docs/getting-started.pymd to docs/getting-started.md
84-
[success] Total time: 21 s, completed Jul 5, 2019 12:31:09 PM
85-
sbt:RasterFrames>
8684
```
8785
5. The _output_ Markdown files are written to `<src-root>/pyrasterframes/target/python/docs`. _Note_: don't edit any files in the `pyrasterframes/target` directory... they will get overwritten each time `sbt` runs a command.
88-
6. To build all the documentation and convert to a static html site, run:
86+
6. During content development it's sometimes helpful to see the output rendered as basic HTML. To do this, add the `-d html` option to the pweave command:
87+
```
88+
sbt:RasterFrames> pyrasterframes/pySetup pweave -d html -f docs/getting-started.pymd
89+
[info] Synchronizing 54 files to '<src-roog>/pyrasterframes/target/python'
90+
[info] Running 'python setup.py pweave -d html -f docs/getting-started.pymd' in '<src-root>/pyrasterframes/target/python'
91+
running pweave
92+
--------------------------------------------------
93+
Running getting-started
94+
--------------------------------------------------
95+
...
96+
Weaved docs/getting-started.pymd to docs/getting-started.html
97+
```
98+
Note: This feature requires `pandoc` to be installed.
99+
7. To build all the documentation and convert to a static html site, run:
89100
```bash
90101
sbt makeSite
91102
```
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import geotrellis.proj4.LatLng
2+
import org.locationtech.rasterframes._
3+
import org.locationtech.rasterframes.datasource.raster._
4+
import org.apache.spark.sql._
5+
import org.apache.spark.sql.functions._
6+
7+
8+
implicit val spark = SparkSession.builder().
9+
master("local[*]").appName("RasterFrames").getOrCreate().withRasterFrames
10+
spark.sparkContext.setLogLevel("ERROR")
11+
12+
import spark.implicits._
13+
14+
val modis = spark.read.format("aws-pds-modis-catalog").load()
15+
16+
val red_nir_monthly_2017 = modis
17+
.select($"granule_id", month($"acquisition_date") as "month", $"B01" as "red", $"B02" as "nir")
18+
.where(year($"acquisition_date") === 2017 && (dayofmonth($"acquisition_date") === 15) && $"granule_id" === "h21v09")
19+
20+
val red_nir_tiles_monthly_2017 = spark.read.raster
21+
.fromCatalog(red_nir_monthly_2017, "red", "nir")
22+
.load()
23+
24+
val result = red_nir_tiles_monthly_2017
25+
.where(st_intersects(
26+
st_reproject(rf_geometry($"red"), rf_crs($"red"), LatLng),
27+
st_makePoint(34.870605, -4.729727)
28+
))
29+
.groupBy("month")
30+
.agg(rf_agg_stats(rf_normalized_difference($"nir", $"red")) as "ndvi_stats")
31+
.orderBy("month")
32+
.select("month", "ndvi_stats.*")
33+
34+
35+
result.show()

project/RFDependenciesPlugin.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ object RFDependenciesPlugin extends AutoPlugin {
5959
),
6060

6161
// NB: Make sure to update the Spark version in pyrasterframes/python/setup.py
62-
rfSparkVersion := "2.3.2",
62+
rfSparkVersion := "2.3.3",
6363
rfGeoTrellisVersion := "2.2.0",
6464
rfGeoMesaVersion := "2.2.1",
6565
dependencyOverrides += "com.azavea.gdal" % "gdal-warp-bindings" % "33.58d4965"

pyrasterframes/src/main/python/docs/languages.pymd

Lines changed: 75 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,54 @@
33
One of the great powers of RasterFrames, afforded by Spark SQL, is the ability to express computation in multiple programming languages. This manual is centered around Python because that's the most common language used in data science and GIS analytics. However, Scala (the implementation language of RasterFrames) and SQL are also fully supported. Examples in Python can be mechanically translated into the other two languages without much difficulty once the naming conventions are understood. In the sections below we will show the same example program (computing average NDVI per month for a single tile in Tanzania).
44

55
```python, imports, echo=False
6-
from pyspark.sql.functions import month, dayofmonth, year
6+
from pyspark.sql.functions import *
77
from pyrasterframes.utils import create_rf_spark_session
8+
89
from pyrasterframes.rasterfunctions import *
910
import pyrasterframes.rf_ipython
1011
import pandas as pd
1112
import os
13+
spark = create_rf_spark_session()
1214
```
1315

1416
## Python
1517

16-
Step 1: Load the catalog
18+
### Step 1: Load the catalog
1719

1820
```python, step_1_python
1921
modis = spark.read.format('aws-pds-modis-catalog').load()
2022
```
21-
Step 2: Down-select data by month
23+
### Step 2: Down-select data by month
2224

2325
```python, step_2_python
2426
red_nir_monthly_2017 = modis \
25-
.select('granule_id', month('acquisition_date').alias('month'), col('B01').alias('red'), col('B02').alias('nir')) \
27+
.select(
28+
col('granule_id'),
29+
month('acquisition_date').alias('month'),
30+
col('B01').alias('red'),
31+
col('B02').alias('nir')
32+
) \
2633
.where((year('acquisition_date') == 2017) & (dayofmonth('acquisition_date') == 15) & (col('granule_id') == 'h21v09'))
2734
```
2835

29-
Step 3: Read tiles
36+
### Step 3: Read tiles
3037

3138
```python, step_3_python
3239
red_nir_tiles_monthly_2017 = spark.read.raster(catalog=red_nir_monthly_2017, catalog_col_names=['red', 'nir'])
3340
```
3441

35-
Step 4: Compute aggregates
42+
### Step 4: Compute aggregates
3643

3744
```python, step_4_python
3845
result = red_nir_tiles_monthly_2017 \
46+
.where(st_intersects(
47+
st_reproject(rf_geometry(col('red')), rf_crs(col('red')).crsProj4, rf_mk_crs('EPSG:4326')),
48+
st_makePoint(lit(34.870605), lit(-4.729727)))
49+
) \
3950
.groupBy('month') \
40-
.agg(first('month'), rf_agg_stats(rf_normalized_difference(col('nir'), col('red')).alias('ndvi_stats'))) \
41-
.where(st_intersects(st_reproject(rf_geometry(col('red')), rf_crs(col('red')), rf_mk_crs('EPSG:4326')), st_makePoint(34.870605, -4.729727))) \
42-
.orderBy(col('month'))
51+
.agg(rf_agg_stats(rf_normalized_difference(col('nir'), col('red'))).alias('ndvi_stats')) \
52+
.orderBy(col('month')) \
53+
.select('month', 'ndvi_stats.*')
4354
result.show()
4455
```
4556

@@ -48,18 +59,17 @@ result.show()
4859
For convenience we're going to evaluate SQL from the Python environment. The SQL fragments should work in the `spark-sql` shell just the same.
4960

5061
```python, sql_setup
51-
spark = create_rf_spark_session()
5262
def sql(stmt):
5363
return spark.sql(stmt)
5464
```
5565

56-
Step 1: Load the catalog
66+
### Step 1: Load the catalog
5767

5868
```python, step_1_sql
5969
sql("CREATE OR REPLACE TEMPORARY VIEW modis USING `aws-pds-modis-catalog`")
6070
```
6171

62-
Step 2: Down-select data by month
72+
### Step 2: Down-select data by month
6373

6474
```python, step_2_sql
6575
sql("""
@@ -71,7 +81,7 @@ WHERE year(acquisition_date) = 2017 AND day(acquisition_date) = 15 AND granule_i
7181
sql('DESCRIBE red_nir_monthly_2017').show()
7282
```
7383

74-
Step 3: Read tiles
84+
### Step 3: Read tiles
7585

7686
```python, step_3_sql
7787
sql("""
@@ -81,7 +91,7 @@ OPTIONS (catalogTable='red_nir_monthly_2017', catalogColumns='red,nir')
8191
""")
8292
```
8393

84-
Step 4: Compute aggregates
94+
### Step 4: Compute aggregates
8595

8696
```python, step_4_sql
8797
sql("""
@@ -97,3 +107,54 @@ SELECT month, ndvi_stats.* FROM (
97107

98108
## Scala
99109

110+
### Step 1: Load the catalog
111+
112+
```scala
113+
import geotrellis.proj4.LatLng
114+
import org.locationtech.rasterframes._
115+
import org.locationtech.rasterframes.datasource.raster._
116+
import org.apache.spark.sql._
117+
import org.apache.spark.sql.functions._
118+
119+
120+
implicit val spark = SparkSession.builder()
121+
.master("local[*]")
122+
.appName("RasterFrames")
123+
.withKryoSerialization
124+
.getOrCreate()
125+
.withRasterFrames
126+
127+
import spark.implicits._
128+
129+
val modis = spark.read.format("aws-pds-modis-catalog").load()
130+
```
131+
132+
### Step 2: Down-select data by month
133+
134+
```scala
135+
val red_nir_monthly_2017 = modis
136+
.select($"granule_id", month($"acquisition_date") as "month", $"B01" as "red", $"B02" as "nir")
137+
.where(year($"acquisition_date") === 2017 && (dayofmonth($"acquisition_date") === 15) && $"granule_id" === "h21v09")
138+
```
139+
140+
### Step 3: Read tiles
141+
142+
```scala
143+
val red_nir_tiles_monthly_2017 = spark.read.raster
144+
.fromCatalog(red_nir_monthly_2017, "red", "nir")
145+
.load()
146+
```
147+
148+
### Step 4: Compute aggregates
149+
150+
```scala
151+
val result = red_nir_tiles_monthly_2017
152+
.where(st_intersects(
153+
st_reproject(rf_geometry($"red"), rf_crs($"red"), LatLng),
154+
st_makePoint(34.870605, -4.729727)
155+
))
156+
.groupBy("month")
157+
.agg(rf_agg_stats(rf_normalized_difference($"nir", $"red")) as "ndvi_stats")
158+
.orderBy("month")
159+
.select("month", "ndvi_stats.*")
160+
```

pyrasterframes/src/main/python/docs/reference.pymd

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,22 @@ Fetches the extent (bounding box or envelope) of a `ProjectedRasterTile` or `Ras
8787

8888
### rf_crs
8989

90-
Struct[String] rf_crs(ProjectedRasterTile proj_raster)
91-
Struct[String] rf_crs(RasterSource proj_raster)
90+
Struct rf_crs(ProjectedRasterTile proj_raster)
91+
Struct rf_crs(RasterSource proj_raster)
9292

93-
Fetch the [proj4](https://proj4.org/) string representation of the coordinate reference system of a `ProjectedRasterTile` or `RasterSource` type tile columns.
93+
Fetch CRS structure representing the coordinate reference system of a `ProjectedRasterTile` or `RasterSource` type tile columns.
94+
95+
### rf_mk_crs
96+
97+
Construct a CRS structure from one of its string representations. Three froms are supported:
98+
99+
Struct rf_mk_crs(String crsText)
100+
101+
* [EPSG code](https://www.epsg-registry.org/): `EPSG:<integer>`
102+
* [Proj4 string](https://proj.org/): `+proj <proj4 parameters>`
103+
* [WKT String](http://www.geoapi.org/3.0/javadoc/org/opengis/referencing/doc-files/WKT.html) with embedded EPSG code: `GEOGCS["<name>", <datum>, <prime meridian>, <angular unit> {,<twin axes>} {,<authority>}]`
104+
105+
Example: `SELECT rf_mk_crs('EPSG:4326')`
94106

95107
### rf_convert_cell_type
96108

@@ -117,7 +129,6 @@ Functions to create a new Tile column, either from scratch or from existing data
117129
Tile rf_make_zeros_tile(Int tile_columns, Int tile_rows, String cell_type_name)
118130
```
119131

120-
121132
Create a `tile` of shape `tile_columns` by `tile_rows` full of zeros, with the specified cell type. See function @ref:[`rf_cell_types`](reference.md#rf-cell-types) for valid values. All arguments are literal values and not column expressions.
122133

123134
### rf_make_ones_tile

pyrasterframes/src/main/python/setup.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class PweaveDocs(distutils.cmd.Command):
5353
user_options = [
5454
# The format is (long option, short option, description).
5555
('files=', 'f', 'Specific files to pweave. Defaults to all in `docs` directory.'),
56+
('doctype=', 'd', 'Output format type. Defaults to `markdown`')
5657
]
5758

5859
def initialize_options(self):
@@ -62,15 +63,15 @@ def initialize_options(self):
6263
lambda x: not path.basename(x)[:1] == '_',
6364
glob(path.join(here, 'docs', '*.pymd'))
6465
)
66+
self.doctype = 'markdown'
6567

6668
def finalize_options(self):
6769
"""Post-process options."""
6870
import re
6971
if isinstance(self.files, str):
7072
self.files = filter(lambda s: len(s) > 0, re.split(',', self.files))
71-
72-
def doctype(self):
73-
return "markdown"
73+
if self.doctype is "html":
74+
self.doctype = "pandoc2html"
7475

7576
def run(self):
7677
"""Run pweave."""
@@ -83,7 +84,7 @@ def run(self):
8384
try:
8485
pweave.weave(
8586
file=str(file),
86-
doctype=self.doctype()
87+
doctype=self.doctype
8788
)
8889
except Exception:
8990
print(_divided('%s Failed:' % file))
@@ -92,8 +93,9 @@ def run(self):
9293

9394

9495
class PweaveNotebooks(PweaveDocs):
95-
def doctype(self):
96-
return "notebook"
96+
def initialize_options(self):
97+
super().initialize_options()
98+
self.doctype = 'notebook'
9799

98100

99101
setup(

rf-notebook/src/main/docker/docker-compose.yml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@ services:
77
# jupyter notebook port
88
- "8888:8888"
99
# spark UI ports
10-
- "4040:4040"
11-
- "4041:4041"
12-
- "4042:4042"
13-
- "4043:4043"
14-
- "4044:4044"
10+
- "44040:4040"
11+
- "44041:4041"
12+
- "44042:4042"
13+
- "44043:4043"
14+
- "44044:4044"
1515
# To save locally at './work' from the container:
1616
volumes:
17-
- ../../../src/main/notebooks:/home/jovyan/work
17+
- ../../../src/main/notebooks:/home/jovyan/work
18+
- ~/.rf_cache:/home/jovyan/.rf_cache

0 commit comments

Comments
 (0)