Skip to content

Commit 6c1cad7

Browse files
authored
Added more unit and integration tests. (#12)
I caught one bug due to the tests, which I count as a success. In addition, this change makes sure that no test code is bundled with the distribution. Last, I've added an easy hook (via an extra install) to set up a development environment.
1 parent 589a07e commit 6c1cad7

File tree

6 files changed

+72
-18
lines changed

6 files changed

+72
-18
lines changed

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
global-exclude *test.py

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Feel free to do any analysis you wish. For example:
5353
.plot()
5454
)
5555
```
56-
![Coal vs Wind in the US since 1940](demo.png)
56+
![Coal vs Wind in the US since 1940](https://raw.githubusercontent.com/alxmrs/dask-ee/main/demo.png)
5757

5858
There are a few other useful things you can do.
5959

@@ -81,6 +81,11 @@ df.head()
8181
Contributions are welcome. A good way to start is to check out open [issues](https://github.com/alxmrs/dask-ee/issues)
8282
or file a new one. We're happy to review pull requests, too.
8383

84+
Before writing code, please install the development dependencies (after cloning the repo):
85+
```shell
86+
pip install -e ".[dev]"
87+
```
88+
8489
## License
8590
```
8691
Copyright 2024 Alexander S Merose

dask_ee/read.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@
2626
}
2727

2828

29-
# TODO(#4): Support 'auto' chunks, where we calculate the maximum allowed page size given the number of
30-
# bytes in each row.
3129
def read_ee(
3230
fc: t.Union[ee.FeatureCollection, str],
3331
chunksize: t.Union[int, t.Literal['auto']] = 5_000,
@@ -41,25 +39,27 @@ def read_ee(
4139
Returns:
4240
A dask DataFrame with paged Google Earth Engine data.
4341
"""
42+
# TODO(#4): Support 'auto' chunks, where we calculate the maximum allowed page size given the number of
43+
# bytes in each row.
44+
if chunksize == 'auto':
45+
raise NotImplementedError('Auto chunksize is not implemented yet!')
4446

4547
if isinstance(fc, str):
4648
fc = ee.FeatureCollection(fc)
4749

48-
if chunksize == 'auto':
49-
raise NotImplementedError('Auto chunksize is not implemented yet!')
50-
5150
# Make all the getInfo() calls at once, up front.
5251
fc_size, all_info = ee.List([fc.size(), fc.limit(0)]).getInfo()
5352

5453
columns = {'geo': 'Json'}
5554
columns.update(all_info['columns'])
56-
del columns['system:index']
55+
if 'system:index' in columns:
56+
del columns['system:index']
5757

5858
divisions = tuple(range(0, fc_size, chunksize))
5959

6060
# TODO(#5): Compare `toList()` to other range operations, like getting all index IDs via `getInfo()`.
6161
pages = [ee.FeatureCollection(fc.toList(chunksize, i)) for i in divisions]
62-
# Get the remainder, if it exists. `io_chunks` are not likely to evenly partition the data.
62+
# Get the remainder, if it exists. `chunksize` is not likely to evenly partition the data.
6363
d, r = divmod(fc_size, chunksize)
6464
if r != 0:
6565
pages.append(ee.FeatureCollection(fc.toList(r, d)))

dask_ee/read_integrationtest.py

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,66 @@ def setUpClass(cls):
2323
ee.Initialize()
2424

2525
def test_reads_dask_dataframe(self):
26-
fc = ee.FeatureCollection("WRI/GPPD/power_plants")
27-
ddf = dask_ee.read_ee(fc)
26+
fc = ee.FeatureCollection('WRI/GPPD/power_plants')
27+
df = dask_ee.read_ee(fc)
2828

29-
head = ddf.head()
30-
columns = ddf.columns
29+
head = df.head()
30+
columns = df.columns
3131

32-
self.assertIsNotNone(ddf)
32+
self.assertIsNotNone(df)
3333
self.assertIsNotNone(head)
34-
self.assertIsInstance(ddf, dd.DataFrame)
35-
self.assertEqual(ddf.compute().shape, (28_664, 23))
34+
self.assertIsInstance(df, dd.DataFrame)
35+
self.assertEqual(df.compute().shape, (28_664, 23))
3636

3737
print(columns)
3838
print(head)
3939

40+
def test_works_with_defined_features(self):
41+
# Make a list of Features.
42+
features = [
43+
ee.Feature(
44+
ee.Geometry.Rectangle(30.01, 59.80, 30.59, 60.15),
45+
{'name': 'Voronoi'},
46+
),
47+
ee.Feature(ee.Geometry.Point(-73.96, 40.781), {'name': 'Thiessen'}),
48+
ee.Feature(ee.Geometry.Point(6.4806, 50.8012), {'name': 'Dirichlet'}),
49+
]
50+
51+
fc = ee.FeatureCollection(features)
52+
53+
df = dask_ee.read_ee(fc)
54+
55+
self.assertEqual(list(df.columns), ['geo', 'name'])
56+
57+
def test_works_with_a_single_feature_in_fc(self):
58+
from_geom = ee.FeatureCollection(ee.Geometry.Point(16.37, 48.225))
59+
60+
df = dask_ee.read_ee(from_geom)
61+
62+
self.assertEqual(list(df.columns), ['geo'])
63+
self.assertEqual(df.compute().shape, (1, 1))
64+
65+
def test_can_create_random_points(self):
66+
# Define an arbitrary region in which to compute random points.
67+
region = ee.Geometry.Rectangle(-119.224, 34.669, -99.536, 50.064)
68+
69+
# Create 1000 random points in the region.
70+
random_points = ee.FeatureCollection.randomPoints(region)
71+
72+
# Note: these random points have no system:index!
73+
df = dask_ee.read_ee(random_points)
74+
75+
self.assertEqual(list(df.columns), ['geo'])
76+
self.assertEqual(df.compute().shape, (1000, 1))
77+
4078
def test_prof__read_ee(self):
41-
fc = ee.FeatureCollection("WRI/GPPD/power_plants")
79+
fc = ee.FeatureCollection('WRI/GPPD/power_plants')
4280
with cProfile.Profile() as pr:
4381
_ = dask_ee.read_ee(fc)
4482

4583
# Modified version of `pr.print_stats()`.
46-
pstats.Stats(pr).sort_stats("cumtime").print_stats()
84+
pstats.Stats(pr).sort_stats('cumtime').print_stats()
4785

4886

49-
if __name__ == "__main__":
87+
if __name__ == '__main__':
5088
unittest.main()

dask_ee/read_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ def test_can_import_read_op(self):
99
except ModuleNotFoundError:
1010
self.fail('Cannot import `read_ee` function.')
1111

12+
def test_rejects_auto_chunks(self):
13+
import dask_ee
14+
15+
with self.assertRaises(NotImplementedError):
16+
dask_ee.read_ee('WRI/GPPD/power_plants', 'auto')
17+
1218

1319
if __name__ == '__main__':
1420
unittest.main()

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ tests = [
3838
"pytest",
3939
"pyink",
4040
]
41+
dev = [
42+
"dask-ee[tests]",
43+
"build",
44+
]
4145

4246
[project.urls]
4347
Homepage = "https://github.com/alxmrs/dask-ee"

0 commit comments

Comments
 (0)