Skip to content

Commit 5f97322

Browse files
committed
Implement py raster read list of lists
Signed-off-by: Jason T. Brown <[email protected]>
1 parent 2ebd85a commit 5f97322

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

pyrasterframes/src/main/python/pyrasterframes/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,16 @@ def temp_name():
162162
path = None
163163
catalog = None
164164
options.update(dict(paths='\n'.join([str(i) for i in source]))) # pass in "uri1\nuri2\nuri3\n..."
165+
if all([isinstance(i, list) for i in source]):
166+
# list of lists; we will rely on pandas to
167+
# - coerce all data to str (possibly using objects' __str__ or __repr__\
168+
# - ensure data is not "ragged": all sublists are same len
169+
path = None
170+
catalog_col_names = ['proj_raster_{}'.format(i) for i in range(len(source[0]))]
171+
catalog = PdDataFrame(source,
172+
columns=catalog_col_names,
173+
dtype=str,
174+
)
165175
elif isinstance(source, str):
166176
if '\n' in source or '\r' in source:
167177
# then the `source` string is a catalog as a CSV (header is required)
@@ -172,12 +182,13 @@ def temp_name():
172182
path = source
173183
catalog = None
174184
else:
175-
# user has passed in some other type, we will interpret as a catalog
185+
# user has passed in some other type, we will try to interpret as a catalog
176186
catalog = source
177187

178188
if catalog is not None:
179189
if catalog_col_names is None:
180190
raise Exception("'catalog_col_names' required when DataFrame 'catalog' specified")
191+
181192
if isinstance(catalog, str):
182193
options.update({
183194
"catalogCSV": catalog,

pyrasterframes/src/main/python/tests/RasterSourceTest.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,19 @@ def l8path(b):
105105
print(path_count.collect())
106106
self.assertTrue(path_count.count() == 3)
107107

108-
@skip('not implemented yet')
109108
def test_list_of_list_of_str(self):
110-
0
109+
lol = [
110+
[self.path(1, 1), self.path(1, 2), ],
111+
[self.path(2, 1), self.path(2, 2), ],
112+
[self.path(3, 1), self.path(3, 2), ]
113+
]
114+
df = self.spark.read.raster(lol)
115+
self.assertTrue(len(df.columns) == 4) # 2 cols of uris plus 2 cols of proj_rasters
116+
self.assertEqual(sorted(df.columns), sorted(['proj_raster_0_path', 'proj_raster_1_path',
117+
'proj_raster_0', 'proj_raster_1']))
118+
uri_df = df.select('proj_raster_0_path', 'proj_raster_1_path').distinct().collect()
119+
uri_list = [list(r.asDict().values()) for r in uri_df]
120+
self.assertEqual(sorted(uri_list), sorted(lol))
111121

112122
def test_schemeless_string(self):
113123
import os.path
@@ -186,4 +196,4 @@ def test_csv_string(self):
186196

187197
df = self.spark.read.raster(s, ['b1', 'b2'])
188198
self.assertEqual(len(df.columns), 3 + 2) # number of columns in original DF plus cardinality of catalog_col_names
189-
self.assertTrue(len(df.take(1)))
199+
self.assertTrue(len(df.take(1))) # non-empty check

0 commit comments

Comments
 (0)