Skip to content

Commit 180132b

Browse files
jdbcodeXee authors
authored andcommitted
Account for mask byte in chunk size calculation
PiperOrigin-RevId: 712996660
1 parent 065db79 commit 180132b

File tree

2 files changed

+47
-27
lines changed

2 files changed

+47
-27
lines changed

xee/ext.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,8 @@
6767
'double': np.float64,
6868
}
6969

70-
# While this documentation says that the limit is 10 MB...
71-
# https://developers.google.com/earth-engine/guides/usage#request_payload_size
72-
# actual byte limit seems to depend on other factors. This has been found via
73-
# trial & error.
70+
# Earth Engine image:computePixels request is limited to 48 MB
71+
# https://developers.google.com/earth-engine/reference/rest/v1/projects.image/computePixels
7472
REQUEST_BYTE_LIMIT = 2**20 * 48 # 48 MBs
7573

7674
# Xee uses the ee.ImageCollection.toList function for slicing into an
@@ -80,10 +78,12 @@
8078
_TO_LIST_WARNING_LIMIT = 10000
8179

8280

81+
# Used in ext_test.py.
8382
def _check_request_limit(chunks: Dict[str, int], dtype_size: int, limit: int):
8483
"""Checks that the actual number of bytes exceeds the limit."""
8584
index, width, height = chunks['index'], chunks['width'], chunks['height']
86-
actual_bytes = index * width * height * dtype_size
85+
# Add one for the mask byte (Earth Engine bytes-per-pixel accounting).
86+
actual_bytes = index * width * height * (dtype_size + 1)
8787
if actual_bytes > limit:
8888
raise ValueError(
8989
f'`chunks="auto"` failed! Actual bytes {actual_bytes!r} exceeds limit'
@@ -105,7 +105,7 @@ class EarthEngineStore(common.AbstractDataStore):
105105
# "Safe" default chunks that won't exceed the request limit.
106106
PREFERRED_CHUNKS: Dict[str, int] = {
107107
'index': 48,
108-
'width': 512,
108+
'width': 256,
109109
'height': 256,
110110
}
111111

@@ -352,20 +352,22 @@ def _auto_chunks(
352352
# height and width follow round numbers (powers of two) and allocate the
353353
# remaining bytes available for the index length. To illustrate this logic,
354354
# let's follow through with an example where:
355-
# request_byte_limit = 2 ** 20 * 10 # = 10 MBs
355+
# request_byte_limit = 2 ** 20 * 48 # = 48 MBs
356356
# dtype_bytes = 8
357-
log_total = np.log2(request_byte_limit) # e.g.=23.32...
358-
log_dtype = np.log2(dtype_bytes) # e.g.=3
357+
358+
log_total = np.log2(request_byte_limit) # e.g.=25.58...
359+
# Add one for the mask byte (Earth Engine bytes-per-pixel accounting).
360+
log_dtype = np.log2(dtype_bytes + 1) # e.g.=3.16...
359361
log_limit = 10 * (log_total // 10) # e.g.=20
360-
log_index = log_total - log_limit # e.g.=3.32...
362+
log_index = log_total - log_limit # e.g.=5.58...
361363

362364
# Motivation: How do we divide a number N into the closest sum of two ints?
363-
d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=17/2=8.5
364-
wd, ht = np.ceil(d), np.floor(d) # e.g. wd=9, ht=8
365+
d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=16/2=8.0
366+
wd, ht = np.ceil(d), np.floor(d) # e.g. wd=8, ht=8
365367

366368
# Put back to byte space, then round to the nearst integer number of bytes.
367-
index = int(np.rint(2**log_index)) # e.g.=10
368-
width = int(np.rint(2**wd)) # e.g.=512
369+
index = int(np.rint(2**log_index)) # e.g.=48
370+
width = int(np.rint(2**wd)) # e.g.=256
369371
height = int(np.rint(2**ht)) # e.g.=256
370372

371373
return {'index': index, 'width': width, 'height': height}

xee/ext_test.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,32 @@ class EEStoreStandardDatatypesTest(parameterized.TestCase):
1313
dict(
1414
testcase_name='int8',
1515
dtype=np.dtype('int8'),
16-
expected_chunks={'index': 48, 'width': 1024, 'height': 1024},
16+
expected_chunks={'index': 48, 'width': 1024, 'height': 512},
1717
),
1818
dict(
1919
testcase_name='int32',
2020
dtype=np.dtype('int32'),
21-
expected_chunks={'index': 48, 'width': 512, 'height': 512},
21+
expected_chunks={'index': 48, 'width': 512, 'height': 256},
2222
),
2323
dict(
2424
testcase_name='int64',
2525
dtype=np.dtype('int64'),
26-
expected_chunks={'index': 48, 'width': 512, 'height': 256},
26+
expected_chunks={'index': 48, 'width': 256, 'height': 256},
2727
),
2828
dict(
2929
testcase_name='float32',
3030
dtype=np.dtype('float32'),
31-
expected_chunks={'index': 48, 'width': 512, 'height': 512},
31+
expected_chunks={'index': 48, 'width': 512, 'height': 256},
3232
),
3333
dict(
3434
testcase_name='float64',
3535
dtype=np.dtype('float64'),
36-
expected_chunks={'index': 48, 'width': 512, 'height': 256},
36+
expected_chunks={'index': 48, 'width': 256, 'height': 256},
3737
),
3838
dict(
3939
testcase_name='complex64',
4040
dtype=np.dtype('complex64'),
41-
expected_chunks={'index': 48, 'width': 512, 'height': 256},
41+
expected_chunks={'index': 48, 'width': 256, 'height': 256},
4242
),
4343
)
4444
def test_auto_chunks__handles_standard_dtypes(self, dtype, expected_chunks):
@@ -49,7 +49,7 @@ def test_auto_chunks__handles_standard_dtypes(self, dtype, expected_chunks):
4949
)
5050

5151

52-
class EEStoreTest(absltest.TestCase):
52+
class EEStoreTest(parameterized.TestCase):
5353

5454
def test_auto_chunks__handles_range_of_dtype_sizes(self):
5555
dt = 0
@@ -59,18 +59,36 @@ def test_auto_chunks__handles_range_of_dtype_sizes(self):
5959
except ValueError:
6060
self.fail(f'Could not handle data type size {dt}.')
6161

62-
def test_auto_chunks__is_optimal_for_powers_of_two(self):
63-
for p in range(10):
64-
dt = 2**p
65-
chunks = xee.EarthEngineStore._auto_chunks(dt)
62+
def test_auto_chunks__matches_observed_values(self):
63+
observed_results = {
64+
1: 50331648,
65+
2: 37748736,
66+
4: 31457280,
67+
8: 28311552,
68+
16: 26738688,
69+
32: 25952256,
70+
64: 25559040,
71+
128: 25362432,
72+
256: 25264128,
73+
512: 25214976,
74+
}
75+
76+
for dtype_bytes, expected_bytes in observed_results.items():
77+
chunks = xee.EarthEngineStore._auto_chunks(dtype_bytes)
78+
actual_bytes = np.prod(list(chunks.values())) * (
79+
dtype_bytes + 1
80+
) # added +1 to account for the mask byte
6681
self.assertEqual(
67-
xee.REQUEST_BYTE_LIMIT, np.prod(list(chunks.values())) * dt
82+
expected_bytes,
83+
actual_bytes,
84+
f'dtype_bytes: {dtype_bytes}, Expected: {expected_bytes}, '
85+
f'Actual: {actual_bytes}, Chunks: {chunks}',
6886
)
6987

7088
def test_exceeding_byte_limit__raises_error(self):
7189
dtype_size = 8
7290
# does not fail
73-
chunks = {'index': 48, 'width': 512, 'height': 256}
91+
chunks = {'index': 48, 'width': 256, 'height': 256}
7492
ext._check_request_limit(chunks, dtype_size, xee.REQUEST_BYTE_LIMIT)
7593

7694
# fails

0 commit comments

Comments
 (0)