Skip to content

[file_packager] split data files when file size exceeds 2Gi ArrayBuffer limit #24802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
44 changes: 44 additions & 0 deletions test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -7415,6 +7415,50 @@ def test_file_packager_huge(self):
self.assertContained(MESSAGE, err)
self.clear()

def test_file_packager_huge_no_split(self):
create_file('huge.dat', 'a' * (1024 * 1024 * 1024))
create_file('huge2.dat', 'b' * ((1024 * 1024 * 1024) - 1))
err = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat', '--preload', 'huge2.dat'], stdout=PIPE, stderr=PIPE).stderr
self.assertContained('warning: file packager is creating an asset bundle of 2047 MB. this is very large, and browsers might have trouble loading it', err)
self.assertExists('test.data')
self.assertEqual(os.path.getsize('test.data'), (1024 * 1024 * 1024 * 2) - 1)
self.clear()

def test_file_packager_huge_split(self):
create_file('huge.dat', 'a' * (1024 * 1024 * 1024))
create_file('huge2.dat', 'b' * (1024 * 1024 * 1024))
err = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat', '--preload', 'huge2.dat'], stdout=PIPE, stderr=PIPE).stderr
self.assertContained('warning: file packager is creating an asset bundle of 1024 MB. this is very large, and browsers might have trouble loading it', err)
self.assertContained('warning: file packager is splitting bundle into 2 chunks', err)
self.assertExists('test.data')
self.assertExists('test_1.data')
self.assertEqual(os.path.getsize('test.data'), 1024 * 1024 * 1024)
self.assertEqual(os.path.getsize('test_1.data'), 1024 * 1024 * 1024)
self.clear()

def test_file_packager_huge_split_metadata(self):
create_file('huge.dat', 'a' * (1024 * 1024 * 1024))
create_file('huge2.dat', 'b' * (1024 * 1024 * 1024))
err = self.run_process([FILE_PACKAGER, 'test.data', '--separate-metadata', '--js-output=immutable.js', '--preload', 'huge.dat', '--preload', 'huge2.dat'], stdout=PIPE, stderr=PIPE).stderr
self.assertContained('warning: file packager is creating an asset bundle of 1024 MB. this is very large, and browsers might have trouble loading it', err)
self.assertContained('warning: file packager is splitting bundle into 2 chunks', err)
self.assertExists('test.data')
self.assertExists('immutable.js')
self.assertExists('immutable.js.metadata')
self.assertExists('test_1.data')
self.assertExists('immutable_1.js')
self.assertExists('immutable_1.js.metadata')
self.assertEqual(os.path.getsize('test.data'), 1024 * 1024 * 1024)
self.assertEqual(os.path.getsize('test_1.data'), 1024 * 1024 * 1024)
self.clear()

def test_file_packager_huge_split_too_large(self):
create_file('huge.dat', 'a' * (2 * 1024 * 1024 * 1024))
proc = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat'], check=False, stdout=PIPE, stderr=PIPE)
self.assertEqual(proc.returncode, 1)
self.assertContained('error: cannot package file greater than 2047 MB does not exist', proc.stderr)
self.clear()

@parameterized({
'': (True,),
'wasm2js': (False,),
Expand Down
79 changes: 52 additions & 27 deletions tools/file_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
AUDIO_MIMETYPES = {'ogg': 'audio/ogg', 'wav': 'audio/wav', 'mp3': 'audio/mpeg'}

DDS_HEADER_SIZE = 128
PRELOAD_DATA_FILE_LIMIT = 2**31 - 1

# Set to 1 to randomize file order and add some padding,
# to work around silly av false positives
Expand Down Expand Up @@ -565,8 +566,6 @@ def was_seen(name):
for plugin in plugins:
plugin(file_)

metadata = {'files': []}

if options.obj_output:
if not options.has_embedded:
diagnostics.error('--obj-output is only applicable when embedding files')
Expand All @@ -575,27 +574,52 @@ def was_seen(name):
if not options.has_preloaded:
return 0

ret = generate_js(data_target, data_files, metadata)

if options.force or len(data_files):
if options.jsoutput is None:
print(ret)
else:
# Overwrite the old jsoutput file (if exists) only when its content
# differs from the current generated one, otherwise leave the file
# untouched preserving its old timestamp
if os.path.isfile(options.jsoutput):
old = utils.read_file(options.jsoutput)
if old != ret:
utils.write_file(options.jsoutput, ret)
file_chunks = [data_files]
if options.has_preloaded and not options.has_embedded:
file_chunks = [[]]
current_size = 0
for file_ in data_files:
fsize = os.path.getsize(file_.srcpath)
if current_size + fsize <= PRELOAD_DATA_FILE_LIMIT:
file_chunks[-1].append(file_)
current_size += fsize
elif fsize > PRELOAD_DATA_FILE_LIMIT:
diagnostics.error('error: cannot package file greater than %d MB does not exist' % (PRELOAD_DATA_FILE_LIMIT / (1024 * 1024)))
return 1
else:
current_size = fsize
file_chunks.append([file_])

if len(file_chunks) > 1:
diagnostics.warn('warning: file packager is splitting bundle into %d chunks' % len(file_chunks))

targets = []
for counter, data_files in enumerate(file_chunks):
metadata = {'files': []}
base, ext = data_target.rsplit('.', 1)
targets.append(f"{base}{f'_{counter}' if counter else ''}.{ext}")
ret = generate_js(targets[-1], data_files, metadata)
if options.force or len(data_files):
if options.jsoutput is None:
print(ret)
else:
utils.write_file(options.jsoutput, ret)
if options.separate_metadata:
utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':')))
# Overwrite the old jsoutput file (if exists) only when its content
# differs from the current generated one, otherwise leave the file
# untouched preserving its old timestamp
base, ext = options.jsoutput.rsplit('.', 1)
targets.append(f"{base}{f'_{counter}' if counter else ''}.{ext}")
if os.path.isfile(targets[-1]):
old = utils.read_file(targets[-1])
if old != ret:
utils.write_file(targets[-1], ret)
else:
utils.write_file(targets[-1], ret)
if options.separate_metadata:
utils.write_file(targets[-1] + '.metadata', json.dumps(metadata, separators=(',', ':')))

if options.depfile:
with open(options.depfile, 'w') as f:
for target in (data_target, options.jsoutput):
for target in targets:
if target:
f.write(escape_for_makefile(target))
f.write(' \\\n')
Expand Down Expand Up @@ -661,14 +685,15 @@ def generate_js(data_target, data_files, metadata):
# XHRs which has overhead.
start = 0
with open(data_target, 'wb') as data:
for file_ in data_files:
file_.data_start = start
curr = utils.read_binary(file_.srcpath)
file_.data_end = start + len(curr)
if AV_WORKAROUND:
curr += '\x00'
start += len(curr)
data.write(curr)
if file_.mode == 'preload':
for file_ in data_files:
file_.data_start = start
curr = utils.read_binary(file_.srcpath)
file_.data_end = start + len(curr)
if AV_WORKAROUND:
curr += '\x00'
start += len(curr)
data.write(curr)

if start > 256 * 1024 * 1024:
diagnostics.warn('file packager is creating an asset bundle of %d MB. '
Expand Down