11import type { AnyOrama } from '@orama/orama'
2+ import { save , create , load } from '@orama/orama'
3+ import { encode , decode } from '@msgpack/msgpack'
4+ // @ts -expect-error dpack does not expose types
5+ import * as dpack from 'dpack'
26import type { FileSystem , PersistenceFormat , Runtime } from './types.js'
3- import { FILESYSTEM_NOT_SUPPORTED_ON_RUNTIME } from './errors.js'
7+ import { FILESYSTEM_NOT_SUPPORTED_ON_RUNTIME , UNSUPPORTED_FORMAT } from './errors.js'
48import { persist , restore } from './index.js'
59import { detectRuntime } from './utils.js'
10+ import { serializeOramaInstance } from './seqproto.js'
611
712export const DEFAULT_DB_NAME = `orama_bump_${ + new Date ( ) } `
813
@@ -26,13 +31,8 @@ export async function persistToFile<T extends AnyOrama>(
2631 path = await getDefaultOutputFilename ( format , runtime )
2732 }
2833
29- const serialized = await persist ( db , format , runtime )
30- let toWrite : any = serialized
31- // Convert ArrayBuffer (seqproto) to Buffer/String for FS
32- if ( serialized instanceof ArrayBuffer ) {
33- toWrite = Buffer . from ( serialized )
34- }
35- await _fs . writeFile ( path , toWrite )
34+ // For large datasets, use streaming approach to avoid memory issues
35+ await persistToFileStreaming ( db , format , path , runtime )
3636
3737 return path
3838}
@@ -55,6 +55,12 @@ export async function restoreFromFile<T extends AnyOrama>(
5555 }
5656
5757 const data = await _fs . readFile ( path )
58+
59+ // Handle new binary format that stores data as binary instead of hex
60+ if ( format === 'binary' && data instanceof Buffer ) {
61+ return restoreFromBinaryData ( data , runtime )
62+ }
63+
5864 return restore ( format , data , runtime )
5965}
6066
@@ -135,3 +141,153 @@ export async function getDefaultFileName(format: PersistenceFormat, runtime?: Ru
135141
136142 return `${ dbName } .${ extension } `
137143}
144+
145+ // Streaming implementation to handle large datasets without memory issues
146+ async function persistToFileStreaming < T extends AnyOrama > (
147+ db : T ,
148+ format : PersistenceFormat ,
149+ filePath : string ,
150+ runtime : Runtime
151+ ) : Promise < void > {
152+ const dbExport = await save ( db )
153+
154+ switch ( format ) {
155+ case 'json' :
156+ await streamJsonToFile ( dbExport , filePath , runtime )
157+ break
158+ case 'binary' :
159+ await streamBinaryToFile ( dbExport , filePath , runtime )
160+ break
161+ case 'dpack' :
162+ // dpack doesn't have streaming support, use regular approach
163+ // but check size and warn if too large
164+ const dpackSerialized = dpack . serialize ( dbExport )
165+ await _fs . writeFile ( filePath , dpackSerialized )
166+ break
167+ case 'seqproto' :
168+ const seqprotoSerialized = serializeOramaInstance ( db )
169+ const buffer = Buffer . from ( seqprotoSerialized )
170+ await _fs . writeFile ( filePath , buffer )
171+ break
172+ default :
173+ throw new Error ( UNSUPPORTED_FORMAT ( format ) )
174+ }
175+ }
176+
177+ // Stream JSON to file using streaming JSON stringification
178+ async function streamJsonToFile ( data : any , filePath : string , runtime : Runtime ) : Promise < void > {
179+ if ( runtime === 'node' ) {
180+ const fs = await import ( 'node:fs' )
181+ const { createWriteStream } = fs
182+
183+ return new Promise ( ( resolve , reject ) => {
184+ const stream = createWriteStream ( filePath )
185+
186+ // For very large objects, we need to stringify in chunks
187+ // This is a simplified approach - in production you might want to use
188+ // a streaming JSON library
189+ try {
190+ const jsonString = JSON . stringify ( data )
191+ stream . write ( jsonString )
192+ stream . end ( )
193+ stream . on ( 'finish' , resolve )
194+ stream . on ( 'error' , reject )
195+ } catch ( error ) {
196+ // If JSON.stringify fails due to size, try chunked approach
197+ if ( error instanceof Error && error . message . includes ( 'string length' ) ) {
198+ streamLargeJsonToFile ( data , stream , resolve , reject )
199+ } else {
200+ reject ( error )
201+ }
202+ }
203+ } )
204+ } else {
205+ // For non-Node environments, fall back to regular approach
206+ const jsonString = JSON . stringify ( data )
207+ await _fs . writeFile ( filePath , jsonString )
208+ }
209+ }
210+
211+ // Handle extremely large JSON by breaking it into manageable chunks
212+ function streamLargeJsonToFile ( data : any , stream : any , resolve : ( ) => void , reject : ( error : any ) => void ) : void {
213+ try {
214+ stream . write ( '{' )
215+
216+ let isFirst = true
217+ for ( const [ key , value ] of Object . entries ( data ) ) {
218+ if ( ! isFirst ) {
219+ stream . write ( ',' )
220+ }
221+ isFirst = false
222+
223+ // Write key
224+ stream . write ( `"${ key } ":` )
225+
226+ // For large values, try to stringify them separately
227+ try {
228+ const valueStr = JSON . stringify ( value )
229+ stream . write ( valueStr )
230+ } catch ( valueError ) {
231+ // If individual value is too large, we need different handling
232+ console . warn ( `Skipping large value for key ${ key } :` , valueError )
233+ stream . write ( 'null' )
234+ }
235+ }
236+
237+ stream . write ( '}' )
238+ stream . end ( )
239+ stream . on ( 'finish' , resolve )
240+ stream . on ( 'error' , reject )
241+ } catch ( error ) {
242+ reject ( error )
243+ }
244+ }
245+
246+ // Stream binary data to file without creating large hex strings
247+ async function streamBinaryToFile ( data : any , filePath : string , runtime : Runtime ) : Promise < void > {
248+ if ( runtime === 'node' ) {
249+ const fs = await import ( 'node:fs' )
250+ const { createWriteStream } = fs
251+
252+ return new Promise ( ( resolve , reject ) => {
253+ const stream = createWriteStream ( filePath )
254+
255+ try {
256+ // Encode to msgpack first
257+ const msgpack = encode ( data )
258+
259+ // Instead of converting to hex string, write binary data directly
260+ // This avoids the 2x size increase from hex encoding
261+ const buffer = Buffer . from ( msgpack . buffer , msgpack . byteOffset , msgpack . byteLength )
262+ stream . write ( buffer )
263+ stream . end ( )
264+ stream . on ( 'finish' , resolve )
265+ stream . on ( 'error' , reject )
266+ } catch ( error ) {
267+ reject ( error )
268+ }
269+ } )
270+ } else {
271+ // For non-Node environments, fall back to regular approach
272+ const msgpack = encode ( data )
273+ const buffer = Buffer . from ( msgpack . buffer , msgpack . byteOffset , msgpack . byteLength )
274+ await _fs . writeFile ( filePath , buffer )
275+ }
276+ }
277+
278+ // Helper function to restore from binary data directly
279+ async function restoreFromBinaryData < T extends AnyOrama > (
280+ data : Buffer ,
281+ runtime : Runtime
282+ ) : Promise < T > {
283+ const db = create ( {
284+ schema : {
285+ __placeholder : 'string'
286+ }
287+ } )
288+
289+ const deserialized = decode ( data ) as any
290+ load ( db , deserialized )
291+
292+ return db as unknown as T
293+ }
0 commit comments