1- // -----------------------
2- // -- example-cramit.js --
3- // --------------------------------------------------------------------------------
4- // this is an example of how to use the cramit function
5- // first we import the cramit function
6- // then we setup the documents array with a text
7- // then we call the cramit function with the text and an options object
8- // the options object is optional
9- //
10- // the cramit function is faster than the chunkit function, but it is less accurate
11- // useful for quickly chunking text, but not for exact semantic chunking
12- // --------------------------------------------------------------------------------
13-
14- import { cramit } from '../chunkit.js' ; // this is typically just "import { cramit } from 'semantic-chunking';", but this is a local test
15- import fs from 'fs' ;
16-
17- // initialize documents array
18- let documents = [ ] ;
19- let textFiles = [ './example3.txt' ] ;
20-
21- // read each text file and add it to the documents array
22- for ( const textFile of textFiles ) {
23- documents . push ( {
24- document_name : textFile ,
25- document_text : await fs . promises . readFile ( textFile , 'utf8' )
26- } ) ;
27- }
28-
29- // start timing
30- const startTime = performance . now ( ) ;
31-
32- let myTestChunks = await cramit (
33- documents ,
34- {
35- logging : false ,
36- maxTokenSize : 300 ,
37- onnxEmbeddingModel : "nomic-ai/nomic-embed-text-v1.5" ,
38- onnxEmbeddingModelQuantized : true ,
39- localModelPath : "../models" ,
40- modelCacheDir : "../models" ,
41- returnEmbedding : false ,
42- returnTokenLength : true ,
43- }
44- ) ;
45-
46- // end timeing
47- const endTime = performance . now ( ) ;
48-
49- // calculate tracked time in seconds
50- let trackedTimeSeconds = ( endTime - startTime ) / 1000 ;
51- trackedTimeSeconds = parseFloat ( trackedTimeSeconds . toFixed ( 2 ) ) ;
52-
53- console . log ( "\n\n\n" ) ;
54- console . log ( "myTestChunks:" ) ;
55- console . log ( myTestChunks ) ;
56- console . log ( "length: " + myTestChunks . length ) ;
1+ // -----------------------
2+ // -- example-cramit.js --
3+ // --------------------------------------------------------------------------------
4+ // this is an example of how to use the cramit function
5+ // first we import the cramit function
6+ // then we setup the documents array with a text
7+ // then we call the cramit function with the text and an options object
8+ // the options object is optional
9+ //
10+ // the cramit function is faster than the chunkit function, but it is less accurate
11+ // useful for quickly chunking text, but not for exact semantic chunking
12+ // --------------------------------------------------------------------------------
13+
14+ import { cramit } from '../chunkit.js' ; // this is typically just "import { cramit } from 'semantic-chunking';", but this is a local test
15+ import fs from 'fs' ;
16+ import { fileURLToPath } from 'url' ;
17+ import { dirname , resolve } from 'path' ;
18+
19+ // Get current file's directory
20+ const __filename = fileURLToPath ( import . meta. url ) ;
21+ const __dirname = dirname ( __filename ) ;
22+
23+ // initialize documents array
24+ let documents = [ ] ;
25+ let textFiles = [ 'example3.txt' ] . map ( file =>
26+ resolve ( __dirname , file )
27+ ) ;
28+
29+ // read each text file and add it to the documents array
30+ for ( const textFile of textFiles ) {
31+ documents . push ( {
32+ document_name : textFile ,
33+ document_text : await fs . promises . readFile ( textFile , 'utf8' )
34+ } ) ;
35+ }
36+
37+ // start timing
38+ const startTime = performance . now ( ) ;
39+
40+ let myTestChunks = await cramit (
41+ documents ,
42+ {
43+ logging : false ,
44+ maxTokenSize : 300 ,
45+ onnxEmbeddingModel : "nomic-ai/nomic-embed-text-v1.5" ,
46+ onnxEmbeddingModelQuantized : true ,
47+ localModelPath : "../models" ,
48+ modelCacheDir : "../models" ,
49+ returnEmbedding : false ,
50+ returnTokenLength : true ,
51+ }
52+ ) ;
53+
54+ // end timeing
55+ const endTime = performance . now ( ) ;
56+
57+ // calculate tracked time in seconds
58+ let trackedTimeSeconds = ( endTime - startTime ) / 1000 ;
59+ trackedTimeSeconds = parseFloat ( trackedTimeSeconds . toFixed ( 2 ) ) ;
60+
61+ console . log ( "\n\n\n" ) ;
62+ console . log ( "myTestChunks:" ) ;
63+ console . log ( myTestChunks ) ;
64+ console . log ( "length: " + myTestChunks . length ) ;
5765console . log ( "trackedTimeSeconds: " + trackedTimeSeconds ) ;
0 commit comments