Skip to content

Commit ed47ed4

Browse files
Fix formatting in config.ts and core.ts
1 parent 98a645a commit ed47ed4

File tree

3 files changed

+31
-22
lines changed

3 files changed

+31
-22
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ type Config = {
8585
*/
8686
resourceExclusions?: string[];
8787
/** Optional maximum file size in megabytes to include in the output file */
88-
maxFileSize?: number,
88+
maxFileSize?: number;
8989
/** Optional maximum number tokens to include in the output file */
90-
maxTokens?: number,
90+
maxTokens?: number;
9191
};
9292
```
9393

src/config.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ export const configSchema = z.object({
6464

6565
/** Optional maximum file size in megabytes to include in the output file
6666
* @example 1
67-
*/
67+
*/
6868
maxFileSize: z.number().int().positive().optional(),
69-
/** Optional maximum number tokens to include in the output file
69+
/** Optional maximum number tokens to include in the output file
7070
* @example 5000
71-
*/
71+
*/
7272
maxTokens: z.number().int().positive().optional(),
7373
});
7474

src/core.ts

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@ import { readFile, writeFile } from "fs/promises";
44
import { glob } from "glob";
55
import { Config, configSchema } from "./config.js";
66
import { Page } from "playwright";
7-
import {
8-
isWithinTokenLimit,
9-
} from 'gpt-tokenizer'
7+
import { isWithinTokenLimit } from "gpt-tokenizer";
108

119
let pageCounter = 0;
1210

@@ -144,35 +142,46 @@ export async function crawl(config: Config) {
144142
}
145143
}
146144

147-
export async function write(config: Config) {
148-
const jsonFiles = await glob("storage/datasets/default/*.json", { absolute: true });
145+
export async function write(config: Config) {
146+
const jsonFiles = await glob("storage/datasets/default/*.json", {
147+
absolute: true,
148+
});
149149

150150
console.log(`Found ${jsonFiles.length} files to combine...`);
151151

152152
let currentResults: Record<string, any>[] = [];
153153
let currentSize: number = 0;
154154
let fileCounter: number = 1;
155-
const maxBytes: number = config.maxFileSize ? config.maxFileSize * 1024 * 1024 : Infinity;
156-
157-
const getStringByteSize = (str: string): number => Buffer.byteLength(str, 'utf-8');
158-
159-
const nextFileName = (): string => `${config.outputFileName.replace(/\.json$/, '')}-${fileCounter}.json`;
160-
155+
const maxBytes: number = config.maxFileSize
156+
? config.maxFileSize * 1024 * 1024
157+
: Infinity;
158+
159+
const getStringByteSize = (str: string): number =>
160+
Buffer.byteLength(str, "utf-8");
161+
162+
const nextFileName = (): string =>
163+
`${config.outputFileName.replace(/\.json$/, "")}-${fileCounter}.json`;
164+
161165
const writeBatchToFile = async (): Promise<void> => {
162166
await writeFile(nextFileName(), JSON.stringify(currentResults, null, 2));
163167
console.log(`Wrote ${currentResults.length} items to ${nextFileName()}`);
164168
currentResults = [];
165169
currentSize = 0;
166170
fileCounter++;
167171
};
168-
172+
169173
let estimatedTokens: number = 0;
170174

171-
const addContentOrSplit = async (data: Record<string, any>): Promise<void> => {
175+
const addContentOrSplit = async (
176+
data: Record<string, any>,
177+
): Promise<void> => {
172178
const contentString: string = JSON.stringify(data);
173-
const tokenCount: number | false = isWithinTokenLimit(contentString, config.maxTokens || Infinity);
179+
const tokenCount: number | false = isWithinTokenLimit(
180+
contentString,
181+
config.maxTokens || Infinity,
182+
);
174183

175-
if (typeof tokenCount === 'number') {
184+
if (typeof tokenCount === "number") {
176185
if (estimatedTokens + tokenCount > config.maxTokens!) {
177186
// Only write the batch if it's not empty (something to write)
178187
if (currentResults.length > 0) {
@@ -195,7 +204,7 @@ export async function write(config: Config) {
195204

196205
// Iterate over each JSON file and process its contents.
197206
for (const file of jsonFiles) {
198-
const fileContent = await readFile(file, 'utf-8');
207+
const fileContent = await readFile(file, "utf-8");
199208
const data: Record<string, any> = JSON.parse(fileContent);
200209
await addContentOrSplit(data);
201210
}
@@ -204,4 +213,4 @@ export async function write(config: Config) {
204213
if (currentResults.length > 0) {
205214
await writeBatchToFile();
206215
}
207-
};
216+
}

0 commit comments

Comments
 (0)