Skip to content
This repository was archived by the owner on Jun 1, 2025. It is now read-only.

Commit 02432ab

Browse files
dsonyywujekbogdan
andauthored
feat: strongly typed csv parsing (#52)
Co-authored-by: wujekbogdan <[email protected]>
1 parent edaeee0 commit 02432ab

File tree

13 files changed

+379
-231
lines changed

13 files changed

+379
-231
lines changed

.changeset/lucky-kings-mix.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"map_of_science": minor
3+
---
4+
5+
- Introduce strongly-typed `keys.tsv`, `data.tsv`, and `labels.tsv"` files parsing.
6+
- Refactor `points.js` to use th newly introduced parser

src/csv/collector.spec.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { describe, it, expect } from "vitest";
2+
import { arrayCollector, mapCollector, setCollector } from "./collector.ts";
3+
4+
describe("collectors", () => {
5+
describe("arrayCollector", () => {
6+
it("should collect items in an array", () => {
7+
const collector = arrayCollector();
8+
collector.add("Alice");
9+
collector.add("Bob");
10+
11+
expect(collector.getResults()).toEqual(["Alice", "Bob"]);
12+
});
13+
});
14+
15+
describe("setCollector", () => {
16+
it("should collect unique items in a set", () => {
17+
const collector = setCollector();
18+
collector.add("Alice");
19+
collector.add("Bob");
20+
collector.add("Alice"); // Duplicate
21+
22+
expect(collector.getResults()).toEqual(new Set(["Alice", "Bob"]));
23+
});
24+
});
25+
26+
describe("mapCollector", () => {
27+
it("should collect items in a map using the provided key function", () => {
28+
const collector = mapCollector<string, { id: string; name: string }>(
29+
(item) => item.id,
30+
);
31+
32+
collector.add({ id: "1", name: "Alice" });
33+
collector.add({ id: "2", name: "Bob" });
34+
collector.add({ id: "1", name: "Charlie" }); // Overwrites "Alice"
35+
36+
expect(collector.getResults()).toEqual(
37+
new Map([
38+
["1", { id: "1", name: "Charlie" }], // Last value wins
39+
["2", { id: "2", name: "Bob" }],
40+
]),
41+
);
42+
});
43+
});
44+
});

src/csv/collector.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { Collector } from "./parse.ts";
2+
3+
export const arrayCollector = <T>(): Collector<T, T[]> => {
4+
const collection: T[] = [];
5+
return {
6+
add: (item) => {
7+
collection.push(item);
8+
return collection;
9+
},
10+
getResults: () => collection,
11+
};
12+
};
13+
14+
export const setCollector = <T>(): Collector<T, Set<T>> => {
15+
const collection = new Set<T>();
16+
return {
17+
add: (item) => {
18+
collection.add(item);
19+
return collection;
20+
},
21+
getResults: () => collection,
22+
};
23+
};
24+
25+
export const mapCollector = <K, V>(
26+
getKey: (item: V) => K,
27+
): Collector<V, Map<K, V>> => {
28+
const collection = new Map<K, V>();
29+
return {
30+
add: (item) => collection.set(getKey(item), item),
31+
getResults: () => collection,
32+
};
33+
};

src/csv/csv.spec.ts

Lines changed: 32 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,118 +1,60 @@
11
import { describe, it, expect, vi } from "vitest";
2-
import { parse, parseFromUrl } from "./parse";
2+
import { parse, withHttpProvider } from "./parse";
33
import { withRequestInterception } from "../test-utils/request-interception.ts";
44

55
const CSV = "name\tage\nAlice\t30\nBob\t40";
66

77
describe("csv", () => {
88
describe("parse", () => {
9-
it("should parse CSV", async () => {
10-
const fetcher = vi.fn(() => CSV);
11-
const asyncFetcher = vi.fn(() => Promise.resolve(CSV));
12-
const transformer = (row: unknown) => row;
9+
it("should parse CSV with non-async provider", async () => {
10+
const provider = vi.fn(() => CSV);
11+
const onItem = vi.fn();
1312

14-
const resultNonAsync = await parse(fetcher, transformer);
15-
const resultAsync = await parse(asyncFetcher, transformer);
16-
const expected = new Set([
17-
{ name: "Alice", age: "30" },
18-
{ name: "Bob", age: "40" },
19-
]);
13+
await parse(provider, onItem);
2014

21-
expect(fetcher).toHaveBeenCalledOnce();
22-
expect(asyncFetcher).toHaveBeenCalledOnce();
23-
expect(resultNonAsync).toEqual(expected);
24-
expect(resultAsync).toEqual(expected);
15+
expect(onItem).toHaveBeenCalledTimes(2);
16+
expect(onItem).toHaveBeenNthCalledWith(1, { name: "Alice", age: "30" });
17+
expect(onItem).toHaveBeenNthCalledWith(2, { name: "Bob", age: "40" });
2518
});
2619

27-
it("should transform rows", async () => {
28-
const fetcher = () => CSV;
20+
it("should parse CSV with async provider", async () => {
21+
const provider = vi.fn(() => Promise.resolve(CSV));
22+
const onItem = vi.fn();
2923

30-
const result = await parse(fetcher, (row) => ({
31-
name: `transformed ${row.name}`,
32-
age: parseInt(row.age),
33-
}));
34-
const expected = new Set([
35-
{ name: "transformed Alice", age: 30 },
36-
{ name: "transformed Bob", age: 40 },
37-
]);
24+
await parse(provider, onItem);
3825

39-
expect(result).toEqual(expected);
26+
expect(onItem).toHaveBeenCalledTimes(2);
27+
expect(onItem).toHaveBeenNthCalledWith(1, { name: "Alice", age: "30" });
28+
expect(onItem).toHaveBeenNthCalledWith(2, { name: "Bob", age: "40" });
4029
});
4130

42-
it("should fail to parse CSV if transformer fails", async () => {
43-
const transformer = () => {
44-
throw new Error("Failed to transform");
31+
it("should fail to parse CSV if onItem fails", async () => {
32+
const provider = vi.fn(() => Promise.resolve(CSV));
33+
const onItem = () => {
34+
throw new Error("onItem failed");
4535
};
4636

47-
return expect(parse(() => CSV, transformer)).rejects.toThrow(
48-
"Failed to transform",
49-
);
37+
return expect(parse(provider, onItem)).rejects.toThrow("onItem failed");
5038
});
5139
});
5240

53-
describe("parseFromUrl", () => {
41+
describe("withHttpProvider", () => {
5442
it(
55-
"should parse CSV from URL",
43+
"should fetch CSV from URL and process each row",
5644
withRequestInterception(
5745
({ http, HttpResponse }) => [
58-
http.get("https://example.com/csv", () => {
59-
return HttpResponse.text(CSV);
60-
}),
46+
http.get("https://example.com/csv", () => HttpResponse.text(CSV)),
6147
],
6248
async () => {
63-
const result = await parseFromUrl(
64-
"https://example.com/csv",
65-
(row: unknown) => row,
66-
);
67-
68-
expect(result).toEqual(
69-
new Set([
70-
{ name: "Alice", age: "30" },
71-
{ name: "Bob", age: "40" },
72-
]),
73-
);
74-
},
75-
),
76-
);
77-
78-
it(
79-
"should fail if request fails",
80-
withRequestInterception(
81-
({ http, HttpResponse }) => [
82-
http.get("https://example.com/csv", () => {
83-
return HttpResponse.error();
84-
}),
85-
],
86-
async () => {
87-
try {
88-
await parseFromUrl("https://example.com/csv", (row) => row);
89-
} catch (error) {
90-
expect(error).toEqual(new Error("Failed to fetch"));
91-
}
92-
expect.hasAssertions();
93-
},
94-
),
95-
);
96-
97-
it(
98-
"should fail if request doesn't fail, but response is not 200",
99-
withRequestInterception(
100-
({ http, HttpResponse }) => [
101-
http.get("https://example.com/csv", () => {
102-
return new HttpResponse(null, {
103-
status: 404,
104-
});
105-
}),
106-
],
107-
async () => {
108-
try {
109-
await parseFromUrl("https://example.com/csv", (row) => row);
110-
} catch (error) {
111-
expect(error).toEqual(
112-
new Error("Failed to fetch csv from: https://example.com/csv"),
113-
);
114-
}
115-
expect.hasAssertions();
49+
const onItem = vi.fn();
50+
await withHttpProvider("https://example.com/csv", onItem);
51+
52+
expect(onItem).toHaveBeenCalledTimes(2);
53+
expect(onItem).toHaveBeenNthCalledWith(1, {
54+
name: "Alice",
55+
age: "30",
56+
});
57+
expect(onItem).toHaveBeenNthCalledWith(2, { name: "Bob", age: "40" });
11658
},
11759
),
11860
);

src/csv/parse.ts

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,36 @@
11
import { parse as csvParse } from "csv-parse/browser/esm";
2+
import { ZodTypeDef, ZodSchema } from "zod";
23

3-
type Fetcher = () => Promise<string> | string;
4-
type Transformer<T> = (data: Record<string, string>) => T;
5-
6-
export const parse = async <T>(
7-
fetch: Fetcher,
8-
transform: Transformer<T>,
9-
): Promise<Set<T>> => {
10-
const csv = await fetch();
4+
type CsvSource = string | Buffer;
5+
type Provider = () => Promise<CsvSource> | CsvSource;
6+
type OnItem<T, R> = (item: T) => R;
7+
type CSVRecord = Record<string, string>;
8+
export type Collector<T, R> = {
9+
add: (item: T) => R;
10+
getResults: () => R;
11+
};
1112

12-
console.time("parse");
13+
export const parse = async <T extends CSVRecord, R>(
14+
providerCsv: Provider,
15+
onItem: OnItem<T, R>,
16+
): Promise<void> => {
17+
const csv = await providerCsv();
1318

1419
return new Promise((resolve, reject) => {
15-
const result = new Set<T>();
1620
const parser = csvParse({
1721
delimiter: "\t",
1822
columns: true,
1923
bom: true,
2024
});
2125

2226
const onReadable = () => {
23-
const record = parser.read() as Record<string, string> | null;
27+
const record = parser.read() as T | null;
2428

2529
if (record === null) {
2630
return;
2731
}
2832

29-
const transformed = transform(record);
30-
result.add(transformed);
33+
onItem(record);
3134
onReadable();
3235
};
3336

@@ -36,26 +39,40 @@ export const parse = async <T>(
3639
reject(error);
3740
})
3841
.on("readable", onReadable)
39-
.on("end", () => {
40-
console.timeEnd("parse");
41-
resolve(result);
42-
});
42+
.on("end", resolve);
4343

4444
stream.write(csv);
4545
stream.end();
4646
});
4747
};
4848

49-
export const parseFromUrl = <T>(url: string, transform: Transformer<T>) => {
50-
const fetcher = async () => {
51-
const response = await fetch(url);
49+
const httpProvider = async (url: string) => {
50+
const response = await fetch(url);
51+
if (!response.ok) {
52+
throw new Error(`Failed to fetch CSV from: ${url}`);
53+
}
54+
return response.text();
55+
};
5256

53-
if (!response.ok) {
54-
throw new Error(`Failed to fetch csv from: ${url}`);
55-
}
57+
export const withHttpProvider = async <T extends CSVRecord, R>(
58+
url: string,
59+
onItem: OnItem<T, R>,
60+
) => parse(() => httpProvider(url), onItem);
5661

57-
return response.text();
58-
};
62+
export const validateWithSchema =
63+
<T>(schema: ZodSchema<T, ZodTypeDef, unknown>) =>
64+
(data: unknown): T =>
65+
schema.parse(data);
5966

60-
return parse(fetcher, transform);
67+
export const createProcessor = <T, R>(
68+
schema: ZodSchema<T, ZodTypeDef, unknown>,
69+
collector: Collector<T, R>,
70+
) => {
71+
return {
72+
process: (data: unknown) => {
73+
const parsed = validateWithSchema(schema)(data);
74+
collector.add(parsed);
75+
},
76+
getResults: () => collector.getResults(),
77+
};
6178
};

src/js/main.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,5 @@ export const init = () => {
1717
}
1818

1919
enableLoadingScreen();
20-
points.loadConcepts();
21-
points.loadDataPoints();
20+
points.load();
2221
};

0 commit comments

Comments
 (0)