|
1 | 1 | import type { ClickHouseClient } from "@clickhouse/client" |
2 | 2 | import type { DatasetRow } from "@domain/datasets" |
3 | 3 | import { DatasetRowRepository, RowNotFoundError } from "@domain/datasets" |
4 | | -import { ChSqlClient, type ChSqlClientShape, DatasetId, DatasetRowId, toRepositoryError } from "@domain/shared" |
| 4 | +import { ChSqlClient, type ChSqlClientShape, DatasetId, DatasetRowId } from "@domain/shared" |
5 | 5 | import { safeParseJson, safeStringifyJson } from "@repo/utils" |
6 | 6 | import { Effect, Layer } from "effect" |
7 | | -import { insertJsonEachRow, queryClickhouse } from "../sql.ts" |
8 | 7 |
|
9 | 8 | type DatasetRowCH = { |
10 | 9 | row_id: string |
@@ -35,160 +34,6 @@ const buildVersionClause = (version: number | undefined) => |
35 | 34 |
|
36 | 35 | const INSERT_BATCH_SIZE = 500 |
37 | 36 |
|
38 | | -export const createDatasetRowClickHouseRepository = (client: ClickHouseClient) => ({ |
39 | | - insertBatch: (args: { |
40 | | - organizationId: string |
41 | | - datasetId: string |
42 | | - version: number |
43 | | - rows: readonly { |
44 | | - readonly id: DatasetRowId |
45 | | - readonly input: Record<string, unknown> |
46 | | - readonly output?: Record<string, unknown> |
47 | | - readonly metadata?: Record<string, unknown> |
48 | | - }[] |
49 | | - }) => |
50 | | - Effect.gen(function* () { |
51 | | - const values = args.rows.map((row) => ({ |
52 | | - organization_id: args.organizationId, |
53 | | - dataset_id: args.datasetId, |
54 | | - row_id: row.id, |
55 | | - xact_id: args.version, |
56 | | - input: safeStringifyJson(row.input), |
57 | | - output: safeStringifyJson(row.output ?? {}), |
58 | | - metadata: safeStringifyJson(row.metadata ?? {}), |
59 | | - })) |
60 | | - |
61 | | - for (let i = 0; i < values.length; i += INSERT_BATCH_SIZE) { |
62 | | - const batch = values.slice(i, i + INSERT_BATCH_SIZE) |
63 | | - yield* insertJsonEachRow(client, "dataset_rows", batch).pipe( |
64 | | - Effect.mapError((e) => toRepositoryError(e, "insertBatch")), |
65 | | - ) |
66 | | - } |
67 | | - |
68 | | - return args.rows.map((r) => r.id) |
69 | | - }), |
70 | | - |
71 | | - list: (args: { |
72 | | - organizationId: string |
73 | | - datasetId: string |
74 | | - version?: number |
75 | | - search?: string |
76 | | - limit?: number |
77 | | - offset?: number |
78 | | - }) => |
79 | | - Effect.gen(function* () { |
80 | | - const limit = args.limit ?? 50 |
81 | | - const offset = args.offset ?? 0 |
82 | | - const params: Record<string, unknown> = { |
83 | | - organizationId: args.organizationId, |
84 | | - datasetId: args.datasetId, |
85 | | - limit, |
86 | | - offset, |
87 | | - } |
88 | | - |
89 | | - if (args.version !== undefined) params.version = args.version |
90 | | - if (args.search) params.search = args.search |
91 | | - |
92 | | - const versionClause = buildVersionClause(args.version) |
93 | | - const searchClause = buildSearchClause(args.search) |
94 | | - |
95 | | - const dataQuery = ` |
96 | | - SELECT |
97 | | - row_id, |
98 | | - argMax(input, xact_id) AS input, |
99 | | - argMax(output, xact_id) AS output, |
100 | | - argMax(metadata, xact_id) AS metadata, |
101 | | - min(created_at) AS created_at, |
102 | | - max(xact_id) AS latest_xact_id |
103 | | - FROM dataset_rows |
104 | | - WHERE organization_id = {organizationId:String} |
105 | | - AND dataset_id = {datasetId:String} |
106 | | - ${versionClause} |
107 | | - GROUP BY row_id |
108 | | - HAVING argMax(_object_delete, xact_id) = false |
109 | | - ${searchClause} |
110 | | - ORDER BY created_at DESC |
111 | | - LIMIT {limit:UInt32} OFFSET {offset:UInt32} |
112 | | - ` |
113 | | - |
114 | | - const countQuery = ` |
115 | | - SELECT count() AS total FROM ( |
116 | | - SELECT |
117 | | - row_id, |
118 | | - argMax(input, xact_id) AS input, |
119 | | - argMax(output, xact_id) AS output |
120 | | - FROM dataset_rows |
121 | | - WHERE organization_id = {organizationId:String} |
122 | | - AND dataset_id = {datasetId:String} |
123 | | - ${versionClause} |
124 | | - GROUP BY row_id |
125 | | - HAVING argMax(_object_delete, xact_id) = false |
126 | | - ${searchClause} |
127 | | - ) |
128 | | - ` |
129 | | - |
130 | | - const [rows, countResult] = yield* Effect.all([ |
131 | | - queryClickhouse<DatasetRowCH>(client, dataQuery, params).pipe( |
132 | | - Effect.mapError((e) => toRepositoryError(e, "list")), |
133 | | - ), |
134 | | - queryClickhouse<{ total: string }>(client, countQuery, params).pipe( |
135 | | - Effect.mapError((e) => toRepositoryError(e, "list:count")), |
136 | | - ), |
137 | | - ]) |
138 | | - |
139 | | - return { |
140 | | - rows: rows.map((row) => toDomainRow(row, args.datasetId)), |
141 | | - total: Number(countResult[0]?.total ?? 0), |
142 | | - } as const |
143 | | - }), |
144 | | - |
145 | | - findById: (args: { |
146 | | - organizationId: string |
147 | | - datasetId: string |
148 | | - rowId: string |
149 | | - version?: number |
150 | | - }) => |
151 | | - Effect.gen(function* () { |
152 | | - const params: Record<string, unknown> = { |
153 | | - organizationId: args.organizationId, |
154 | | - datasetId: args.datasetId, |
155 | | - rowId: args.rowId, |
156 | | - } |
157 | | - |
158 | | - if (args.version !== undefined) params.version = args.version |
159 | | - |
160 | | - const versionClause = buildVersionClause(args.version) |
161 | | - |
162 | | - const query = ` |
163 | | - SELECT |
164 | | - row_id, |
165 | | - argMax(input, xact_id) AS input, |
166 | | - argMax(output, xact_id) AS output, |
167 | | - argMax(metadata, xact_id) AS metadata, |
168 | | - min(created_at) AS created_at, |
169 | | - max(xact_id) AS latest_xact_id |
170 | | - FROM dataset_rows |
171 | | - WHERE organization_id = {organizationId:String} |
172 | | - AND dataset_id = {datasetId:String} |
173 | | - AND row_id = {rowId:String} |
174 | | - ${versionClause} |
175 | | - GROUP BY row_id |
176 | | - HAVING argMax(_object_delete, xact_id) = false |
177 | | - LIMIT 1 |
178 | | - ` |
179 | | - |
180 | | - const rows = yield* queryClickhouse<DatasetRowCH>(client, query, params).pipe( |
181 | | - Effect.mapError((e) => toRepositoryError(e, "findById")), |
182 | | - ) |
183 | | - |
184 | | - if (rows.length === 0) { |
185 | | - return yield* new RowNotFoundError({ rowId: args.rowId }) |
186 | | - } |
187 | | - |
188 | | - return toDomainRow(rows[0] ?? [], args.datasetId) |
189 | | - }), |
190 | | -}) |
191 | | - |
192 | 37 | export const DatasetRowRepositoryLive = Layer.effect( |
193 | 38 | DatasetRowRepository, |
194 | 39 | Effect.gen(function* () { |
|
0 commit comments