Skip to content

Commit 5c2d793

Browse files
Merge pull request #60 from rdf-connect/chore/update-dependencies
Chore/update dependencies
2 parents 1a8f45b + 435117d commit 5c2d793

File tree

9 files changed

+5346
-11440
lines changed

9 files changed

+5346
-11440
lines changed

bun.lock

Lines changed: 628 additions & 912 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package-lock.json

Lines changed: 4410 additions & 10379 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@rdfc/sparql-ingest-processor-ts",
3-
"version": "2.0.2",
3+
"version": "2.0.3",
44
"description": "SPARQL Update function to be within RDF-Connect pipelines",
55
"author": "Julián Rojas",
66
"contributors": [
@@ -10,7 +10,7 @@
1010
"scripts": {
1111
"build": "tspc && tsc-alias",
1212
"prepublishOnly": "npm run build",
13-
"test": "vitest run --coverage --coverage.include lib",
13+
"test": "vitest run --coverage --coverage.include src",
1414
"prepare": "husky"
1515
},
1616
"files": [
@@ -28,29 +28,29 @@
2828
},
2929
"dependencies": {
3030
"@treecg/types": "^0.4.6",
31-
"n3": "^1.26.0",
31+
"n3": "^2.0.1",
3232
"rdf-data-factory": "^2.0.2",
3333
"rdf-stores": "^2.1.1",
3434
"undici": "^7.20.0",
3535
"winston": "^3.19.0"
3636
},
3737
"devDependencies": {
38-
"@comunica/query-sparql": "^4.5.0",
38+
"@comunica/query-sparql": "^5.1.3",
3939
"@rdfc/js-runner": "^3.0.2",
4040
"@rdfjs/types": "^2.0.1",
4141
"@types/n3": "^1.26.1",
42-
"@types/node": "^24.10.10",
42+
"@types/node": "^25.2.1",
4343
"@typescript-eslint/eslint-plugin": "^8.54.0",
4444
"@typescript-eslint/parser": "^8.54.0",
45-
"@vitest/coverage-v8": "^3.2.4",
46-
"eslint": "^9.34.0",
45+
"@vitest/coverage-v8": "^4.0.18",
46+
"eslint": "^9.39.2",
4747
"eslint-config-prettier": "^10.1.8",
4848
"fastify": "^5.7.4",
4949
"husky": "^9.1.7",
5050
"ts-patch": "^3.3.0",
5151
"tsc-alias": "^1.8.16",
5252
"typescript": "^5.9.3",
53-
"vite-tsconfig-paths": "^5.1.4",
54-
"vitest": "^3.2.4"
53+
"vite-tsconfig-paths": "^6.0.5",
54+
"vitest": "^4.0.18"
5555
}
5656
}

processors.ttl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ rdfc:SPARQLIngest rdfc:jsImplementationOf rdfc:Processor;
3434
[ ] a sh:NodeShape;
3535
sh:targetClass rdfc:IngestConfig;
3636
sh:property [
37+
sh:path rdfc:operationMode;
38+
sh:datatype xsd:string;
39+
sh:name "operationMode";
40+
sh:maxCount 1;
41+
], [
42+
sh:path rdfc:memberBatchSize;
43+
sh:datatype xsd:integer;
44+
sh:name "memberBatchSize";
45+
sh:maxCount 1;
46+
], [
3747
sh:path rdfc:memberIsGraph;
3848
sh:datatype xsd:boolean;
3949
sh:name "memberIsGraph";

src/SPARQLIngest.ts

Lines changed: 109 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1-
import { extendLogger, Processor, Reader, Writer } from "@rdfc/js-runner";
1+
import { Processor, extendLogger } from "@rdfc/js-runner";
22
import { SDS } from "@treecg/types";
33
import { DataFactory } from "rdf-data-factory";
44
import { RdfStore } from "rdf-stores";
5-
import { Parser } from "n3";
5+
import { Parser, Writer as N3Writer } from "n3";
66
import { writeFile } from "fs/promises";
77
import { CREATE, DELETE, UPDATE } from "./SPARQLQueries";
88
import { doSPARQLRequest, getObjects, sanitizeQuads } from "./Utils";
9-
10-
import type { Quad_Subject, Term } from "@rdfjs/types";
119
import { Logger } from "winston";
1210

11+
import type { Quad, Quad_Subject, Term } from "@rdfjs/types";
12+
import type { Reader, Writer } from "@rdfc/js-runner";
13+
1314
const df = new DataFactory();
1415

1516
// TODO: This should be obtained from an SDS metadata stream
@@ -33,7 +34,14 @@ export type PerformanceConfig = {
3334
failureIsFatal?: boolean;
3435
};
3536

37+
export enum OperationMode {
38+
REPLICATION = "Replication",
39+
SYNC = "Sync"
40+
}
41+
3642
export type IngestConfig = {
43+
operationMode?: OperationMode;
44+
memberBatchSize?: number;
3745
memberIsGraph?: boolean;
3846
memberShapes?: string[]; // TODO: This should be obtained from an SDS metadata stream
3947
changeSemantics?: ChangeSemantics;
@@ -45,7 +53,7 @@ export type IngestConfig = {
4553
measurePerformance?: PerformanceConfig;
4654
};
4755

48-
export type TransactionMember = {
56+
type TransactionMember = {
4957
memberId: string,
5058
transactionId: string,
5159
store: RdfStore
@@ -59,17 +67,32 @@ type SPARQLIngestArgs = {
5967

6068
export class SPARQLIngest extends Processor<SPARQLIngestArgs> {
6169
protected transactionMembers: TransactionMember[] = [];
70+
protected memberBatch: Quad[] = [];
6271
protected requestsPerformance: number[] = [];
72+
protected batchCount = 0;
6373

6474
protected createTransactionQueriesLogger: Logger;
6575
protected doSPARQLRequestLogger: Logger;
6676

6777
async init(this: SPARQLIngestArgs & this): Promise<void> {
6878
this.createTransactionQueriesLogger = extendLogger(this.logger, "createTransactionQueries");
6979
this.doSPARQLRequestLogger = extendLogger(this.logger, "doSPARQLRequest");
80+
81+
if (!this.config.operationMode) {
82+
this.config.operationMode = OperationMode.SYNC;
83+
}
84+
85+
if (!this.config.memberBatchSize) {
86+
this.config.memberBatchSize = 100;
87+
}
88+
89+
if (this.config.accessToken === "") {
90+
this.config.accessToken = undefined;
91+
}
7092
}
7193

7294
async transform(this: SPARQLIngestArgs & this): Promise<void> {
95+
7396
for await (const rawQuads of this.memberStream.strings()) {
7497
this.logger.debug(`Raw member data received: \n${rawQuads}`);
7598
const quads = new Parser().parse(rawQuads);
@@ -177,7 +200,11 @@ export class SPARQLIngest extends Processor<SPARQLIngestArgs> {
177200
this.transactionMembers = [];
178201
} else {
179202
// Determine if we have a named graph (either explicitly configured or as the member itself)
180-
const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
203+
const ng = this.getNamedGraphIfAny(
204+
memberIRI,
205+
this.config.memberIsGraph,
206+
this.config.targetNamedGraph
207+
);
181208
// Get the type of change
182209
// TODO: use rdf-lens to support complex paths
183210
const ctv = store.getQuads(
@@ -212,19 +239,37 @@ export class SPARQLIngest extends Processor<SPARQLIngestArgs> {
212239
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
213240
query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
214241
} else {
215-
// Determine if we have a named graph (either explicitly configure or as the member itself)
216-
const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
217-
// No change semantics are provided so we do a DELETE/INSERT query by default
218-
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
219-
query = UPDATE(store, this.config.forVirtuoso, ng);
242+
// Check operation mode
243+
if (this.config.operationMode === OperationMode.REPLICATION) {
244+
this.memberBatch.push(...store.getQuads(null, null, null, null));
245+
this.batchCount++;
246+
if (this.batchCount < this.config.memberBatchSize!) {
247+
continue;
248+
}
249+
} else {
250+
// Determine if we have a named graph (either explicitly configure or as the member itself)
251+
const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
252+
// No change semantics are provided so we do a DELETE/INSERT query by default
253+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
254+
query = UPDATE(store, this.config.forVirtuoso, ng);
255+
}
220256
}
221257
}
222258
} else {
223259
// Non-SDS data
224260

225-
// TODO: Handle change semantics(?) and transactions for non-SDS data
226-
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for received triples (${store.size})`);
227-
query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
261+
// Check operation mode
262+
if (this.config.operationMode === OperationMode.REPLICATION) {
263+
this.memberBatch.push(...store.getQuads(null, null, null, null));
264+
this.batchCount++;
265+
if (this.batchCount < this.config.memberBatchSize!) {
266+
continue;
267+
}
268+
} else {
269+
// TODO: Handle change semantics(?) and transactions for non-SDS data
270+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for received triples (${store.size})`);
271+
query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
272+
}
228273
}
229274

230275
// Execute the update query
@@ -255,7 +300,56 @@ export class SPARQLIngest extends Processor<SPARQLIngestArgs> {
255300
await this.sparqlWriter.string(query.join("\n"));
256301
}
257302
} else {
258-
this.logger.warn(`No query generated for member ${memberIRI.value}`);
303+
if (this.config.operationMode === OperationMode.REPLICATION) {
304+
try {
305+
// Execute the ingestion of the collected member batch via the SPARQL Graph Store protocol
306+
const t0 = Date.now();
307+
await doSPARQLRequest(this.memberBatch, this.config, this.doSPARQLRequestLogger);
308+
const reqTime = Date.now() - t0;
309+
if (this.config.measurePerformance) {
310+
this.requestsPerformance.push(reqTime);
311+
}
312+
this.logger.info(`Executed query on remote SPARQL server ${this.config.graphStoreUrl} (took ${reqTime} ms)`);
313+
this.batchCount = 0;
314+
this.memberBatch = [];
315+
} catch (error) {
316+
if (!this.config.measurePerformance || this.config.measurePerformance.failureIsFatal) {
317+
this.logger.error(`Error executing query on remote SPARQL server ${this.config.graphStoreUrl}: ${error}`);
318+
throw error;
319+
} else {
320+
if (this.config.measurePerformance) {
321+
this.requestsPerformance.push(-1); // -1 indicates a failure
322+
}
323+
}
324+
}
325+
} else {
326+
this.logger.warn(`No query generated for member ${memberIRI.value}`);
327+
}
328+
}
329+
}
330+
331+
// Flush remaining member batch if any
332+
if (this.config.operationMode === OperationMode.REPLICATION && this.memberBatch.length > 0) {
333+
try {
334+
// Execute the ingestion of the collected member batch via the SPARQL Graph Store protocol
335+
const t0 = Date.now();
336+
await doSPARQLRequest(this.memberBatch, this.config, this.doSPARQLRequestLogger);
337+
const reqTime = Date.now() - t0;
338+
if (this.config.measurePerformance) {
339+
this.requestsPerformance.push(reqTime);
340+
}
341+
this.logger.info(`Executed query on remote SPARQL server ${this.config.graphStoreUrl} (took ${reqTime} ms)`);
342+
this.batchCount = 0;
343+
this.memberBatch = [];
344+
} catch (error) {
345+
if (!this.config.measurePerformance || this.config.measurePerformance.failureIsFatal) {
346+
this.logger.error(`Error executing query on remote SPARQL server ${this.config.graphStoreUrl}: ${error}`);
347+
throw error;
348+
} else {
349+
if (this.config.measurePerformance) {
350+
this.requestsPerformance.push(-1); // -1 indicates a failure
351+
}
352+
}
259353
}
260354
}
261355

src/Utils.ts

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ import { XSD } from "@treecg/types";
22
import { DataFactory } from "rdf-data-factory";
33
import { RdfStore } from "rdf-stores";
44
import { Agent } from "undici";
5+
import { Writer as N3Writer } from "n3";
56

6-
import type { Term, Quad_Subject, Quad_Object } from "@rdfjs/types";
7+
import type { Term, Quad_Subject, Quad_Object, Quad } from "@rdfjs/types";
78
import type { IngestConfig } from "./SPARQLIngest";
8-
import {Logger} from "winston";
9+
import { Logger } from "winston";
910

1011
const df = new DataFactory();
1112

@@ -109,23 +110,55 @@ export function sanitizeQuads(store: RdfStore): void {
109110
}
110111
}
111112

112-
export async function doSPARQLRequest(query: string[], config: IngestConfig, logger: Logger): Promise<void> {
113+
export async function doSPARQLRequest(
114+
query: string[] | Quad[],
115+
config: IngestConfig,
116+
logger: Logger
117+
): Promise<void> {
113118
try {
119+
const timeout = config.measurePerformance?.queryTimeout || 1800; // Default to 30 minutes if not specified
120+
// Check if the query is a Quad array (for Replication mode)
121+
if (query.length > 0 && typeof query[0] !== 'string') {
122+
const quads = query as Quad[];
123+
const writer = new N3Writer();
124+
const serialized = writer.quadsToString(quads);
125+
const url = new URL(config.graphStoreUrl!);
126+
if (config.accessToken) {
127+
url.searchParams.append("access-token", config.accessToken);
128+
}
129+
130+
logger.debug(`Executing SPARQL Graph Store request (POST) with ${quads.length} quads.`);
131+
const res = await fetch(url.toString(), {
132+
method: "POST",
133+
headers: {
134+
'Content-Type': 'application/n-quads',
135+
},
136+
body: serialized,
137+
dispatcher: new Agent({
138+
headersTimeout: timeout * 1000,
139+
bodyTimeout: timeout * 1000,
140+
}),
141+
});
142+
143+
if (!res.ok) {
144+
throw new Error(`HTTP request failed with code ${res.status} and message: \n${await res.text()}`);
145+
}
146+
return;
147+
}
148+
114149
let queries: string[] = [];
115-
const jointQuery = query.join("\n");
150+
const jointQuery = (query as string[]).join("\n");
116151

117152
if (config.forVirtuoso && Buffer.byteLength(jointQuery, 'utf8') > 1e6) {
118153
// We need to split the query across multiple requests for Virtuoso,
119154
// when the query is too big (see https://community.openlinksw.com/t/virtuosoexception-sq199/1950).
120-
// We set 1MB as the maximum query size empirally, aiming to maximize the query size without hitting the limit.
121-
queries = query;
155+
// We set 1MB as the maximum query size empirically, aiming to maximize the query size without hitting the limit.
156+
queries = query as string[];
122157
}
123158
else {
124159
queries.push(jointQuery);
125160
}
126161

127-
const timeout = config.measurePerformance?.queryTimeout || 1800; // Default to 30 minutes if not specified
128-
129162
for (const q of queries) {
130163
logger.debug(`Executing SPARQL query: \n${q}`);
131164
const res = await fetch(config.graphStoreUrl!, {

0 commit comments

Comments
 (0)