Skip to content

Commit 8e48c3a

Browse files
committed
Parse triple terms
1 parent f970161 commit 8e48c3a

File tree

2 files changed

+199
-17
lines changed

2 files changed

+199
-17
lines changed

lib/RdfXmlParser.ts

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
215215
activeTag.language = parentTag.language;
216216
activeTag.direction = parentTag.direction;
217217
activeTag.baseIRI = parentTag.baseIRI;
218+
// Also inherit triple term collection array
219+
activeTag.childrenTripleTerms = parentTag.childrenTripleTerms;
218220
} else {
219221
activeTag.baseIRI = this.baseIRI;
220222
}
@@ -348,7 +350,7 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
348350
if (typedNode) {
349351
const type: RDF.NamedNode = this.uriToNamedNode(tag.uri + tag.local);
350352
this.emitTriple(activeTag.subject, this.dataFactory.namedNode(RdfXmlParser.RDF + 'type'),
351-
type, parentTag ? parentTag.reifiedStatementId : null);
353+
type, parentTag ? parentTag.reifiedStatementId : null, activeTag.childrenTripleTerms);
352354
}
353355

354356
if (parentTag) {
@@ -360,41 +362,43 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
360362

361363
// Emit <x> <p> <current-chain> OR <previous-chain> <rdf:rest> <current-chain>
362364
this.emitTriple(parentTag.childrenCollectionSubject,
363-
parentTag.childrenCollectionPredicate, linkTerm, parentTag.reifiedStatementId);
365+
parentTag.childrenCollectionPredicate, linkTerm, parentTag.reifiedStatementId, parentTag.childrenTripleTerms);
364366

365367
// Emit <current-chain> <rdf:first> value
366368
this.emitTriple(linkTerm, this.dataFactory.namedNode(RdfXmlParser.RDF + 'first'),
367-
activeTag.subject, activeTag.reifiedStatementId);
369+
activeTag.subject, activeTag.reifiedStatementId, activeTag.childrenTripleTerms);
368370

369371
// Store <current-chain> in the parent node
370372
parentTag.childrenCollectionSubject = linkTerm;
371373
parentTag.childrenCollectionPredicate = this.dataFactory.namedNode(RdfXmlParser.RDF + 'rest');
372374
} else { // !parentTag.predicateEmitted
373375
// Set-based properties
374-
this.emitTriple(parentTag.subject, parentTag.predicate, activeTag.subject, parentTag.reifiedStatementId);
376+
if (!parentTag.childrenTagsToTripleTerms) {
377+
this.emitTriple(parentTag.subject, parentTag.predicate, activeTag.subject, parentTag.reifiedStatementId, parentTag.childrenTripleTerms);
378+
parentTag.predicateEmitted = true;
379+
}
375380

376381
// Emit pending properties on the parent tag that had no defined subject yet.
377382
for (let i = 0; i < parentTag.predicateSubPredicates.length; i++) {
378383
this.emitTriple(activeTag.subject, parentTag.predicateSubPredicates[i],
379-
parentTag.predicateSubObjects[i], null);
384+
parentTag.predicateSubObjects[i], null, parentTag.childrenTripleTerms);
380385
}
381386

382387
// Cleanup so we don't emit them again when the parent tag is closed
383388
parentTag.predicateSubPredicates = [];
384389
parentTag.predicateSubObjects = [];
385-
parentTag.predicateEmitted = true;
386390
}
387391
}
388392

389393
// Emit all collected triples
390394
for (let i = 0; i < predicates.length; i++) {
391395
const object: RDF.Term = this.createLiteral(objects[i], activeTag);
392-
this.emitTriple(activeTag.subject, predicates[i], object, parentTag.reifiedStatementId);
396+
this.emitTriple(activeTag.subject, predicates[i], object, parentTag.reifiedStatementId, parentTag.childrenTripleTerms);
393397
}
394398
// Emit the rdf:type as named node instead of literal
395399
if (explicitType) {
396400
this.emitTriple(activeTag.subject, this.dataFactory.namedNode(RdfXmlParser.RDF + 'type'),
397-
this.uriToNamedNode(explicitType), null);
401+
this.uriToNamedNode(explicitType), null, activeTag.childrenTripleTerms);
398402
}
399403
}
400404
}
@@ -505,7 +509,7 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
505509

506510
// Turn this property element into a node element
507511
const nestedBNode: RDF.BlankNode = this.dataFactory.blankNode();
508-
this.emitTriple(activeTag.subject, activeTag.predicate, nestedBNode, activeTag.reifiedStatementId);
512+
this.emitTriple(activeTag.subject, activeTag.predicate, nestedBNode, activeTag.reifiedStatementId, activeTag.childrenTripleTerms);
509513
activeTag.subject = nestedBNode;
510514
activeTag.predicate = null;
511515
} else if (propertyAttribute.value === 'Collection') {
@@ -520,6 +524,11 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
520524
// Interpret children as being part of a literal string
521525
activeTag.childrenTagsToString = true;
522526
activeTag.childrenStringTags = [];
527+
} else if (propertyAttribute.value === 'Triple') {
528+
parseType = true;
529+
// Collect children as triple terms
530+
activeTag.childrenTagsToTripleTerms = true;
531+
activeTag.childrenTripleTerms = [];
523532
}
524533
continue;
525534
case 'ID':
@@ -558,11 +567,11 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
558567
const subjectParent: RDF.Term = activeTag.subject;
559568
activeTag.subject = subSubjectValueBlank
560569
? this.dataFactory.blankNode(activeSubSubjectValue) : this.valueToUri(activeSubSubjectValue, activeTag);
561-
this.emitTriple(subjectParent, activeTag.predicate, activeTag.subject, activeTag.reifiedStatementId);
570+
this.emitTriple(subjectParent, activeTag.predicate, activeTag.subject, activeTag.reifiedStatementId, activeTag.childrenTripleTerms);
562571

563572
// Emit our buffered triples
564573
for (let i = 0; i < predicates.length; i++) {
565-
this.emitTriple(activeTag.subject, predicates[i], objects[i], null);
574+
this.emitTriple(activeTag.subject, predicates[i], objects[i], null, activeTag.childrenTripleTerms);
566575
}
567576
activeTag.predicateEmitted = true;
568577
} else if (subSubjectValueBlank) {
@@ -582,10 +591,16 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
582591
* @param {Term} object An object term.
583592
* @param {Term} statementId An optional resource that identifies the triple.
584593
* If truthy, then the given triple will also be emitted reified.
594+
* @param childrenTripleTerms An optional array to push quads into instead of emitting them.
585595
*/
586596
protected emitTriple(subject: RDF.Quad_Subject, predicate: RDF.Quad_Predicate, object: RDF.Quad_Object,
587-
statementId?: RDF.NamedNode) {
588-
this.push(this.dataFactory.quad(subject, predicate, object, this.defaultGraph));
597+
statementId?: RDF.NamedNode, childrenTripleTerms?: RDF.Quad[]) {
598+
const quad = this.dataFactory.quad(subject, predicate, object, this.defaultGraph);
599+
if (childrenTripleTerms) {
600+
childrenTripleTerms.push(quad);
601+
} else {
602+
this.push(quad);
603+
}
589604

590605
// Reify triple
591606
if (statementId) {
@@ -640,6 +655,8 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
640655
*/
641656
protected onCloseTag() {
642657
const poppedTag: IActiveTag = this.activeTagStack.pop();
658+
const parentTag: IActiveTag = this.activeTagStack.length
659+
? this.activeTagStack[this.activeTagStack.length - 1] : null;
643660

644661
// If we were converting a tag to a string, and the tag was not self-closing, close it here.
645662
if (poppedTag.childrenStringEmitClosingTag) {
@@ -653,21 +670,32 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
653670
poppedTag.hadChildren = false; // Force a literal triple to be emitted hereafter
654671
}
655672

673+
// Set the triple term value if we were collecting triple terms
674+
if (poppedTag.childrenTagsToTripleTerms && poppedTag.predicate) {
675+
if (poppedTag.childrenTripleTerms.length !== 1) {
676+
throw this.newParseError(`Expected exactly one triple term in rdf:parseType="Triple" but got ${poppedTag.childrenTripleTerms.length}`);
677+
}
678+
for (const tripleTerm of poppedTag.childrenTripleTerms) {
679+
this.emitTriple(poppedTag.subject, poppedTag.predicate, tripleTerm, null, parentTag?.childrenTripleTerms);
680+
}
681+
poppedTag.predicateEmitted = true;
682+
}
683+
656684
if (poppedTag.childrenCollectionSubject) {
657685
// Terminate the rdf:List
658686
this.emitTriple(poppedTag.childrenCollectionSubject, poppedTag.childrenCollectionPredicate,
659-
this.dataFactory.namedNode(RdfXmlParser.RDF + 'nil'), poppedTag.reifiedStatementId);
687+
this.dataFactory.namedNode(RdfXmlParser.RDF + 'nil'), poppedTag.reifiedStatementId, poppedTag.childrenTripleTerms);
660688
} else if (poppedTag.predicate) {
661689
if (!poppedTag.hadChildren && poppedTag.childrenParseType !== ParseType.PROPERTY) {
662690
// Property element contains text
663691
this.emitTriple(poppedTag.subject, poppedTag.predicate, this.createLiteral(poppedTag.text || '', poppedTag),
664-
poppedTag.reifiedStatementId);
692+
poppedTag.reifiedStatementId, poppedTag.childrenTripleTerms);
665693
} else if (!poppedTag.predicateEmitted) {
666694
// Emit remaining properties on an anonymous property element
667695
const subject: RDF.Term = this.dataFactory.blankNode();
668-
this.emitTriple(poppedTag.subject, poppedTag.predicate, subject, poppedTag.reifiedStatementId);
696+
this.emitTriple(poppedTag.subject, poppedTag.predicate, subject, poppedTag.reifiedStatementId, poppedTag.childrenTripleTerms);
669697
for (let i = 0; i < poppedTag.predicateSubPredicates.length; i++) {
670-
this.emitTriple(subject, poppedTag.predicateSubPredicates[i], poppedTag.predicateSubObjects[i], null);
698+
this.emitTriple(subject, poppedTag.predicateSubPredicates[i], poppedTag.predicateSubObjects[i], null, poppedTag.childrenTripleTerms);
671699
}
672700
}
673701
}
@@ -752,6 +780,8 @@ export interface IActiveTag {
752780
// for creating rdf:Lists
753781
childrenCollectionSubject?: RDF.NamedNode | RDF.BlankNode;
754782
childrenCollectionPredicate?: RDF.NamedNode;
783+
childrenTagsToTripleTerms?: boolean;
784+
childrenTripleTerms?: RDF.Quad[];
755785
}
756786

757787
export enum ParseType {

test/RdfXmlParser-test.ts

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,40 @@ abc`)).rejects.toBeTruthy();
845845
</rdf:RDF>`)).rejects.toEqual(
846846
new Error('5:13: unbound namespace prefix: "ex".'));
847847
});
848+
849+
it('on rdf:parseType="Triple" with missing predicate in triple term', async () => {
850+
return expect(parse(parser, `<?xml version="1.0"?>
851+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
852+
xmlns:ex="http://example.org/stuff/1.0/"
853+
xml:base="http://example.org/triples/"
854+
rdf:version="1.2">
855+
<rdf:Description rdf:about="http://example.org/">
856+
<ex:prop rdf:parseType="Triple">
857+
<rdf:Description rdf:about="http://example.org/stuff/1.0/s">
858+
</rdf:Description>
859+
</ex:prop>
860+
</rdf:Description>
861+
</rdf:RDF>`)).rejects.toEqual(
862+
new Error('Expected exactly one triple term in rdf:parseType="Triple" but got 0'));
863+
});
864+
865+
it('on rdf:parseType="Triple" with multiple triple terms', async () => {
866+
return expect(parse(parser, `<?xml version="1.0"?>
867+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
868+
xmlns:ex="http://example.org/stuff/1.0/"
869+
xml:base="http://example.org/triples/"
870+
rdf:version="1.2">
871+
<rdf:Description rdf:about="http://example.org/">
872+
<ex:prop rdf:parseType="Triple">
873+
<rdf:Description rdf:about="http://example.org/stuff/1.0/s">
874+
<ex:p rdf:resource="http://example.org/stuff/1.0/o1" />
875+
<ex:p rdf:resource="http://example.org/stuff/1.0/o2" />
876+
</rdf:Description>
877+
</ex:prop>
878+
</rdf:Description>
879+
</rdf:RDF>`)).rejects.toEqual(
880+
new Error('Expected exactly one triple term in rdf:parseType="Triple" but got 2'));
881+
});
848882
});
849883

850884
describe('should parse', () => {
@@ -2522,6 +2556,124 @@ abc`)).rejects.toBeTruthy();
25222556
</rdf:RDF>`);
25232557
return expect(cb).toHaveBeenCalledWith('1.2');
25242558
});
2559+
2560+
// 2.19
2561+
it('on property elements with rdf:parseType="Triple"', async () => {
2562+
const array = await parse(parser, `<?xml version="1.0"?>
2563+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
2564+
xmlns:ex="http://example.org/stuff/1.0/"
2565+
xml:base="http://example.org/triples/"
2566+
rdf:version="1.2">
2567+
<rdf:Description rdf:about="http://example.org/">
2568+
<ex:prop rdf:parseType="Triple">
2569+
<rdf:Description rdf:about="http://example.org/stuff/1.0/s">
2570+
<ex:p rdf:resource="http://example.org/stuff/1.0/o" />
2571+
</rdf:Description>
2572+
</ex:prop>
2573+
</rdf:Description>
2574+
</rdf:RDF>`);
2575+
return expect(array)
2576+
.toBeRdfIsomorphic([
2577+
quad(
2578+
'http://example.org/',
2579+
'http://example.org/stuff/1.0/prop',
2580+
'<<http://example.org/stuff/1.0/s http://example.org/stuff/1.0/p http://example.org/stuff/1.0/o>>'),
2581+
]);
2582+
});
2583+
2584+
it('on property elements with rdf:parseType="Triple" with blank subject', async () => {
2585+
const array = await parse(parser, `<?xml version="1.0"?>
2586+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
2587+
xmlns:ex="http://example.org/stuff/1.0/"
2588+
xml:base="http://example.org/triples/"
2589+
rdf:version="1.2">
2590+
<rdf:Description rdf:about="http://example.org/">
2591+
<ex:prop rdf:parseType="Triple">
2592+
<rdf:Description>
2593+
<ex:p rdf:resource="http://example.org/stuff/1.0/o" />
2594+
</rdf:Description>
2595+
</ex:prop>
2596+
</rdf:Description>
2597+
</rdf:RDF>`);
2598+
return expect(array)
2599+
.toBeRdfIsomorphic([
2600+
quad(
2601+
'http://example.org/',
2602+
'http://example.org/stuff/1.0/prop',
2603+
'<<_:b0 http://example.org/stuff/1.0/p http://example.org/stuff/1.0/o>>'),
2604+
]);
2605+
});
2606+
2607+
it('on property elements with rdf:parseType="Triple" with rdf:type', async () => {
2608+
const array = await parse(parser, `<?xml version="1.0"?>
2609+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
2610+
xmlns:ex="http://example.org/stuff/1.0/"
2611+
xml:base="http://example.org/triples/"
2612+
rdf:version="1.2">
2613+
<rdf:Description rdf:about="http://example.org/">
2614+
<ex:prop rdf:parseType="Triple">
2615+
<rdf:Description rdf:type="http://example.org/stuff/1.0/t" />
2616+
</ex:prop>
2617+
</rdf:Description>
2618+
</rdf:RDF>`);
2619+
return expect(array)
2620+
.toBeRdfIsomorphic([
2621+
quad(
2622+
'http://example.org/',
2623+
'http://example.org/stuff/1.0/prop',
2624+
'<<_:b0 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://example.org/stuff/1.0/t>>'),
2625+
]);
2626+
});
2627+
2628+
it('on property elements with rdf:parseType="Triple" and rdf:nodeID', async () => {
2629+
const array = await parse(parser, `<?xml version="1.0"?>
2630+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
2631+
xmlns:ex="http://example.org/stuff/1.0/"
2632+
xml:base="http://example.org/triples/"
2633+
rdf:version="1.2">
2634+
<rdf:Description rdf:about="http://example.org/">
2635+
<ex:prop rdf:parseType="Triple">
2636+
<rdf:Description rdf:about="http://example.org/stuff/1.0/s">
2637+
<ex:p rdf:nodeID="b1" />
2638+
</rdf:Description>
2639+
</ex:prop>
2640+
</rdf:Description>
2641+
</rdf:RDF>`);
2642+
return expect(array)
2643+
.toBeRdfIsomorphic([
2644+
quad(
2645+
'http://example.org/',
2646+
'http://example.org/stuff/1.0/prop',
2647+
'<<http://example.org/stuff/1.0/s http://example.org/stuff/1.0/p _:b0>>'),
2648+
]);
2649+
});
2650+
2651+
it('on property elements with nested rdf:parseType="Triple"', async () => {
2652+
const array = await parse(parser, `<?xml version="1.0"?>
2653+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
2654+
xmlns:ex="http://example.org/stuff/1.0/"
2655+
xml:base="http://example.org/triples/"
2656+
rdf:version="1.2">
2657+
<rdf:Description rdf:about="http://example.org/">
2658+
<ex:prop rdf:parseType="Triple">
2659+
<rdf:Description rdf:about="http://example.org/stuff/1.0/s">
2660+
<ex:p rdf:parseType="Triple">
2661+
<rdf:Description rdf:about="http://example.org/stuff/1.0/s2">
2662+
<ex:p2 rdf:resource="http://example.org/stuff/1.0/o2" />
2663+
</rdf:Description>
2664+
</ex:p>
2665+
</rdf:Description>
2666+
</ex:prop>
2667+
</rdf:Description>
2668+
</rdf:RDF>`);
2669+
return expect(array)
2670+
.toBeRdfIsomorphic([
2671+
quad(
2672+
'http://example.org/',
2673+
'http://example.org/stuff/1.0/prop',
2674+
'<<http://example.org/stuff/1.0/s http://example.org/stuff/1.0/p <<http://example.org/stuff/1.0/s2 http://example.org/stuff/1.0/p2 http://example.org/stuff/1.0/o2>>>>'),
2675+
]);
2676+
});
25252677
});
25262678

25272679
describe('streaming-wise', () => {

0 commit comments

Comments
 (0)