Skip to content

Commit 265df9b

Browse files
committed
Parse base directions
1 parent cb92d2c commit 265df9b

File tree

2 files changed

+171
-7
lines changed

2 files changed

+171
-7
lines changed

lib/RdfXmlParser.ts

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
1212

1313
public static readonly RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
1414
public static readonly XML = 'http://www.w3.org/XML/1998/namespace';
15+
public static readonly ITS = 'http://www.w3.org/2005/11/its';
1516
public static readonly FORBIDDEN_NODE_ELEMENTS = [
1617
'RDF',
1718
'ID',
@@ -155,6 +156,15 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
155156
}
156157
}
157158

159+
/**
160+
* Create a new literal term.
161+
* @param value The literal value.
162+
* @param activeTag The active tag.
163+
*/
164+
public createLiteral(value: string, activeTag: IActiveTag): RDF.Literal {
165+
return this.dataFactory.literal(value, activeTag.datatype ? activeTag.datatype : activeTag.language ? { language: activeTag.language, direction: activeTag.direction } : undefined)
166+
}
167+
158168
protected attachSaxListeners() {
159169
this.saxParser.on('error', (error) => this.emit('error', error));
160170
this.saxParser.on('opentag', this.onTag.bind(this));
@@ -201,8 +211,9 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
201211

202212
const activeTag: IActiveTag = {};
203213
if (parentTag) {
204-
// Inherit language scope and baseIRI from parent
214+
// Inherit language scope, direction scope and baseIRI from parent
205215
activeTag.language = parentTag.language;
216+
activeTag.direction = parentTag.direction;
206217
activeTag.baseIRI = parentTag.baseIRI;
207218
} else {
208219
activeTag.baseIRI = this.baseIRI;
@@ -302,6 +313,9 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
302313
activeTag.baseIRI = resolve(attribute.value, activeTag.baseIRI);
303314
continue;
304315
}
316+
} else if (attribute.uri === RdfXmlParser.ITS && attribute.local === 'dir') {
317+
this.setDirection(activeTag, attribute.value);
318+
continue;
305319
}
306320

307321
// Interpret attributes at this point as properties on this node,
@@ -372,8 +386,7 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
372386

373387
// Emit all collected triples
374388
for (let i = 0; i < predicates.length; i++) {
375-
const object: RDF.Term = this.dataFactory.literal(objects[i],
376-
activeTag.datatype || activeTag.language);
389+
const object: RDF.Term = this.createLiteral(objects[i], activeTag);
377390
this.emitTriple(activeTag.subject, predicates[i], object, parentTag.reifiedStatementId);
378391
}
379392
// Emit the rdf:type as named node instead of literal
@@ -515,6 +528,9 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
515528
activeTag.language = propertyAttribute.value === ''
516529
? null : propertyAttribute.value.toLowerCase();
517530
continue;
531+
} else if (propertyAttribute.uri === RdfXmlParser.ITS && propertyAttribute.local === 'dir') {
532+
this.setDirection(activeTag, propertyAttribute.value);
533+
continue;
518534
}
519535

520536
// Interpret attributes at this point as properties via implicit blank nodes on the property,
@@ -529,8 +545,7 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
529545
activeTag.hadChildren = true;
530546
attributedProperty = true;
531547
predicates.push(this.uriToNamedNode(propertyAttribute.uri + propertyAttribute.local));
532-
objects.push(this.dataFactory.literal(propertyAttribute.value,
533-
activeTag.datatype || activeTag.language));
548+
objects.push(this.createLiteral(propertyAttribute.value, activeTag));
534549
}
535550
}
536551

@@ -641,8 +656,7 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
641656
} else if (poppedTag.predicate) {
642657
if (!poppedTag.hadChildren && poppedTag.childrenParseType !== ParseType.PROPERTY) {
643658
// Property element contains text
644-
this.emitTriple(poppedTag.subject, poppedTag.predicate,
645-
this.dataFactory.literal(poppedTag.text || '', poppedTag.datatype || poppedTag.language),
659+
this.emitTriple(poppedTag.subject, poppedTag.predicate, this.createLiteral(poppedTag.text || '', poppedTag),
646660
poppedTag.reifiedStatementId);
647661
} else if (!poppedTag.predicateEmitted) {
648662
// Emit remaining properties on an anonymous property element
@@ -665,6 +679,17 @@ while ${attribute.value} and ${activeSubjectValue} where found.`);
665679
return '';
666680
});
667681
}
682+
683+
private setDirection(activeTag: IActiveTag, value?: string) {
684+
if (value) {
685+
if (value !== 'ltr' && value !== 'rtl') {
686+
throw this.newParseError(`Base directions must either be 'ltr' or 'rtl', while ${value} was found.`);
687+
}
688+
activeTag.direction = value;
689+
} else {
690+
delete activeTag.direction;
691+
}
692+
}
668693
}
669694

670695
export interface IRdfXmlParserArgs {
@@ -710,6 +735,7 @@ export interface IActiveTag {
710735
hadChildren?: boolean;
711736
text?: string;
712737
language?: string;
738+
direction?: 'ltr' | 'rtl';
713739
datatype?: RDF.NamedNode;
714740
nodeId?: RDF.BlankNode;
715741
childrenParseType?: ParseType;

test/RdfXmlParser-test.ts

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,6 +1417,24 @@ abc`)).rejects.toBeTruthy();
14171417
]);
14181418
});
14191419

1420+
// 2.8
1421+
it('its:dir on node elements', async () => {
1422+
const array = await parse(parser, `<?xml version="1.0"?>
1423+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1424+
xmlns:dc="http://purl.org/dc/elements/1.1/"
1425+
xmlns:ex="http://example.org/stuff/1.0/"
1426+
xmlns:its="http://www.w3.org/2005/11/its">
1427+
<rdf:Description rdf:about="http://www.w3.org/TR/rdf-syntax-grammar" xml:lang="en-us" its:dir="ltr">
1428+
<dc:title>RDF 1.1 XML Syntax</dc:title>
1429+
</rdf:Description>
1430+
</rdf:RDF>`);
1431+
return expect(array)
1432+
.toBeRdfIsomorphic([
1433+
quad('http://www.w3.org/TR/rdf-syntax-grammar', 'http://purl.org/dc/elements/1.1/title',
1434+
'"RDF 1.1 XML Syntax"@en-us--ltr'),
1435+
]);
1436+
});
1437+
14201438
// 2.7
14211439
it('xml:lang on nested node elements', async () => {
14221440
const array = await parse(parser, `<?xml version="1.0"?>
@@ -1438,6 +1456,28 @@ abc`)).rejects.toBeTruthy();
14381456
]);
14391457
});
14401458

1459+
// 2.8
1460+
it('its:dir on nested node elements', async () => {
1461+
const array = await parse(parser, `<?xml version="1.0"?>
1462+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1463+
xmlns:dc="http://purl.org/dc/elements/1.1/"
1464+
xmlns:ex="http://example.org/stuff/1.0/"
1465+
xmlns:its="http://www.w3.org/2005/11/its">
1466+
<rdf:Description rdf:about="http://www.w3.org/TR/rdf-syntax-grammar" xml:lang="en-us" its:dir="ltr">
1467+
<ex:editor>
1468+
<rdf:Description>
1469+
<dc:title>RDF 1.1 XML Syntax</dc:title>
1470+
</rdf:Description>
1471+
</ex:editor>
1472+
</rdf:Description>
1473+
</rdf:RDF>`);
1474+
return expect(array)
1475+
.toBeRdfIsomorphic([
1476+
quad('http://www.w3.org/TR/rdf-syntax-grammar', 'http://example.org/stuff/1.0/editor', '_:b'),
1477+
quad('_:b', 'http://purl.org/dc/elements/1.1/title', '"RDF 1.1 XML Syntax"@en-us--ltr'),
1478+
]);
1479+
});
1480+
14411481
// 2.7
14421482
it('xml:lang resets on node elements', async () => {
14431483
const array = await parse(parser, `<?xml version="1.0"?>
@@ -1459,6 +1499,28 @@ abc`)).rejects.toBeTruthy();
14591499
]);
14601500
});
14611501

1502+
// 2.8
1503+
it('its:dir resets on node elements', async () => {
1504+
const array = await parse(parser, `<?xml version="1.0"?>
1505+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1506+
xmlns:dc="http://purl.org/dc/elements/1.1/"
1507+
xmlns:ex="http://example.org/stuff/1.0/"
1508+
xmlns:its="http://www.w3.org/2005/11/its">
1509+
<rdf:Description rdf:about="http://www.w3.org/TR/rdf-syntax-grammar" xml:lang="en-us" its:dir="ltr">
1510+
<ex:editor>
1511+
<rdf:Description xml:lang="" its:dir="">
1512+
<dc:title>RDF 1.1 XML Syntax</dc:title>
1513+
</rdf:Description>
1514+
</ex:editor>
1515+
</rdf:Description>
1516+
</rdf:RDF>`);
1517+
return expect(array)
1518+
.toBeRdfIsomorphic([
1519+
quad('http://www.w3.org/TR/rdf-syntax-grammar', 'http://example.org/stuff/1.0/editor', '_:b'),
1520+
quad('_:b', 'http://purl.org/dc/elements/1.1/title', '"RDF 1.1 XML Syntax"'),
1521+
]);
1522+
});
1523+
14621524
// 2.7
14631525
it('xml:lang on property elements', async () => {
14641526
const array = await parse(parser, `<?xml version="1.0"?>
@@ -1476,6 +1538,24 @@ abc`)).rejects.toBeTruthy();
14761538
]);
14771539
});
14781540

1541+
// 2.8
1542+
it('its:dir on property elements', async () => {
1543+
const array = await parse(parser, `<?xml version="1.0"?>
1544+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1545+
xmlns:dc="http://purl.org/dc/elements/1.1/"
1546+
xmlns:ex="http://example.org/stuff/1.0/"
1547+
xmlns:its="http://www.w3.org/2005/11/its">
1548+
<rdf:Description rdf:about="http://www.w3.org/TR/rdf-syntax-grammar">
1549+
<dc:title xml:lang="en-us" its:dir="rtl">RDF 1.1 XML Syntax</dc:title>
1550+
</rdf:Description>
1551+
</rdf:RDF>`);
1552+
return expect(array)
1553+
.toBeRdfIsomorphic([
1554+
quad('http://www.w3.org/TR/rdf-syntax-grammar', 'http://purl.org/dc/elements/1.1/title',
1555+
'"RDF 1.1 XML Syntax"@en-us--rtl'),
1556+
]);
1557+
});
1558+
14791559
// 2.7
14801560
it('xml:lang resets on property elements', async () => {
14811561
const array = await parse(parser, `<?xml version="1.0"?>
@@ -1497,6 +1577,28 @@ abc`)).rejects.toBeTruthy();
14971577
]);
14981578
});
14991579

1580+
// 2.8
1581+
it('its:dir resets on property elements', async () => {
1582+
const array = await parse(parser, `<?xml version="1.0"?>
1583+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1584+
xmlns:dc="http://purl.org/dc/elements/1.1/"
1585+
xmlns:ex="http://example.org/stuff/1.0/"
1586+
xmlns:its="http://www.w3.org/2005/11/its">
1587+
<rdf:Description rdf:about="http://www.w3.org/TR/rdf-syntax-grammar" xml:lang="en-us" its:dir="rtl">
1588+
<ex:editor>
1589+
<rdf:Description>
1590+
<dc:title xml:lang="" its:dir="">RDF 1.1 XML Syntax</dc:title>
1591+
</rdf:Description>
1592+
</ex:editor>
1593+
</rdf:Description>
1594+
</rdf:RDF>`);
1595+
return expect(array)
1596+
.toBeRdfIsomorphic([
1597+
quad('http://www.w3.org/TR/rdf-syntax-grammar', 'http://example.org/stuff/1.0/editor', '_:b'),
1598+
quad('_:b', 'http://purl.org/dc/elements/1.1/title', '"RDF 1.1 XML Syntax"'),
1599+
]);
1600+
});
1601+
15001602
// 2.7
15011603
it('mixed xml:lang usage', async () => {
15021604
const array = await parse(parser, `<?xml version="1.0"?>
@@ -1532,6 +1634,42 @@ abc`)).rejects.toBeTruthy();
15321634
]);
15331635
});
15341636

1637+
// 2.8
1638+
it('mixed its:dir usage', async () => {
1639+
const array = await parse(parser, `<?xml version="1.0"?>
1640+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1641+
xmlns:dc="http://purl.org/dc/elements/1.1/"
1642+
xmlns:ex="http://example.org/stuff/1.0/"
1643+
xmlns:its="http://www.w3.org/2005/11/its">
1644+
<rdf:Description rdf:about="http://www.w3.org/TR/rdf-syntax-grammar">
1645+
<dc:title>RDF 1.1 XML Syntax</dc:title>
1646+
<dc:title xml:lang="en" its:dir="ltr">RDF 1.1 XML Syntax</dc:title>
1647+
<dc:title xml:lang="en-US" its:dir="rtl">RDF 1.1 XML Syntax</dc:title>
1648+
</rdf:Description>
1649+
1650+
<rdf:Description rdf:about="http://example.org/buecher/baum" xml:lang="de" its:dir="ltr">
1651+
<dc:title>Der Baum</dc:title>
1652+
<dc:description>Das Buch ist außergewöhnlich</dc:description>
1653+
<dc:title xml:lang="en" its:dir="rtl">The Tree</dc:title>
1654+
</rdf:Description>
1655+
</rdf:RDF>`);
1656+
return expect(array)
1657+
.toBeRdfIsomorphic([
1658+
quad('http://www.w3.org/TR/rdf-syntax-grammar', 'http://purl.org/dc/elements/1.1/title',
1659+
'"RDF 1.1 XML Syntax"'),
1660+
quad('http://www.w3.org/TR/rdf-syntax-grammar', 'http://purl.org/dc/elements/1.1/title',
1661+
'"RDF 1.1 XML Syntax"@en--ltr'),
1662+
quad('http://www.w3.org/TR/rdf-syntax-grammar', 'http://purl.org/dc/elements/1.1/title',
1663+
'"RDF 1.1 XML Syntax"@en-us--rtl'),
1664+
quad('http://example.org/buecher/baum', 'http://purl.org/dc/elements/1.1/title',
1665+
'"Der Baum"@de--ltr'),
1666+
quad('http://example.org/buecher/baum', 'http://purl.org/dc/elements/1.1/description',
1667+
'"Das Buch ist au\u00DFergew\u00F6hnlich"@de--ltr'),
1668+
quad('http://example.org/buecher/baum', 'http://purl.org/dc/elements/1.1/title',
1669+
'"The Tree"@en--rtl'),
1670+
]);
1671+
});
1672+
15351673
// 2.9
15361674
it('rdf:datatype on property elements', async () => {
15371675
const array = await parse(parser, `<?xml version="1.0"?>

0 commit comments

Comments
 (0)