Skip to content

Commit 113d4a4

Browse files
committed
GH-3685: Update string functions for RDF 1.2 base direction
1 parent f085da4 commit 113d4a4

File tree

10 files changed

+505
-413
lines changed

10 files changed

+505
-413
lines changed

jena-arq/src/main/java/org/apache/jena/riot/writer/JsonLD11Writer.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
package org.apache.jena.riot.writer;
2020

21+
import static org.apache.jena.atlas.lib.Lib.equalsOrNulls;
22+
2123
import java.io.IOException;
2224
import java.io.OutputStream;
2325
import java.io.Writer;
@@ -37,7 +39,6 @@
3739

3840
import jakarta.json.*;
3941
import jakarta.json.stream.JsonGenerator;
40-
import org.apache.jena.atlas.lib.Lib;
4142
import org.apache.jena.atlas.logging.FmtLog;
4243
import org.apache.jena.graph.Node;
4344
import org.apache.jena.riot.Lang;
@@ -96,11 +97,11 @@ static class JenaTitaniumException extends JenaException {
9697
boolean indented = true;
9798

9899
// Choose algorithms
99-
if ( Lib.equals(variant, RDFFormat.PRETTY) ) {
100+
if ( equalsOrNulls(variant, RDFFormat.PRETTY) ) {
100101
writeThis = writePretty(array, dsg);
101-
} else if ( variant == null || Lib.equals(variant, RDFFormat.PLAIN) ) {
102+
} else if ( variant == null || equalsOrNulls(variant, RDFFormat.PLAIN) ) {
102103
writeThis = writePlain(array, dsg);
103-
} else if ( Lib.equals(variant, RDFFormat.FLAT) ) {
104+
} else if ( equalsOrNulls(variant, RDFFormat.FLAT) ) {
104105
writeThis = writePlain(array, dsg);
105106
indented = false;
106107
} else {

jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/NodeValueOps.java

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,17 @@
2727
import javax.xml.datatype.Duration;
2828
import javax.xml.datatype.XMLGregorianCalendar;
2929

30+
import org.apache.jena.atlas.lib.Lib;
31+
import org.apache.jena.datatypes.RDFDatatype;
3032
import org.apache.jena.datatypes.xsd.XSDDatatype;
3133
import org.apache.jena.graph.Node;
3234
import org.apache.jena.graph.NodeFactory;
35+
import org.apache.jena.graph.TextDirection;
3336
import org.apache.jena.sparql.expr.ExprEvalException;
3437
import org.apache.jena.sparql.expr.ExprEvalTypeException;
3538
import org.apache.jena.sparql.expr.NodeValue;
3639
import org.apache.jena.sparql.expr.ValueSpace;
37-
import org.apache.jena.sparql.util.NodeUtils;
40+
import org.apache.jena.vocabulary.RDF;
3841

3942
/**
4043
* Operations relating to {@link NodeValue NodeValues}.
@@ -301,18 +304,36 @@ public static Node checkAndGetStringLiteral(String label, NodeValue nv) {
301304
Node n = nv.asNode();
302305
if ( !n.isLiteral() )
303306
throw new ExprEvalException(label + ": Not a literal: " + nv);
304-
String lang = n.getLiteralLanguage();
305307

306-
if ( NodeUtils.isLangString(n) )
307-
// Language tag. Legal.
308+
if ( nv.isString() )
309+
// Includes derived types of xsd:string.
308310
return n;
309311

310-
// No language tag : either no datatype or a datatype of xsd:string
311-
// Includes the case of rdf:langString and no language ==> Illegal as a
312-
// compatible string.
312+
RDFDatatype dt = n.getLiteralDatatype();
313+
if ( ! RDF.dtLangString.equals(dt) && ! RDF.dtDirLangString.equals(dt) )
314+
throw new ExprEvalException(label + ": Not a string literal: " + nv);
313315

314-
if ( nv.isString() )
316+
// Check for malformed:
317+
// e.g. "abc"^^rdf:langString, and "abc"^^rdf:dirLangString
318+
319+
// Must have a language.
320+
String lang = n.getLiteralLanguage();
321+
if ( lang == null || lang.isEmpty() )
322+
throw new ExprEvalException(label + ": Not a string literal (no langtag): " + nv);
323+
if ( RDF.dtLangString.equals(dt) ) {
324+
// Must not have a text direction
325+
if ( n.getLiteralBaseDirection() != null )
326+
throw new ExprEvalException(label + ": Not a string literal (rdf:langString + text direction): " + nv);
315327
return n;
328+
}
329+
if ( RDF.dtDirLangString.equals(dt) ) {
330+
// Must have a text direction
331+
if ( n.getLiteralBaseDirection() == null )
332+
throw new ExprEvalException(label + ": Not a string literal (no text direction): " + nv);
333+
return n;
334+
}
335+
336+
// Should not get here.
316337
throw new ExprEvalException(label + ": Not a string literal: " + nv);
317338
}
318339

@@ -322,41 +343,50 @@ public static Node checkAndGetStringLiteral(String label, NodeValue nv) {
322343
* is not symmetric.
323344
* <ul>
324345
* <li>"abc"@en is compatible with "abc"
346+
* <li>"abc"@en--ltr is compatible with "abc"
325347
* <li>"abc" is NOT compatible with "abc"@en
348+
* <li>"abc"@en--ltr is NOT compatible with "abc"@en
326349
* </ul>
327350
*/
328351
public static void checkTwoArgumentStringLiterals(String label, NodeValue arg1, NodeValue arg2) {
329-
/* Quote the spec:
330-
* Compatibility of two arguments is defined as:
331-
* The arguments are simple literals or literals typed as xsd:string
332-
* The arguments are plain literals with identical language tags
333-
* The first argument is a plain literal with language tag and the second argument is a simple literal or literal typed as xsd:string
352+
/* Compatibility of two arguments:
353+
* The arguments are both xsd:string
354+
* The arguments are rdf:langString with identical language tags
355+
* The arguments are rdf:dirLangString with identical language tags and text direction
356+
* The first argument a string literal (rdf:langString, rdf:dirLangString) and the second argument is an xsd:string
357+
*
358+
* which simplifies to
359+
* Both arguments are string literals
360+
* The second argument is an xsd:string.
361+
* The first and second arguments have the same lang and text direction.
334362
*/
335363

364+
// Common case
365+
if ( arg1.isString() && arg2.isString() )
366+
// Includes derived datatypes of xsd:string.
367+
return;
368+
369+
// Robust checking.
336370
Node n1 = checkAndGetStringLiteral(label, arg1);
337371
Node n2 = checkAndGetStringLiteral(label, arg2);
372+
if ( arg2.isString() )
373+
// args1 is some kind of string literal.
374+
return;
375+
// same lane, same text direction.
338376
String lang1 = n1.getLiteralLanguage();
339377
String lang2 = n2.getLiteralLanguage();
340378
if ( lang1 == null )
341379
lang1 = "";
342380
if ( lang2 == null )
343381
lang2 = "";
344382

345-
// Case 1
346-
if ( lang1.equals("") ) {
347-
if ( lang2.equals("") )
348-
return;
383+
if ( ! Lib.equalsOrNulls(lang1, lang2) )
384+
// Different languages.
349385
throw new ExprEvalException(label + ": Incompatible: " + arg1 + " and " + arg2);
350-
}
351386

352-
// Case 2
353-
if ( lang1.equalsIgnoreCase(lang2) )
354-
return;
355-
356-
// Case 3
357-
if ( lang2.equals("") )
358-
return;
359-
360-
throw new ExprEvalException(label + ": Incompatible: " + arg1 + " and " + arg2);
387+
TextDirection textDir1 = n1.getLiteralBaseDirection();
388+
TextDirection textDir2 = n2.getLiteralBaseDirection();
389+
if ( ! Lib.equalsOrNulls(textDir1, textDir2) )
390+
throw new ExprEvalException(label + ": Incompatible: " + arg1 + " and " + arg2);
361391
}
362392
}

jena-arq/src/main/java/org/apache/jena/sparql/expr/nodevalue/XSDFuncOp.java

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,13 @@
4242
import javax.xml.datatype.XMLGregorianCalendar;
4343

4444
import org.apache.jena.atlas.lib.IRILib;
45+
import org.apache.jena.atlas.lib.Lib;
4546
import org.apache.jena.atlas.lib.StrUtils;
4647
import org.apache.jena.datatypes.RDFDatatype;
4748
import org.apache.jena.datatypes.xsd.XSDDatatype;
4849
import org.apache.jena.graph.Node;
4950
import org.apache.jena.graph.NodeFactory;
51+
import org.apache.jena.graph.TextDirection;
5052
import org.apache.jena.rdf.model.impl.Util;
5153
import org.apache.jena.sparql.ARQInternalErrorException;
5254
import org.apache.jena.sparql.SystemARQ;
@@ -745,7 +747,7 @@ public static NodeValue strEndsWith(NodeValue string, NodeValue match) {
745747

746748
/** Build a NodeValue with lexical form, and same language and datatype as the Node argument */
747749
private static NodeValue calcReturn(String result, Node arg) {
748-
Node n2 = NodeFactory.createLiteral(result, arg.getLiteralLanguage(), arg.getLiteralDatatype());
750+
Node n2 = NodeFactory.createLiteral(result, arg.getLiteralLanguage(), arg.getLiteralBaseDirection(), arg.getLiteralDatatype());
749751
return NodeValue.makeNode(n2);
750752
}
751753

@@ -823,57 +825,57 @@ public static NodeValue fnConcat(List<NodeValue> args) {
823825
}
824826

825827
/** SPARQL CONCAT (no implicit casts to strings) */
826-
public static NodeValue strConcat(List<NodeValue> args) {
827-
// Step 1 : Choose type.
828-
// One lang tag -> that lang tag
829-
String lang = null;
830-
boolean mixedLang = false;
831-
boolean xsdString = false;
832-
boolean simpleLiteral = false;
828+
public static NodeValue /*XSDFuncOp*/strConcat(List<NodeValue> args) {
829+
// Is this list of argument known to result in xsd:string by being mixed or seen an xsd:string?
830+
boolean outputSimpleString = false;
831+
832+
// A candidate has been set (happens at first argument)
833+
boolean candidateSet = false;
834+
String concatLang = null;
835+
TextDirection concatTextDir = null;
833836

834837
StringBuilder sb = new StringBuilder();
835838

836839
for (NodeValue nv : args) {
840+
// ExprEvalException is is not a string/langString or dirLangString.
837841
Node n = NodeValueOps.checkAndGetStringLiteral("CONCAT", nv);
838-
String lang1 = n.getLiteralLanguage();
839-
if ( !lang1.equals("") ) {
840-
if ( lang != null && !lang1.equals(lang) )
841-
// throw new
842-
// ExprEvalException("CONCAT: Mixed language tags: "+args);
843-
mixedLang = true;
844-
lang = lang1;
845-
} else if ( n.getLiteralDatatype() != null )
846-
xsdString = true;
847-
else
848-
simpleLiteral = true;
849-
850842
sb.append(n.getLiteralLexicalForm());
843+
if ( outputSimpleString )
844+
continue;
845+
String nLang = n.getLiteralLanguage();
846+
TextDirection nTextDir = n.getLiteralBaseDirection();
847+
848+
// Not mixed,
849+
if ( ! candidateSet ) {
850+
if ( nLang.isEmpty() ) {
851+
// No language => outcome is an xsd:string
852+
outputSimpleString = nLang.isEmpty();
853+
continue;
854+
}
855+
// It is the first argument, then set candidate
856+
concatLang = nLang;
857+
concatTextDir = nTextDir;
858+
candidateSet = true;
859+
continue;
860+
}
861+
// candidateSet is true
862+
// Possible lang/textDir
863+
if ( ! Lib.equalsOrNulls(concatLang, nLang) || ! Lib.equalsOrNulls(concatTextDir, nTextDir) ) {
864+
// Different => outcome is an xsd:string
865+
outputSimpleString = true;
866+
candidateSet = false;
867+
}
851868
}
852869

853-
if ( mixedLang )
854-
return NodeValue.makeString(sb.toString());
870+
String string = sb.toString();
855871

856-
// Must be all one lang.
857-
if ( lang != null ) {
858-
if ( !xsdString && !simpleLiteral )
859-
return NodeValue.makeNode(sb.toString(), lang, (String)null);
860-
else
861-
// Lang and one or more of xsd:string or simpleLiteral.
862-
return NodeValue.makeString(sb.toString());
863-
}
864-
865-
if ( simpleLiteral && xsdString )
866-
return NodeValue.makeString(sb.toString());
867-
// All xsdString
868-
if ( xsdString )
869-
return NodeValue.makeNode(sb.toString(), XSDDatatype.XSDstring);
870-
if ( simpleLiteral )
872+
if ( outputSimpleString )
871873
return NodeValue.makeString(sb.toString());
872874

873-
// No types - i.e. no arguments
874-
return NodeValue.makeString(sb.toString());
875+
// Handles "textDir == null"
876+
Node nOut = NodeFactory.createLiteralDirLang(string, concatLang, concatTextDir);
877+
return NodeValue.makeNode(nOut);
875878
}
876-
877879
/** fn:normalizeSpace */
878880
public static NodeValue strNormalizeSpace(NodeValue v){
879881
String str = v.asString();

jena-arq/src/test/java/org/apache/jena/arq/junit/OmittedTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import org.apache.jena.arq.junit.manifest.ManifestEntry;
2222

2323
/**
24-
* Omitted test -= no marked as "suppressed" but not run.
24+
* Omitted test = not marked as "suppressed" but not run.
2525
*/
2626
public class OmittedTest extends SkipTest {
2727
public final boolean verbose;
@@ -38,7 +38,7 @@ public OmittedTest(ManifestEntry entry, boolean verbose) {
3838

3939
@Override
4040
public void runTest() {
41-
//if ( verbose )
41+
if ( verbose )
4242
System.err.println("Omitted test: " + manifestEntry.getName());
4343
}
4444
}

jena-arq/src/test/java/org/apache/jena/rdfs/TestDatasetGraphRDFS.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.jena.rdfs;
2020

2121
import static org.apache.jena.atlas.iterator.Iter.iter;
22+
import static org.apache.jena.atlas.lib.Lib.equalsOrNulls;
2223
import static org.apache.jena.graph.Node.ANY;
2324
import static org.apache.jena.rdfs.LibTestRDFS.node;
2425
import static org.apache.jena.rdfs.engine.ConstRDFS.rdfType;
@@ -36,7 +37,6 @@
3637
import org.junit.jupiter.api.Test;
3738

3839
import org.apache.jena.atlas.iterator.Iter;
39-
import org.apache.jena.atlas.lib.Lib;
4040
import org.apache.jena.atlas.lib.ListUtils;
4141
import org.apache.jena.atlas.lib.StrUtils;
4242
import org.apache.jena.graph.Graph;
@@ -136,7 +136,7 @@ public static void beforeClass() {
136136
}
137137

138138
private static boolean hasNG(List<Quad> quads, Node graphName) {
139-
return quads.stream().map(Quad::getGraph).anyMatch(gn -> Lib.equals(gn, graphName));
139+
return quads.stream().map(Quad::getGraph).anyMatch(gn -> equalsOrNulls(gn, graphName));
140140
}
141141

142142
private void testContains(Node g, Node s, Node p, Node o, boolean expected) {

jena-arq/src/test/java/org/apache/jena/sparql/expr/TS_Expr.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
, TestNodeFunctions.class
3838
, TestExpressionsMath.class
3939
, TestFunctions.class
40-
, TestFunctions2.class
40+
, TestStringArgCompatibility.class
41+
, TestSparqlKeywordFunctions.class
4142
, TestFunctionsByURI.class
4243
, TestExprTripleTerms.class
4344
, TestLeviathanFunctions.class

0 commit comments

Comments
 (0)