Skip to content

Commit 4f44a5f

Browse files
rjrudinMarkLogic Builder
authored andcommitted
DHFPROD-6954: TDE context path now avoids false positives
The false positives don't affect query results, but they do impact which documents are reindexed. Extracted fix-tde-context and a few other private functions to start this module on the path towards readability.
1 parent 5fc388f commit 4f44a5f

File tree

11 files changed

+257
-32
lines changed

11 files changed

+257
-32
lines changed

marklogic-data-hub/src/main/resources/ml-modules/root/data-hub/5/impl/hub-entities.xqy

Lines changed: 100 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -497,28 +497,26 @@ declare function hent:dump-tde($entities as json:array)
497497
)
498498
let $entity-model-contexts := map:keys($uber-definitions) ! ("./" || .)
499499
let $entity-name := map:get(map:get($uber-model, "info"), "title")
500-
return hent:fix-tde(es:extraction-template-generate($uber-model), $entity-model-contexts, $uber-definitions, $entity-name)
500+
let $es-template := es:extraction-template-generate($uber-model)
501+
return hent:fix-tde($es-template, $entity-model-contexts, $uber-model, $entity-name)
501502
};
502503

503504
declare variable $default-nullable as element(tde:nullable) := element tde:nullable {fn:true()};
504505
declare variable $default-invalid-values as element(tde:invalid-values) := element tde:invalid-values {"ignore"};
505-
(:
506-
this method doctors the TDE output from ES
507-
:)
508506

509-
declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:string*, $uber-definitions as map:map)
507+
declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:string*, $uber-model as map:map)
510508
{
511-
hent:fix-tde($nodes, $entity-model-contexts, $uber-definitions, ())
509+
hent:fix-tde($nodes, $entity-model-contexts, $uber-model, ())
512510
};
513511

514-
declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:string*, $uber-definitions as map:map, $entity-name as xs:string?)
512+
declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:string*, $uber-model as map:map, $entity-name as xs:string?)
515513
{
516514
for $n in $nodes
517515
return
518516
typeswitch($n)
519517
case document-node() return
520518
document {
521-
hent:fix-tde($n/node(), $entity-model-contexts, $uber-definitions, $entity-name)
519+
hent:fix-tde($n/node(), $entity-model-contexts, $uber-model, $entity-name)
522520
}
523521
case element(tde:nullable) return
524522
$default-nullable
@@ -533,6 +531,7 @@ declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:st
533531
$generated-primary-key-expression
534532
else if (fn:starts-with($n, $col-name)) then
535533
let $parts := fn:tokenize($n, "/")
534+
let $uber-definitions := $uber-model => map:get("definitions")
536535
let $entity-definition := $uber-definitions => map:get(fn:string($parts[2]))
537536
return
538537
if (fn:exists($entity-definition)) then
@@ -543,37 +542,26 @@ declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:st
543542
else
544543
fn:string($n) || "/" || $primary-key
545544
else
546-
hent:fix-tde($n/node(), $entity-model-contexts, $uber-definitions)
545+
hent:fix-tde($n/node(), $entity-model-contexts, $uber-model)
547546
else
548-
hent:fix-tde($n/node(), $entity-model-contexts, $uber-definitions)
547+
hent:fix-tde($n/node(), $entity-model-contexts, $uber-model)
549548
}
549+
550550
case element(tde:context) return
551-
element { fn:node-name($n) } {
552-
$n/namespace::node(),
553-
if ($n = $entity-model-contexts) then
554-
fn:replace(fn:replace(fn:string($n),"^\./", ".//"), "(.)$", "$1[node()]")
555-
else
556-
if(fn:count($n) = 1) then
557-
let $outer-context := fn:replace(fn:string($n),"//\*:instance", "/*:envelope/*:instance")
558-
return if(fn:not(fn:empty($entity-name))) then
559-
fn:concat($outer-context, "[*:", $entity-name, "]")
560-
else
561-
$outer-context
562-
else
563-
$n/node()
564-
}
551+
fix-tde-context($n, $entity-model-contexts, $uber-model, $entity-name)
552+
565553
case element(tde:column) return
566554
element { fn:node-name($n) } {
567555
$n/namespace::node(),
568556
$n/@*,
569-
hent:fix-tde($n/* except $n/(tde:nullable|tde:invalid-values), $entity-model-contexts, $uber-definitions),
557+
hent:fix-tde($n/* except $n/(tde:nullable|tde:invalid-values), $entity-model-contexts, $uber-model),
570558
$default-nullable,
571559
$default-invalid-values
572560
}
573561
case element(tde:subject)|element(tde:predicate)|element(tde:object) return
574562
element { fn:node-name($n) } {
575563
$n/namespace::node(),
576-
hent:fix-tde($n/* except $n/tde:invalid-values, $entity-model-contexts, $uber-definitions),
564+
hent:fix-tde($n/* except $n/tde:invalid-values, $entity-model-contexts, $uber-model),
577565
$default-invalid-values
578566
}
579567
case element(tde:template) return
@@ -586,7 +574,7 @@ declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:st
586574
let $rows := $n/tde:rows/tde:row
587575
return
588576
if ($is-join-template) then (
589-
hent:fix-tde($n/tde:context, $entity-model-contexts, $uber-definitions),
577+
hent:fix-tde($n/tde:context, $entity-model-contexts, $uber-model),
590578
element tde:rows {
591579
element tde:row {
592580
$rows/(tde:schema-name|tde:view-name|tde:view-layout),
@@ -596,9 +584,10 @@ declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:st
596584
return
597585
element tde:column {
598586
$column/@*,
599-
hent:fix-tde($column/(tde:name|tde:scalar-type), $entity-model-contexts, $uber-definitions),
587+
hent:fix-tde($column/(tde:name|tde:scalar-type), $entity-model-contexts, $uber-model),
600588
if (fn:starts-with($column/tde:name, $join-prefix)) then (
601589
let $tde-val := fn:string($column/tde:val)
590+
let $uber-definitions := $uber-model => map:get("definitions")
602591
let $primary-key := $uber-definitions => map:get($tde-val) => map:get("primaryKey")
603592
return
604593
element tde:val {
@@ -608,21 +597,21 @@ declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:st
608597
$tde-val || "/" || $primary-key
609598
}
610599
) else
611-
hent:fix-tde($column/tde:val, $entity-model-contexts, $uber-definitions),
600+
hent:fix-tde($column/tde:val, $entity-model-contexts, $uber-model),
612601
$default-nullable,
613602
$default-invalid-values,
614-
hent:fix-tde($column/(tde:default|tde:reindexing|tde:collation), $entity-model-contexts, $uber-definitions)
603+
hent:fix-tde($column/(tde:default|tde:reindexing|tde:collation), $entity-model-contexts, $uber-model)
615604
}
616605
}
617606
}
618607
}
619608
) else
620-
hent:fix-tde($n/node(), $entity-model-contexts, $uber-definitions, $entity-name)
609+
hent:fix-tde($n/node(), $entity-model-contexts, $uber-model, $entity-name)
621610
}
622611
case element() return
623612
element { fn:node-name($n) } {
624613
$n/namespace::node(),
625-
hent:fix-tde(($n/@*, $n/node()), $entity-model-contexts, $uber-definitions)
614+
hent:fix-tde(($n/@*, $n/node()), $entity-model-contexts, $uber-model)
626615
}
627616
case text() return
628617
fn:replace(
@@ -633,6 +622,85 @@ declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:st
633622
default return $n
634623
};
635624

625+
(:
626+
Fixes the ES-generated TDE context path by:
627+
- Replacing the use of wildcards, which lead to false positives
628+
- Checking the entity namespacePrefix to determine if the context only needs to support XML
629+
630+
False positives in the context path won't lead to incorrect results when querying via the TDE, but they will
631+
lead to unnecessary reindexing, per DHFPROD-6954.
632+
633+
Example of an ES-generated path: //*:instance[*:info/*:version = "1.0"]
634+
:)
635+
declare private function fix-tde-context(
636+
$context as element(tde:context),
637+
$entity-model-contexts as xs:string*,
638+
$uber-model as map:map,
639+
$entity-name as xs:string?
640+
) as element(tde:context)
641+
{
642+
element tde:context {
643+
$context/namespace::node(),
644+
645+
(: This appears to be for the 'non-root' context elements in a TDE :)
646+
if ($context = $entity-model-contexts) then
647+
fn:replace(fn:replace(fn:string($context),"^\./", ".//"), "(.)$", "$1[node()]")
648+
649+
else if ($entity-name) then
650+
let $version := get-version-from-uber-model($uber-model)
651+
let $ns-prefix := get-namespace-prefix($uber-model, $entity-name)
652+
return
653+
if ($ns-prefix) then
654+
let $entity-predicate := "[" || $ns-prefix || ":" || $entity-name || "]"
655+
return
656+
if ($version) then
657+
"/(es:envelope|envelope)/(es:instance|instance)[es:info/es:version = '" || $version || "']" || $entity-predicate
658+
else
659+
replace-context-wildcards($context/text()) || $entity-predicate
660+
else
661+
let $entity-predicate := "[" || $entity-name || "]"
662+
return
663+
if ($version) then
664+
(: An 'or' clause is used to further avoid false positives :)
665+
"/(es:envelope|envelope)/(es:instance|instance)[es:info/es:version = '" || $version || "' or info/version = '" || $version || "']" || $entity-predicate
666+
else
667+
replace-context-wildcards($context/text()) || $entity-predicate
668+
669+
else
670+
(: In the absence of an entity-name, which is very unexpected, at least remove the wildcards :)
671+
replace-context-wildcards($context/text())
672+
}
673+
};
674+
675+
declare private function get-version-from-uber-model($uber-model as map:map) as xs:string?
676+
{
677+
let $info := map:get($uber-model, "info")
678+
where fn:exists($info)
679+
return map:get($info, "version")
680+
};
681+
682+
declare private function get-namespace-prefix($uber-model as map:map, $entity-name as xs:string) as xs:string?
683+
{
684+
let $uber-definitions := $uber-model => map:get("definitions")
685+
where fn:exists($uber-definitions)
686+
return
687+
let $def := map:get($uber-definitions, $entity-name)
688+
where fn:exists($def)
689+
return map:get($def, "namespacePrefix")
690+
};
691+
692+
(:
693+
Replacing wildcards in the ES-generated context path eliminates many false positives, per DHFPROD-6954.
694+
This function should also only be used when the entity def does not have a namespace prefix, as the path it
695+
returns is intended to support JSON entity instances and XML entity instances that do not have a namespace (but still
696+
use the es namespace for envelope/instance/info/version).
697+
:)
698+
declare private function replace-context-wildcards($path as xs:string) as xs:string
699+
{
700+
let $temp := fn:replace($path, "//\*:instance", "/(es:envelope|envelope)/(es:instance|instance)")
701+
return fn:replace($temp, "\*:info/\*:version", "(es:info/es:version|info/version)")
702+
};
703+
636704
declare variable $number-types as xs:string+ := ("byte","decimal","double","float","int","integer","long","negativeInteger","nonNegativeInteger","nonPositiveInteger","positiveInteger","short","unsignedLong","unsignedInt","unsignedShort","unsignedByte");
637705
declare variable $string-types as xs:string+ := ("dateTime","date");
638706

marklogic-data-hub/src/test/ml-modules/root/test/data-hub-test-helper.xqy

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,15 @@ declare function get-first-prov-document()
181181
invoke-in-db(function() {fn:collection("http://marklogic.com/provenance-services/record")[1]}, "data-hub-JOBS")
182182
};
183183

184+
declare function get-final-schema($uri as xs:string)
185+
{
186+
xdmp:eval("fn:doc('" || $uri || "')", (),
187+
<options xmlns="xdmp:eval">
188+
<database>{xdmp:database("data-hub-final-SCHEMAS")}</database>
189+
</options>
190+
)
191+
};
192+
184193
declare function invoke-in-db($function, $database as xs:string)
185194
{
186195
xdmp:invoke-function($function,
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
xquery version "1.0-ml";
2+
import module namespace hub-test = "http://marklogic.com/data-hub/test" at "/test/data-hub-test-helper.xqy";
3+
hub-test:reset-hub();
4+
5+
xquery version "1.0-ml";
6+
import module namespace hub-test = "http://marklogic.com/data-hub/test" at "/test/data-hub-test-helper.xqy";
7+
import module namespace test = "http://marklogic.com/test" at "/test/test-helper.xqy";
8+
hub-test:load-entities($test:__CALLER_FILE__);
9+
10+
xquery version "1.0-ml";
11+
import module namespace hub-test = "http://marklogic.com/data-hub/test" at "/test/data-hub-test-helper.xqy";
12+
import module namespace test = "http://marklogic.com/test" at "/test/test-helper.xqy";
13+
hub-test:load-artifacts($test:__CALLER_FILE__);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"envelope": {
3+
"instance": {
4+
"info": {
5+
"version": "1.0"
6+
},
7+
"TdeContextNoNamespaceEntity": {
8+
"myProperty": "JSON"
9+
}
10+
}
11+
}
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"envelope": {
3+
"instance": {
4+
"info": {
5+
"version": "1.0.1"
6+
},
7+
"TdeContextNoNamespaceEntity": {
8+
"myProperty": "JSON"
9+
}
10+
}
11+
}
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<es:envelope xmlns:es="http://marklogic.com/entity-services">
2+
<es:instance>
3+
<es:info>
4+
<es:version>1.0</es:version>
5+
</es:info>
6+
<oex:TdeContextNamespacedEntity xmlns:oex="http://example.org/">
7+
<oex:myProperty>XML</oex:myProperty>
8+
</oex:TdeContextNamespacedEntity>
9+
</es:instance>
10+
</es:envelope>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<es:envelope xmlns:es="http://marklogic.com/entity-services">
2+
<es:instance>
3+
<oex:NamespacedEntity xmlns:oex="http://example.org/">
4+
<oex:myProperty>XML</oex:myProperty>
5+
</oex:NamespacedEntity>
6+
</es:instance>
7+
</es:envelope>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<es:envelope xmlns:es="http://marklogic.com/entity-services">
2+
<es:instance>
3+
<es:info>
4+
<es:version>1.0</es:version>
5+
</es:info>
6+
<TdeContextNoNamespaceEntity xmlns:ns="example">
7+
<myProperty>XML</myProperty>
8+
</TdeContextNoNamespaceEntity>
9+
</es:instance>
10+
</es:envelope>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"info": {
3+
"title": "TdeContextNamespacedEntity",
4+
"version": "1.0",
5+
"baseUri": "http://example.org/"
6+
},
7+
"definitions": {
8+
"TdeContextNamespacedEntity": {
9+
"namespace": "http://example.org/",
10+
"namespacePrefix": "oex",
11+
"properties": {
12+
"myProperty": {
13+
"datatype": "string"
14+
}
15+
}
16+
}
17+
}
18+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"info": {
3+
"title": "TdeContextNoNamespaceEntity",
4+
"version": "1.0",
5+
"baseUri": "http://example.org/"
6+
},
7+
"definitions": {
8+
"TdeContextNoNamespaceEntity": {
9+
"properties": {
10+
"myProperty": {
11+
"datatype": "string"
12+
}
13+
}
14+
}
15+
}
16+
}

0 commit comments

Comments
 (0)