Skip to content

Commit e375f90

Browse files
ryanjdewMarkLogic Builder
authored andcommitted
DHFPROD-7234: No caching or double counting calculated weights
1 parent fe76bcb commit e375f90

File tree

5 files changed

+58
-18
lines changed

5 files changed

+58
-18
lines changed

marklogic-data-hub/src/main/resources/ml-modules/root/com.marklogic.smart-mastering/matcher-impl/matcher-impl.xqy

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ declare function match-impl:search(
542542
)
543543
else ()
544544
where $contains
545-
return $query-map => map:with("weight", $weight)
545+
return map:new(($query-map, map:entry("weight", $weight)))
546546
let $score :=
547547
fn:sum(
548548
$matching-query-maps ! map:get(., "weight")
@@ -660,12 +660,20 @@ declare function match-impl:instance-query-wrapper(
660660
};
661661

662662
declare function match-impl:score-from-cts-query($result as node(), $query as cts:query) as xs:double {
663-
fn:sum(
663+
(: We don't want to double count for the same query/value pair hit :)
664+
let $queries-and-values-hit := map:map()
665+
return
666+
fn:sum(
664667
cts:walk(
665668
$result,
666669
$query,
667-
document{$cts:queries}
668-
//schema-element(cts:query)[fn:node-name(.) = $QUERIES_WITH_WEIGHT] ! fn:number(fn:head((./@weight, 1)))
670+
let $key := xdmp:hash64(xdmp:describe($cts:queries, (), ())) || ":" || $cts:text
671+
where fn:not(map:contains($queries-and-values-hit, $key))
672+
return (
673+
map:put($queries-and-values-hit, $key, fn:true()),
674+
document{$cts:queries}
675+
//schema-element(cts:query)[fn:node-name(.) = $QUERIES_WITH_WEIGHT] ! fn:number(fn:head((./@weight, 1)))
676+
)
669677
)
670-
)
678+
)
671679
};

marklogic-data-hub/src/test/ml-modules/root/test/suites/data-hub/5/smart-mastering/matching/custom-controlled-weight.xqy

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,18 @@ declare option xdmp:mapping "false";
1616
test:assert-equal(2.5, match-impl:score-from-cts-query(document { element test{"specific score"} }, cts:word-query("specific score", (), 2.5)), "The match score should be 0")
1717
),
1818
(: test that score is used in matching :)
19-
let $doc := fn:doc($lib:URI-DOB1)
19+
let $jsonDoc3 := fn:doc($lib:URI6)
2020
let $options := test:get-test-file($lib:MATCH-OPTIONS-CUST-ALG-CUST-WEIGHT || ".json")
21-
let $actual := matcher:find-document-matches-by-options($doc, $options, 1, 10, 0, fn:true(), cts:true-query())
21+
let $actual := matcher:find-document-matches-by-options($jsonDoc3, $options, 1, 10, 0, fn:true(), cts:true-query())
22+
let $jsonDoc1Match := $actual/result[@uri = $lib:URI4]
23+
let $jsonDoc2Match := $actual/result[@uri = $lib:URI5]
2224
return
2325
(
2426
test:assert-exists($actual, "There should be a result returned"),
25-
test:assert-equal("1", fn:string($actual/@total), "The match count should be 1"),
26-
test:assert-equal("/source/5/dob2.json", fn:string($actual/result/@uri), "The matching document should be dob2.json")
27+
test:assert-equal("2", fn:string($actual/@total), "The match count should be 2"),
28+
test:assert-exists($jsonDoc1Match, "There should be a match for " || $lib:URI4),
29+
test:assert-equal("5", fn:string($jsonDoc1Match/@score), "The match score for "|| $lib:URI4 ||" should be 5"),
30+
test:assert-exists($jsonDoc2Match, "There should be a match for " || $lib:URI5),
31+
test:assert-equal("10", fn:string($jsonDoc2Match/@score), "The match score for "|| $lib:URI5 ||" should be 10")
2732
)
2833

marklogic-data-hub/src/test/ml-modules/root/test/suites/data-hub/5/smart-mastering/matching/test-data/custom-matching-with-custom-weight-options.json

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,18 @@
1212
"propertyDefs": {
1313
"property": [
1414
{
15-
"localname": "DateOfBirth",
16-
"name": "DateOfBirth"
15+
"localname": "id",
16+
"name": "id"
1717
}
1818
]
1919
},
2020
"algorithms": {
2121
"algorithm": [
2222
{
23-
"name": "dob-match",
24-
"function": "dob-match",
23+
"name": "custom-id",
24+
"function": "custom-id",
2525
"namespace": "http://marklogic.com/smart-mastering/algorithms",
26-
"at": "/custom-xqy-matching-algo-dob.xqy"
26+
"at": "/test/suites/data-hub/5/smart-mastering/matching/test-data/custom-matching-with-custom-weight.xqy"
2727
}
2828
]
2929
},
@@ -33,9 +33,8 @@
3333
"scoring": {
3434
"expand": [
3535
{
36-
"propertyName": "DateOfBirth",
37-
"algorithmRef": "dob-match",
38-
"customWeight": "10"
36+
"propertyName": "id",
37+
"algorithmRef": "custom-id"
3938
}
4039
]
4140
},
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
xquery version "1.0-ml";
2+
3+
module namespace algorithms = "http://marklogic.com/smart-mastering/algorithms";
4+
5+
import module namespace helper-impl = "http://marklogic.com/smart-mastering/helper-impl"
6+
at "/com.marklogic.smart-mastering/matcher-impl/helper-impl.xqy";
7+
8+
declare namespace matcher = "http://marklogic.com/smart-mastering/matcher";
9+
10+
declare option xdmp:mapping "false";
11+
12+
13+
14+
(:
15+
Example of custom weights
16+
:)
17+
declare function algorithms:custom-id(
18+
$expand-values as xs:string*,
19+
$expand-xml as element(matcher:expand),
20+
$options-xml as element(matcher:options)
21+
) as cts:query*
22+
{
23+
cts:or-query((
24+
cts:word-query("6986792174", (), 5),
25+
(: different scores for the different documents to ensure weight isn't cached :)
26+
cts:word-query($expand-values, (), 10)
27+
))
28+
};

marklogic-data-hub/src/test/ml-modules/root/test/suites/data-hub/5/smart-mastering/matching/test-data/doc1.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
}
3939
},
4040
"IncidentCategoryCodeDate": null,
41-
"id": "6986792174",
41+
"id": ["6986792174", "repeating ID to ensure score is only counted once","6986792174"],
4242
"PersonBirthDate": "19801001",
4343
"PersonSex": "F",
4444
"CaseAmount": 1287.9,

0 commit comments

Comments
 (0)