Skip to content

Commit de0e855

Browse files
committed
CMR-10502 implements json conifgurable validation to generic documents
1 parent e474246 commit de0e855

File tree

8 files changed

+361
-15
lines changed

8 files changed

+361
-15
lines changed

ingest-app/src/cmr/ingest/api/generic_documents.clj

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@
66
[clojure.string :as string]
77
[cmr.acl.core :as acl]
88
[cmr.common-app.api.launchpad-token-validation :as lt-validation]
9-
[cmr.common.generics :as gconfig]
109
[cmr.common.concepts :as common-concepts]
1110
[cmr.common.config :as cfg]
11+
[cmr.common.generics :as gconfig]
1212
[cmr.common.log :refer [info]]
1313
[cmr.common.services.errors :as errors]
1414
[cmr.common.util :as util :refer [defn-timed]]
15-
[cmr.ingest.api.core :as api-core]
1615
[cmr.ingest.api.collections :as collections]
16+
[cmr.ingest.api.core :as api-core]
1717
[cmr.ingest.api.services :as services]
1818
[cmr.ingest.api.tools :as tools]
1919
[cmr.ingest.api.variables :as variables]
20+
[cmr.ingest.validation.generic-document-validation :as generic-document-validation]
2021
[cmr.schema-validation.json-schema :as js-validater]
2122
[cmr.transmit.metadata-db :as mdb]
2223
[cmr.transmit.metadata-db2 :as mdb2]
@@ -197,12 +198,27 @@
197198
:concept-id concept-id
198199
:revision-id revision-id}))
199200

201+
(defn validate-business-rules
202+
"Validates a concept against business rules defined in validation schemas."
203+
[context concept]
204+
(when concept
205+
(let [concept-type (:concept-type concept)]
206+
(when-not (common-concepts/is-draft-concept? concept-type)
207+
(try
208+
(generic-document-validation/validate-concept context concept)
209+
(catch Exception e
210+
(errors/throw-service-error
211+
:invalid-data
212+
(format "While validating the business rules for the record, the following error occurred: [%s]"
213+
(.getMessage e)))))))))
214+
200215
(defn ingest-document
201216
"Ingest the concept into the database and the indexer through the database."
202217
[context concept headers]
203218
(info (format "Ingesting concept %s from client %s"
204219
(api-core/concept->loggable-string concept)
205220
(:client-id context)))
221+
(validate-business-rules context concept)
206222
(let [save-concept-result (save-document context concept)
207223
concept-to-log (-> concept
208224
(api-core/concept-with-revision-id save-concept-result)

ingest-app/src/cmr/ingest/system.clj

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
[cmr.common-app.services.kms-lookup :as kl]
1616
[cmr.common.api.web-server :as web]
1717
[cmr.common.cache.in-memory-cache :as mem-cache]
18+
[cmr.ingest.validation.generic-document-validation :as generic-validation]
1819
[cmr.common.config :as cfg :refer [defconfig]]
1920
[cmr.common.jobs :as jobs]
2021
[cmr.common.log :as log]
@@ -121,7 +122,8 @@
121122
common-enabled/write-enabled-cache-key (common-enabled/create-write-enabled-cache)
122123
humanizer-alias-cache/humanizer-alias-cache-key (humanizer-alias-cache/create-cache-client)
123124
launchpad-user-cache/launchpad-user-cache-key (launchpad-user-cache/create-launchpad-user-cache)
124-
urs/urs-cache-key (urs/create-urs-cache)}
125+
urs/urs-cache-key (urs/create-urs-cache)
126+
generic-validation/schema-validation-cache-key (generic-validation/create-schema-validation-cache)}
125127
:public-conf (public-conf)
126128
:queue-broker (queue-broker/create-queue-broker (config/queue-config))}]
127129
(transmit-config/system-with-connections
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
(ns cmr.ingest.validation.generic-document-validation
2+
"Provides functions to validate the ingest generic document"
3+
(:require
4+
[cheshire.core :as json]
5+
[clojure.string :as string]
6+
[cmr.common.cache :as cache]
7+
[cmr.common.cache.in-memory-cache :as mem-cache]
8+
[cmr.common.concepts :as concepts]
9+
[cmr.common.generics :as generics]
10+
[cmr.common.log :refer (info error)]
11+
[cmr.common.services.errors :as errors]
12+
[cmr.transmit.metadata-db :as mdb]))
13+
14+
(def schema-validation-cache-key
15+
"The cache key for the schema validation functions cache."
16+
:schema-validation-functions)
17+
18+
(def SCHEMA_CACHE_TIME
19+
"The number of milliseconds schema validation functions will be cached."
20+
(* 60 60 1000)) ;; 1 hour
21+
22+
(defn create-schema-validation-cache
23+
"Creates a cache for schema validation functions."
24+
[]
25+
(mem-cache/create-in-memory-cache :ttl {} {:ttl SCHEMA_CACHE_TIME}))
26+
27+
(defn- fetch-existing-concepts
28+
"Fetch existing concepts of the given type from metadata-db for uniqueness validation"
29+
[context concept-type]
30+
(try
31+
(mdb/find-concepts context {:latest true} concept-type)
32+
(catch Exception e
33+
(error (str "Error fetching concepts for generic validations: " (.getMessage e)))
34+
[])))
35+
36+
(defn- validate-uniqueness
37+
"Validates that the combination of field values is unique in the collection.
38+
Returns a sequence of error messages if validation fails, empty sequence otherwise."
39+
[context concept fields]
40+
(let [concept-type (:concept-type concept)]
41+
(if-let [existing-concepts (fetch-existing-concepts context concept-type)]
42+
(let [;; Helper function to extract field values from a concept
43+
get-field-values (fn [c fs]
44+
(let [metadata (json/parse-string (:metadata c) true)]
45+
(mapv #(let [field-path (rest (string/split % #"\."))
46+
keyword-path (keyword (first field-path))]
47+
(get-in metadata[keyword-path]))
48+
fs)))
49+
50+
;; Extract values for the specified fields from the current concept
51+
field-values (get-field-values concept fields)
52+
53+
;; Check if any other document in the collection has the same combination of values
54+
duplicate-concepts (filter (fn [existing-concept]
55+
;; Debug print to show values being compared
56+
(and (not= (:native-id existing-concept) (:native-id concept))
57+
(not= (:deleted existing-concept) true)
58+
(= (set (get-field-values existing-concept fields))
59+
(set field-values))))
60+
existing-concepts)]
61+
(if (seq duplicate-concepts)
62+
(let [duplicate-concept-ids (map :concept-id duplicate-concepts)
63+
field-names (mapv #(string/join "." (rest (string/split % #"\."))) fields)
64+
display-values (mapv str field-values)]
65+
(info "Duplicate concept IDs found: " duplicate-concept-ids)
66+
[(format "Values %s for fields %s must be unique for concept type %s. Duplicate concept IDs: %s"
67+
(string/join ", " display-values)
68+
(string/join ", " field-names)
69+
(str concept-type)
70+
(string/join ", " duplicate-concept-ids))])
71+
[]))
72+
[])))
73+
74+
(defn- validate-by-type
75+
"Validates fields based on validation type.
76+
Returns a sequence of error messages if validation fails, empty sequence otherwise."
77+
[context concept validation-type fields validation-value]
78+
(case validation-type
79+
"unique" (validate-uniqueness context concept fields)
80+
;; Default case
81+
[(str "Unknown validation type: " validation-type)]))
82+
83+
(defn- validate-with-schema
84+
"Validates a concept against the schema validations.
85+
Returns a sequence of error messages if validation fails, empty sequence otherwise."
86+
[context concept schema]
87+
(let [validations (:Validations schema)]
88+
(if validations
89+
(mapcat (fn [validation]
90+
(let [validation-type (:ValidationType validation)
91+
fields (:Fields validation)
92+
validation-value (:ValidationValue validation)]
93+
(validate-by-type context concept validation-type fields validation-value)))
94+
validations)
95+
[])))
96+
97+
(defn- load-schema-validation
98+
"Loads a single schema validation function for a concept type and version"
99+
[concept-type version]
100+
(try
101+
(if (generics/approved-generic? concept-type version)
102+
(let [schema-json (generics/read-schema-index concept-type version)
103+
schema (json/parse-string schema-json true)]
104+
(fn [context concept]
105+
(validate-with-schema context concept schema)))
106+
(do
107+
(error "Schema version not approved for" concept-type "version" version)
108+
nil))
109+
(catch Exception e
110+
(error "Error loading schema for" concept-type "version" version ":" (.getMessage e))
111+
nil)))
112+
113+
(defn- extract-concept-metadata-spec
114+
"Extract metadata specification info from concept"
115+
[concept]
116+
(let [parsed-data (json/parse-string (:metadata concept))]
117+
(when parsed-data
118+
{:name (get-in parsed-data [:MetadataSpecification :Name])
119+
:version (get-in parsed-data [:MetadataSpecification :Version])})))
120+
121+
(defn- load-schema-validators
122+
"Loads all schema validators for all generic concept types"
123+
[]
124+
(info "Loading schema validation functions for all generic concept types")
125+
(let [generic-types (concepts/get-generic-concept-types-array)
126+
validators (reduce (fn [validators concept-type]
127+
(let [current-version (generics/current-generic-version concept-type)
128+
validator (load-schema-validation concept-type current-version)]
129+
(if validator
130+
(assoc validators [concept-type current-version] validator)
131+
validators)))
132+
{}
133+
generic-types)]
134+
(info "Loaded" (count validators) "schema validators")
135+
validators))
136+
137+
(defn- get-validation-functions
138+
"Gets the validation functions from cache if available, otherwise loads them"
139+
[context]
140+
(if-let [cache (cache/context->cache context schema-validation-cache-key)]
141+
;; Return cached validation functions if available
142+
(cache/get-value cache :validators load-schema-validators)
143+
;; No cache available, load directly
144+
(load-schema-validators)))
145+
146+
(defn validate-concept
147+
"Validates the given concept dynamically based on concept type.
148+
Throws a :bad-request service error if validation fails."
149+
[context concept]
150+
(let [concept-type (:concept-type concept)
151+
metadata-spec (extract-concept-metadata-spec concept)
152+
version (or (:version metadata-spec)
153+
(generics/current-generic-version concept-type))
154+
155+
;; Get validation functions
156+
validators (get-validation-functions context)
157+
validator-fn (get validators [concept-type version])
158+
159+
;; Only run validation if a validator is defined
160+
errors (when validator-fn
161+
(validator-fn context concept))]
162+
(info "Validating concept" concept "with schema version" version)
163+
164+
;; Throw service errors if any validation errors are found
165+
(when (seq errors)
166+
(errors/throw-service-errors :bad-request errors))))

schemas/resources/schemas/citation/v1.0.0/index.json

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
"refresh_interval": "1s"
1717
}
1818
},
19-
"Indexes":
20-
[
19+
"Indexes": [
2120
{
2221
"Description": "The identifier for the cited resource, e.g., DOI, ISBN, ARK",
2322
"Field": ".Identifier",
@@ -54,5 +53,11 @@
5453
"Name": "Relationship-Type",
5554
"Mapping": "string"
5655
}
56+
],
57+
"Validations": [
58+
{
59+
"Fields": [".Identifier", ".IdentifierType"],
60+
"ValidationType": "unique"
61+
}
5762
]
5863
}

schemas/resources/schemas/index/v0.0.1/schema.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,38 @@
2424
"uniqueItems": true,
2525
"minItems": 1,
2626
"items": {"$ref": "#/definitions/IndexType"}
27+
},
28+
"Validations": {
29+
"type": "array",
30+
"uniqueItems": true,
31+
"minItems": 1,
32+
"items": {
33+
"type": "object",
34+
"additionalProperties": false,
35+
"properties": {
36+
"Fields": {
37+
"description": "The Field to validate, defined as a jq style path",
38+
"type": "array",
39+
"items": {
40+
"type": "string",
41+
"examples": [".RootElement.SubElement"],
42+
"minLength": 1
43+
},
44+
"minItems": 1
45+
},
46+
"ValidationType": {
47+
"description": "The type of validation to perform on the field",
48+
"type": "string",
49+
"enum": ["unique" ,"regex", "length", "value"],
50+
"default": "regex"
51+
},
52+
"ValidationValue": {
53+
"$comment": "This is the value to validate against. The type of value depends on the ValidationType.",
54+
"type": ["string", "number"]
55+
}
56+
},
57+
"required": ["Fields", "ValidationType"]
58+
}
2759
}
2860
},
2961
"required": ["MetadataSpecification", "Generic"],

system-int-test/test/cmr/system_int_test/bootstrap/bulk_index/generics_test.clj

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,6 @@
44
[clojure.test :refer :all]
55
[cmr.mock-echo.client.echo-util :as echo-util]
66
[cmr.system-int-test.bootstrap.bulk-index.core :as core]
7-
[cmr.system-int-test.data2.collection :as data-collection]
8-
[cmr.system-int-test.data2.core :as data-core]
9-
[cmr.system-int-test.data2.umm-spec-collection :as data-umm-c]
10-
[cmr.system-int-test.data2.umm-json :as data-umm-json]
117
[cmr.system-int-test.search.misc.generic-association-test :as association-test]
128
[cmr.system-int-test.system :as system]
139
[cmr.system-int-test.utils.association-util :as association-util]
@@ -16,11 +12,7 @@
1612
[cmr.system-int-test.utils.index-util :as index]
1713
[cmr.system-int-test.utils.ingest-util :as ingest]
1814
[cmr.system-int-test.utils.search-util :as search]
19-
[cmr.system-int-test.utils.service-util :as service-util]
20-
[cmr.system-int-test.utils.tool-util :as tool-util]
21-
[cmr.system-int-test.utils.tool-util :as tool]
22-
[cmr.system-int-test.utils.variable-util :as variable-util]
23-
[cmr.umm-spec.versioning :as umm-version]))
15+
[cmr.system-int-test.utils.service-util :as service-util]))
2416

2517
(use-fixtures :each (join-fixtures
2618
[(ingest/reset-fixture {"provguid1" "PROV1"

system-int-test/test/cmr/system_int_test/ingest/collection/collection_ingest_test.clj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@
6262
(deftest basic-collection-ingest-test
6363
(testing "ingest of a new concept"
6464
(let [concept (data-umm-c/collection-concept {})
65-
{:keys [concept-id revision-id]} (ingest/ingest-concept concept {:validate-keywords false})]
65+
{:keys [concept-id revision-id]} (ingest/ingest-concept concept {:validate-keywords false})
66+
_ (ingest/ingest-concept concept {:validate-keywords false})]
6667
(index/wait-until-indexed)
6768
(is (mdb/concept-exists-in-mdb? concept-id revision-id))
6869
(is (= 1 revision-id)))

0 commit comments

Comments
 (0)