Skip to content

Commit e631374

Browse files
Niels Vnvdk
authored andcommitted
initial support for prefixes
1 parent d2559be commit e631374

File tree

4 files changed

+89
-13
lines changed

4 files changed

+89
-13
lines changed

README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ In the example below the documents index contains a property `topics` that maps
406406
}
407407
```
408408

409+
409410
##### File content property
410411
To make the content of a file searchable, it needs to be indexed as a property in a search index. Basic indexing of PDF, Word etc. files is provided using a local [Apache Tika](https://tika.apache.org/) instance. A default ingest pipeline named `attachment` is created on startup of the mu-search service. Note that this is under development and liable to change.
411412

@@ -643,6 +644,30 @@ The example below contains 2 simple indexes for documents and creative works, an
643644
]
644645
}
645646
```
647+
#### Using Prefixes
648+
To make the configuration more concise and maintainable, you can define prefixes for commonly used URI namespaces. Prefixes are defined at the root level of the configuration using the `prefixes` property:
649+
650+
```json
651+
{
652+
"prefixes": {
653+
"foaf": "http://xmlns.com/foaf/0.1/",
654+
"dct": "http://purl.org/dc/terms/",
655+
"skos": "http://www.w3.org/2004/02/skos/core#"
656+
},
657+
"types": [
658+
{
659+
"type": "document",
660+
"on_path": "documents",
661+
"rdf_type": "foaf:Document",
662+
"properties": {
663+
"title": "dct:title",
664+
"label": "skos:prefLabel",
665+
"creator": "^foaf:made"
666+
}
667+
}
668+
]
669+
}
670+
```
646671

647672
#### Elasticsearch settings
648673
Elasticsearch provides a lot of [index configuration settings](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules.html) for analysis, logging, etc. Mu-search allows to provide this configuration for the whole domain and/or to be overridden (currently not merged!) on a per-type basis.

lib/mu_search/config_parser.rb

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ def self.parse(path)
1616
eager_indexing_groups: [],
1717
update_wait_interval_minutes: 1,
1818
number_of_threads: 1,
19-
enable_raw_dsl_endpoint: false
19+
enable_raw_dsl_endpoint: false,
20+
prefixes: {}
2021
}
2122

2223
json_config = JSON.parse(File.read(path))
@@ -48,7 +49,9 @@ def self.parse(path)
4849
config[:eager_indexing_groups] = json_config["eager_indexing_groups"]
4950
end
5051
config[:ignored_allowed_groups] = json_config["ignored_allowed_groups"] || []
51-
config[:type_definitions] = Hash[MuSearch::IndexDefinition.from_json_config(json_config["types"])]
52+
config[:prefixes] = json_config["prefixes"] || {}
53+
config[:type_definitions] = Hash[MuSearch::IndexDefinition.from_json_config(json_config["types"], config[:prefixes])]
54+
5255
config
5356
end
5457

@@ -120,6 +123,9 @@ def self.validate_config(json_config)
120123
if json_config.has_key?("ignored_allowed_groups")
121124
errors = errors.concat(self.validate_ignored_allowed_groups(json_config["ignored_allowed_groups"]))
122125
end
126+
if json_config.has_key?("prefixes")
127+
errors = errors.concat(self.validate_prefixes(json_config["prefixes"]))
128+
end
123129
if errors.length > 0
124130
Mu::log.error("CONFIG_PARSER") { errors.join("\n") }
125131
raise "invalid config"
@@ -242,5 +248,28 @@ def self.validate_eager_indexing_groups(groups)
242248

243249
errors
244250
end
251+
252+
def self.validate_prefixes(prefixes)
253+
errors = []
254+
if ! prefixes.kind_of?(Hash)
255+
errors << "prefixes should be an object mapping prefix names to URIs"
256+
else
257+
prefixes.each do |prefix, uri|
258+
if ! prefix.kind_of?(String)
259+
errors << "prefix name should be a string: #{prefix.inspect}"
260+
end
261+
if ! uri.kind_of?(String)
262+
errors << "prefix URI should be a string: #{uri.inspect}"
263+
else
264+
begin
265+
parsed_uri = URI.parse(uri)
266+
rescue URI::InvalidURIError => e
267+
errors << "prefix URI '#{uri}' is not a valid URI: #{e.message}"
268+
end
269+
end
270+
end
271+
end
272+
errors
273+
end
245274
end
246275
end

lib/mu_search/index_definition.rb

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
require_relative './prefix_utils'
2+
13
module MuSearch
24
# This class represents index definitions as defined in the configuration file of mu-search
35
# in the config file you will find these definitions on the the keyword "types"
@@ -58,23 +60,34 @@ def self.create_composite_sub_definitions(composite_definition, definitions)
5860

5961
# builds a tuples mapping the index name to the full definition for all provided types
6062
# expects all types as param
61-
def self.from_json_config(all_definitions)
63+
def self.from_json_config(all_definitions, prefixes = {})
6264
all_definitions.collect do |definition|
6365
name = definition["type"]
6466
composite_types = []
6567
if definition["composite_types"]
6668
composite_types = create_composite_sub_definitions(definition, all_definitions)
6769
composite_types.each do |definition|
68-
ensure_uuid_in_properties definition.properties
70+
build_property_definitions(definition.properties, prefixes)
6971
end
7072
else
7173
# ensure uuid is included because it may be used for folding
72-
ensure_uuid_in_properties definition["properties"]
74+
build_property_definitions(definition["properties"], prefixes)
75+
end
76+
77+
# Expand prefixes in rdf_type if present
78+
rdf_type = definition["rdf_type"]
79+
if rdf_type
80+
if rdf_type.is_a?(Array)
81+
rdf_type = rdf_type.map { |t| PrefixUtils.expand_prefix(t, prefixes) }
82+
else
83+
rdf_type = PrefixUtils.expand_prefix(rdf_type, prefixes)
84+
end
7385
end
86+
7487
index_definition = IndexDefinition.new(
7588
name: name,
7689
on_path: definition["on_path"],
77-
rdf_type: definition["rdf_type"],
90+
rdf_type: rdf_type,
7891
composite_types: composite_types,
7992
properties: definition["properties"],
8093
mappings: definition["mappings"],
@@ -84,16 +97,20 @@ def self.from_json_config(all_definitions)
8497
end
8598
end
8699

87-
def self.ensure_uuid_in_properties properties
88-
properties["uuid"] = ["http://mu.semte.ch/vocabularies/core/uuid"] unless properties.key?("uuid")
100+
def self.build_property_definitions(properties, prefixes)
101+
ensure_uuid_property(properties)
89102
properties.each do |(key, value)|
90-
property_definition = PropertyDefinition.from_json_config(key, value)
103+
property_definition = PropertyDefinition.from_json_config(key, value, prefixes)
91104
if property_definition.type == "nested"
92-
ensure_uuid_in_properties value["properties"]
105+
build_property_definitions(value["properties"], prefixes)
93106
end
94107
end
95108
end
96109

110+
def self.ensure_uuid_property(properties)
111+
properties["uuid"] = ["http://mu.semte.ch/vocabularies/core/uuid"] unless properties.key?("uuid")
112+
end
113+
97114
def type
98115
@name
99116
end

lib/mu_search/property_definition.rb

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
require_relative './prefix_utils'
2+
13
module MuSearch
24
class PropertyDefinition
35
PROPERTY_TYPES = ["simple", "nested", "attachment", "language-string"]
@@ -16,7 +18,7 @@ def initialize(name: , path:, type: "auto", rdf_type: nil, sub_properties:)
1618
end
1719
end
1820

19-
def self.from_json_config(name, config)
21+
def self.from_json_config(name, config, prefixes = {})
2022
type = "simple"
2123
rdf_type = sub_properties = pipeline = nil
2224
if config.is_a?(Hash)
@@ -26,7 +28,7 @@ def self.from_json_config(name, config)
2628
elsif config.key?("properties")
2729
type = "nested"
2830
sub_properties = config["properties"].map do |subname, subconfig|
29-
from_json_config(subname, subconfig)
31+
from_json_config(subname, subconfig, prefixes)
3032
end
3133
rdf_type = config["rdf_type"]
3234
elsif config.key?("type") && config["type"] == "language-string"
@@ -38,6 +40,10 @@ def self.from_json_config(name, config)
3840
path = [config]
3941
end
4042

43+
path = path.map do |p|
44+
PrefixUtils.expand_prefix(p, prefixes)
45+
end
46+
4147
PropertyDefinition.new(
4248
name: name,
4349
type: type,
@@ -46,6 +52,5 @@ def self.from_json_config(name, config)
4652
sub_properties: sub_properties,
4753
)
4854
end
49-
5055
end
5156
end

0 commit comments

Comments
 (0)