From 86e62bc8a1c98cffc79def0914befaa26e3e0497 Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Wed, 12 Mar 2025 17:16:47 +0100 Subject: [PATCH 1/3] support indexing for lambert 72 --- Dockerfile | 3 +++ lib/mu_search/document_builder.rb | 23 ++++++++++++++++++++++- lib/mu_search/property_definition.rb | 10 +++++++--- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index cfbb76d..bb0cfde 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,9 @@ FROM semtech/mu-jruby-template:3.1.0 LABEL maintainer="redpencil " + +RUN apt update && apt install gdal-bin -y + # 200MB ENV MAXIMUM_FILE_SIZE="209715200" # seconds diff --git a/lib/mu_search/document_builder.rb b/lib/mu_search/document_builder.rb index 3c7992f..bc75d98 100644 --- a/lib/mu_search/document_builder.rb +++ b/lib/mu_search/document_builder.rb @@ -2,6 +2,8 @@ require '/usr/src/app/sinatra_template/utils' # provided by template require_relative './property_definition' + + module MuSearch ## # This class is responsible for building JSON documents from an IndexDefinition @@ -12,7 +14,7 @@ def initialize(tika:, sparql_client:, attachment_path_base:, logger:) @attachment_path_base = attachment_path_base @cache_path_base = "/cache/" @logger = logger - end +end ## # Builds a document to index for the given resource URI and index_definition @@ -120,8 +122,12 @@ def construct_document_to_index(uri: nil, definitions: property_definitions) matching_values = matching_triples.map { |triple| triple.o } definition = info[:property_definition] + @logger.info("definition type #{definition.type}, lambert?" ) + if definition.type == "simple" index_value = build_simple_property(matching_values) + elsif definition.type == "lambert-72" + index_value = build_lambert_property(matching_values) elsif definition.type == "language-string" index_value = build_language_property(matching_values) elsif definition.type == "attachment" @@ -138,6 +144,21 @@ def construct_document_to_index(uri: nil, definitions: property_definitions) Hash[key_value_tuples] end + def build_lambert_property(values) + loc_map = Hash.new {|hash, key| hash[key] = 0.0} + values.collect do |value| + ## assuming POINT(160167.27757517056 168249.60765740927) + match = value.to_s.match(/POINT\(([\d.]+)\s([\d.]+)\)/) + x_lambert = match[1].to_f + y_lambert = match[2].to_f + output = `echo "#{x_lambert} #{y_lambert}" | gdaltransform -s_srs EPSG:31370 -t_srs EPSG:4326` + lon, lat, _ = output.split(' ') + loc_map["lon"] = lon + loc_map["lat"] = lat + end + [loc_map] + end + # Get the array of values to index for a given SPARQL result set of simple values. # Values are constructed based on the literal datatype. def build_simple_property(values) diff --git a/lib/mu_search/property_definition.rb b/lib/mu_search/property_definition.rb index 6777b28..0038ba3 100644 --- a/lib/mu_search/property_definition.rb +++ b/lib/mu_search/property_definition.rb @@ -1,6 +1,6 @@ module MuSearch class PropertyDefinition - PROPERTY_TYPES = ["simple", "nested", "attachment", "language-string"] + PROPERTY_TYPES = ["simple", "nested", "attachment", "language-string", "lambert-72"] attr_reader :name, :type, :rdf_type, :path, :pipeline, :sub_properties def initialize(name: , path:, type: "auto", rdf_type: nil, sub_properties:) @@ -29,8 +29,12 @@ def self.from_json_config(name, config) from_json_config(subname, subconfig) end rdf_type = config["rdf_type"] - elsif config.key?("type") && config["type"] == "language-string" - type = "language-string" + elsif config.key?("type") + if config["type"] == "language-string" + type = "language-string" + elsif config["type"] == "lambert-72" + type = "lambert-72" + end end elsif config.is_a?(Array) path = config From 36db86904d34adb69f24ea98ed7227f432280563 Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Wed, 12 Mar 2025 17:46:18 +0100 Subject: [PATCH 2/3] fix query filter --- framework/elastic_query_builder.rb | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/framework/elastic_query_builder.rb b/framework/elastic_query_builder.rb index 6b037be..077f51b 100644 --- a/framework/elastic_query_builder.rb +++ b/framework/elastic_query_builder.rb @@ -184,6 +184,26 @@ def construct_es_query_term(filter_key, value) terms: { field => value.split(",") } } end + when "geo" + ensure_single_field_for flag, fields do |field| + query = value.split(",") # just for the poc + lat = query[0].to_f + lon = query[1].to_f + distance = query[2] + { + bool: { + filter: { + geo_distance: { + distance: distance, + field => { + lat: lat, + lon: lon + } + } + } + } + } + end when "fuzzy_phrase" ensure_single_field_for flag, fields do |field| clauses = value.split(" ").map do |word| From e59e8a09ed9cd6d69da695cd9615ab8084fcefbc Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Mon, 2 Jun 2025 11:27:11 +0200 Subject: [PATCH 3/3] add gh workflow and some checks --- .github/workflows/latest.yml | 26 ++++++++++++++++++++++++++ lib/mu_search/document_builder.rb | 5 +++-- 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/latest.yml diff --git a/.github/workflows/latest.yml b/.github/workflows/latest.yml new file mode 100644 index 0000000..5075196 --- /dev/null +++ b/.github/workflows/latest.yml @@ -0,0 +1,26 @@ +name: Latest + +on: + push: + branches: + - "master" +env: + CARGO_TERM_COLOR: always +jobs: + docker: + runs-on: ubuntu-latest + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Build and push + uses: docker/build-push-action@v5 + with: + push: true + tags: nbittich/mu-search:latest diff --git a/lib/mu_search/document_builder.rb b/lib/mu_search/document_builder.rb index bc75d98..d677f83 100644 --- a/lib/mu_search/document_builder.rb +++ b/lib/mu_search/document_builder.rb @@ -122,8 +122,6 @@ def construct_document_to_index(uri: nil, definitions: property_definitions) matching_values = matching_triples.map { |triple| triple.o } definition = info[:property_definition] - @logger.info("definition type #{definition.type}, lambert?" ) - if definition.type == "simple" index_value = build_simple_property(matching_values) elsif definition.type == "lambert-72" @@ -149,10 +147,13 @@ def build_lambert_property(values) values.collect do |value| ## assuming POINT(160167.27757517056 168249.60765740927) match = value.to_s.match(/POINT\(([\d.]+)\s([\d.]+)\)/) + next unless match && match.length == 2 x_lambert = match[1].to_f y_lambert = match[2].to_f + next unless x_lambert && y_lambert output = `echo "#{x_lambert} #{y_lambert}" | gdaltransform -s_srs EPSG:31370 -t_srs EPSG:4326` lon, lat, _ = output.split(' ') + next unless lon && lat loc_map["lon"] = lon loc_map["lat"] = lat end