Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/termium.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
require "glossarist"

require "lutaml/model"
require "lutaml/model/xml_adapter/nokogiri_adapter"
require "lutaml/model/xml/nokogiri_adapter"

Lutaml::Model::Config.configure do |config|
config.xml_adapter = Lutaml::Model::XmlAdapter::NokogiriAdapter
config.xml_adapter = Lutaml::Model::Xml::NokogiriAdapter
end

module Termium
Expand Down
12 changes: 12 additions & 0 deletions lib/termium/abbreviation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,17 @@ def to_h

set
end

def to_designation
attrs = {
designation: value,
normative_status: deprecated ? "deprecated" : "preferred",
}

attrs[:gender] = gender if gender
attrs[:part_of_speech] = part_of_speech if part_of_speech

Glossarist::Designation::Abbreviation.new(attrs)
end
end
end
19 changes: 11 additions & 8 deletions lib/termium/core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,13 @@ def uuid(str = identification_number)
# details="Compartment - ISO/IEC JTC 1 Information Technology Vocabulary" />
def to_concept(options = {})
Glossarist::ManagedConcept.new.tap do |concept|
# The way to set the universal concept's identifier: data.identifier
concept.id = identification_number
# V2: Create new data object to ensure it's serialized (not marked as default)
concept.data = Glossarist::ManagedConceptData.new(
id: identification_number,
sources: concept_sources
)

concept.uuid = uuid
concept.id = uuid

# Assume no related concepts
concept.related = []
Expand All @@ -60,19 +63,19 @@ def to_concept(options = {})
concept.date_accepted = options[:date_accepted]
end

language_module.map do |lang_mod|
language_module.each do |lang_mod|
localized_concept = lang_mod.to_concept(options)

# TODO: This is needed to skip the empty french entries of 10031781 and 10031778
next if localized_concept.nil?

localized_concept.id = identification_number
localized_concept.uuid = uuid("#{identification_number}-#{lang_mod.language}")
localized_concept.data.id = identification_number
localized_concept.id = uuid("#{identification_number}-#{lang_mod.language}")

universal_entry.each do |entry|
localized_concept.notes << Glossarist::DetailedDefinition.new(content: entry.value)
localized_concept.data.notes << Glossarist::DetailedDefinition.new(content: entry.value)
end
localized_concept.sources = concept_sources
localized_concept.data.sources = concept_sources
concept.add_localization(localized_concept)
end
end
Expand Down
14 changes: 14 additions & 0 deletions lib/termium/entry_term.rb
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,19 @@ def to_h

set
end

def to_designation
attrs = {
designation: value,
normative_status: normative_status,
}

attrs[:geographical_area] = geographical_area if geographical_area
attrs[:plurality] = plurality if plurality
attrs[:gender] = gender if gender
attrs[:part_of_speech] = part_of_speech if part_of_speech

Glossarist::Designation::Expression.new(attrs)
end
end
end
5 changes: 3 additions & 2 deletions lib/termium/extract.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ class Extract < Lutaml::Model::Serializable

def to_concept(options = {})
coll = Glossarist::ManagedConceptCollection.new
coll.managed_concepts = core.map do |managed_concept|
managed_concept.to_concept(options)
core.each do |managed_concept|
concept = managed_concept.to_concept(options)
coll.store(concept)
end
coll
end
Expand Down
21 changes: 13 additions & 8 deletions lib/termium/language_module.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def abbreviations

def designations
# NOTE: entry_term is a collection
entry_term + abbreviations
(entry_term + abbreviations).compact
end

def to_h
Expand All @@ -69,17 +69,22 @@ def to_h
end

def to_concept(options = {})
x = to_h
return nil unless x
return nil unless definition

Glossarist::LocalizedConcept.new.tap do |concept|
concept.data = Glossarist::ConceptData.new(
language_code: LANGUAGE_CODE_MAPPING[language.downcase],
terms: designations.map(&:to_designation),
definition: [Glossarist::DetailedDefinition.new(content: definition)],
notes: notes.map { |n| Glossarist::DetailedDefinition.new(content: n) },
examples: examples.map { |e| Glossarist::DetailedDefinition.new(content: e) },
entry_status: "valid",
domain: domain
)

Glossarist::LocalizedConcept.new(x).tap do |concept|
# Fill in register parameters
if options[:date_accepted]
puts options[:date_accepted].inspect
concept.date_accepted = options[:date_accepted]
end

puts concept.inspect
end
end
end
Expand Down
10 changes: 5 additions & 5 deletions lib/termium/source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ def content
end

def to_concept_source
Glossarist::ConceptSource.new({
"type" => "lineage",
"ref" => content,
"status" => "identical",
})
Glossarist::ConceptSource.new(
type: "lineage",
status: "identical",
origin: Glossarist::Citation.new(ref: content)
)
end
end
end
90 changes: 80 additions & 10 deletions spec/termium_spec.rb
Original file line number Diff line number Diff line change
@@ -1,17 +1,87 @@
# frozen_string_literal: true

require "yaml"

RSpec.describe Termium do
let(:termium_extract_file) { fixtures_path("Characters.xml") }
let(:glossarist_output_dir) { fixtures_path("Characters-Glossarist") }

# let(:concept_folder) { "concept_collection_v2" }
# let(:concept_files) { Dir.glob(File.join(fixtures_path(concept_folder), "concept", "*.{yaml,yml}")) }
# let(:localized_concepts_folder) { File.join(fixtures_path(concept_folder), "localized_concept") }
before do
FileUtils.mkdir_p(glossarist_output_dir)
end

let(:termium_extract_file) { fixtures_path("Characters.xml") }
let(:glossarist_output_file) { fixtures_path("Characters-Glossarist") }
it "does something useful" do
termium_extract = Termium::Extract.from_xml(IO.read(termium_extract_file))
glossarist_col = termium_extract.to_concept
FileUtils.mkdir_p(glossarist_output_file)
glossarist_col.save_to_files(glossarist_output_file)
after do
FileUtils.rm_rf(glossarist_output_dir)
end

describe "V2 format conversion" do
let(:termium_extract) { Termium::Extract.from_xml(IO.read(termium_extract_file)) }
let(:glossarist_col) { termium_extract.to_concept }

before do
glossarist_col.save_to_files(glossarist_output_dir)
end

it "creates concept and localized_concept directories" do
expect(Dir.exist?(File.join(glossarist_output_dir, "concept"))).to be true
expect(Dir.exist?(File.join(glossarist_output_dir, "localized_concept"))).to be true
end

it "creates concept files with V2 structure" do
concept_files = Dir.glob(File.join(glossarist_output_dir, "concept", "*.yaml"))
expect(concept_files).not_to be_empty

concept_files.each do |file|
concept = YAML.safe_load(File.read(file), permitted_classes: [Date, Time])

# V2: concept must have id (UUID) at root level
expect(concept).to have_key("id")
expect(concept["id"]).to match(/^[0-9a-f-]{36}$/)

# V2: concept must have data with identifier and localized_concepts
expect(concept).to have_key("data")
expect(concept["data"]).to have_key("identifier")
expect(concept["data"]).to have_key("localized_concepts")
expect(concept["data"]["localized_concepts"]).to be_a(Hash)
end
end

it "creates localized_concept files with V2 structure" do
localized_files = Dir.glob(File.join(glossarist_output_dir, "localized_concept", "*.yaml"))
expect(localized_files).not_to be_empty

localized_files.each do |file|
localized = YAML.safe_load(File.read(file), permitted_classes: [Date, Time])

# V2: localized concept must have id (UUID) at root level
expect(localized).to have_key("id")
expect(localized["id"]).to match(/^[0-9a-f-]{36}$/)

# V2: localized concept must have data with language_code and terms
expect(localized).to have_key("data")
expect(localized["data"]).to have_key("language_code")
expect(localized["data"]["language_code"]).to match(/^[a-z]{3}$/)
expect(localized["data"]).to have_key("terms")
expect(localized["data"]["terms"]).to be_an(Array)
end
end

it "links concepts to localized concepts via UUID" do
concept_files = Dir.glob(File.join(glossarist_output_dir, "concept", "*.yaml"))
localized_files = Dir.glob(File.join(glossarist_output_dir, "localized_concept", "*.yaml"))

localized_uuids = localized_files.map do |file|
YAML.safe_load(File.read(file), permitted_classes: [Date, Time])["id"]
end

concept_files.each do |file|
concept = YAML.safe_load(File.read(file), permitted_classes: [Date, Time])
referenced_uuids = concept["data"]["localized_concepts"].values

referenced_uuids.each do |uuid|
expect(localized_uuids).to include(uuid)
end
end
end
end
end
Loading