Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/settings.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"permissions": {
"allow": [
"Bash(*)",
"Bash",
"Edit",
"MultiEdit",
"NotebookEdit",
Expand Down
45 changes: 45 additions & 0 deletions project.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,48 @@ docs/specification/compliance.md: tests/test_compliance/test_compliance_suite.py
doctest:
$(RUN) python -m doctest --option ELLIPSIS --option NORMALIZE_WHITESPACE src/linkml_map/*.py src/linkml_map/*/*.py

# Transform a LinkML-Map TransformationSpecification to FAIR Mappings Schema
# This demonstrates mapping metadata elements from linkml-map to fair-mappings-schema
FAIR_EXAMPLE_DIR = tests/input/examples/fair_mappings_metadata
FAIR_OUTPUT_DIR = $(FAIR_EXAMPLE_DIR)/output

FAIR_LINKMLMAP_SPEC = $(FAIR_EXAMPLE_DIR)/transform/linkmlmap-to-fair.transformation.yaml
LINKML_SAMPLE_INPUT = $(FAIR_EXAMPLE_DIR)/data/sample-linkmlmap-spec.yaml
LINKMLMAP_SCHEMA = src/linkml_map/datamodel/transformer_model.yaml

$(FAIR_OUTPUT_DIR)/linkmlmap-fair-mappings.yaml: $(LINKML_SAMPLE_INPUT) $(FAIR_LINKMLMAP_SPEC)
mkdir -p $(FAIR_OUTPUT_DIR)
$(RUN) linkml-map map-data \
-T $(FAIR_LINKMLMAP_SPEC) \
-s $(LINKMLMAP_SCHEMA) \
--source-type TransformationSpecification \
--unrestricted-eval \
$(LINKML_SAMPLE_INPUT) \
-o $@

transform-to-fair: $(FAIR_OUTPUT_DIR)/linkmlmap-fair-mappings.yaml
@echo "Transformed $(LINKML_SAMPLE_INPUT) to FAIR Mappings Schema format"
@echo "Output: $(FAIR_OUTPUT_DIR)/linkmlmap-fair-mappings.yaml"

# Transform an SSSOM Mapping Set to FAIR Mappings Schema
# This demonstrates mapping SSSOM metadata to fair-mappings-schema
SSSOM_TRANSFORM_SPEC = $(FAIR_EXAMPLE_DIR)/transform/sssom-to-fair.transformation.yaml
SSSOM_SAMPLE_INPUT = $(FAIR_EXAMPLE_DIR)/data/sample-sssom-mapping-set.yaml
SSSOM_SCHEMA = /Users/matentzn/ws/SSSOM/src/sssom_schema/schema/sssom_schema.yaml

$(FAIR_OUTPUT_DIR)/sssom-fair-mappings.yaml: $(SSSOM_SAMPLE_INPUT) $(SSSOM_TRANSFORM_SPEC)
mkdir -p $(FAIR_OUTPUT_DIR)
$(RUN) linkml-map map-data \
-T $(SSSOM_TRANSFORM_SPEC) \
-s $(SSSOM_SCHEMA) \
--source-type 'mapping set' \
--unrestricted-eval \
$(SSSOM_SAMPLE_INPUT) \
-o $@

sssom-to-fair: $(FAIR_OUTPUT_DIR)/sssom-fair-mappings.yaml
@echo "Transformed $(SSSOM_SAMPLE_INPUT) to FAIR Mappings Schema format"
@echo "Output: $(FAIR_OUTPUT_DIR)/sssom-fair-mappings.yaml"

# Run all FAIR mappings transformations
all-fair-transforms: transform-to-fair sssom-to-fair
128 changes: 119 additions & 9 deletions src/linkml_map/datamodel/transformer_model.py

Large diffs are not rendered by default.

100 changes: 100 additions & 0 deletions src/linkml_map/datamodel/transformer_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ prefixes:
rdfs: http://www.w3.org/2000/01/rdf-schema#
sh: http://www.w3.org/ns/shacl#
STATO: http://purl.obolibrary.org/obo/STATO_
sssom: https://w3id.org/sssom/
default_prefix: linkmlmap

imports:
Expand Down Expand Up @@ -73,8 +74,21 @@ classes:
description: Unique identifier for this transformation specification
slot_uri: schema:identifier
title:
# Corresponds to fair_mappings_schema:MappingSpecification.name
description: human readable title for this transformation specification
slot_uri: dcterms:title
publication_date:
description: date of publication of this transformation specification
range: date
slot_uri: dcterms:issued
license:
description: license under which this transformation specification is published
range: uriorcurie
slot_uri: dcterms:license
version:
description: version of this transformation specification
range: string
slot_uri: dcterms:version
prefixes:
description: maps prefixes to URL expansions
range: KeyVal
Expand All @@ -86,15 +100,47 @@ classes:
multivalued: true
inlined: true
source_schema:
# Corresponds to fair_mappings_schema:MappingSpecification.subject_source
description: name of the schema that describes the source (input) objects
target_schema:
# Corresponds to fair_mappings_schema:MappingSpecification.object_source
description: name of the schema that describes the target (output) objects
source_schema_patches:
range: Any
description: >-
Schema patches to apply to the source schema before transformation.
Useful for adding foreign key relationships to auto-generated schemas.
Uses LinkML schema YAML structure (classes, slots, attributes, etc.).
creator:
description: A list of creators of this transformation specification
range: Agent
multivalued: true
inlined: true
inlined_as_list: true
slot_uri: dcterms:creator
author:
description: A list of authors of this transformation specification
range: Agent
multivalued: true
inlined: true
inlined_as_list: true
slot_uri: dcterms:contributor
reviewer:
description: A list of reviewers of this transformation specification
range: Agent
multivalued: true
inlined: true
inlined_as_list: true
mapping_method:
description: The method used to create this mapping, e.g. manual curation, automated
mapping, etc.
range: uriorcurie
documentation:
description: URL or reference to documentation for the mapping specification
range: uri
content_url:
description: Reference to the actual content of the mapping specification
range: uri
class_derivations:
description: >-
Instructions on how to derive a set of classes in the target schema
Expand Down Expand Up @@ -458,6 +504,60 @@ classes:
value:
range: Any

# Aligned with fair_mappings_schema:Agent
Agent:
abstract: true
description: An entity that can create or contribute to a digital object, such as an author or creator.
attributes:
id:
identifier: true
description: Identifier for the agent
range: uriorcurie
name:
description: Name of the agent
range: string
slot_uri: schema:name
type:
description: Type of the agent
range: string
designates_type: true

# Aligned with fair_mappings_schema:Person
Person:
is_a: Agent
description: An individual person who contributes to a mapping specification
attributes:
orcid:
description: ORCID identifier for the person
range: uriorcurie
affiliation:
description: Institutional affiliation of the person
range: string

# Aligned with fair_mappings_schema:Organization
Organization:
is_a: Agent
description: An organization or institution that contributes to a mapping specification
attributes:
ror_id:
description: ROR (Research Organization Registry) identifier
range: uriorcurie
url:
description: URL or web address of the organization
range: uri

# Aligned with fair_mappings_schema:Software
Software:
is_a: Agent
description: A software tool or system used in creating mappings
attributes:
version:
description: Version of the software
range: string
repository_url:
description: URL to a code repository
range: uri

CopyDirective:
description: >-
Instructs a Schema Mapper in how to map to a target schema. Not used for data transformation.
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Sample TransformationSpecification instance for testing transformation to FAIR Mappings Schema
# This example includes both metadata AND actual transformation content.
# When transformed to FAIR Mappings Schema, only the metadata elements are extracted;
# the class_derivations, enum_derivations etc. are linkml-map specific and not part of FAIR Mappings.
#
# NOTE: class_derivations and enum_derivations are commented out below due to a bug
# in dynamic_object.py that doesn't handle key-based (vs identifier-based) classes.
# The actual transformation would ignore these anyway since FAIR Mappings Schema
# doesn't have equivalents for these linkml-map specific elements.

id: https://example.org/transformations/gene-expression-mapping
title: Gene Expression Data Transformation
description: |-
A transformation specification that maps gene expression data from
a legacy format to a standardized biomedical data model.

This specification includes class derivations for:
- GeneExpressionMixin (from GeneExpressionRecord)
- OrganismTaxon (from SampleMetadata)
- Study (from ExperimentInfo)

And enum derivations for:
- ExpressionLevelCategory (from LegacyExpressionLevel)
publication_date: "2024-01-15"
license: https://creativecommons.org/licenses/by/4.0/
version: "1.0.0"
mapping_method: semapv:ManualMappingCuration
documentation: https://example.org/docs/gene-expression-mapping
content_url: https://example.org/content/gene-expression-mapping.yaml

# Using Person type for creator/author/reviewer
creator:
- type: Person
id: orcid:0000-0002-1234-5678
name: Jane Doe

author:
- type: Person
id: orcid:0000-0003-9876-5432
name: John Smith

reviewer:
- type: Person
id: orcid:0000-0001-1111-2222
name: Alice Johnson

# source_schema and target_schema (simple strings in linkml-map)
source_schema: legacy_gene_expression.yaml
target_schema: biolink_model.yaml

# ============================================================================
# ACTUAL TRANSFORMATION CONTENT (commented out due to dynamic_object bug)
# These elements are linkml-map specific and won't appear in FAIR Mappings output
# ============================================================================
#
# class_derivations:
# GeneExpressionMixin:
# populated_from: GeneExpressionRecord
# slot_derivations:
# id:
# populated_from: record_id
# gene_id:
# populated_from: gene_symbol
#
# OrganismTaxon:
# populated_from: SampleMetadata
# slot_derivations:
# id:
# populated_from: taxon_id
#
# enum_derivations:
# ExpressionLevelCategory:
# populated_from: LegacyExpressionLevel
# permissible_value_derivations:
# HIGH:
# populated_from: H
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Sample SSSOM Mapping Set instance for testing transformation to FAIR Mappings Schema
# This demonstrates how SSSOM metadata maps to FAIR Mappings Schema MappingSpecification
mapping_set_id: http://purl.obolibrary.org/obo/mondo/mappings/mondo_exactmatch_ncit.sssom.tsv
mapping_set_title: Mondo to NCIT Exact Mappings
mapping_set_description: |-
This mapping set contains exact mappings between Mondo Disease Ontology
and NCI Thesaurus (NCIT) terms. These mappings were curated by the
Monarch Initiative team to support disease data integration.
mapping_set_version: "2024-01-15"
license: https://creativecommons.org/publicdomain/zero/1.0/
publication_date: "2024-01-15"

# Creator and author information (SSSOM uses ID + label pattern)
creator_id:
- orcid:0000-0002-7356-1779
- orcid:0000-0002-6601-2165
creator_label:
- Nicolas Matentzoglu
- Chris Mungall

author_id:
- orcid:0000-0002-7356-1779
author_label:
- Nicolas Matentzoglu

reviewer_id:
- orcid:0000-0001-9114-8737
reviewer_label:
- Melissa Haendel

# Source information
subject_source: obo:mondo.owl
subject_source_version: http://purl.obolibrary.org/obo/mondo/releases/2024-01-03/mondo.owl
object_source: obo:ncit.owl
object_source_version: http://purl.obolibrary.org/obo/ncit/releases/2024-01/ncit.owl

# Mapping provenance
mapping_tool: https://github.com/mapping-commons/sssom-py
mapping_tool_version: "0.4.0"
mapping_date: "2024-01-10"
mapping_provider: https://monarchinitiative.org/

# Sample mappings (demonstrating that mappings are preserved but not part of FAIR Mappings output)
mappings:
- subject_id: MONDO:0005015
subject_label: diabetes mellitus
predicate_id: skos:exactMatch
object_id: NCIT:C2985
object_label: Diabetes Mellitus
mapping_justification: semapv:ManualMappingCuration
confidence: 0.95

- subject_id: MONDO:0004992
subject_label: cancer
predicate_id: skos:exactMatch
object_id: NCIT:C9305
object_label: Malignant Neoplasm
mapping_justification: semapv:ManualMappingCuration
confidence: 0.90

- subject_id: MONDO:0005301
subject_label: multiple sclerosis
predicate_id: skos:exactMatch
object_id: NCIT:C3243
object_label: Multiple Sclerosis
mapping_justification: semapv:LexicalMatching
confidence: 0.85
match_string:
- multiple sclerosis
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
id: https://example.org/transformations/gene-expression-mapping
name: Gene Expression Data Transformation
description: 'A transformation specification that maps gene expression data from

a legacy format to a standardized biomedical data model.


This specification includes class derivations for:

- GeneExpressionMixin (from GeneExpressionRecord)

- OrganismTaxon (from SampleMetadata)

- Study (from ExperimentInfo)


And enum derivations for:

- ExpressionLevelCategory (from LegacyExpressionLevel)'
publication_date: '2024-01-15'
license: https://creativecommons.org/licenses/by/4.0/
version: 1.0.0
mapping_method: semapv:ManualMappingCuration
documentation: https://example.org/docs/gene-expression-mapping
content_url: https://example.org/content/gene-expression-mapping.yaml
creator:
- id: orcid:0000-0002-1234-5678
name: Jane Doe
type: Person
author:
- id: orcid:0000-0003-9876-5432
name: John Smith
type: Person
reviewer:
- id: orcid:0000-0001-1111-2222
name: Alice Johnson
type: Person
subject_source:
name: legacy_gene_expression.yaml
object_source:
name: biolink_model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
id: http://purl.obolibrary.org/obo/mondo/mappings/mondo_exactmatch_ncit.sssom.tsv
name: Mondo to NCIT Exact Mappings
description: 'This mapping set contains exact mappings between Mondo Disease Ontology

and NCI Thesaurus (NCIT) terms. These mappings were curated by the

Monarch Initiative team to support disease data integration.'
version: '2024-01-15'
license: https://creativecommons.org/publicdomain/zero/1.0/
publication_date: '2024-01-15'
type: sssom
mapping_method: https://github.com/mapping-commons/sssom-py
Loading