Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ Reading systems:
| Geneways | [`indra.sources.geneways`](https://indra.readthedocs.io/en/latest/modules/sources/geneways/index.html) | https://www.ncbi.nlm.nih.gov/pubmed/15016385 |
| GNBR | [`indra.sources.gnbr`](https://indra.readthedocs.io/en/latest/modules/sources/gnbr/index.html) | https://zenodo.org/record/3459420 |
| SemRep | [`indra.sources.semrep`](https://indra.readthedocs.io/en/latest/modules/sources/semrep.html) | https://github.com/lhncbc/SemRep |
| INDRA-BERT | [`indra.sources.indra_bert`](https://indra.readthedocs.io/en/latest/modules/sources/indra_bert.html) | https://github.com/gyorilab/indra_bert |


Biological pathway databases:
Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@
'nltk', 'nltk.stem', 'nltk.stem.snowball', 'kappy', 'openpyxl',
'reportlab', 'reportlab.lib', 'reportlab.lib.enums',
'reportlab.lib.pagesizes', 'reportlab.platypus', 'reportlab.lib.styles',
'reportlab.lib.units'
'reportlab.lib.units', 'indra_bert'
]
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.MagicMock()
Expand Down
1 change: 1 addition & 0 deletions doc/modules/sources/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Reading Systems
eidos/index
gnbr/index
semrep
indra_bert

Molecular Pathway Databases
---------------------------
Expand Down
18 changes: 18 additions & 0 deletions doc/modules/sources/indra_bert/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
INDRA-BERT (:py:mod:`indra.sources.indra_bert`)
===============================================

.. automodule:: indra.sources.indra_bert
:members:

INDRA-BERT API (:py:mod:`indra.sources.indra_bert.api`)
-------------------------------------------------------

.. automodule:: indra.sources.indra_bert.api
:members:

INDRA-BERT Processor (:py:mod:`indra.sources.indra_bert.processor`)
-------------------------------------------------------------------

.. automodule:: indra.sources.indra_bert.processor
:members:

9 changes: 7 additions & 2 deletions indra/resources/default_belief_probs.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@
"creeds": 0.01,
"ubibrowser": 0.01,
"acsn": 0.01,
"semrep": 0.05
"semrep": 0.05,
"wormbase": 0.01,
"indra_bert": 0.05,
"indra_gpt": 0.05
},
"rand": {
"eidos": 0.3,
Expand Down Expand Up @@ -72,6 +75,8 @@
"ubibrowser": 0.1,
"acsn": 0.1,
"semrep": 0.3,
"wormbase": 0.1
"wormbase": 0.1,
"indra_bert": 0.3,
"indra_gpt": 0.3
}
}
10 changes: 10 additions & 0 deletions indra/resources/source_info.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@
"background-color": "#fdb462"
}
},
"indra_bert": {
"name": "INDRA-BERT",
"link": "https://github.com/gyorilab/indra_bert",
"type": "reader",
"domain": "biology",
"default_style": {
"color": "white",
"background-color": "#2d9636"
}
},
"tees": {
"name": "TEES",
"link": "https://github.com/jbjorne/TEES",
Expand Down
1 change: 1 addition & 0 deletions indra/sources/indra_bert/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .api import *
92 changes: 92 additions & 0 deletions indra/sources/indra_bert/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
__all__ = ['process_text']

import os
from tqdm import tqdm
import logging

logger = logging.getLogger(__name__)

try:
from indra_bert import IndraStructuredExtractor
except ImportError as e:
logger.error("""Could not import indra_bert for reading with INDRA BERT.
Please make sure the indra_bert extra dependencies of
INDRA are installed.""")
raise ImportError(e)

from .processor import IndraBertProcessor

def create_extractor(
ner_model_path="thomaslim6793/indra_bert_ner_agent_detection",
stmt_model_path="thomaslim6793/indra_bert_indra_stmt_classifier",
role_model_path="thomaslim6793/indra_bert_indra_stmt_agents_role_assigner",
mutations_model_path="thomaslim6793/indra_bert_agent_mutation_detection",
stmt_conf_threshold=0.95
):
try:
ise = IndraStructuredExtractor(
ner_model_path=ner_model_path,
stmt_model_path=stmt_model_path,
role_model_path=role_model_path,
mutations_model_path=mutations_model_path,
stmt_conf_threshold=stmt_conf_threshold
)
except Exception as e:
logger.info(f"Error - {e}")
logger.info("Downloading models from Hugging Face")
ise = IndraStructuredExtractor(
ner_model_path="thomaslim6793/indra_bert_ner_agent_detection",
stmt_model_path="thomaslim6793/indra_bert_indra_stmt_classifier",
role_model_path="thomaslim6793/indra_bert_indra_stmt_agents_role_assigner",
mutations_model_path="thomaslim6793/indra_bert_agent_mutation_detection",
stmt_conf_threshold=stmt_conf_threshold
)
logger.info(f"Loaded ner_model from: {ise.ner_model_local_path}")
logger.info(f"Loaded stmt_model from: {ise.stmt_model_local_path}")
logger.info(f"Loaded role_model from: {ise.role_model_local_path}")
logger.info(f"Loaded mutations_model from: {ise.mutations_model_local_path}")
return ise

def process_text(text,
ner_model_path="thomaslim6793/indra_bert_ner_agent_detection",
stmt_model_path="thomaslim6793/indra_bert_indra_stmt_classifier",
role_model_path="thomaslim6793/indra_bert_indra_stmt_agents_role_assigner",
mutations_model_path="thomaslim6793/indra_bert_agent_mutation_detection",
stmt_conf_threshold=0.95,
grounder=None):
ise = create_extractor(
ner_model_path=ner_model_path,
stmt_model_path=stmt_model_path,
role_model_path=role_model_path,
mutations_model_path=mutations_model_path,
stmt_conf_threshold=stmt_conf_threshold
)
res = ise.get_json_indra_stmts(text)
ip = IndraBertProcessor(res, grounder=grounder)
return ip, ise

def process_texts(texts,
ner_model_path="thomaslim6793/indra_bert_ner_agent_detection",
stmt_model_path="thomaslim6793/indra_bert_indra_stmt_classifier",
role_model_path="thomaslim6793/indra_bert_indra_stmt_agents_role_assigner",
mutations_model_path="thomaslim6793/indra_bert_agent_mutation_detection",
stmt_conf_threshold=0.95,
grounder=None):

if not isinstance(texts, list):
raise ValueError("Input must be a list of texts.")

ise = create_extractor(
ner_model_path=ner_model_path,
stmt_model_path=stmt_model_path,
role_model_path=role_model_path,
mutations_model_path=mutations_model_path,
stmt_conf_threshold=stmt_conf_threshold
)

ips = []
for text in tqdm(texts, desc="Processing texts"):
res = ise.get_json_indra_stmts(text)
ip = IndraBertProcessor(res, grounder=grounder)
ips.append(ip)
return ips, ise
68 changes: 68 additions & 0 deletions indra/sources/indra_bert/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from indra.statements import *
from indra.statements.io import stmt_from_json
from indra.ontology.standardize import standardize_agent_name

import re
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)

class IndraBertProcessor:
def __init__(self, data, grounder=None):
self.data = data
self.statements = []
self.source_api = 'indra_bert'
self.grounder = grounder if grounder else default_grounder_wrapper
self.extract_statements()


def extract_statement(self, entry):
"""Extract a statement from JSON using INDRA's built-in functionality."""
try:
# Use INDRA's built-in statement_from_json functionality
stmt = stmt_from_json(entry)

# Apply grounding to agents if grounder is available
if self.grounder:
text = entry['evidence'][0]['text'] if entry.get('evidence') else ""
self._apply_grounding(stmt, text)

return stmt

except Exception as e:
logger.warning(f"Error creating statement from JSON: {e}")
raise

def _apply_grounding(self, stmt, context_text):
"""Apply grounding to all agents in a statement."""
# Get all agents from the statement
agents = stmt.agent_list()

for agent in agents:
if agent and agent.name:
# Apply grounding
grounding_result = self.grounder(agent.name, context_text)
if grounding_result:
# Update db_refs with grounding results
agent.db_refs.update(grounding_result)

# Standardize the agent name
standardize_agent_name(agent, standardize_refs=True)

def extract_statements(self):
self.statements = []
for entry in self.data:
try:
stmt = self.extract_statement(entry)
except Exception as e:
logger.warning(f"Error processing entry: {e}")
logger.debug(f"Entry data: {entry}")
continue
self.statements.append(stmt)


def default_grounder_wrapper(text, context=None):
# Import here to avoid this when working in INDRA World context
from indra.preassembler.grounding_mapper.gilda import get_grounding
grounding, _ = get_grounding(text, context=context, mode='local')
return grounding
5 changes: 3 additions & 2 deletions indra/util/statement_presentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,9 @@ class to define a `StmtStat`.
'ubibrowser', 'acsn', 'wormbase']
"""Database source names as they appear in the DB"""

reader_sources = ['geneways', 'tees', 'gnbr', 'semrep', 'isi', 'trips',
'rlimsp', 'medscan', 'eidos', 'sparser', 'reach']
reader_sources = ['geneways', 'tees', 'gnbr', 'semrep', 'indra_bert',
'isi', 'trips', 'rlimsp', 'medscan', 'eidos', 'sparser',
'reach']
"""Reader source names as they appear in the DB"""

# These are mappings where the actual INDRA source, as it appears
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def main():
'trips_offline': ['pykqml'],
'reach_offline': ['cython<3', 'pyjnius==1.1.4'],
'eidos_offline': ['cython<3', 'pyjnius==1.1.4'],
'indra_bert': ['indra_bert'],
'hypothesis': ['gilda>1.0.0'],
'geneways': ['stemming', 'nltk<3.6'],
'bel': ['pybel>=0.15.0,<0.16.0'],
Expand Down Expand Up @@ -90,6 +91,7 @@ def main():
'indra.sources.geneways', 'indra.sources.gnbr',
'indra.sources.hprd', 'indra.sources.hypothesis',
'indra.sources.index_cards',
'indra.sources.indra_bert',
'indra.sources.indra_db_rest', 'indra.sources.isi',
'indra.sources.minerva', 'indra.sources.ndex_cx',
'indra.sources.reach', 'indra.sources.omnipath',
Expand Down
Loading