Skip to content

Commit ab6e6f0

Browse files
committed
Apply text classification
1 parent e58e829 commit ab6e6f0

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

consumer/views/submit_job.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"""
1313
import asyncio
1414
import datetime
15+
import joblib
1516
import logging
1617
import nltk
1718
import re
@@ -26,6 +27,7 @@
2627
from database.models import CONSUMER_STATUS_CHOICES, JOB_STATUS_CHOICES
2728
from database.results import get_pmcid, get_pmcid_in_result, save_article, save_result, save_abstract_sentences, \
2829
save_body_sentences
30+
from training.export_data import clean_text
2931
from xml.etree import ElementTree as ET
3032
from xml.etree.ElementTree import ParseError
3133

@@ -430,6 +432,12 @@ async def seek_references(engine, job_id, consumer_ip, date):
430432
except AttributeError:
431433
logging.debug("Journal not found for pmcid {}".format(element["pmcid"]))
432434

435+
# text classification - is it RNA-related?
436+
cleaned_abstract = await clean_text(abstract)
437+
rna_pipeline = joblib.load("training/svc_pipeline.pkl")
438+
relevance_label = rna_pipeline.predict([cleaned_abstract])[0]
439+
article_response["rna_related"] = bool(int(relevance_label))
440+
433441
# add pmcid
434442
article_response["pmcid"] = element["pmcid"]
435443

0 commit comments

Comments
 (0)