55Uses sentence-transformers/all-mpnet-base-v2 sentence transformer with 4-class classification:
66 - 3: EYE_IMAGING - Actual eye imaging datasets (fundus, OCT, OCTA, cornea, etc.)
77 - 2: EYE_SOFTWARE - Code, tools, models for eye imaging (no actual data)
8- - 1: EDGE_CASE - Eye research (papers, reviews, non-imaging data)
8+ - 1: OTHER_EYE_DATA - Eye research (papers, reviews, non-imaging data)
99 - 0: NEGATIVE - Not eye-related at all
1010"""
1111
2222# Model configuration
2323BASE_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
2424HF_MODEL_REPO = "fairdataihub/envision-eye-imaging-classifier"
25- LABELS = ["NEGATIVE" , "EDGE_CASE " , "EYE_SOFTWARE" , "EYE_IMAGING" ]
25+ LABELS = ["NEGATIVE" , "OTHER_EYE_DATA " , "EYE_SOFTWARE" , "EYE_IMAGING" ]
2626
2727# ============================================================
2828# TRAINING DATA - Curated examples for few-shot learning
111111]
112112
113113# EYE_SOFTWARE (label=2): Code, tools, models for eye imaging (NOT actual data)
114- # Added: misplaced software from EYE_IMAGING + EDGE_CASE , spot-check examples
114+ # Added: misplaced software from EYE_IMAGING + OTHER_EYE_DATA , spot-check examples
115115EYE_SOFTWARE_EXAMPLES = [
116116 "linchundan88/Fundus-image-preprocessing: fundus image preprocessing Python code" ,
117117 "NIH-NEI/oct-image-segmentation-models: v0.8.2 trained model weights" ,
150150 "ResNet-50 classifiers and diffusion models trained on retinal fundus images" ,
151151 "AMikroulis/octopus OCT image processing dataset" ,
152152 "anithaj17/RetinoNet-DR-Classification fundus image dataset" ,
153- # Moved from EDGE_CASE (clearly software/tools)
153+ # Moved from OTHER_EYE_DATA (clearly software/tools)
154154 "Python package for retinal image preprocessing" ,
155155 "Deep learning framework for fundus image segmentation code only" ,
156156 "OCT image reconstruction algorithm implementation" ,
166166 "Flexible corneal neurotechnology reveals in-vivo pathological retinal oscillations recording device" ,
167167]
168168
169- # EDGE_CASE (label=1): Eye/vision research but NOT actual imaging datasets
169+ # OTHER_EYE_DATA (label=1): Eye/vision research but NOT actual imaging datasets
170170# Cleaned: removed misplaced software→EYE_SOFTWARE, non-eye→NEGATIVE; added eye metabolomics
171- EDGE_CASE_EXAMPLES = [
171+ OTHER_EYE_DATA_EXAMPLES = [
172172 "A Review of Deep Learning Methods for Diabetic Retinopathy Detection" ,
173173 "Survey of Machine Learning Techniques for Glaucoma Diagnosis" ,
174174 "Advances in Optical Coherence Tomography Technology Review Article" ,
257257]
258258
259259# NEGATIVE (label=0): Clearly not eye-related
260- # Added: non-eye medical imaging from EDGE_CASE , spot-check confounders
260+ # Added: non-eye medical imaging from OTHER_EYE_DATA , spot-check confounders
261261NEGATIVE_EXAMPLES = [
262262 "Climate change impact on coral reef ecosystems dataset" ,
263263 "COVID-19 genome sequencing and variant analysis" ,
492492 "Dataset_1 of AF driver detection in pulmonary vein area cardiac arrhythmia" ,
493493 "Data from Dichoptic metacontrast masking functions to infer transmission delay" ,
494494 "IRIS Carbon Mapping Project Curated Dataset carbon emissions" ,
495- # Moved from EDGE_CASE (non-eye medical imaging — clearly NEGATIVE)
495+ # Moved from OTHER_EYE_DATA (non-eye medical imaging — clearly NEGATIVE)
496496 "Brain MRI analysis for Alzheimer's disease detection" ,
497497 "Cardiac CT angiography for coronary artery disease" ,
498498 "Dermatology skin lesion classification dataset" ,
503503 "Ultrasound imaging for liver disease assessment" ,
504504 "PET scan analysis for neurological disorders" ,
505505 "Spine MRI for degenerative disc disease" ,
506- # Moved from EDGE_CASE (non-eye OCT — clearly NEGATIVE)
506+ # Moved from OTHER_EYE_DATA (non-eye OCT — clearly NEGATIVE)
507507 "OCT for industrial material inspection dataset" ,
508508 "Optical coherence tomography in dermatology skin imaging" ,
509509 "OCT imaging of atherosclerotic plaque in arteries" ,
@@ -545,7 +545,7 @@ class EyeImagingClassifier:
545545 Classifies metadata records into 4 classes:
546546 - EYE_IMAGING: Actual eye imaging datasets (fundus, OCT, OCTA, etc.)
547547 - EYE_SOFTWARE: Code, tools, models for eye imaging (no actual data)
548- - EDGE_CASE : Eye research papers, reviews, borderline items
548+ - OTHER_EYE_DATA : Eye research papers, reviews, borderline items
549549 - NEGATIVE: Unrelated domains
550550
551551 Usage:
@@ -679,7 +679,7 @@ def _predict_batch(self, texts):
679679 else :
680680 pred_int = {
681681 "NEGATIVE" : 0 ,
682- "EDGE_CASE " : 1 ,
682+ "OTHER_EYE_DATA " : 1 ,
683683 "EYE_SOFTWARE" : 2 ,
684684 "EYE_IMAGING" : 3 ,
685685 }.get (str (pred ), 0 )
@@ -692,7 +692,7 @@ def _predict_batch(self, texts):
692692 "confidence" : float (max (probs )),
693693 "probabilities" : {
694694 "NEGATIVE" : float (probs [0 ]),
695- "EDGE_CASE " : float (probs [1 ]),
695+ "OTHER_EYE_DATA " : float (probs [1 ]),
696696 "EYE_SOFTWARE" : float (probs [2 ]),
697697 "EYE_IMAGING" : float (probs [3 ]),
698698 },
@@ -733,13 +733,13 @@ def train(cls, output_dir=None, device=None, base_model_name=None,
733733 train_texts = (
734734 EYE_IMAGING_EXAMPLES
735735 + EYE_SOFTWARE_EXAMPLES
736- + EDGE_CASE_EXAMPLES
736+ + OTHER_EYE_DATA_EXAMPLES
737737 + NEGATIVE_EXAMPLES
738738 )
739739 train_labels = (
740740 [3 ] * len (EYE_IMAGING_EXAMPLES )
741741 + [2 ] * len (EYE_SOFTWARE_EXAMPLES )
742- + [1 ] * len (EDGE_CASE_EXAMPLES )
742+ + [1 ] * len (OTHER_EYE_DATA_EXAMPLES )
743743 + [0 ] * len (NEGATIVE_EXAMPLES )
744744 )
745745
0 commit comments