1+ """Langflow component for all-MiniLM-L6-V2 embeddings model."""
2+
3+ from __future__ import annotations
4+
5+ import subprocess
6+ from pathlib import Path
7+ from typing import List , Tuple
8+
9+ from lfx .base .embeddings .model import LCEmbeddingsModel
10+ from lfx .field_typing import Embeddings
11+ from lfx .io import IntInput
12+
13+ try :
14+ from sentence_transformers import SentenceTransformer # type: ignore[import-not-found]
15+ except ImportError :
16+ SentenceTransformer = None # type: ignore
17+
18+ ROOT_DIR = Path (__file__ ).resolve ().parents [2 ]
19+ MODELS_DIR = ROOT_DIR / "models"
20+ MODEL_NAME = "all-MiniLM-L6-V2"
21+ HF_MODEL_ID = "sentence-transformers/all-MiniLM-L6-V2"
22+ DOWNLOAD_SCRIPT = ROOT_DIR / "scripts" / "download.sh"
23+
24+
25+ class MiniLMEmbeddingsComponent (Embeddings ):
26+ """Droq Embeddings wrapper for local execution of SentenceTransformer model."""
27+
28+ def __init__ (self , model_path : Path ) -> None :
29+ """Initialize with path to local model."""
30+ if SentenceTransformer is None :
31+ msg = "sentence-transformers must be installed. Install it with: uv pip install sentence-transformers"
32+ raise ImportError (msg )
33+ self .model = SentenceTransformer (str (model_path ))
34+ self .model_path = model_path
35+
36+ def embed_documents (self , texts : List [str ]) -> List [List [float ]]:
37+ """Generate embeddings for a list of documents."""
38+ if not texts :
39+ return []
40+ embeddings = self .model .encode (texts , convert_to_numpy = True )
41+ return [embedding .tolist () for embedding in embeddings ]
42+
43+ def embed_query (self , text : str ) -> List [float ]:
44+ """Generate embedding for a single query text."""
45+ if not text :
46+ return []
47+ embedding = self .model .encode (text , convert_to_numpy = True )
48+ return embedding .tolist ()
49+
50+
51+ class MiniLMEmbeddingsComponent (LCEmbeddingsModel ):
52+ """DroqFlow component for all-MiniLM-L6-V2 embeddings using local model."""
53+
54+ display_name = "MiniLM Embeddings"
55+ description = "Generate embeddings using the locally stored all-MiniLM-L6-V2 SentenceTransformer model."
56+ documentation : str = "https://www.sbert.net/docs/pretrained_models.html"
57+ icon = "binary"
58+ name = "MiniLMEmbeddingsComponent"
59+ category = "models"
60+
61+ inputs = [
62+ IntInput (
63+ name = "chunk_size" ,
64+ display_name = "Chunk Size" ,
65+ info = "Number of texts to embed in a single batch." ,
66+ advanced = True ,
67+ value = 32 ,
68+ ),
69+ ]
70+
71+ def build_embeddings (self ) -> Embeddings :
72+ """Build and return the local MiniLM embeddings model."""
73+ model_path = ensure_model ()
74+ return MiniLMEmbeddingsComponent (model_path )
75+
76+
77+ def ensure_model () -> Path :
78+ """Ensure the MiniLM model assets exist locally, downloading them if necessary."""
79+ target_dir = MODELS_DIR / MODEL_NAME
80+ if target_dir .exists ():
81+ return target_dir .resolve ()
82+
83+ if not DOWNLOAD_SCRIPT .exists ():
84+ raise FileNotFoundError (f"Download script not found at { DOWNLOAD_SCRIPT } " )
85+
86+ target_dir .parent .mkdir (parents = True , exist_ok = True )
87+ result = subprocess .run (
88+ [str (DOWNLOAD_SCRIPT ), HF_MODEL_ID , str (target_dir )],
89+ capture_output = True ,
90+ text = True ,
91+ )
92+ if result .returncode != 0 :
93+ detail = result .stderr .strip () or result .stdout .strip () or f"exit code { result .returncode } "
94+ raise RuntimeError (f"Failed to download { HF_MODEL_ID } : { detail } " )
95+
96+ if not target_dir .exists ():
97+ raise RuntimeError (f"Download reported success but assets missing at { target_dir } " )
98+
99+ return target_dir .resolve ()
100+
101+
102+ def get_component_runner () -> Tuple [str , str , str , MiniLMEmbeddingsComponent ]:
103+ """
104+ Return task, model identifier, runner kind, and runner instance for this component.
105+ """
106+ model_path = ensure_model ()
107+ if SentenceTransformer is None :
108+ msg = "sentence-transformers must be installed. Install it with: uv pip install sentence-transformers"
109+ raise ImportError (msg )
110+ runner = SentenceTransformer (str (model_path ))
111+ return ("embeddings" , HF_MODEL_ID , "sentence_transformer" , runner )
0 commit comments