모듈 경로를 tools 와 llm direcotry로 구분 #134

ehddnr301 · ehddnr301 · commit d3e432656750 · 2025-09-05T13:52:26.000+09:00
diff --git a/llm_utils/chains.py b/llm_utils/chains.py
@@ -6,7 +6,7 @@
 )
 from pydantic import BaseModel, Field
 
-from .llm_factory import get_llm
+from llm_utils.llm import get_llm
 
 from prompt.template_loader import get_prompt_template
 
diff --git a/llm_utils/graph_utils/base.py b/llm_utils/graph_utils/base.py
@@ -6,7 +6,7 @@
 from langgraph.graph.message import add_messages
 from langchain.chains.sql_database.prompt import SQL_PROMPTS
 from pydantic import BaseModel, Field
-from llm_utils.llm_factory import get_llm
+from llm_utils.llm import get_llm
 
 from llm_utils.chains import (
     query_refiner_chain,
diff --git a/llm_utils/llm/__init__.py b/llm_utils/llm/__init__.py
@@ -0,0 +1,35 @@
+from .factory import (
+    get_llm,
+    get_llm_openai,
+    get_llm_azure,
+    get_llm_bedrock,
+    get_llm_gemini,
+    get_llm_ollama,
+    get_llm_huggingface,
+    get_embeddings,
+    get_embeddings_openai,
+    get_embeddings_azure,
+    get_embeddings_bedrock,
+    get_embeddings_gemini,
+    get_embeddings_ollama,
+    get_embeddings_huggingface,
+)
+
+__all__ = [
+    "get_llm",
+    "get_llm_openai",
+    "get_llm_azure",
+    "get_llm_bedrock",
+    "get_llm_gemini",
+    "get_llm_ollama",
+    "get_llm_huggingface",
+    "get_embeddings",
+    "get_embeddings_openai",
+    "get_embeddings_azure",
+    "get_embeddings_bedrock",
+    "get_embeddings_gemini",
+    "get_embeddings_ollama",
+    "get_embeddings_huggingface",
+]
+
+
diff --git a/llm_utils/llm/factory.py b/llm_utils/llm/factory.py
@@ -1,4 +1,3 @@
-# llm_factory.py
 import os
 from typing import Optional
 
@@ -180,3 +179,5 @@ def get_embeddings_huggingface() -> BaseLanguageModel:
         repo_id=os.getenv("HUGGING_FACE_EMBEDDING_REPO_ID"),
         huggingfacehub_api_token=os.getenv("HUGGING_FACE_EMBEDDING_API_TOKEN"),
     )
+
+
diff --git a/llm_utils/retrieval.py b/llm_utils/retrieval.py
@@ -6,9 +6,7 @@
 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
-from .tools import get_info_from_db
-from .llm_factory import get_embeddings
-from .vectordb import get_vector_db
+from llm_utils.vectordb import get_vector_db
 
 
 def load_reranker_model(device: str = "cpu"):
diff --git a/llm_utils/tools/__init__.py b/llm_utils/tools/__init__.py
@@ -0,0 +1,13 @@
+from .datahub import (
+    set_gms_server,
+    get_info_from_db,
+    get_metadata_from_db,
+)
+
+__all__ = [
+    "set_gms_server",
+    "get_info_from_db",
+    "get_metadata_from_db",
+]
+
+
diff --git a/llm_utils/tools/datahub.py b/llm_utils/tools/datahub.py
@@ -18,18 +18,7 @@ def parallel_process(
     desc: Optional[str] = None,
     show_progress: bool = True,
 ) -> List[R]:
-    """병렬 처리를 위한 유틸리티 함수
-
-    Args:
-        items (Iterable[T]): 처리할 아이템들
-        process_fn (Callable[[T], R]): 각 아이템을 처리할 함수
-        max_workers (int, optional): 최대 쓰레드 수. Defaults to 8.
-        desc (Optional[str], optional): 진행 상태 메시지. Defaults to None.
-        show_progress (bool, optional): 진행 상태 표시 여부. Defaults to True.
-
-    Returns:
-        List[R]: 처리 결과 리스트
-    """
+    """병렬 처리를 위한 유틸리티 함수"""
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         futures = [executor.submit(process_fn, item) for item in items]
         if show_progress:
@@ -67,14 +56,6 @@ def _process_column_info(
 
 
 def _get_table_info(max_workers: int = 8) -> Dict[str, str]:
-    """전체 테이블 이름과 설명을 가져오는 함수
-
-    Args:
-        max_workers (int, optional): 병렬 처리에 사용할 최대 쓰레드 수. Defaults to 8.
-
-    Returns:
-        Dict[str, str]: 테이블 이름과 설명을 담은 딕셔너리
-    """
     fetcher = _get_fetcher()
     urns = fetcher.get_urns()
     table_info = {}
@@ -96,40 +77,19 @@ def _get_table_info(max_workers: int = 8) -> Dict[str, str]:
 def _get_column_info(
     table_name: str, urn_table_mapping: Dict[str, str], max_workers: int = 8
 ) -> List[Dict[str, str]]:
-    """table_name에 해당하는 컬럼 이름과 설명을 가져오는 함수
-
-    Args:
-        table_name (str): 테이블 이름
-        urn_table_mapping (Dict[str, str]): URN-테이블명 매핑 딕셔너리
-        max_workers (int, optional): 병렬 처리에 사용할 최대 쓰레드 수. Defaults to 8.
-
-    Returns:
-        List[Dict[str, str]]: 컬럼 정보 리스트
-    """
-    # 해당 테이블의 URN 직접 찾기
     target_urn = urn_table_mapping.get(table_name)
     if not target_urn:
         return []
 
-    # Fetcher 생성 및 컬럼 정보 가져오기
     fetcher = _get_fetcher()
     column_info = fetcher.get_column_names_and_descriptions(target_urn)
 
     return column_info
 
 
 def get_info_from_db(max_workers: int = 8) -> List[Document]:
-    """전체 테이블 이름과 설명, 컬럼 이름과 설명을 가져오는 함수
-
-    Args:
-        max_workers (int, optional): 병렬 처리에 사용할 최대 쓰레드 수. Defaults to 8.
-
-    Returns:
-        List[Document]: 테이블과 컬럼 정보를 담은 Document 객체 리스트
-    """
     table_info = _get_table_info(max_workers=max_workers)
 
-    # URN-테이블명 매핑을 한 번만 생성
     fetcher = _get_fetcher()
     urns = list(fetcher.get_urns())
     urn_table_mapping = {}
@@ -142,10 +102,8 @@ def process_table_info(item: tuple[str, str]) -> str:
         table_name, table_description = item
         urn = urn_table_mapping.get(table_name, "")
 
-        # fetcher 인스턴스 생성
         local_fetcher = _get_fetcher()
 
-        # 컬럼 정보 가져오기
         column_info = _get_column_info(
             table_name, urn_table_mapping, max_workers=max_workers
         )
@@ -156,13 +114,11 @@ def process_table_info(item: tuple[str, str]) -> str:
             ]
         )
 
-        # 쿼리 및 용어집 정보 가져오기
         queries_result = local_fetcher.get_queries_by_urn(urn) if urn else {}
         glossary_terms_result = (
             local_fetcher.get_glossary_terms_by_urn(urn) if urn else {}
         )
 
-        # GraphQL 응답에서 실제 쿼리 리스트 추출
         queries = []
         if (
             queries_result
@@ -172,7 +128,6 @@ def process_table_info(item: tuple[str, str]) -> str:
         ):
             queries = queries_result["data"]["listQueries"]["queries"]
 
-        # GraphQL 응답에서 실제 glossary terms 추출
         glossary_terms = []
         if (
             glossary_terms_result
@@ -199,10 +154,9 @@ def process_table_info(item: tuple[str, str]) -> str:
                         }
                     )
 
-        # 쿼리 정보를 name, description, statement.value만 추출하여 포맷
         if queries:
             formatted_queries = []
-            for q in queries[:3]:  # 최대 3개 쿼리만
+            for q in queries[:3]:
                 if isinstance(q, dict) and "properties" in q:
                     props = q["properties"]
                     name = props.get("name", "No name")
@@ -241,10 +195,6 @@ def process_table_info(item: tuple[str, str]) -> str:
 
 
 def get_metadata_from_db() -> List[Dict]:
-    """
-    전체 테이블의 메타데이터(테이블 이름, 설명, 컬럼 이름, 설명, 테이블 lineage, 컬럼 별 lineage)를 가져오는 함수
-    """
-
     fetcher = _get_fetcher()
     urns = list(fetcher.get_urns())
 
@@ -256,3 +206,5 @@ def get_metadata_from_db() -> List[Dict]:
         metadata.append(table_metadata)
 
     return metadata
+
+
diff --git a/llm_utils/vectordb/faiss_db.py b/llm_utils/vectordb/faiss_db.py
@@ -7,7 +7,7 @@
 from typing import Optional
 
 from llm_utils.tools import get_info_from_db
-from llm_utils.llm_factory import get_embeddings
+from llm_utils.llm import get_embeddings
 
 
 def get_faiss_vector_db(vectordb_path: Optional[str] = None):
diff --git a/llm_utils/vectordb/pgvector_db.py b/llm_utils/vectordb/pgvector_db.py
@@ -9,7 +9,7 @@
 from langchain_postgres.vectorstores import PGVector
 
 from llm_utils.tools import get_info_from_db
-from llm_utils.llm_factory import get_embeddings
+from llm_utils.llm import get_embeddings
 
 
 def _check_collection_exists(connection_string: str, collection_name: str) -> bool:

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@`
`6`	`6`	`)`
`7`	`7`	`from pydantic import BaseModel, Field`
`8`	`8`
`9`		`-from .llm_factory import get_llm`
	`9`	`+from llm_utils.llm import get_llm`
`10`	`10`
`11`	`11`	`from prompt.template_loader import get_prompt_template`
`12`	`12`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-# llm_factory.py`
`2`	`1`	`import os`
`3`	`2`	`from typing import Optional`
`4`	`3`
`@@ -180,3 +179,5 @@ def get_embeddings_huggingface() -> BaseLanguageModel:`
`180`	`179`	`repo_id=os.getenv("HUGGING_FACE_EMBEDDING_REPO_ID"),`
`181`	`180`	`huggingfacehub_api_token=os.getenv("HUGGING_FACE_EMBEDDING_API_TOKEN"),`
`182`	`181`	`)`
	`182`	`+`
	`183`	`+`