질문 적합성 판별 기능 추가 및 관련 모듈 업데이트.

ehddnr301 · ehddnr301 · commit 0075289c6601 · 2025-09-13T16:18:19.000+09:00
- 새로운 질문 게이트 체인과 출력 모델을 구현
- UI에서 결과를 표시하도록 수정함.
- 질문 게이트 결과를 처리하는 노드 및 그래프 구성도 포함됨.
diff --git a/interface/lang2sql.py b/interface/lang2sql.py
@@ -30,6 +30,7 @@
     "show_sql": "Show SQL",
     "show_question_reinterpreted_by_ai": "Show User Question Reinterpreted by AI",
     "show_referenced_tables": "Show List of Referenced Tables",
+    "show_question_gate_result": "Show Question Gate Result",
     "show_table": "Show Table",
     "show_chart": "Show Chart",
 }
@@ -103,8 +104,23 @@ def should_show(_key: str) -> bool:
     show_sql_section = has_query and should_show("show_sql")
     show_result_desc = has_query and should_show("show_result_description")
     show_reinterpreted = has_query and should_show("show_question_reinterpreted_by_ai")
+    show_gate_result = should_show("show_question_gate_result")
     show_table_section = has_query and should_show("show_table")
     show_chart_section = has_query and should_show("show_chart")
+    if show_gate_result and ("question_gate_result" in res):
+        st.markdown("---")
+        st.markdown("**Question Gate 결과:**")
+        details = res.get("question_gate_result")
+        if details:
+            passed = details.get("is_sql_like")
+            if passed is not None:
+                st.write(f"적합성 통과 여부: `{passed}`")
+            try:
+                import json as _json
+                st.code(_json.dumps(details, ensure_ascii=False, indent=2), language="json")
+            except Exception:
+                st.write(details)
+
 
     if should_show("show_token_usage"):
         st.markdown("---")
diff --git a/llm_utils/chains.py b/llm_utils/chains.py
@@ -1,10 +1,19 @@
+"""
+LLM 체인 생성 모듈.
+
+이 모듈은 Lang2SQL에서 사용하는 다양한 LangChain 기반 체인을 정의합니다.
+- Query Maker
+- Query Enrichment
+- Profile Extraction
+- Question Gate (SQL 적합성 분류)
+"""
 import os
 from langchain_core.prompts import (
     ChatPromptTemplate,
-    MessagesPlaceholder,
     SystemMessagePromptTemplate,
 )
 from pydantic import BaseModel, Field
+from llm_utils.output_parser.question_suitability import QuestionSuitability
 
 from llm_utils.llm import get_llm
 
@@ -15,6 +24,11 @@
 
 
 class QuestionProfile(BaseModel):
+    """
+    자연어 질문의 특징을 구조화해 표현하는 프로파일 모델.
+
+    이 프로파일은 이후 컨텍스트 보강 및 SQL 생성 시 힌트로 사용됩니다.
+    """
     is_timeseries: bool = Field(description="시계열 분석 필요 여부")
     is_aggregation: bool = Field(description="집계 함수 필요 여부")
     has_filter: bool = Field(description="조건 필터 필요 여부")
@@ -26,6 +40,15 @@ class QuestionProfile(BaseModel):
 
 # QueryMakerChain
 def create_query_maker_chain(llm):
+    """
+    SQL 쿼리 생성을 위한 체인을 생성합니다.
+
+    Args:
+        llm: LangChain 호환 LLM 인스턴스
+
+    Returns:
+        Runnable: 입력 프롬프트를 받아 SQL을 생성하는 체인
+    """
     prompt = get_prompt_template("query_maker_prompt")
     query_maker_prompt = ChatPromptTemplate.from_messages(
         [
@@ -36,6 +59,15 @@ def create_query_maker_chain(llm):
 
 
 def create_query_enrichment_chain(llm):
+    """
+    사용자 질문을 메타데이터로 보강하기 위한 체인을 생성합니다.
+
+    Args:
+        llm: LangChain 호환 LLM 인스턴스
+
+    Returns:
+        Runnable: 보강된 질문 텍스트를 반환하는 체인
+    """
     prompt = get_prompt_template("query_enrichment_prompt")
 
     enrichment_prompt = ChatPromptTemplate.from_messages(
@@ -49,6 +81,15 @@ def create_query_enrichment_chain(llm):
 
 
 def create_profile_extraction_chain(llm):
+    """
+    질문으로부터 `QuestionProfile`을 추출하는 체인을 생성합니다.
+
+    Args:
+        llm: LangChain 호환 LLM 인스턴스
+
+    Returns:
+        Runnable: `QuestionProfile` 구조화 출력을 반환하는 체인
+    """
     prompt = get_prompt_template("profile_extraction_prompt")
 
     profile_prompt = ChatPromptTemplate.from_messages(
@@ -61,9 +102,28 @@ def create_profile_extraction_chain(llm):
     return chain
 
 
+def create_question_gate_chain(llm):
+    """
+    질문 적합성(Question Gate) 체인을 생성합니다.
+
+    ChatPromptTemplate(SystemMessage) + LLM 구조화 출력으로
+    `QuestionSuitability`를 반환합니다.
+
+    Args:
+        llm: LangChain 호환 LLM 인스턴스
+
+    Returns:
+        Runnable: invoke({"question": str}) -> QuestionSuitability
+    """
+
+    prompt = get_prompt_template("question_gate_prompt")
+    gate_prompt = ChatPromptTemplate.from_messages(
+        [SystemMessagePromptTemplate.from_template(prompt)]
+    )
+    return gate_prompt | llm.with_structured_output(QuestionSuitability)
+
+
 query_maker_chain = create_query_maker_chain(llm)
 profile_extraction_chain = create_profile_extraction_chain(llm)
 query_enrichment_chain = create_query_enrichment_chain(llm)
-
-if __name__ == "__main__":
-    pass
+question_gate_chain = create_question_gate_chain(llm)
diff --git a/llm_utils/graph_utils/base.py b/llm_utils/graph_utils/base.py
@@ -1,22 +1,20 @@
-import os
 import json
 
 from typing_extensions import TypedDict, Annotated
-from langgraph.graph import END, StateGraph
 from langgraph.graph.message import add_messages
 
 
 from llm_utils.chains import (
     query_maker_chain,
     profile_extraction_chain,
     query_enrichment_chain,
+    question_gate_chain,
 )
 
-from llm_utils.tools import get_info_from_db
 from llm_utils.retrieval import search_tables
-from llm_utils.graph_utils.profile_utils import profile_to_text
 
 # 노드 식별자 정의
+QUESTION_GATE = "question_gate"
 GET_TABLE_INFO = "get_table_info"
 TOOL = "tool"
 TABLE_FILTER = "table_filter"
@@ -36,6 +34,31 @@ class QueryMakerState(TypedDict):
     retriever_name: str
     top_n: int
     device: str
+    question_gate_result: dict
+
+# 노드 함수: QUESTION_GATE 노드
+def question_gate_node(state: QueryMakerState):
+    """
+    사용자의 질문이 SQL로 답변 가능한지 판별하고, 구조화된 결과를 반환하는 게이트 노드입니다.
+
+    - question_gate_chain 으로 적합성을 판정하여
+      `question_gate_result`를 설정합니다.
+
+    Args:
+        state (QueryMakerState): 그래프 상태
+
+    Returns:
+        QueryMakerState: 게이트 판정 결과가 반영된 상태
+    """
+
+    question_text = state["messages"][0].content
+    suitability = question_gate_chain.invoke({"question": question_text})
+    state["question_gate_result"] = {
+        "reason": getattr(suitability, "reason", ""),
+        "missing_entities": getattr(suitability, "missing_entities", []),
+        "requires_data_science": getattr(suitability, "requires_data_science", False),
+    }
+    return state
 
 
 # 노드 함수: PROFILE_EXTRACTION 노드
diff --git a/llm_utils/graph_utils/basic_graph.py b/llm_utils/graph_utils/basic_graph.py
@@ -3,8 +3,10 @@
 from langgraph.graph import StateGraph, END
 from llm_utils.graph_utils.base import (
     QueryMakerState,
+    QUESTION_GATE,
     GET_TABLE_INFO,
     QUERY_MAKER,
+    question_gate_node,
     get_table_info_node,
     query_maker_node,
 )
@@ -16,12 +18,25 @@
 
 # StateGraph 생성 및 구성
 builder = StateGraph(QueryMakerState)
-builder.set_entry_point(GET_TABLE_INFO)
+builder.set_entry_point(QUESTION_GATE)
 
 # 노드 추가
+builder.add_node(QUESTION_GATE, question_gate_node)
 builder.add_node(GET_TABLE_INFO, get_table_info_node)
 builder.add_node(QUERY_MAKER, query_maker_node)
 
+def _route_after_gate(state: QueryMakerState):
+    return GET_TABLE_INFO
+
+builder.add_conditional_edges(
+    QUESTION_GATE,
+    _route_after_gate,
+    {
+        GET_TABLE_INFO: GET_TABLE_INFO,
+        END: END,
+    },
+)
+
 # 기본 엣지 설정
 builder.add_edge(GET_TABLE_INFO, QUERY_MAKER)
 
diff --git a/llm_utils/graph_utils/enriched_graph.py b/llm_utils/graph_utils/enriched_graph.py
@@ -3,10 +3,12 @@
 from langgraph.graph import StateGraph, END
 from llm_utils.graph_utils.base import (
     QueryMakerState,
+    QUESTION_GATE,
     GET_TABLE_INFO,
     PROFILE_EXTRACTION,
     CONTEXT_ENRICHMENT,
     QUERY_MAKER,
+    question_gate_node,
     get_table_info_node,
     profile_extraction_node,
     context_enrichment_node,
@@ -20,14 +22,27 @@
 
 # StateGraph 생성 및 구성
 builder = StateGraph(QueryMakerState)
-builder.set_entry_point(GET_TABLE_INFO)
+builder.set_entry_point(QUESTION_GATE)
 
 # 노드 추가
+builder.add_node(QUESTION_GATE, question_gate_node)
 builder.add_node(GET_TABLE_INFO, get_table_info_node)
 builder.add_node(PROFILE_EXTRACTION, profile_extraction_node)
 builder.add_node(CONTEXT_ENRICHMENT, context_enrichment_node)
 builder.add_node(QUERY_MAKER, query_maker_node)
 
+def _route_after_gate(state: QueryMakerState):
+    return GET_TABLE_INFO
+
+builder.add_conditional_edges(
+    QUESTION_GATE,
+    _route_after_gate,
+    {
+        GET_TABLE_INFO: GET_TABLE_INFO,
+        END: END,
+    },
+)
+
 # 기본 엣지 설정
 builder.add_edge(GET_TABLE_INFO, PROFILE_EXTRACTION)
 builder.add_edge(PROFILE_EXTRACTION, CONTEXT_ENRICHMENT)
diff --git a/llm_utils/output_parser/__init__.py b/llm_utils/output_parser/__init__.py
@@ -0,0 +1,6 @@
+"""
+출력 파서 모듈 패키지 초기화.
+
+이 패키지는 LLM의 구조화 출력 모델과 파서들을 포함합니다.
+"""
+
diff --git a/llm_utils/output_parser/question_suitability.py b/llm_utils/output_parser/question_suitability.py
@@ -0,0 +1,25 @@
+"""
+QuestionSuitability 출력 모델.
+
+LLM 구조화 출력으로부터 SQL 적합성 판단 결과를 표현하는 Pydantic 모델입니다.
+"""
+
+from pydantic import BaseModel, Field
+
+
+class QuestionSuitability(BaseModel):
+    """
+    SQL 생성 적합성 결과 모델.
+
+    LLM 구조화 출력으로 직렬화 가능한 필드를 정의합니다.
+    """
+
+    reason: str = Field(description="보완/설명 사유 요약")
+    missing_entities: list[str] = Field(
+        default_factory=list, description="질문에서 누락된 핵심 엔터티/기간 등"
+    )
+    requires_data_science: bool = Field(
+        default=False, description="SQL을 넘어 ML/통계 분석이 필요한지 여부"
+    )
+
+
diff --git a/prompt/question_gate_prompt.md b/prompt/question_gate_prompt.md
@@ -0,0 +1,19 @@
+당신은 데이터 분석 도우미입니다. 아래 사용자 질문이 SQL로 답변 가능한지 판별하고, 구조화된 결과를 반환하세요.
+
+요건:
+- reason: 한 줄 설명(어떤 보완이 필요한지 요약)
+- missing_entities: 기간, 대상 엔터티, 측정값 등 누락된 핵심 요소 리스트(없으면 빈 리스트)
+- requires_data_science: 통계/ML 분석이 필요한지 여부(Boolean)
+
+언어/출력 형식:
+- 모든 텍스트 값은 한국어로 작성하세요. (reason는 한국어 문장, missing_entities 항목은 한국어 명사구)
+- Boolean 값은 JSON의 true/false로 표기하세요.
+
+주의:
+- 데이터 분석 맥락에서 SQL 집계/필터/조인으로 해결 가능한지 판단합니다.
+- 정책/운영/가이드/설치/권한/오류 해결 등은 SQL 부적합으로 간주합니다.
+
+입력: {question}
+
+출력은 반드시 지정된 스키마의 JSON으로만 반환하세요.
+

-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +"""
 +출력 파서 모듈 패키지 초기화.
++
 +이 패키지는 LLM의 구조화 출력 모델과 파서들을 포함합니다.
 +"""
++