opea-project · joshuayao · Sep 22, 2025 · Sep 19, 2025 · Sep 19, 2025 · Sep 19, 2025
@@ -25,6 +25,7 @@
 /comps/text2graph/ sharath.raghava@intel.com letong.han@intel.com
 /comps/text2image/ xinyu.ye@intel.com liang1.lv@intel.com
 /comps/text2kg/ siddhi.velankar@intel.com letong.han@intel.com
+/comps/text2query/ yogesh.pandey@intel.com jean1.yu@intel.com sharath.raghava@intel.com yi.a.yao@intel.com
 /comps/text2sql/ yogesh.pandey@intel.com qing.yao@intel.com
 /comps/third_parties/ liang1.lv@intel.com letong.han@intel.com
 /comps/tts/ sihan.chen@intel.com letong.han@intel.com

@@ -0,0 +1,19 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  text2query-sql:
+    build:
+      dockerfile: comps/text2query/src/Dockerfile
+    image: ${REGISTRY:-opea}/text2query-sql:${TAG:-latest}
+
+  text2query-cypher:
+    build:
+      dockerfile: comps/text2query/src/Dockerfile.cypher.intel_hpu
+    image: ${REGISTRY:-opea}/text2query-cypher:${TAG:-latest}
+
+  text2query-graph:
+    build:
+      dockerfile: comps/text2query/src/Dockerfile.graph
+    image: ${REGISTRY:-opea}/text2query-graph:${TAG:-latest}
@@ -41,6 +41,7 @@ class ServiceType(Enum):
     LANGUAGE_DETECTION = 24
     PROMPT_TEMPLATE = 25
     PROMPT_REGISTRY = 26
+    TEXT2QUERY = 27
 
 
 class MegaServiceEndpoint(Enum):

@@ -1057,3 +1057,13 @@ class FineTuningJobCheckpoint(BaseModel):
 
 class RouteEndpointDoc(BaseModel):
     url: str = Field(..., description="URL of the chosen inference endpoint")
+
+
+class Text2QueryRequest(BaseModel):
+    query: Optional[str] = None
+    conn_type: Optional[str] = "sql"
+    conn_url: Optional[str] = None
+    conn_user: Optional[str] = None
+    conn_password: Optional[str] = None
+    conn_dialect: Optional[str] = "postgresql"
+    options: Dict = {}
@@ -1,3 +1,9 @@
+# ⚠️ Deprecation Notice: `text2cypher`
+
+**This repository is no longer actively maintained.**
+
+As of OPEA v1.5, we are deprecating the `text2cypher` microservice. Please use `text2query` microservice instead. We will remove `text2cypher` at OPEA v1.7.
+
 # 🛢 Text-to-Cypher Microservice
 
 The microservice enables a wide range of use cases, making it a versatile tool for businesses, researchers, and individuals alike. Users can generate queries based on natural language questions, enabling them to quickly retrieve relevant data from graph databases. This service executes locally on Intel Gaudi.

diff --git a/comps/text2graph/src/README.md b/comps/text2graph/src/README.md
@@ -1,3 +1,9 @@
+# ⚠️ Deprecation Notice: `text2graph`
+
+**This repository is no longer actively maintained.**
+
+As of OPEA v1.5, we are deprecating the `text2graph` microservice. Please use `text2query` microservice instead. We will remove `text2graph` at OPEA v1.7.
+
 # Text to graph triplet extractor
 
 Creating graphs from text is about converting unstructured text into structured data is challenging.

@@ -0,0 +1,18 @@
+# 🛢 Text-to-Query Microservice
+
+A text2query microservice is a specialized, independent service designed to translate natural language queries into structured query languages. Its primary function is to act as an interpreter, allowing users to ask questions in plain human language and receive a formal query in return, which can then be executed against a Relational or Graph database. This service bridges the gap between human communication and machine-readable database commands.
+
+## 🛠️ Features
+
+- **Implement SQL Query based on input text**: Transform user-provided natural language into SQL queries, subsequently executing them to retrieve data from SQL databases.
+- **Implement Cypher Query based on input text**: Transform user-provided natural language into Cypher queries, subsequently executing them to retrieve data from Neo4j Graph database.
+
+## ⚙️ Supported Implementations
+
+The Text2Query Microservice supports multiple implementation options to suit different databases. Each implementation includes its own configuration and setup instructions:
+
+| Implementation     | Description                                                     | Supported Hardware | Documentation                  |
+| ------------------ | --------------------------------------------------------------- | ------------------ | ------------------------------ |
+| **Text-to-SQL**    | Transforming user-provided natural language into SQL queries    | Xeon, Gaudi        | [README](src/README_sql.md)    |
+| **Text-to-Cypher** | Transforming user-provided natural language into Cypher queries | Gaudi              | [README](src/README_cypher.md) |
+| **Text-to-Graph**  | Transforming user-provided natural language into graph triplets | Xeon               | [README](src/README_graph.md)  |
@@ -0,0 +1,94 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+include:
+  - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml
+  - ../../../third_parties/neo4j/deployment/docker_compose/compose.yaml
+
+services:
+  postgres:
+    image: postgres:latest
+    container_name: postgres-container
+    restart: always
+    environment:
+      - POSTGRES_USER=${POSTGRES_USER-postgres}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD-testpwd}
+      - POSTGRES_DB=${POSTGRES_DB-chinook}
+    ports:
+      - '5442:5432'
+    volumes:
+      - ../../src/integrations/sql/chinook.sql:/docker-entrypoint-initdb.d/chinook.sql
+
+  text2query-sql:
+    image: opea/text2query-sql:${TAG:-latest}
+    container_name: text2query-sql-server
+    ports:
+      - ${TEXT2SQL_PORT:-9097}:9097
+    environment:
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT-http://localhost:8008}
+      TEXT2QUERY_COMPONENT_NAME: "OPEA_TEXT2QUERY_SQL"
+    depends_on:
+      - tgi-server
+      - postgres
+
+  text2query-sql-gaudi:
+    image: opea/text2query-sql:${TAG:-latest}
+    container_name: text2query-sql-gaudi-server
+    ports:
+      - ${TEXT2SQL_PORT:-9097}:9097
+    environment:
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT-http://localhost:8008}
+      TEXT2QUERY_COMPONENT_NAME: "OPEA_TEXT2QUERY_SQL"
+    depends_on:
+      - tgi-gaudi-server
+      - postgres
+
+  text2query-cypher-gaudi:
+    image: opea/text2query-cypher:${TAG:-latest}
+    container_name: text2query-cypher-gaudi-server
+    ports:
+      - ${TEXT2CYPHER_PORT:-9097}:9097
+    depends_on:
+      neo4j-apoc:
+        condition: service_healthy
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      INDEX_NAME: ${INDEX_NAME}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
+      LOGFLAG: ${LOGFLAG:-False}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      TOKENIZERS_PARALLELISM: False
+      NEO4J_URI: ${NEO4J_URI}
+      NEO4J_URL: ${NEO4J_URI}
+      NEO4J_USERNAME: ${NEO4J_USERNAME}
+      NEO4J_PASSWORD: ${NEO4J_PASSWORD}
+      host_ip: ${host_ip}
+      TEXT2QUERY_COMPONENT_NAME: "OPEA_TEXT2QUERY_CYPHER"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    restart: unless-stopped
+
+  text2query-graph:
+     image: opea/text2query-graph:${TAG:-latest}
+     container_name: text2query-graph-server
+     ports:
+         - ${TEXT2GRAPH_PORT:-9097}:9097
+     environment:
+         - no_proxy=${no_proxy}
+         - https_proxy=${https_proxy}
+         - http_proxy=${http_proxy}
+         - LLM_MODEL_ID=${LLM_MODEL_ID:-"Babelscape/rebel-large"}
+         - HF_TOKEN=${HF_TOKEN}
+         - TEXT2QUERY_COMPONENT_NAME=OPEA_TEXT2QUERY_GRAPH
+     ipc: host
+     restart: always
+
+networks:
+  default:
+    driver: bridge
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+ENV LANG=C.UTF-8
+ARG ARCH=cpu
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    build-essential \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY comps /home/user/comps
+
+ARG uvpip='uv pip install --system --no-cache-dir'
+RUN pip install --no-cache-dir --upgrade pip setuptools uv && \
+    if [ ${ARCH} = "cpu" ]; then \
+      $uvpip torch --index-url https://download.pytorch.org/whl/cpu; \
+      $uvpip -r /home/user/comps/text2query/src/requirements-cpu.txt; \
+    else \
+      $uvpip -r /home/user/comps/text2query/src/requirements-gpu.txt; \
+    fi
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+USER user
+
+WORKDIR /home/user/comps/text2query/src/
+
+ENTRYPOINT ["python", "opea_text2query_microservice.py"]
@@ -0,0 +1,64 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HABANA environment
+FROM vault.habana.ai/gaudi-docker/1.20.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0 AS hpu
+
+ENV LANG=en_US.UTF-8
+ARG REPO=https://github.com/huggingface/optimum-habana.git
+ARG REPO_VER=v1.15.0
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    git-lfs \
+    libjemalloc-dev
+
+#RUN useradd -m -s /bin/bash user && \
+#    mkdir -p /home/user && \
+#    chown -R user /home/user/
+
+RUN git lfs install
+
+COPY comps /root/comps
+#RUN chown -R user /home/user/comps/text2cypher
+
+#RUN rm -rf /etc/ssh/ssh_host*
+
+ARG uvpip='uv pip install --system --no-cache-dir'
+RUN pip install --no-cache-dir --upgrade pip setuptools uv && \
+    pip install --no-cache-dir accelerate \
+    huggingface_hub \
+    json_repair \
+    langchain_experimental \
+    llama-index \
+    llama-index-embeddings-huggingface \
+    llama-index-embeddings-langchain \
+    llama-index-graph-stores-neo4j \
+    llama-index-llms-huggingface \
+    llama-index-llms-huggingface-api \
+    neo4j \
+    peft \
+    pydub \
+    pyprojroot \
+    sentence-transformers \
+    unstructured \
+    urllib3 \
+    optimum-habana==1.17.0 && \
+    $uvpip git+https://github.com/HabanaAI/DeepSpeed.git@1.19.0
+
+RUN git clone --depth 1 --branch ${REPO_VER} ${REPO}
+
+WORKDIR /root/comps/text2query/src
+RUN $uvpip -r requirements-cpu.txt && \
+    $uvpip --upgrade --force-reinstall pydantic numpy==1.26.3 transformers==4.49.0
+
+# Set environment variables
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/:/root/optimum-habana
+ENV HABANA_VISIBLE_DEVICES=all
+ENV OMPI_MCA_btl_vader_single_copy_mechanism=none
+ENV DEBIAN_FRONTEND="noninteractive"  TZ=Etc/UTC
+
+#USER user
+WORKDIR /root/comps/text2query/src
+
+ENTRYPOINT ["python", "opea_text2query_microservice.py"]
+
diff --git a/comps/text2query/src/Dockerfile.graph b/comps/text2query/src/Dockerfile.graph
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM ubuntu:22.04
+
+WORKDIR /home/graph_extract
+
+FROM python:3.11-slim
+ENV LANG=C.UTF-8
+ARG ARCH=cpu
+
+RUN apt-get update -y && apt-get install vim -y && apt-get install -y --no-install-recommends --fix-missing \
+    build-essential 
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY comps /home/user/comps
+
+ARG uvpip='uv pip install --system --no-cache-dir'
+RUN pip install --no-cache-dir --upgrade pip setuptools uv && \
+    if [ ${ARCH} = "cpu" ]; then \
+      $uvpip torch --index-url https://download.pytorch.org/whl/cpu; \
+      $uvpip -r /home/user/comps/text2query/src/requirements-cpu.txt; \
+    else \
+      $uvpip -r /home/user/comps/text2query/src/requirements-gpu.txt; \
+    fi
+
+ENV LLM_ID=${LLM_ID:-"Babelscape/rebel-large"}
+ENV SPAN_LENGTH=${SPAN_LENGTH:-"1024"}
+ENV OVERLAP=${OVERLAP:-"100"}
+ENV MAX_LENGTH=${MAX_NEW_TOKENS:-"256"}
+ENV HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+ENV HF_TOKEN=${HF_TOKEN}
+ENV LLM_MODEL_ID=${LLM_ID}
+ENV TGI_PORT=8008
+ENV PYTHONPATH="/home/user/":$PYTHONPATH
+
+USER user
+
+WORKDIR /home/user/comps/text2query/src/
+
+RUN bash -c 'source /home/user/comps/text2query/src/integrations/graph/setup_service_env.sh'
+
+ENTRYPOINT ["python", "opea_text2query_microservice.py"]