opea-project · joshuayao · Sep 22, 2025 · Sep 19, 2025 · Sep 19, 2025 · Sep 19, 2025
@@ -25,6 +25,7 @@
 /comps/text2graph/ sharath.raghava@intel.com letong.han@intel.com
 /comps/text2image/ xinyu.ye@intel.com liang1.lv@intel.com
 /comps/text2kg/ siddhi.velankar@intel.com letong.han@intel.com
+/comps/text2query/ yogesh.pandey@intel.com jean1.yu@intel.com yi.a.yao@intel.com
 /comps/text2sql/ yogesh.pandey@intel.com qing.yao@intel.com
 /comps/third_parties/ liang1.lv@intel.com letong.han@intel.com
 /comps/tts/ sihan.chen@intel.com letong.han@intel.com

@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# this file should be run in the root of the repo
+services:
+  text2query-sql:
+    build:
+      dockerfile: comps/text2query/src/Dockerfile
+    image: ${REGISTRY:-opea}/text2query-sql:${TAG:-latest}
+
+  text2query-cypher:
+    build:
+      dockerfile: comps/text2query/src/Dockerfile.cypher.intel_hpu
+    image: ${REGISTRY:-opea}/text2query-cypher:${TAG:-latest}
@@ -41,6 +41,7 @@ class ServiceType(Enum):
     LANGUAGE_DETECTION = 24
     PROMPT_TEMPLATE = 25
     PROMPT_REGISTRY = 26
+    TEXT2QUERY = 27
 
 
 class MegaServiceEndpoint(Enum):

@@ -1057,3 +1057,13 @@ class FineTuningJobCheckpoint(BaseModel):
 
 class RouteEndpointDoc(BaseModel):
     url: str = Field(..., description="URL of the chosen inference endpoint")
+
+
+class Text2QueryRequest(BaseModel):
+    query: Optional[str] = None
+    conn_type: Optional[str] = "sql"
+    conn_url: Optional[str] = None
+    conn_user: Optional[str] = None
+    conn_password: Optional[str] = None
+    conn_dialect: Optional[str] = "postgresql"
+    options: Dict = {}
@@ -1,3 +1,9 @@
+# ⚠️ Deprecation Notice: `text2cypher`
+
+**This repository is no longer actively maintained.**
+
+As of OPEA v1.5, we are deprecating the `text2cypher` microservice. Please use `text2query` microservice instead. We will remove `text2cypher` at OPEA v1.7.
+
 # 🛢 Text-to-Cypher Microservice
 
 The microservice enables a wide range of use cases, making it a versatile tool for businesses, researchers, and individuals alike. Users can generate queries based on natural language questions, enabling them to quickly retrieve relevant data from graph databases. This service executes locally on Intel Gaudi.

@@ -0,0 +1,17 @@
+# 🛢 Text-to-Query Microservice
+
+A text2query microservice is a specialized, independent service designed to translate natural language queries into structured query languages. Its primary function is to act as an interpreter, allowing users to ask questions in plain human language and receive a formal query in return, which can then be executed against a Relational or Graph database. This service bridges the gap between human communication and machine-readable database commands.
+
+## 🛠️ Features
+
+- **Implement SQL Query based on input text**: Transform user-provided natural language into SQL queries, subsequently executing them to retrieve data from SQL databases.
+- **Implement Cypher Query based on input text**: Transform user-provided natural language into Cypher queries, subsequently executing them to retrieve data from Neo4j Graph database.
+
+## ⚙️ Supported Implementations
+
+The Text2Query Microservice supports multiple implementation options to suit different databases. Each implementation includes its own configuration and setup instructions:
+
+| Implementation     | Description                                                     | Supported Hardware | Documentation                  |
+| ------------------ | --------------------------------------------------------------- | ------------------ | ------------------------------ |
+| **Text-to-SQL**    | Transforming user-provided natural language into SQL queries    | Xeon, Gaudi        | [README](src/README_sql.md)    |
+| **Text-to-Cypher** | Transforming user-provided natural language into Cypher queries | Gaudi              | [README](src/README_cypher.md) |
@@ -0,0 +1,94 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+include:
+  - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml
+  - ../../../third_parties/neo4j/deployment/docker_compose/compose.yaml
+
+services:
+  postgres:
+    image: postgres:latest
+    container_name: postgres-container
+    restart: always
+    environment:
+      - POSTGRES_USER=${POSTGRES_USER-postgres}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD-testpwd}
+      - POSTGRES_DB=${POSTGRES_DB-chinook}
+    ports:
+      - '5442:5432'
+    volumes:
+      - ../../src/integrations/sql/chinook.sql:/docker-entrypoint-initdb.d/chinook.sql
+
+  text2query-sql:
+    image: opea/text2query-sql:${TAG:-latest}
+    container_name: text2query-sql-server
+    ports:
+      - ${TEXT2SQL_PORT:-9097}:9097
+    environment:
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT-http://localhost:8008}
+      TEXT2QUERY_COMPONENT_NAME: "OPEA_TEXT2QUERY_SQL"
+    depends_on:
+      - tgi-server
+      - postgres
+
+  text2query-sql-gaudi:
+    image: opea/text2query-sql:${TAG:-latest}
+    container_name: text2query-sql-gaudi-server
+    ports:
+      - ${TEXT2SQL_PORT:-9097}:9097
+    environment:
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT-http://localhost:8008}
+      TEXT2QUERY_COMPONENT_NAME: "OPEA_TEXT2QUERY_SQL"
+    depends_on:
+      - tgi-gaudi-server
+      - postgres
+
+  text2query-cypher-gaudi:
+    image: opea/text2query-cypher:${TAG:-latest}
+    container_name: text2query-cypher-gaudi-server
+    ports:
+      - ${TEXT2CYPHER_PORT:-9097}:9097
+    depends_on:
+      neo4j-apoc:
+        condition: service_healthy
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      INDEX_NAME: ${INDEX_NAME}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
+      LOGFLAG: ${LOGFLAG:-False}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      TOKENIZERS_PARALLELISM: False
+      NEO4J_URI: ${NEO4J_URI}
+      NEO4J_URL: ${NEO4J_URI}
+      NEO4J_USERNAME: ${NEO4J_USERNAME}
+      NEO4J_PASSWORD: ${NEO4J_PASSWORD}
+      host_ip: ${host_ip}
+      TEXT2QUERY_COMPONENT_NAME: "OPEA_TEXT2QUERY_CYPHER"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    restart: unless-stopped
+
+  text2query-graph:
+     image: opea/text2query-graph:${TAG:-latest}
+     container_name: text2query-graph-server
+     ports:
+         - ${TEXT2GRAPH_PORT:-9097}:9097
+     environment:
+         - no_proxy=${no_proxy}
+         - https_proxy=${https_proxy}
+         - http_proxy=${http_proxy}
+         - LLM_MODEL_ID=${LLM_MODEL_ID:-"Babelscape/rebel-large"}
+         - HF_TOKEN=${HF_TOKEN}
+         - TEXT2QUERY_COMPONENT_NAME=OPEA_TEXT2QUERY_GRAPH
+     ipc: host
+     restart: always
+
+networks:
+  default:
+    driver: bridge
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+ENV LANG=C.UTF-8
+ARG ARCH=cpu
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    build-essential \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY comps /home/user/comps
+
+ARG uvpip='uv pip install --system --no-cache-dir'
+RUN pip install --no-cache-dir --upgrade pip setuptools uv && \
+    if [ ${ARCH} = "cpu" ]; then \
+      $uvpip torch --index-url https://download.pytorch.org/whl/cpu; \
+      $uvpip -r /home/user/comps/text2query/src/requirements-cpu.txt; \
+    else \
+      $uvpip -r /home/user/comps/text2query/src/requirements-gpu.txt; \
+    fi
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+USER user
+
+WORKDIR /home/user/comps/text2query/src/
+
+ENTRYPOINT ["python", "opea_text2query_microservice.py"]
@@ -0,0 +1,64 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HABANA environment
+FROM vault.habana.ai/gaudi-docker/1.20.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0 AS hpu
+
+ENV LANG=en_US.UTF-8
+ARG REPO=https://github.com/huggingface/optimum-habana.git
+ARG REPO_VER=v1.15.0
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    git-lfs \
+    libjemalloc-dev
+
+#RUN useradd -m -s /bin/bash user && \
+#    mkdir -p /home/user && \
+#    chown -R user /home/user/
+
+RUN git lfs install
+
+COPY comps /root/comps
+#RUN chown -R user /home/user/comps/text2cypher
+
+#RUN rm -rf /etc/ssh/ssh_host*
+
+ARG uvpip='uv pip install --system --no-cache-dir'
+RUN pip install --no-cache-dir --upgrade pip setuptools uv && \
+    pip install --no-cache-dir accelerate \
+    huggingface_hub \
+    json_repair \
+    langchain_experimental \
+    llama-index \
+    llama-index-embeddings-huggingface \
+    llama-index-embeddings-langchain \
+    llama-index-graph-stores-neo4j \
+    llama-index-llms-huggingface \
+    llama-index-llms-huggingface-api \
+    neo4j \
+    peft \
+    pydub \
+    pyprojroot \
+    sentence-transformers \
+    unstructured \
+    urllib3 \
+    optimum-habana==1.17.0 && \
+    $uvpip git+https://github.com/HabanaAI/DeepSpeed.git@1.19.0
+
+RUN git clone --depth 1 --branch ${REPO_VER} ${REPO}
+
+WORKDIR /root/comps/text2query/src
+RUN $uvpip -r requirements-cpu.txt && \
+    $uvpip --upgrade --force-reinstall pydantic numpy==1.26.3 transformers==4.49.0
+
+# Set environment variables
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/:/root/optimum-habana
+ENV HABANA_VISIBLE_DEVICES=all
+ENV OMPI_MCA_btl_vader_single_copy_mechanism=none
+ENV DEBIAN_FRONTEND="noninteractive"  TZ=Etc/UTC
+
+#USER user
+WORKDIR /root/comps/text2query/src
+
+ENTRYPOINT ["python", "opea_text2query_microservice.py"]
+
@@ -0,0 +1,105 @@
+# 🛢 Text-to-Cypher Microservice
+
+The microservice enables a wide range of use cases, making it a versatile tool for businesses, researchers, and individuals alike. Users can generate queries based on natural language questions, enabling them to quickly retrieve relevant data from graph databases. This service executes locally on Intel Gaudi.
+
+## Setup Environment Variables.
+
+```bash
+ip_address=$(hostname -I | awk '{print $1}')
+export HF_TOKEN=${HF_TOKEN}
+export NEO4J_USER=neo4j
+export NEO4J_PASSWORD=neo4jtest
+export NEO4J_URL="bolt://${ip_address}:7687"
+export TEXT2CYPHER_PORT=11801
+export TEXT2QUERY_COMPONENT_NAME="OPEA_TEXT2QUERY_CYPHER"
+```
+
+## 🚀 Start Text2Cypher Microservice with Python（Option 1）
+
+### Install Requirements
+
+```bash
+pip install -r requirements.txt
+```
+
+### Start Text-to-Cypher Microservice with Python Script
+
+Start Text-to-Cypher microservice with below command.
+
+```bash
+python3 opea_text2query_microservice.py
+```
+
+## 🚀 Start Microservice with Docker (Option 2)
+
+### Build Docker Image
+
+```bash
+cd GenAIComps/
+docker build -t opea/text2query-cypher:latest -f comps/text2query/src/Dockerfile.cypher.intel_hpu .
+```
+
+### Run Docker with CLI (Option A)
+
+```bash
+docker run  --name="comps-langchain-text2query-cypher"  -p 9097:8080 --ipc=host opea/text2query-cypher:latest
+```
+
+### Run via docker compose (Option B)
+
+#### Start the services.
+
+- Gaudi2 HPU
+
+```bash
+cd comps/text2query/deployment/docker_compose
+docker compose -f compose.yaml up text2query-cypher-gaudi -d
+```
+
+---
+
+## ✅ Invoke the microservice.
+
+The Text-to-Cypher microservice exposes the following API endpoints:
+
+- Execute Cypher Query with Pre-seeded Data and Schema:
+
+  ```bash
+  curl http://${ip_address}:${TEXT2CYPHER_PORT}/v1/text2cypher\
+        -X POST \
+        -d '{"query": "what are the symptoms for Diabetes?","conn_type": "cypher","conn_user": "'${NEO4J_USERNAME}'","conn_password": "neo4jtest","conn_url": "'${NEO4J_URL}'","conn_dialect": "neo4j" }' \
+        -H 'Content-Type: application/json'
+  ```
+
+- Execute Cypher Query with User Data and Schema:
+
+Define customized cypher_insert statements:
+
+```bash
+export cypher_insert='
+ LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/e/2PACX-1vQCEUxVlMZwwI2sn2T1aulBrRzJYVpsM9no8AEsYOOklCDTljoUIBHItGnqmAez62wwLpbvKMr7YoHI/pub?gid=0&single=true&output=csv" AS rows
+ MERGE (d:disease {name:rows.Disease})
+ MERGE (dt:diet {name:rows.Diet})
+ MERGE (d)-[:HOME_REMEDY]->(dt)
+
+ MERGE (m:medication {name:rows.Medication})
+ MERGE (d)-[:TREATMENT]->(m)
+
+ MERGE (s:symptoms {name:rows.Symptom})
+ MERGE (d)-[:MANIFESTATION]->(s)
+
+ MERGE (p:precaution {name:rows.Precaution})
+ MERGE (d)-[:PREVENTION]->(p)
+'
+```
+
+Pass the cypher_insert to the cypher2text service. The user can also specify whether to refresh the Neo4j database using the refresh_db option.
+
+```bash
+ curl http://${ip_address}:${TEXT2CYPHER_PORT}/v1/text2cypher \
+        -X POST \
+        -d '{"query": "what are the symptoms for Diabetes?", "conn_type": "cypher", "conn_user": "'${NEO4J_USERNAME}'", "conn_password": "neo4jtest", "conn_url": "'${NEO4J_URL}'", "conn_dialect": "neo4j", \
+            options: {"cypher_insert": "'${cypher_insert}'","refresh_db": "True" }}' \
+        -H 'Content-Type: application/json'
+
+```