Skip to content

Commit c121b96

Browse files
author
longbingljw
authored
support thai_ftparser config (#26)
* fix image name tag * fix:redis mode don't need caches * tokenizer init in ob container * fix rag concurrent issue * remove thai_tokenizer config * fix * choose redis,ob cluster name be difyai-redis * support blocking lock * fix lock expire and clean lock * fix * support parser config * fix when using mysql cache * fix * fix
1 parent 42a93ec commit c121b96

File tree

12 files changed

+625
-196
lines changed

12 files changed

+625
-196
lines changed

api/Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ RUN apt-get update \
2121

2222
# Install Python dependencies
2323
COPY pyproject.toml uv.lock ./
24+
2425
RUN uv sync --index-url https://pypi.tuna.tsinghua.edu.cn/simple
2526
# RUN uv sync
2627

@@ -76,6 +77,9 @@ RUN python -c "import tiktoken; tiktoken.encoding_for_model('gpt2')"
7677
# Copy source code
7778
COPY . /app/api/
7879

80+
# Fix pyobvector bug
81+
RUN python /app/api/fix_pyobvector_bug.py
82+
7983
# Copy entrypoint
8084
COPY docker/entrypoint.sh /entrypoint.sh
8185
RUN chmod +x /entrypoint.sh

api/commands.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -703,8 +703,7 @@ def ensure_caches_table_exists():
703703
@click.option("--directory", prompt=False, help="The target migration script directory.")
704704
def upgrade_db(directory: Optional[str] = None):
705705
click.echo("Preparing database migration...")
706-
707-
# 1. 先确保 caches 表存在(原子操作)
706+
# 1. 在MySQL缓存模式下,确保 caches 表存在
708707
# 这样在MySQL缓存模式下,分布式锁可以正常工作
709708
if "mysql" in dify_config.SQLALCHEMY_DATABASE_URI_SCHEME and dify_config.CACHE_SCHEME == "mysql":
710709
try:
@@ -713,8 +712,9 @@ def upgrade_db(directory: Optional[str] = None):
713712
click.echo(click.style(f"Error: Failed to ensure caches table: {e}", fg="red"))
714713
click.echo(click.style("Migration stopped due to caches table creation failure.", fg="red"))
715714
raise Exception(f"Migration failed: {e}")
715+
716+
# 2. 使用分布式锁
716717

717-
# 2. 然后使用分布式锁(可以安全使用了)
718718
lock = redis_client.lock(name="db_upgrade_lock", timeout=60)
719719
if lock.acquire(blocking=False):
720720
try:

api/configs/middleware/vdb/oceanbase_config.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,15 @@ class OceanBaseVectorConfig(BaseSettings):
3939
"with older versions",
4040
default=False,
4141
)
42+
43+
OCEANBASE_FULLTEXT_PARSER: Optional[str] = Field(
44+
description="Fulltext parser to use for text indexing. Options: 'thai_ftparser' (Thai), 'ik' (Chinese), "
45+
"'auto' (automatic language detection). Default is 'ik'",
46+
default="ik",
47+
)
48+
49+
OCEANBASE_ENABLE_LANGUAGE_DETECTION: bool = Field(
50+
description="Enable automatic language detection for choosing appropriate fulltext parser. "
51+
"When disabled, uses OCEANBASE_FULLTEXT_PARSER setting",
52+
default=True,
53+
)

api/core/plugin/impl/plugin.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def list_plugins(self, tenant_id: str) -> list[PluginEntity]:
3737
f"plugin/{tenant_id}/management/list",
3838
PluginListResponse,
3939
params={"page": 1, "page_size": 256, "response_type": "paged"},
40+
4041
)
4142
return result.list
4243

@@ -46,6 +47,31 @@ def list_plugins_with_total(self, tenant_id: str, page: int, page_size: int) ->
4647
f"plugin/{tenant_id}/management/list",
4748
PluginListResponse,
4849
params={"page": page, "page_size": page_size, "response_type": "paged"},
50+
# compatibility: adapt wrapped {code,message,data}, wrapped with data as list, bare {list,total}, or bare list
51+
transformer=lambda r: (
52+
r
53+
if (
54+
isinstance(r, dict)
55+
and {"code", "message", "data"} <= set(r.keys())
56+
and isinstance(r.get("data"), dict)
57+
and {"list", "total"} <= set(r["data"].keys())
58+
)
59+
else (
60+
{
61+
"code": (r.get("code", 0) if isinstance(r, dict) else 0),
62+
"message": (r.get("message", "") if isinstance(r, dict) else ""),
63+
"data": (
64+
{"list": r["data"], "total": len(r["data"])}
65+
if (isinstance(r, dict) and isinstance(r.get("data"), list))
66+
else (
67+
r
68+
if (isinstance(r, dict) and {"list", "total"} <= set(r.keys()))
69+
else {"list": (r if isinstance(r, list) else []), "total": (len(r) if isinstance(r, list) else 0)}
70+
)
71+
),
72+
}
73+
)
74+
),
4975
)
5076

5177
def upload_pkg(

api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,16 +117,22 @@ def _create_collection(self) -> None:
117117
columns=cols,
118118
vidxs=vidx_params,
119119
)
120-
try:
121-
if self._hybrid_search_enabled:
120+
if self._hybrid_search_enabled:
121+
# Get parser from config or use default ik parser
122+
parser_name = dify_config.OCEANBASE_FULLTEXT_PARSER or "ik"
123+
124+
logger.info(f"Creating fulltext index for collection '{self._collection_name}' using parser '{parser_name}'")
125+
126+
try:
122127
self._client.perform_raw_text_sql(f"""ALTER TABLE {self._collection_name}
123-
ADD FULLTEXT INDEX fulltext_index_for_col_text (text) WITH PARSER ik""")
124-
except Exception as e:
125-
raise Exception(
126-
"Failed to add fulltext index to the target table, your OceanBase version must be 4.3.5.1 or above "
127-
+ "to support fulltext index and vector index in the same table",
128-
e,
129-
)
128+
ADD FULLTEXT INDEX fulltext_index_for_col_text (text) WITH PARSER {parser_name}""")
129+
except Exception as e:
130+
raise Exception(
131+
"Failed to add fulltext index to the target table, your OceanBase version must be 4.3.5.1 or above "
132+
+ "to support fulltext index and vector index in the same table",
133+
e,
134+
)
135+
130136
redis_client.set(collection_exist_cache_key, 1, ex=3600)
131137

132138
def _check_hybrid_search_support(self) -> bool:

0 commit comments

Comments
 (0)