Skip to content

Commit bf23973

Browse files
authored
fix: append index error when set an exist collection name (#266)
1 parent 4053177 commit bf23973

File tree

4 files changed

+392
-22
lines changed

4 files changed

+392
-22
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Less Code, Lower Barrier, Faster Deployment
2828

2929
*Latest News* 🔥
3030

31-
- [2026.01.23] 🎉 🎉 UltraRAG 3.0 Released: Say no to "black box" development—make every line of reasoning logic clearly visible 👉|[📖 Blog](https://github.com/OpenBMB/UltraRAG/blob/page/project/blog/en/ultrarag3_0.md)|
31+
- [2026.01.23] 🎉 UltraRAG 3.0 Released: Say no to "black box" development—make every line of reasoning logic clearly visible 👉|[📖 Blog](https://github.com/OpenBMB/UltraRAG/blob/page/project/blog/en/ultrarag3_0.md)|
3232
- [2026.01.20] 🎉 AgentCPM-Report Model Released! DeepResearch is finally localized: 8B on-device writing agent AgentCPM-Report is open-sourced 👉 |[🤗 Model](https://huggingface.co/openbmb/AgentCPM-Report)|
3333

3434
<details>

ui/backend/pipeline_manager.py

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,12 @@ def _normalize_collection_name(raw_name: str) -> str:
134134
return normalized[:MAX_COLLECTION_NAME_LEN]
135135

136136

137+
def _normalize_display_name(name: str) -> str:
138+
normalized = unicodedata.normalize("NFKC", str(name or "")).strip().lower()
139+
normalized = re.sub(r"\s+", " ", normalized)
140+
return normalized
141+
142+
137143
def _make_safe_collection_name(display_name: str) -> tuple[str, str]:
138144
base = _normalize_collection_name(display_name)
139145

@@ -2567,7 +2573,8 @@ def run_kb_pipeline_tool(
25672573

25682574
# Get existing collection names and display names for deduplication
25692575
existing_collections: set[str] = set()
2570-
existing_display_names: set[str] = set()
2576+
existing_display_names: dict[str, str] = {}
2577+
display_name_lookup: dict[str, str] = {}
25712578
client = None
25722579
try:
25732580
client = _get_milvus_client()
@@ -2577,9 +2584,9 @@ def run_kb_pipeline_tool(
25772584
desc = client.describe_collection(_name).get("description", "")
25782585
except Exception:
25792586
desc = ""
2580-
existing_display_names.add(
2581-
_extract_display_name_from_desc(desc, _name)
2582-
)
2587+
display_name = _extract_display_name_from_desc(desc, _name)
2588+
existing_display_names[_name] = display_name
2589+
display_name_lookup[_normalize_display_name(display_name)] = _name
25832590
except Exception as exc:
25842591
raise PipelineManagerError(f"Milvus connection failed: {exc}") from exc
25852592
finally:
@@ -2589,14 +2596,33 @@ def run_kb_pipeline_tool(
25892596
except Exception:
25902597
pass
25912598

2592-
# Convert to pinyin -> ASCII slug, then deduplicate
2593-
slug_base = _transliterate_name(requested_name)
2594-
safe_collection_name = _make_unique_name(slug_base, existing_collections)
2599+
existing_display_name_values = set(existing_display_names.values())
2600+
normalized_request = _normalize_display_name(requested_name)
2601+
matched_collection = None
2602+
if requested_name in existing_collections:
2603+
matched_collection = requested_name
2604+
elif normalized_request and normalized_request in display_name_lookup:
2605+
matched_collection = display_name_lookup[normalized_request]
2606+
2607+
if matched_collection and index_mode == "new":
2608+
raise PipelineManagerError(
2609+
"Collection name already exists. Choose append or overwrite."
2610+
)
25952611

2596-
# Display name uses original input, add (1) increment if duplicate
2597-
display_collection_name = _make_unique_display(
2598-
requested_name, existing_display_names
2599-
)
2612+
if matched_collection and index_mode in {"append", "overwrite"}:
2613+
safe_collection_name = matched_collection
2614+
display_collection_name = existing_display_names.get(
2615+
matched_collection, requested_name
2616+
)
2617+
else:
2618+
# Convert to pinyin -> ASCII slug, then deduplicate
2619+
slug_base = _transliterate_name(requested_name)
2620+
safe_collection_name = _make_unique_name(slug_base, existing_collections)
2621+
2622+
# Display name uses original input, add (1) increment if duplicate
2623+
display_collection_name = _make_unique_display(
2624+
requested_name, existing_display_name_values
2625+
)
26002626

26012627
is_overwrite = index_mode == "overwrite"
26022628

ui/frontend/index.html

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -795,15 +795,17 @@ <h5 class="mb-3 fw-bold">Build Vector Index</h5>
795795
</div>
796796

797797
<div class="mb-3">
798-
<label class="form-label small text-muted text-uppercase fw-bold">Collection Name</label>
798+
<label class="form-label small text-muted text-uppercase fw-bold" id="idx-collection-label">Collection Name</label>
799799
<input type="text" id="idx-collection" class="form-control" placeholder="e.g. wiki_v1">
800+
<select id="idx-collection-select" class="form-select d-none"></select>
800801
</div>
801802

802803
<div class="mb-4">
803804
<label class="form-label small text-muted text-uppercase fw-bold">Mode</label>
804805
<select id="idx-mode" class="form-select">
805-
<option value="append">Append (Add Data)</option>
806-
<option value="overwrite">Overwrite (Drop Collection)</option>
806+
<option value="new">New</option>
807+
<option value="append">Append</option>
808+
<option value="overwrite">Overwrite</option>
807809
</select>
808810
</div>
809811

0 commit comments

Comments
 (0)