Skip to content

Commit 09da4ba

Browse files
authored
⭐️ Release/v1.7.7
2 parents e27d265 + c09476d commit 09da4ba

File tree

232 files changed

+13113
-5862
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

232 files changed

+13113
-5862
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
.idea
2-
.gitignore
32
/.env
43
.vscode
54

backend/agents/create_agent_info.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from nexent.core.agents.agent_model import AgentRunInfo, ModelConfig, AgentConfig, ToolConfig
1010
from nexent.memory.memory_service import search_memory_in_levels
1111

12+
from services.file_management_service import get_llm_model
1213
from services.vectordatabase_service import (
1314
ElasticSearchService,
1415
get_vector_db_core,
@@ -17,13 +18,15 @@
1718
from services.tenant_config_service import get_selected_knowledge_list
1819
from services.remote_mcp_service import get_remote_mcp_server_list
1920
from services.memory_config_service import build_memory_context
21+
from services.image_service import get_vlm_model
2022
from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list
2123
from database.tool_db import search_tools_for_sub_agent
2224
from database.model_management_db import get_model_records, get_model_by_model_id
25+
from database.client import minio_client
2326
from utils.model_name_utils import add_repo_to_name
2427
from utils.prompt_template_utils import get_agent_prompt_template
2528
from utils.config_utils import tenant_config_manager, get_model_name_from_config
26-
from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE
29+
from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE
2730

2831
logger = logging.getLogger("create_agent_info")
2932
logger.setLevel(logging.DEBUG)
@@ -236,6 +239,18 @@ async def create_tool_config_list(agent_id, tenant_id, user_id):
236239
"vdb_core": get_vector_db_core(),
237240
"embedding_model": get_embedding_model(tenant_id=tenant_id),
238241
}
242+
elif tool_config.class_name == "AnalyzeTextFileTool":
243+
tool_config.metadata = {
244+
"llm_model": get_llm_model(tenant_id=tenant_id),
245+
"storage_client": minio_client,
246+
"data_process_service_url": DATA_PROCESS_SERVICE
247+
}
248+
elif tool_config.class_name == "AnalyzeImageTool":
249+
tool_config.metadata = {
250+
"vlm_model": get_vlm_model(tenant_id=tenant_id),
251+
"storage_client": minio_client,
252+
}
253+
239254
tool_config_list.append(tool_config)
240255

241256
return tool_config_list
@@ -299,13 +314,12 @@ async def join_minio_file_description_to_query(minio_files, query):
299314
if minio_files and isinstance(minio_files, list):
300315
file_descriptions = []
301316
for file in minio_files:
302-
if isinstance(file, dict) and "description" in file and file["description"]:
303-
file_descriptions.append(file["description"])
304-
317+
if isinstance(file, dict) and "url" in file and file["url"] and "name" in file and file["name"]:
318+
file_descriptions.append(f"File name: {file['name']}, S3 URL: s3:/{file['url']}")
305319
if file_descriptions:
306-
final_query = "User provided some reference files:\n"
320+
final_query = "User uploaded files. The file information is as follows:\n"
307321
final_query += "\n".join(file_descriptions) + "\n\n"
308-
final_query += f"User wants to answer questions based on the above information: {query}"
322+
final_query += f"User wants to answer questions based on the information in the above files: {query}"
309323
return final_query
310324

311325

backend/apps/agent_app.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,11 @@ async def import_agent_api(request: AgentImportRequest, authorization: Optional[
134134
import an agent
135135
"""
136136
try:
137-
await import_agent_impl(request.agent_info, authorization)
137+
await import_agent_impl(
138+
request.agent_info,
139+
authorization,
140+
force_import=request.force_import
141+
)
138142
return {}
139143
except Exception as e:
140144
logger.error(f"Agent import error: {str(e)}")

backend/apps/file_management_app.py

Lines changed: 2 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
import logging
2-
import os
32
from http import HTTPStatus
43
from typing import List, Optional
54

6-
from fastapi import APIRouter, Body, File, Form, Header, HTTPException, Path as PathParam, Query, Request, UploadFile
5+
from fastapi import APIRouter, Body, File, Form, Header, HTTPException, Path as PathParam, Query, UploadFile
76
from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
87

98
from consts.model import ProcessParams
109
from services.file_management_service import upload_to_minio, upload_files_impl, \
11-
get_file_url_impl, get_file_stream_impl, delete_file_impl, list_files_impl, \
12-
preprocess_files_generator
13-
from utils.auth_utils import get_current_user_info
10+
get_file_url_impl, get_file_stream_impl, delete_file_impl, list_files_impl
1411
from utils.file_management_utils import trigger_data_process
1512

1613
logger = logging.getLogger("file_management_app")
@@ -271,61 +268,3 @@ async def get_storage_file_batch_urls(
271268
"failed_count": sum(1 for r in results if not r.get("success", False)),
272269
"results": results
273270
}
274-
275-
276-
@file_management_runtime_router.post("/preprocess")
277-
async def agent_preprocess_api(
278-
request: Request, query: str = Form(...),
279-
files: List[UploadFile] = File(...),
280-
authorization: Optional[str] = Header(None)
281-
):
282-
"""
283-
Preprocess uploaded files and return streaming response
284-
"""
285-
try:
286-
# Pre-read and cache all file contents
287-
user_id, tenant_id, language = get_current_user_info(
288-
authorization, request)
289-
file_cache = []
290-
for file in files:
291-
try:
292-
content = await file.read()
293-
file_cache.append({
294-
"filename": file.filename or "",
295-
"content": content,
296-
"ext": os.path.splitext(file.filename or "")[1].lower()
297-
})
298-
except Exception as e:
299-
file_cache.append({
300-
"filename": file.filename or "",
301-
"error": str(e)
302-
})
303-
304-
# Generate unique task ID for this preprocess operation
305-
import uuid
306-
task_id = str(uuid.uuid4())
307-
conversation_id = request.query_params.get("conversation_id")
308-
if conversation_id:
309-
conversation_id = int(conversation_id)
310-
else:
311-
conversation_id = -1 # Default for cases without conversation_id
312-
313-
# Call service layer to generate streaming response
314-
return StreamingResponse(
315-
preprocess_files_generator(
316-
query=query,
317-
file_cache=file_cache,
318-
tenant_id=tenant_id,
319-
language=language,
320-
task_id=task_id,
321-
conversation_id=conversation_id
322-
),
323-
media_type="text/event-stream",
324-
headers={
325-
"Cache-Control": "no-cache",
326-
"Connection": "keep-alive"
327-
}
328-
)
329-
except Exception as e:
330-
raise HTTPException(
331-
status_code=500, detail=f"File preprocessing error: {str(e)}")

backend/apps/vectordatabase_app.py

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
66
from fastapi.responses import JSONResponse
77

8-
from consts.model import IndexingResponse
8+
from consts.model import ChunkCreateRequest, ChunkUpdateRequest, HybridSearchRequest, IndexingResponse
99
from nexent.vector_database.base import VectorDatabaseCore
1010
from services.vectordatabase_service import (
1111
ElasticSearchService,
@@ -226,3 +226,125 @@ def get_index_chunks(
226226
f"Error getting chunks for index '{index_name}': {error_msg}")
227227
raise HTTPException(
228228
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error getting chunks: {error_msg}")
229+
230+
231+
@router.post("/{index_name}/chunk")
232+
def create_chunk(
233+
index_name: str = Path(..., description="Name of the index"),
234+
payload: ChunkCreateRequest = Body(..., description="Chunk data"),
235+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
236+
authorization: Optional[str] = Header(None),
237+
):
238+
"""Create a manual chunk."""
239+
try:
240+
user_id, _ = get_current_user_id(authorization)
241+
result = ElasticSearchService.create_chunk(
242+
index_name=index_name,
243+
chunk_request=payload,
244+
vdb_core=vdb_core,
245+
user_id=user_id,
246+
)
247+
return JSONResponse(status_code=HTTPStatus.OK, content=result)
248+
except Exception as exc:
249+
logger.error(
250+
"Error creating chunk for index %s: %s", index_name, exc, exc_info=True
251+
)
252+
raise HTTPException(
253+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(exc)
254+
)
255+
256+
257+
@router.put("/{index_name}/chunk/{chunk_id}")
258+
def update_chunk(
259+
index_name: str = Path(..., description="Name of the index"),
260+
chunk_id: str = Path(..., description="Chunk identifier"),
261+
payload: ChunkUpdateRequest = Body(...,
262+
description="Chunk update payload"),
263+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
264+
authorization: Optional[str] = Header(None),
265+
):
266+
"""Update an existing chunk."""
267+
try:
268+
user_id, _ = get_current_user_id(authorization)
269+
result = ElasticSearchService.update_chunk(
270+
index_name=index_name,
271+
chunk_id=chunk_id,
272+
chunk_request=payload,
273+
vdb_core=vdb_core,
274+
user_id=user_id,
275+
)
276+
return JSONResponse(status_code=HTTPStatus.OK, content=result)
277+
except ValueError as exc:
278+
raise HTTPException(
279+
status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
280+
except Exception as exc:
281+
logger.error(
282+
"Error updating chunk %s for index %s: %s",
283+
chunk_id,
284+
index_name,
285+
exc,
286+
exc_info=True,
287+
)
288+
raise HTTPException(
289+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(exc)
290+
)
291+
292+
293+
@router.delete("/{index_name}/chunk/{chunk_id}")
294+
def delete_chunk(
295+
index_name: str = Path(..., description="Name of the index"),
296+
chunk_id: str = Path(..., description="Chunk identifier"),
297+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
298+
authorization: Optional[str] = Header(None),
299+
):
300+
"""Delete a chunk."""
301+
try:
302+
get_current_user_id(authorization)
303+
result = ElasticSearchService.delete_chunk(
304+
index_name=index_name,
305+
chunk_id=chunk_id,
306+
vdb_core=vdb_core,
307+
)
308+
return JSONResponse(status_code=HTTPStatus.OK, content=result)
309+
except ValueError as exc:
310+
raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc))
311+
except Exception as exc:
312+
logger.error(
313+
"Error deleting chunk %s for index %s: %s",
314+
chunk_id,
315+
index_name,
316+
exc,
317+
exc_info=True,
318+
)
319+
raise HTTPException(
320+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(exc)
321+
)
322+
323+
324+
@router.post("/search/hybrid")
325+
async def hybrid_search(
326+
payload: HybridSearchRequest,
327+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
328+
authorization: Optional[str] = Header(None),
329+
):
330+
"""Run a hybrid (accurate + semantic) search across indices."""
331+
try:
332+
_, tenant_id = get_current_user_id(authorization)
333+
result = ElasticSearchService.search_hybrid(
334+
index_names=payload.index_names,
335+
query=payload.query,
336+
tenant_id=tenant_id,
337+
top_k=payload.top_k,
338+
weight_accurate=payload.weight_accurate,
339+
vdb_core=vdb_core,
340+
)
341+
return JSONResponse(status_code=HTTPStatus.OK, content=result)
342+
except ValueError as exc:
343+
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
344+
detail=str(exc))
345+
except Exception as exc:
346+
logger.error(f"Hybrid search failed: {exc}", exc_info=True)
347+
raise HTTPException(
348+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
349+
detail=f"Error executing hybrid search: {str(exc)}",
350+
)

backend/consts/const.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ class VectorDatabaseType(str, Enum):
279279
os.getenv("LLM_SLOW_TOKEN_RATE_THRESHOLD", "10.0")) # tokens per second
280280

281281
# APP Version
282-
APP_VERSION = "v1.7.6"
282+
APP_VERSION = "v1.7.7"
283283

284284
DEFAULT_ZH_TITLE = "新对话"
285285
DEFAULT_EN_TITLE = "New Conversation"

backend/consts/model.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,43 @@ class IndexingResponse(BaseModel):
175175
total_submitted: int
176176

177177

178+
class ChunkCreateRequest(BaseModel):
179+
"""Request payload for manual chunk creation."""
180+
181+
content: str = Field(..., min_length=1, description="Chunk content")
182+
title: Optional[str] = Field(None, description="Optional chunk title")
183+
filename: Optional[str] = Field(None, description="Associated file name")
184+
path_or_url: Optional[str] = Field(None, description="Source path or URL")
185+
chunk_id: Optional[str] = Field(
186+
None, description="Explicit chunk identifier")
187+
metadata: Dict[str, Any] = Field(
188+
default_factory=dict, description="Additional chunk metadata")
189+
190+
191+
class ChunkUpdateRequest(BaseModel):
192+
"""Request payload for chunk updates."""
193+
194+
content: Optional[str] = Field(None, description="Updated chunk content")
195+
title: Optional[str] = Field(None, description="Updated chunk title")
196+
filename: Optional[str] = Field(None, description="Updated file name")
197+
path_or_url: Optional[str] = Field(
198+
None, description="Updated source path or URL")
199+
metadata: Dict[str, Any] = Field(
200+
default_factory=dict, description="Additional metadata updates")
201+
202+
203+
class HybridSearchRequest(BaseModel):
204+
"""Request payload for hybrid knowledge-base searches."""
205+
query: str = Field(..., min_length=1,
206+
description="Search query text")
207+
index_names: List[str] = Field(..., min_items=1,
208+
description="List of index names to search")
209+
top_k: int = Field(10, ge=1, le=100,
210+
description="Number of results to return")
211+
weight_accurate: float = Field(0.5, ge=0.0, le=1.0,
212+
description="Weight applied to accurate search scores")
213+
214+
178215
# Request models
179216
class ProcessParams(BaseModel):
180217
chunking_strategy: Optional[str] = "basic"
@@ -304,6 +341,7 @@ class ExportAndImportDataFormat(BaseModel):
304341

305342
class AgentImportRequest(BaseModel):
306343
agent_info: ExportAndImportDataFormat
344+
force_import: bool = False
307345

308346

309347
class ConvertStateRequest(BaseModel):

0 commit comments

Comments
 (0)