Merge branch 'main' into upload

YangSen-qn · YangSen-qn · commit 86fb92c6cc86 · 2025-04-24T14:58:31.000+08:00
# Conflicts:
#	src/mcp_server/core/storage/storage.py
diff --git a/src/mcp_server/core/storage/storage.py b/src/mcp_server/core/storage/storage.py
@@ -12,15 +12,7 @@
 
 
 class StorageService:
-    """
-    S3 Resource provider that handles interactions with AWS S3 buckets.
-    Part of a collection of resource providers (S3, DynamoDB, etc.) for the MCP server.
-    """
-
     def __init__(self, cfg: config.Config = None):
-        """
-        Initialize S3 resource provider
-        """
         # Configure boto3 with retries and timeouts
         self.s3_config = S3Config(
             retries=dict(max_attempts=3, mode="adaptive"),
@@ -36,15 +28,6 @@ def __init__(self, cfg: config.Config = None):
     def get_object_url(
             self, bucket: str, key: str, disable_ssl: bool = False, expires: int = 3600
     ) -> list[dict[str:Any]]:
-        """
-        获取对象
-        :param disable_ssl:
-        :param bucket:
-        :param key:
-        :param expires:
-        :return: dict
-            返回对象信息
-        """
         # 获取下载域名
         domains_getter = getattr(self.bucket_manager, "_BucketManager__uc_do_with_retrier")
         domains_list, domain_response = domains_getter('/v3/domains?tbl={0}'.format(bucket))
@@ -126,15 +109,6 @@ async def list_buckets(self, prefix: Optional[str] = None) -> List[dict]:
     async def list_objects(
             self, bucket: str, prefix: str = "", max_keys: int = 20, start_after: str = ""
     ) -> List[dict]:
-        """
-        List objects in a specific bucket using async client with pagination
-        Args:
-            bucket: Name of the S3 bucket
-            prefix: Object prefix for filtering
-            max_keys: Maximum number of keys to return
-            start_after: the index that list from，can be last object key
-        """
-        #
         if self.config.buckets and bucket not in self.config.buckets:
             logger.warning(f"Bucket {bucket} not in configured bucket list")
             return []
@@ -160,50 +134,31 @@ async def list_objects(
             )
             return response.get("Contents", [])
 
-    async def get_object(
-            self, bucket: str, key: str, max_retries: int = 3
-    ) -> Dict[str, Any]:
-        """
-        Get object from S3 using streaming to handle large files and PDFs reliably.
-        The method reads the stream in chunks and concatenates them before returning.
-        """
+    async def get_object(self, bucket: str, key: str) -> Dict[str, Any]:
         if self.config.buckets and bucket not in self.config.buckets:
             logger.warning(f"Bucket {bucket} not in configured bucket list")
             return {}
 
-        attempt = 0
-        last_exception = None
-
-        while attempt < max_retries:
-            try:
-                async with self.s3_session.client(
-                        "s3",
-                        aws_access_key_id=self.config.access_key,
-                        aws_secret_access_key=self.config.secret_key,
-                        endpoint_url=self.config.endpoint_url,
-                        region_name=self.config.region_name,
-                        config=self.s3_config,
-                ) as s3:
-                    # Get the object and its stream
-                    response = await s3.get_object(Bucket=bucket, Key=key)
-                    stream = response["Body"]
-
-                    # Read the entire stream in chunks
-                    chunks = []
-                    async for chunk in stream:
-                        chunks.append(chunk)
-
-                    # Replace the stream with the complete data
-                    response["Body"] = b"".join(chunks)
-                    return response
-
-            except Exception as e:
-                logger.warning(
-                    f"Attempt {attempt} failed, exception: {str(e)}"
-                )
-                raise e
-
-        raise last_exception or Exception("Failed to get object after all retries")
+        async with self.s3_session.client(
+                "s3",
+                aws_access_key_id=self.config.access_key,
+                aws_secret_access_key=self.config.secret_key,
+                endpoint_url=self.config.endpoint_url,
+                region_name=self.config.region_name,
+                config=self.s3_config,
+        ) as s3:
+            # Get the object and its stream
+            response = await s3.get_object(Bucket=bucket, Key=key)
+            stream = response["Body"]
+
+            # Read the entire stream in chunks
+            chunks = []
+            async for chunk in stream:
+                chunks.append(chunk)
+
+            # Replace the stream with the complete data
+            response["Body"] = b"".join(chunks)
+            return response
 
     def upload_text_data(self, bucket: str, key: str, data: str, overwrite: bool = False) -> list[dict[str:Any]]:
         policy = {
@@ -245,45 +200,45 @@ def fetch_object(self, bucket: str, key: str, url: str):
         return self.get_object_url(bucket, key)
 
     def is_text_file(self, key: str) -> bool:
-        """Determine if a file is text-based by its extension"""
         text_extensions = {
+            ".ini",
+            ".conf",
+            ".py",
+            ".js",
+            ".xml",
+            ".yml",
+            ".properties",
             ".txt",
             ".log",
             ".json",
-            ".xml",
-            ".yml",
             ".yaml",
             ".md",
             ".csv",
-            ".ini",
-            ".conf",
-            ".py",
-            ".js",
             ".html",
             ".css",
             ".sh",
             ".bash",
             ".cfg",
-            ".properties",
         }
         return any(key.lower().endswith(ext) for ext in text_extensions)
 
+
     def is_image_file(self, key: str) -> bool:
         """Determine if a file is text-based by its extension"""
         text_extensions = {
+            ".gif",
             ".png",
-            ".jpeg",
             ".jpg",
-            ".gif",
             ".bmp",
+            ".jpeg",
             ".tiff",
-            ".svg",
             ".webp",
+            ".svg",
         }
         return any(key.lower().endswith(ext) for ext in text_extensions)
 
+
     def is_markdown_file(self, key: str) -> bool:
-        """Determine if a file is text-based by its extension"""
         text_extensions = {
             ".md",
         }
diff --git a/src/mcp_server/core/storage/tools.py b/src/mcp_server/core/storage/tools.py
@@ -10,8 +10,7 @@
 
 logger = logging.getLogger(consts.LOGGER_NAME)
 
-_BUCKET_DESC = """When you use this operation with a directory bucket, you must use virtual-hosted-style requests in the format ${bucket_name}.s3.${region_id}.qiniucs.com. Path-style requests are not supported. Directory bucket names must be unique in the chosen Availability Zone.
-"""
+_BUCKET_DESC = "Qiniu Cloud Storage bucket Name"
 
 class _ToolImpl:
     def __init__(self, storage: StorageService):
@@ -20,7 +19,7 @@ def __init__(self, storage: StorageService):
     @tools.tool_meta(
         types.Tool(
             name="ListBuckets",
-            description="Returns a list of all buckets owned by the authenticated sender of the request. To grant IAM permission to use this operation, you must add the s3:ListAllMyBuckets policy action.",
+            description="Return the Bucket you configured based on the conditions.",
             inputSchema={
                 "type": "object",
                 "properties": {
@@ -40,7 +39,7 @@ async def list_buckets(self, **kwargs) -> list[types.TextContent]:
     @tools.tool_meta(
         types.Tool(
             name="ListObjects",
-            description="Each request will return some or all (up to 100) objects in the bucket. You can use request parameters as selection criteria to return some objects in the bucket. If you want to continue listing, set start_after to the key of the last file in the last listing result so that you can list new content. To get a list of buckets, see ListBuckets.",
+            description="List objects in Qiniu Cloud, list a part each time, you can set start_after to continue listing, when the number of listed objects is less than max_keys, it means that all files are listed. start_after can be the key of the last file in the previous listing.",
             inputSchema={
                 "type": "object",
                 "properties": {
@@ -50,15 +49,15 @@ async def list_buckets(self, **kwargs) -> list[types.TextContent]:
                     },
                     "max_keys": {
                         "type": "integer",
-                        "description": "Sets the maximum number of keys returned in the response. By default, the action returns up to 20 key names. The response might contain fewer keys but will never contain more.",
+                        "description": "Sets the max number of keys returned, default: 20",
                     },
                     "prefix": {
                         "type": "string",
-                        "description": "Limits the response to keys that begin with the specified prefix.",
+                        "description": "Specify the prefix of the operation response key. Only keys that meet this prefix will be listed.",
                     },
                     "start_after": {
                         "type": "string",
-                        "description": "start_after is where you want S3 to start listing from. S3 starts listing after this specified key. start_after can be any key in the bucket.",
+                        "description": "start_after is where you want Qiniu Cloud to start listing from. Qiniu Cloud starts listing after this specified key. start_after can be any key in the bucket.",
                     },
                 },
                 "required": ["bucket"],
@@ -72,7 +71,7 @@ async def list_objects(self, **kwargs) -> list[types.TextContent]:
     @tools.tool_meta(
         types.Tool(
             name="GetObject",
-            description="Retrieves an object from Qiniu bucket. In the GetObject request, specify the full key name for the object. Path-style requests are not supported.",
+            description="Get an object contents from Qiniu Cloud bucket. In the GetObject request, specify the full key name for the object.",
             inputSchema={
                 "type": "object",
                 "properties": {
@@ -82,7 +81,7 @@ async def list_objects(self, **kwargs) -> list[types.TextContent]:
                     },
                     "key": {
                         "type": "string",
-                        "description": "Key of the object to get. Length Constraints: Minimum length of 1.",
+                        "description": "Key of the object to get.",
                     },
                 },
                 "required": ["bucket", "key"],
@@ -198,7 +197,7 @@ def fetch_object(self, **kwargs) -> list[types.TextContent]:
     @tools.tool_meta(
         types.Tool(
             name="GetObjectURL",
-            description="Get the file download URL, and note that the Bucket where the file is located must be bound to a domain name. If using Qiniu's test domain, HTTPS access will not be available, and users need to make adjustments for this themselves.",
+            description="Get the file download URL, and note that the Bucket where the file is located must be bound to a domain name. If using Qiniu Cloud test domain, HTTPS access will not be available, and users need to make adjustments for this themselves.",
             inputSchema={
                 "type": "object",
                 "properties": {