less inference mem ratio

CRZbulabula · CRZbulabula · commit 86f1c76e3631 · 2026-01-12T12:07:52.000+08:00
diff --git a/iotdb-core/ainode/iotdb/ainode/core/constant.py b/iotdb-core/ainode/iotdb/ainode/core/constant.py
@@ -56,7 +56,7 @@
     "timer": 856 * 1024**2,  # 856 MiB
 }  # the memory usage of each model in bytes
 
-AINODE_INFERENCE_MEMORY_USAGE_RATIO = 0.4  # the device space allocated for inference
+AINODE_INFERENCE_MEMORY_USAGE_RATIO = 0.2  # the device space allocated for inference
 AINODE_INFERENCE_EXTRA_MEMORY_RATIO = (
     1.2  # the overhead ratio for inference, used to estimate the pool size
 )
diff --git a/iotdb-core/ainode/iotdb/ainode/core/manager/utils.py b/iotdb-core/ainode/iotdb/ainode/core/manager/utils.py
@@ -65,17 +65,17 @@ def measure_model_memory(device: torch.device, model_id: str) -> int:
 
 
 def evaluate_system_resources(device: torch.device) -> dict:
-    if torch.cuda.is_available():
+    if device.type == "cuda":
         free_mem, total_mem = torch.cuda.mem_get_info()
         logger.info(
-            f"[Inference][Device-{device}] CUDA device memory: free={free_mem/1024**2:.2f} MB, total={total_mem/1024**2:.2f} MB"
+            f"[Inference][{device}] CUDA device memory: free={free_mem/1024**2:.2f} MB, total={total_mem/1024**2:.2f} MB"
         )
         return {"device": "cuda", "free_mem": free_mem, "total_mem": total_mem}
     else:
         free_mem = psutil.virtual_memory().available
         total_mem = psutil.virtual_memory().total
         logger.info(
-            f"[Inference][Device-{device}] CPU memory: free={free_mem/1024**2:.2f} MB, total={total_mem/1024**2:.2f} MB"
+            f"[Inference][{device}] CPU memory: free={free_mem/1024**2:.2f} MB, total={total_mem/1024**2:.2f} MB"
         )
         return {"device": "cpu", "free_mem": free_mem, "total_mem": total_mem}
 
diff --git a/iotdb-core/ainode/resources/conf/iotdb-ainode.properties b/iotdb-core/ainode/resources/conf/iotdb-ainode.properties
@@ -58,7 +58,7 @@ ain_cluster_ingress_time_zone=UTC+8
 
 # The device space allocated for inference
 # Datatype: Float
-ain_inference_memory_usage_ratio=0.4
+ain_inference_memory_usage_ratio=0.2
 
 # The overhead ratio for inference, used to estimate the pool size
 # Datatype: Float

Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@`
`56`	`56`	`"timer": 856 * 1024**2, # 856 MiB`
`57`	`57`	`} # the memory usage of each model in bytes`
`58`	`58`
`59`		`-AINODE_INFERENCE_MEMORY_USAGE_RATIO = 0.4 # the device space allocated for inference`
	`59`	`+AINODE_INFERENCE_MEMORY_USAGE_RATIO = 0.2 # the device space allocated for inference`
`60`	`60`	`AINODE_INFERENCE_EXTRA_MEMORY_RATIO = (`
`61`	`61`	`1.2 # the overhead ratio for inference, used to estimate the pool size`
`62`	`62`	`)`