Skip to content

Commit 86f1c76

Browse files
committed
less inference mem ratio
1 parent f50101e commit 86f1c76

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

iotdb-core/ainode/iotdb/ainode/core/constant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
"timer": 856 * 1024**2, # 856 MiB
5757
} # the memory usage of each model in bytes
5858

59-
AINODE_INFERENCE_MEMORY_USAGE_RATIO = 0.4 # the device space allocated for inference
59+
AINODE_INFERENCE_MEMORY_USAGE_RATIO = 0.2 # the device space allocated for inference
6060
AINODE_INFERENCE_EXTRA_MEMORY_RATIO = (
6161
1.2 # the overhead ratio for inference, used to estimate the pool size
6262
)

iotdb-core/ainode/iotdb/ainode/core/manager/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,17 @@ def measure_model_memory(device: torch.device, model_id: str) -> int:
6565

6666

6767
def evaluate_system_resources(device: torch.device) -> dict:
68-
if torch.cuda.is_available():
68+
if device.type == "cuda":
6969
free_mem, total_mem = torch.cuda.mem_get_info()
7070
logger.info(
71-
f"[Inference][Device-{device}] CUDA device memory: free={free_mem/1024**2:.2f} MB, total={total_mem/1024**2:.2f} MB"
71+
f"[Inference][{device}] CUDA device memory: free={free_mem/1024**2:.2f} MB, total={total_mem/1024**2:.2f} MB"
7272
)
7373
return {"device": "cuda", "free_mem": free_mem, "total_mem": total_mem}
7474
else:
7575
free_mem = psutil.virtual_memory().available
7676
total_mem = psutil.virtual_memory().total
7777
logger.info(
78-
f"[Inference][Device-{device}] CPU memory: free={free_mem/1024**2:.2f} MB, total={total_mem/1024**2:.2f} MB"
78+
f"[Inference][{device}] CPU memory: free={free_mem/1024**2:.2f} MB, total={total_mem/1024**2:.2f} MB"
7979
)
8080
return {"device": "cpu", "free_mem": free_mem, "total_mem": total_mem}
8181

iotdb-core/ainode/resources/conf/iotdb-ainode.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ ain_cluster_ingress_time_zone=UTC+8
5858

5959
# The device space allocated for inference
6060
# Datatype: Float
61-
ain_inference_memory_usage_ratio=0.4
61+
ain_inference_memory_usage_ratio=0.2
6262

6363
# The overhead ratio for inference, used to estimate the pool size
6464
# Datatype: Float

0 commit comments

Comments
 (0)