-
Notifications
You must be signed in to change notification settings - Fork 71
vGPU: Add "Ignore Memory Limit" option to bypass GPU memory check on task submission #3343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| import tidy3d as td | ||
|
Check failure on line 1 in tests/tidy3d_modal_cm_fdtd_test.py
|
||
| from tidy3d import web | ||
| from tidy3d.config import Env | ||
|
|
||
| Env.prod.active() | ||
| web.configure("CICIf8UmEdMtBSJbxW66npxujQ3Ob7Wiy4UHChijaVTAdrnu") | ||
|
|
||
| #Env.dev.active() | ||
| #web.configure("Ltrvqel7oCenUTH88Pqh99vn7ikCD25KFPZ0phz2Mxtgl5I4") | ||
|
|
||
| #Env.uat.active() | ||
| #web.configure("LmpSvRP0MGOuKOgm9ZJn97l9RE8t5I2ENTI9RLwbXlmmW89Z") | ||
|
|
||
|
|
||
| modeler = td.Tidy3dBaseModel.from_file("modal_cm.json") | ||
| task_id = web.upload(modeler, task_name="directional coupler") | ||
| from tidy3d.web.core.http_util import http | ||
|
Check failure on line 17 in tests/tidy3d_modal_cm_fdtd_test.py
|
||
| import json | ||
|
Check failure on line 18 in tests/tidy3d_modal_cm_fdtd_test.py
|
||
| # resp = http.get( | ||
| # f"rf/task/{task_id}/statistics", | ||
| # ) | ||
| #print(json.dumps(resp, indent=4)) | ||
| web.run(modeler, folder_name="modal_cm") | ||
|
Check failure on line 23 in tests/tidy3d_modal_cm_fdtd_test.py
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -304,6 +304,7 @@ def run_custom( | |
| ] = None, | ||
| custom_vjp: Optional[Union[CustomVJPConfig, tuple[CustomVJPConfig, ...]]] = None, | ||
| vgpu_allocation: Optional[int] = None, | ||
| ignore_memory_limit: Optional[bool] = None, | ||
| ) -> WorkflowDataType: | ||
| """ | ||
| Submits a :class:`.Simulation` to server, starts running, monitors progress, downloads, | ||
|
|
@@ -363,6 +364,8 @@ def run_custom( | |
| Number of virtual GPUs to allocate for the simulation (1, 2, 4, or 8). | ||
| Only applies to vGPU license users. If not specified, the system | ||
| automatically determines the optimal GPU count. | ||
| ignore_memory_limit: Optional[bool] = None | ||
| Whether to ignore memory usage limits. Defaults to ``None``. | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -472,6 +475,7 @@ def run_custom( | |
| max_num_adjoint_per_fwd=max_num_adjoint_per_fwd, | ||
| numerical_structures=numerical_structures, | ||
| custom_vjp=custom_vjp, | ||
| ignore_memory_limit=ignore_memory_limit, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ignore_memory_limit dropped in two run_custom code pathsHigh Severity The Additional Locations (1) |
||
| ) | ||
|
|
||
| should_use_autograd = False | ||
|
|
@@ -572,6 +576,7 @@ def run_async_custom( | |
| ] = None, | ||
| custom_vjp: Optional[CustomVJPSpec] = None, | ||
| vgpu_allocation: Optional[int] = None, | ||
| ignore_memory_limit: Optional[bool] = None, | ||
| ) -> BatchData: | ||
| """Submits a set of Union[:class:`.Simulation`, :class:`.HeatSimulation`, :class:`.EMESimulation`] objects to server, | ||
| starts running, monitors progress, downloads, and loads results as a :class:`.BatchData` object. | ||
|
|
@@ -633,7 +638,8 @@ def run_async_custom( | |
| Number of virtual GPUs to allocate for the simulation (1, 2, 4, or 8). | ||
| Only applies to vGPU license users. If not specified, the system | ||
| automatically determines the optimal GPU count. | ||
|
|
||
| ignore_memory_limit: Optional[bool] = None | ||
| Whether to ignore memory limitations. Defaults to ``None`` | ||
| Returns | ||
| ------ | ||
| :class:`BatchData` | ||
|
|
@@ -805,6 +811,7 @@ def _expand_spec( | |
| priority=priority, | ||
| vgpu_allocation=vgpu_allocation, | ||
| lazy=lazy, | ||
| ignore_memory_limit=ignore_memory_limit, | ||
| ) | ||
|
|
||
| # insert numerical_structures even if not traced | ||
|
|
@@ -864,6 +871,7 @@ def run( | |
| priority: Optional[int] = None, | ||
| lazy: Optional[bool] = None, | ||
| vgpu_allocation: Optional[int] = None, | ||
| ignore_memory_limit: Optional[bool] = None, | ||
| ) -> WorkflowDataType: | ||
| """Wrapper for run_custom for usage without numerical_structures or custom_vjp for public facing API.""" | ||
| return run_custom( | ||
|
|
@@ -888,6 +896,7 @@ def run( | |
| lazy=lazy, | ||
| numerical_structures=None, | ||
| custom_vjp=None, | ||
| ignore_memory_limit=ignore_memory_limit, | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -908,6 +917,7 @@ def run_async( | |
| priority: Optional[int] = None, | ||
| lazy: Optional[bool] = None, | ||
| vgpu_allocation: Optional[int] = None, | ||
| ignore_memory_limit: Optional[bool] = None, | ||
| ) -> BatchData: | ||
| """Wrapper for run_async_custom for usage without numerical_structures or custom_vjp for public facing API.""" | ||
| return run_async_custom( | ||
|
|
@@ -929,6 +939,7 @@ def run_async( | |
| lazy=lazy, | ||
| numerical_structures=None, | ||
| custom_vjp=None, | ||
| ignore_memory_limit=ignore_memory_limit, | ||
| ) | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -594,6 +594,7 @@ def submit( | |
| pay_type: Union[PayType, str] = PayType.AUTO, | ||
| priority: Optional[int] = None, | ||
| vgpu_allocation: Optional[int] = None, | ||
| ignore_memory_limit: Optional[bool] = None, | ||
| ) -> None: | ||
| """Kick off this task. | ||
|
|
||
|
|
@@ -616,6 +617,9 @@ def submit( | |
| Number of virtual GPUs to allocate for the simulation (1, 2, 4, or 8). | ||
| Only applies to vGPU license users. If not specified, the system | ||
| automatically determines the optimal GPU count. | ||
| ignore_memory_limit: bool = None | ||
| Whether to ignore memory limits. | ||
|
|
||
| """ | ||
| pay_type = PayType(pay_type) if not isinstance(pay_type, PayType) else pay_type | ||
|
|
||
|
|
@@ -634,6 +638,7 @@ def submit( | |
| "payType": pay_type.value, | ||
| "priority": priority, | ||
| "vgpuAllocation": vgpu_allocation, | ||
| "ignoreMemoryLimit": ignore_memory_limit, | ||
| }, | ||
| ) | ||
|
|
||
|
|
@@ -921,6 +926,7 @@ def submit( | |
| pay_type: Union[PayType, str] = PayType.AUTO, | ||
| priority: Optional[int] = None, | ||
| vgpu_allocation: Optional[int] = None, | ||
| ignore_memory_limit: Optional[bool] = None, | ||
| ) -> requests.Response: | ||
| """Submits the batch for execution on the server. | ||
|
|
||
|
|
@@ -934,6 +940,8 @@ def submit( | |
| Optional identifier for a specific worker group to run on. | ||
| vgpu_allocation : Optional[int], default=None | ||
| Number of virtual GPUs to allocate for the simulation (1, 2, 4, or 8). | ||
| ignore_memory_limit : Optional[bool], default=None | ||
| Whether or not to ignore memory limits. | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -963,6 +971,8 @@ def submit( | |
| "solverVersion": solver_version, | ||
| "protocolVersion": protocol_version, | ||
| "workerGroup": worker_group, | ||
| "vgpu_allocation": vgpu_allocation, | ||
| "ignore_memory_limit": ignore_memory_limit, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Snake_case API keys instead of camelCase in submitHigh Severity In the |
||
| }, | ||
| ) | ||
|
|
||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hardcoded API keys committed in test file
High Severity
Production API keys are hardcoded in
tests/tidy3d_modal_cm_fdtd_test.py. Three separate API keys (for prod, dev, and uat environments) are exposed in plaintext viaweb.configure(...)calls. These credentials will be visible in version control history even if later removed and could allow unauthorized access to the service.