fix(sdk): Add SDK support for setting resource limits on older KFP versions (kubeflow#11839)

mprahl · web-flow · commit f9d487cb6057 · 2025-04-21T13:46:27.000Z
For context, the commit 70aaf8a removed support for the old fields (without a resource_ prefix). This was added back in commit 6ebf4aa but done in a way that broke any usage of pipeline input parameters but was to support the current KFP backend which did not yet support the new fields. In commit 7c931ae, the old fields were removed again but added support for the new field in KFP backend. This commit addresses the case where a user is using a new SDK but with a KFP backend prior to 2.4. Signed-off-by: mprahl <mprahl@users.noreply.github.com>
diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py
@@ -3569,29 +3569,82 @@ def simple_pipeline():
         self.assertEqual(
             '5', dict_format['deploymentSpec']['executors']['exec-return-1-2']
             ['container']['resources']['resourceCpuLimit'])
+        self.assertEqual(
+            5.0, dict_format['deploymentSpec']['executors']['exec-return-1-2']
+            ['container']['resources']['cpuLimit'])
         self.assertNotIn(
             'memoryLimit', dict_format['deploymentSpec']['executors']
             ['exec-return-1-2']['container']['resources'])
 
         self.assertEqual(
             '50G', dict_format['deploymentSpec']['executors']['exec-return-1-3']
             ['container']['resources']['resourceMemoryLimit'])
+        self.assertEqual(
+            50.0, dict_format['deploymentSpec']['executors']['exec-return-1-3']
+            ['container']['resources']['memoryLimit'])
         self.assertNotIn(
             'cpuLimit', dict_format['deploymentSpec']['executors']
             ['exec-return-1-3']['container']['resources'])
 
         self.assertEqual(
             '2', dict_format['deploymentSpec']['executors']['exec-return-1-4']
             ['container']['resources']['resourceCpuRequest'])
+        self.assertEqual(
+            2.0, dict_format['deploymentSpec']['executors']['exec-return-1-4']
+            ['container']['resources']['cpuRequest'])
         self.assertEqual(
             '5', dict_format['deploymentSpec']['executors']['exec-return-1-4']
             ['container']['resources']['resourceCpuLimit'])
+        self.assertEqual(
+            5.0, dict_format['deploymentSpec']['executors']['exec-return-1-4']
+            ['container']['resources']['cpuLimit'])
         self.assertEqual(
             '4G', dict_format['deploymentSpec']['executors']['exec-return-1-4']
             ['container']['resources']['resourceMemoryRequest'])
+        self.assertEqual(
+            4.0, dict_format['deploymentSpec']['executors']['exec-return-1-4']
+            ['container']['resources']['memoryRequest'])
         self.assertEqual(
             '50G', dict_format['deploymentSpec']['executors']['exec-return-1-4']
             ['container']['resources']['resourceMemoryLimit'])
+        self.assertEqual(
+            50.0, dict_format['deploymentSpec']['executors']['exec-return-1-4']
+            ['container']['resources']['memoryLimit'])
+
+    def test_cpu_memory_input_parameter(self):
+
+        @dsl.pipeline
+        def simple_pipeline(
+            cpu_request: str,
+            cpu_limt: str,
+            memory_request: str,
+            memory_limit: str,
+            ac_type: str,
+            ac_count: int,
+        ):
+            return_1().set_cpu_request(cpu_request)\
+                .set_cpu_limit(cpu_limt)\
+                .set_memory_request(memory_request)\
+                .set_memory_limit(memory_limit)\
+                .set_accelerator_limit(ac_count)\
+                .set_accelerator_type(ac_type)
+
+        dict_format = json_format.MessageToDict(simple_pipeline.pipeline_spec)
+        resources = dict_format['deploymentSpec']['executors']['exec-return-1'][
+            'container']['resources']
+
+        self.assertIn('resourceCpuRequest', resources)
+        self.assertNotIn('cpuRequest', resources)
+        self.assertIn('resourceCpuLimit', resources)
+        self.assertNotIn('cpuLimit', resources)
+        self.assertIn('resourceMemoryRequest', resources)
+        self.assertNotIn('memoryRequest', resources)
+        self.assertIn('resourceMemoryLimit', resources)
+        self.assertNotIn('memoryLimit', resources)
+        self.assertIn('resourceType', resources['accelerator'])
+        self.assertNotIn('type', resources['accelerator'])
+        self.assertIn('resourceCount', resources['accelerator'])
+        self.assertNotIn('count', resources['accelerator'])
 
 
 class TestPlatformConfig(unittest.TestCase):
diff --git a/sdk/python/kfp/compiler/compiler_utils.py b/sdk/python/kfp/compiler/compiler_utils.py
@@ -804,3 +804,60 @@ def recursive_replace_placeholders(data: Union[Dict, List], old_value: str,
         if isinstance(data, pipeline_channel.PipelineChannel):
             data = str(data)
         return new_value if data == old_value else data
+
+
+# Note that cpu_to_float assumes the string has already been validated by the _validate_cpu_request_limit method.
+def _cpu_to_float(cpu: str) -> float:
+    """Converts the validated CPU request/limit string and to its numeric float
+    value.
+
+    Args:
+        cpu: CPU requests or limits. This string should be a number or a
+            number followed by an "m" to indicate millicores (1/1000). For
+            more information, see `Specify a CPU Request and a CPU Limit
+    Returns:
+        The numeric value (float) of the cpu request/limit.
+    """
+    return float(cpu[:-1]) / 1000 if cpu.endswith('m') else float(cpu)
+
+
+# Note that memory_to_float assumes the string has already been validated by the _validate_memory_request_limit method.
+def _memory_to_float(memory: str) -> float:
+    """Converts the validated memory request/limit string to its numeric value.
+
+    Args:
+        memory: Memory requests or limits. This string should be a number or
+            a number followed by one of "E", "Ei", "P", "Pi", "T", "Ti", "G",
+            "Gi", "M", "Mi", "K", or "Ki".
+    Returns:
+        The numeric value (float) of the memory request/limit.
+    """
+    if memory.endswith('E'):
+        memory = float(memory[:-1]) * constants._E / constants._G
+    elif memory.endswith('Ei'):
+        memory = float(memory[:-2]) * constants._EI / constants._G
+    elif memory.endswith('P'):
+        memory = float(memory[:-1]) * constants._P / constants._G
+    elif memory.endswith('Pi'):
+        memory = float(memory[:-2]) * constants._PI / constants._G
+    elif memory.endswith('T'):
+        memory = float(memory[:-1]) * constants._T / constants._G
+    elif memory.endswith('Ti'):
+        memory = float(memory[:-2]) * constants._TI / constants._G
+    elif memory.endswith('G'):
+        memory = float(memory[:-1])
+    elif memory.endswith('Gi'):
+        memory = float(memory[:-2]) * constants._GI / constants._G
+    elif memory.endswith('M'):
+        memory = float(memory[:-1]) * constants._M / constants._G
+    elif memory.endswith('Mi'):
+        memory = float(memory[:-2]) * constants._MI / constants._G
+    elif memory.endswith('K'):
+        memory = float(memory[:-1]) * constants._K / constants._G
+    elif memory.endswith('Ki'):
+        memory = float(memory[:-2]) * constants._KI / constants._G
+    else:
+        # By default interpret as a plain integer, in the unit of Bytes.
+        memory = float(memory) / constants._G
+
+    return memory
diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py
@@ -651,27 +651,62 @@ def convert_to_placeholder(input_value: str) -> str:
                 for name, value in (task.container_spec.env or {}).items()
             ]))
 
+    # All the fields with the resource_ prefix are newer fields to support pipeline input parameters. The below code
+    # will check if the value is a placeholder and if not, it will also set the value on the old deprecated fields
+    # without the resource_ prefix to work on older KFP installations.
     if task.container_spec.resources is not None:
         if task.container_spec.resources.cpu_request is not None:
-            container_spec.resources.resource_cpu_request = convert_to_placeholder(
+            placeholder = convert_to_placeholder(
                 task.container_spec.resources.cpu_request)
+            container_spec.resources.resource_cpu_request = placeholder
+
+            if task.container_spec.resources.cpu_request == placeholder:
+                container_spec.resources.cpu_request = compiler_utils._cpu_to_float(
+                    task.container_spec.resources.cpu_request)
         if task.container_spec.resources.cpu_limit is not None:
-            container_spec.resources.resource_cpu_limit = convert_to_placeholder(
+            placeholder = convert_to_placeholder(
                 task.container_spec.resources.cpu_limit)
+            container_spec.resources.resource_cpu_limit = placeholder
+
+            if task.container_spec.resources.cpu_limit == placeholder:
+                container_spec.resources.cpu_limit = compiler_utils._cpu_to_float(
+                    task.container_spec.resources.cpu_limit)
         if task.container_spec.resources.memory_request is not None:
-            container_spec.resources.resource_memory_request = convert_to_placeholder(
+            placeholder = convert_to_placeholder(
                 task.container_spec.resources.memory_request)
+            container_spec.resources.resource_memory_request = placeholder
+
+            if task.container_spec.resources.memory_request == placeholder:
+                container_spec.resources.memory_request = compiler_utils._memory_to_float(
+                    task.container_spec.resources.memory_request)
         if task.container_spec.resources.memory_limit is not None:
-            container_spec.resources.resource_memory_limit = convert_to_placeholder(
+            placeholder = convert_to_placeholder(
                 task.container_spec.resources.memory_limit)
+            container_spec.resources.resource_memory_limit = placeholder
+
+            if task.container_spec.resources.memory_limit == placeholder:
+                container_spec.resources.memory_limit = compiler_utils._memory_to_float(
+                    task.container_spec.resources.memory_limit)
         if task.container_spec.resources.accelerator_count is not None:
+            ac_type = None
+            ac_type_placholder = convert_to_placeholder(
+                task.container_spec.resources.accelerator_type)
+            if task.container_spec.resources.accelerator_type == ac_type_placholder:
+                ac_type = task.container_spec.resources.accelerator_type
+
+            ac_count = None
+            ac_count_placeholder = convert_to_placeholder(
+                task.container_spec.resources.accelerator_count)
+            if task.container_spec.resources.accelerator_count == ac_count_placeholder:
+                ac_count = int(task.container_spec.resources.accelerator_count)
+
             container_spec.resources.accelerator.CopyFrom(
                 pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec
                 .ResourceSpec.AcceleratorConfig(
-                    resource_type=convert_to_placeholder(
-                        task.container_spec.resources.accelerator_type),
-                    resource_count=convert_to_placeholder(
-                        task.container_spec.resources.accelerator_count),
+                    resource_type=ac_type_placholder,
+                    resource_count=ac_count_placeholder,
+                    type=ac_type,
+                    count=ac_count,
                 ))
 
     return container_spec
diff --git a/sdk/python/test_data/pipelines/pipeline_with_resource_spec.yaml b/sdk/python/test_data/pipelines/pipeline_with_resource_spec.yaml
@@ -61,8 +61,14 @@ deploymentSpec:
         image: gcr.io/my-project/my-fancy-trainer
         resources:
           accelerator:
+            count: '1'
             resourceCount: '1'
             resourceType: tpu-v3
+            type: tpu-v3
+          cpuLimit: 4.0
+          cpuRequest: 2.0
+          memoryLimit: 15.032385536
+          memoryRequest: 4.294967296
           resourceCpuLimit: '4'
           resourceCpuRequest: '2'
           resourceMemoryLimit: 14Gi
@@ -119,4 +125,4 @@ root:
         isOptional: true
         parameterType: STRING
 schemaVersion: 2.1.0
-sdkVersion: kfp-2.11.0
+sdkVersion: kfp-2.12.1