pytorch
diff --git a/‎.lintrunner.toml‎
Lines changed: 16 additions & 16 deletions b/‎.lintrunner.toml‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎backends/cadence/aot/pass_utils.py‎
Lines changed: 37 additions & 14 deletions b/‎backends/cadence/aot/pass_utils.py‎
Lines changed: 37 additions & 14 deletions
diff --git a/‎backends/cadence/aot/remove_ops.py‎
Lines changed: 9 additions & 9 deletions b/‎backends/cadence/aot/remove_ops.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎backends/vulkan/op_registry.py‎
Lines changed: 5 additions & 0 deletions b/‎backends/vulkan/op_registry.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/gen_vulkan_spv.py‎
Lines changed: 8 additions & 0 deletions b/‎backends/vulkan/runtime/gen_vulkan_spv.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.cpp‎
Lines changed: 1 addition & 8 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.cpp‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.h‎
Lines changed: 0 additions & 2 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.h‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl‎
Lines changed: 21 additions & 3 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl‎
Lines changed: 21 additions & 3 deletions
@@ -10,7 +10,7 @@ exclude_patterns = [
     'exir/serde/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -19,7 +19,7 @@ command = [
     '@{{PATHSFILE}}'
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -41,7 +41,7 @@ exclude_patterns = [
     'exir/serde/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -50,7 +50,7 @@ command = [
     '@{{PATHSFILE}}'
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -84,7 +84,7 @@ exclude_patterns = [
     'runtime/core/portable_type/c10/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -95,7 +95,7 @@ command = [
     '@{{PATHSFILE}}'
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -117,7 +117,7 @@ exclude_patterns = [
     '**/third-party/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -127,7 +127,7 @@ command = [
     '@{{PATHSFILE}}',
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -151,7 +151,7 @@ exclude_patterns = [
     '**/third-party/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -192,7 +192,7 @@ exclude_patterns = [
     'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -234,7 +234,7 @@ exclude_patterns = [
     'util/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -287,7 +287,7 @@ exclude_patterns = [
     'util/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -337,7 +337,7 @@ exclude_patterns = [
     'backends/arm/test/**',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -349,7 +349,7 @@ command = [
     '@{{PATHSFILE}}'
 ]
 init_command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -368,7 +368,7 @@ exclude_patterns = [
     '.lintrunner.toml',
 ]
 command = [
-    'python3',
+    'python',
     '-m',
     'lintrunner_adapters',
     'run',
@@ -397,7 +397,7 @@ exclude_patterns = [
 ]
 
 command = [
-  "python3",
+  "python",
   "-m",
   "lintrunner_adapters",
   "run",
 
@@ -174,30 +174,53 @@ def nodes_not_adjacent_in_gm(
 
 def get_arg(
     node: torch.fx.Node,
-    arg_index: int,
     kwarg_name: str,
-    *,
-    default: torch.fx.node.Argument = None,
 ) -> torch.fx.node.Argument:
     """
-    Get the arg at arg_index or kwarg with arg_name of the node. If neither is found
-    return default.
+    Get the arg with arg_name of the node, returns default value if not set.
     """
-    if arg_index < len(node.args):
-        return node.args[arg_index]
-    elif kwarg_name in node.kwargs:
+    # Try to get the arg from kwargs first since this is faster
+    if kwarg_name in node.kwargs:
         return node.kwargs[kwarg_name]
-    else:
-        return default
+
+    # If it's not found in kwargs, try to normalize the args
+    normalized_args = node.normalized_arguments(
+        node.graph.owning_module, normalize_to_only_use_kwargs=True
+    )
+    if not normalized_args:
+        raise RuntimeError(
+            f"get_arg: Node {node} does not support normalization of arguments"
+        )
+
+    return normalized_args.kwargs[kwarg_name]
 
 
 def set_arg(
-    node: torch.fx.Node, arg_index: int, kwarg_name: str, value: torch.fx.node.Argument
+    node: torch.fx.Node, kwarg_name: str, value: torch.fx.node.Argument
 ) -> None:
     """
-    Set the arg at arg_index if it exists, otherwise set the kwarg.
+    Set the node's arg with its name to the given value.
     """
-    if arg_index < len(node.args):
-        node.update_arg(arg_index, value)
+    # Try to set the arg if it is present in kwargs first since this is faster
+    if kwarg_name in node.kwargs:
+        node.update_kwarg(kwarg_name, value)
+        return
+
+    # If it's not found in kwargs, try to normalize the args and set the arg
+    normalized_args = node.normalized_arguments(
+        node.graph.owning_module, normalize_to_only_use_kwargs=True
+    )
+    if not normalized_args:
+        raise RuntimeError(
+            f"set_arg: Node {node} does not support normalization of arguments"
+        )
+
+    kwargs = normalized_args.kwargs
+    if kwarg_name not in kwargs:
+        raise ValueError(f"set_arg: invalid arg name {kwarg_name} for node {node} used")
+
+    idx = list(kwargs.keys()).index(kwarg_name)
+    if idx < len(node.args):
+        node.update_arg(idx, value)
     else:
         node.update_kwarg(kwarg_name, value)
@@ -779,17 +779,17 @@ def _remove_unused_cat(self, graph_module: torch.fx.GraphModule) -> None:
         for slice_copy_node in graph_module.graph.find_nodes(
             op="call_function", target=exir_ops.edge.aten.slice_copy.Tensor
         ):
-            cat_node = cast(Node, get_arg(slice_copy_node, 0, "input"))
-            slice_dim = cast(int, get_arg(slice_copy_node, 1, "dim", default=0))
-            start_idx = cast(int, get_arg(slice_copy_node, 2, "start", default=None))
-            end_idx = cast(int, get_arg(slice_copy_node, 3, "end", default=None))
-            step = cast(int, get_arg(slice_copy_node, 4, "step", default=1))
+            cat_node = cast(Node, get_arg(slice_copy_node, "input"))
+            slice_dim = cast(int, get_arg(slice_copy_node, "dim"))
+            start_idx = cast(int, get_arg(slice_copy_node, "start"))
+            end_idx = cast(int, get_arg(slice_copy_node, "end"))
+            step = cast(int, get_arg(slice_copy_node, "step"))
 
             if cat_node.target != exir_ops.edge.aten.cat.default or step != 1:
                 continue
 
             # Make sure cat and slice happens on the same dimension.
-            cat_dim = cast(Node, get_arg(cat_node, 1, "dim", default=0))
+            cat_dim = cast(Node, get_arg(cat_node, "dim"))
             if cat_dim != slice_dim:
                 continue
 
@@ -805,14 +805,14 @@ def _remove_unused_cat(self, graph_module: torch.fx.GraphModule) -> None:
                 end_idx += cat_output_shape[cat_dim]
 
             offset = 0
-            for cat_input_node in cast(List[Node], get_arg(cat_node, 0, "tensors")):
+            for cat_input_node in cast(List[Node], get_arg(cat_node, "tensors")):
                 cat_input_shape = cat_input_node.meta["val"].shape
 
                 # Check if the slice range overlaps with the cat input range.
                 if offset <= start_idx and end_idx <= offset + cat_input_shape[cat_dim]:
                     slice_copy_node.replace_input_with(cat_node, cat_input_node)
-                    set_arg(slice_copy_node, 2, "start", start_idx - offset)
-                    set_arg(slice_copy_node, 3, "end", end_idx - offset)
+                    set_arg(slice_copy_node, "start", start_idx - offset)
+                    set_arg(slice_copy_node, "end", end_idx - offset)
                     break
 
                 offset += cat_input_shape[cat_dim]
 
@@ -259,6 +259,11 @@ def register_ephemeral_op(features: OpFeatures):
         exir_ops.edge.aten.div.Tensor,
         exir_ops.edge.aten.div.Tensor_mode,
         exir_ops.edge.aten.pow.Tensor_Tensor,
+        exir_ops.edge.aten.eq.Tensor,
+        exir_ops.edge.aten.lt.Tensor,
+        exir_ops.edge.aten.le.Tensor,
+        exir_ops.edge.aten.gt.Tensor,
+        exir_ops.edge.aten.ge.Tensor,
     ]
 )
 def register_binary_op(features: OpFeatures):
 
@@ -728,9 +728,16 @@ def parseTemplateYaml(self, yaml_file: str) -> None:
                 )
 
                 for variant in params_dict["shader_variants"]:
+                    default_iterated_params_names = set(
+                        default_iterated_params.keys()
+                        if default_iterated_params is not None
+                        else {}
+                    )
                     variant_params_names = set(variant.keys())
+
                     invalid_keys = (
                         variant_params_names
+                        - default_iterated_params_names
                         - params_names
                         - {"generate_variant_forall"}
                     )
@@ -758,6 +765,7 @@ def parseTemplateYaml(self, yaml_file: str) -> None:
                                     variant_name = f"{variant_name}_{param_value[1]}"
 
                             default_params_copy["NAME"] = variant_name
+                            default_params_copy["VARIANT_NAME"] = variant["NAME"]
 
                             self.shader_template_params[template_name].append(
                                 default_params_copy
 
@@ -549,20 +549,13 @@ vkapi::BufferBindInfo ComputeGraph::get_or_create_int_param_buffer(
   }
 }
 
-int32_t ComputeGraph::get_or_create_int(const ValueRef idx) {
-  if (values_.at(idx).isInt()) {
-    return extract_scalar<int32_t>(idx);
-  }
-  VK_THROW("Cannot create a int param buffer for the given value");
-}
-
 int32_t ComputeGraph::get_or_create_int(
     const ValueRef idx,
     const int32_t default_val) {
   if (values_.at(idx).isNone()) {
     return default_val;
   }
-  return get_or_create_int(idx);
+  return extract_scalar<int32_t>(idx);
 }
 
 void ComputeGraph::set_symint(const ValueRef idx, const int32_t val) {
 
@@ -685,8 +685,6 @@ class ComputeGraph final {
       const ValueRef idx,
       const int32_t default_value);
 
-  int32_t get_or_create_int(const ValueRef idx);
-
   int32_t get_or_create_int(const ValueRef idx, const int32_t default_value);
 
   void set_symint(const ValueRef idx, const int32_t val);
 
@@ -10,17 +10,35 @@
 
 #define PRECISION ${PRECISION}
 
+// Binary comparison ops require that the output is boolean and not the same as input.
+$IS_COMPARISON_OP = (any([name in VARIANT_NAME for name in ["binary_eq",  "binary_lt", "binary_le", "binary_gt", "binary_ge"]]))
+
+#define NAME ${VARIANT_NAME}
+
 #define VEC4_T ${texel_type(DTYPE)}
-#define T ${buffer_scalar_type(DTYPE)}
+$if IS_COMPARISON_OP:
+  #define T ${buffer_scalar_type("uint8")}
+  #define VEC4_OUT_T ${texel_type("uint8")}
+$else:
+  #define T ${buffer_scalar_type(DTYPE)}
+  #define VEC4_OUT_T VEC4_T
 
 #define op(X, Y, A) ${OPERATOR}
 
 ${define_active_storage_type(STORAGE)}
 ${define_required_extensions(DTYPE)}
 
+
+$if IS_COMPARISON_OP:
+  ${define_required_extensions("uint8")}
+
 layout(std430) buffer;
 
-${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
+$if IS_COMPARISON_OP:
+  ${layout_declare_tensor(B, "w", "t_out", "uint8", STORAGE)}
+$else:
+  ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
+
 ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
 ${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}
 
@@ -121,7 +139,7 @@ void main() {
   write_texel_lpos(
     t_out,
     lpos,
-    VEC4_T(op(in_texel, other_texel, alpha)),
+    VEC4_OUT_T(op(in_texel, other_texel, alpha)),
     out_axis_map);
 }
Original file line number	Diff line number	Diff line change
`@@ -259,6 +259,11 @@ def register_ephemeral_op(features: OpFeatures):`
`259`	`259`	`exir_ops.edge.aten.div.Tensor,`
`260`	`260`	`exir_ops.edge.aten.div.Tensor_mode,`
`261`	`261`	`exir_ops.edge.aten.pow.Tensor_Tensor,`
	`262`	`+ exir_ops.edge.aten.eq.Tensor,`
	`263`	`+ exir_ops.edge.aten.lt.Tensor,`
	`264`	`+ exir_ops.edge.aten.le.Tensor,`
	`265`	`+ exir_ops.edge.aten.gt.Tensor,`
	`266`	`+ exir_ops.edge.aten.ge.Tensor,`
`262`	`267`	`]`
`263`	`268`	`)`
`264`	`269`	`def register_binary_op(features: OpFeatures):`
Original file line number	Diff line number	Diff line change
`@@ -549,20 +549,13 @@ vkapi::BufferBindInfo ComputeGraph::get_or_create_int_param_buffer(`
`549`	`549`	`}`
`550`	`550`	`}`
`551`	`551`
`552`		`-int32_t ComputeGraph::get_or_create_int(const ValueRef idx) {`
`553`		`- if (values_.at(idx).isInt()) {`
`554`		`- return extract_scalar<int32_t>(idx);`
`555`		`- }`
`556`		`- VK_THROW("Cannot create a int param buffer for the given value");`
`557`		`-}`
`558`		`-`
`559`	`552`	`int32_t ComputeGraph::get_or_create_int(`
`560`	`553`	`const ValueRef idx,`
`561`	`554`	`const int32_t default_val) {`
`562`	`555`	`if (values_.at(idx).isNone()) {`
`563`	`556`	`return default_val;`
`564`	`557`	`}`
`565`		`- return get_or_create_int(idx);`
	`558`	`+ return extract_scalar<int32_t>(idx);`
`566`	`559`	`}`
`567`	`560`
`568`	`561`	`void ComputeGraph::set_symint(const ValueRef idx, const int32_t val) {`