|
| 1 | +# Copyright (c) Qualcomm Innovation Center, Inc. |
| 2 | +# Copyright 2025 Arm Limited and/or its affiliates. |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +# This pass is based on backends/qualcomm/_passes/replace_inf_values.py |
| 8 | +# with some modification to replaced inf values. |
| 9 | + |
| 10 | +import torch |
| 11 | +from executorch.exir.pass_base import ExportPass, PassResult |
| 12 | + |
| 13 | + |
| 14 | +class ReplaceInfValues(ExportPass): |
| 15 | + """ |
| 16 | + Due to limitation in Quantizer, we need to change inf/-inf to more quantizable values. |
| 17 | + """ |
| 18 | + |
| 19 | + def __init__(self): |
| 20 | + super(ReplaceInfValues, self).__init__() |
| 21 | + |
| 22 | + def call(self, graph_module: torch.fx.GraphModule): |
| 23 | + modified = False |
| 24 | + for buf_name, tensor in graph_module.named_buffers(): |
| 25 | + if tensor.is_floating_point(): |
| 26 | + modified = True |
| 27 | + # 255 here is mainly for attention_mask in Llama for reasonable quant scale |
| 28 | + tensor[tensor == float("inf")] = 255 |
| 29 | + tensor[tensor == float("-inf")] = -255 |
| 30 | + setattr(graph_module, buf_name, tensor) |
| 31 | + |
| 32 | + for node in graph_module.graph.nodes: |
| 33 | + arg_list = list(node.args) |
| 34 | + for index, arg in enumerate(arg_list): |
| 35 | + if arg == float("-inf"): |
| 36 | + modified = True |
| 37 | + arg_list[index] = -255 |
| 38 | + elif arg == float("inf"): |
| 39 | + modified = True |
| 40 | + arg_list[index] = +255 |
| 41 | + node.args = tuple(arg_list) |
| 42 | + |
| 43 | + if modified: |
| 44 | + graph_module.recompile() |
| 45 | + return PassResult(graph_module, modified) |
0 commit comments