From fcafb83c15838ea723208a4b77c4c2801f3df2c6 Mon Sep 17 00:00:00 2001
From: Jacob Hegna <jacobhegna@gmail.com>
Date: Sat, 22 Apr 2023 00:45:14 +0000
Subject: [PATCH 1/5] Add script to convert from .tflite to .cpp/.h.

To get from TF saved model to tflite, see:
  llvm/lib/Analysis/models/saved-model-to-tflite.py
---
 compiler_opt/tools/tflite_to_cpp.py     | 155 +++++++++++
 compiler_opt/tools/tflite_to_cpp_lib.py | 341 ++++++++++++++++++++++++
 2 files changed, 496 insertions(+)
 create mode 100644 compiler_opt/tools/tflite_to_cpp.py
 create mode 100644 compiler_opt/tools/tflite_to_cpp_lib.py

diff --git a/compiler_opt/tools/tflite_to_cpp.py b/compiler_opt/tools/tflite_to_cpp.py
new file mode 100644
index 00000000..c6a79e5b
--- /dev/null
+++ b/compiler_opt/tools/tflite_to_cpp.py
@@ -0,0 +1,155 @@
+# coding=utf-8
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Script for converting between TFLite and C++."""
+from absl import app
+from absl import flags
+from absl import logging
+
+from compiler_opt.tools import tflite_to_cpp_lib
+
+flags.DEFINE_string(
+    'input', None, 'Input, which should be a path to a tflite model'
+)
+flags.mark_flag_as_required('input')
+
+flags.DEFINE_string(
+    'output_dir', None, 'Output directory for the generated files'
+)
+flags.mark_flag_as_required('output_dir')
+
+flags.DEFINE_string(
+    'name',
+    None,
+    (
+        'Name to use for the model. This will be in the filenames and also will'
+        ' be used to identify the model within LLVM. This should be unique'
+        ' between models'
+    ),
+)
+flags.mark_flag_as_required('name')
+
+flags.DEFINE_string(
+    'base_class',
+    None,
+    (
+        'Base class to use for the generated model. This is used when'
+        ' registering the model in LLVM. This should be a fully-qualified name,'
+        ' e.g. ::llvm::MLInlineOzEmitCModel'
+    ),
+)
+flags.mark_flag_as_required('base_class')
+
+flags.DEFINE_multi_string(
+    'additional_headers',
+    None,
+    (
+        'Additional headers to include for the model, for instance the header'
+        ' definining the base class. Should be of the form'
+        ' --additional_headers="llvm/Analysis/MyHeader.h"'
+    ),
+)
+
+flags.DEFINE_string(
+    'iree_import_tflite_path',
+    None,
+    'Path to the iree-import-tflite binary from iree repository',
+)
+flags.mark_flag_as_required('iree_import_tflite_path')
+
+flags.DEFINE_string(
+    'emitc_opt_path',
+    None,
+    'Path to the emitc-opt binary from the emitc repository',
+)
+flags.mark_flag_as_required('emitc_opt_path')
+
+flags.DEFINE_string(
+    'mlir_translate_path',
+    None,
+    'Path to the mlir-translate binary from the llvm repository',
+)
+flags.mark_flag_as_required('mlir_translate_path')
+
+flags.DEFINE_string(
+    'emitc_runtime_path',
+    None,
+    'Path to the emitc runtime to embed in the generated c++ model',
+)
+flags.mark_flag_as_required('emitc_runtime_path')
+
+flags.DEFINE_string(
+    'clang_format_path',
+    None,
+    (
+        '(Optional) path to clang-format binary to use to format the resulting'
+        ' files'
+    ),
+)
+flags.DEFINE_string(
+    'clang_format_style',
+    'llvm',
+    'Style argument to use for clang format',
+)
+
+FLAGS = flags.FLAGS
+
+
+def main(argv):
+  logging.info('Beginning conversion pipeline.')
+  tosa = tflite_to_cpp_lib.tflite_to_tosa(
+      tflite_path=FLAGS.input,
+      iree_import_tflite_path=FLAGS.iree_import_tflite_path,
+  )
+  emitc_mlir = tflite_to_cpp_lib.tosa_to_emitc_mlir(
+      tosa=tosa, emitc_opt_path=FLAGS.emitc_opt_path
+  )
+  model = tflite_to_cpp_lib.emitc_mlir_to_cpp(
+      emitc_mlir=emitc_mlir,
+      mlir_translate_path=FLAGS.mlir_translate_path,
+      name=FLAGS.name,
+      base_class=FLAGS.base_class,
+  )
+  model = tflite_to_cpp_lib.embed_runtime(
+      model=model,
+      runtime_path=FLAGS.emitc_runtime_path,
+  )
+  model = tflite_to_cpp_lib.add_additional_headers(
+      model=model, additional_headers=FLAGS.additional_headers
+  )
+
+  tflite_to_cpp_lib.print_llvm_registration_handle(
+      model=model, base_class=FLAGS.base_class
+  )
+
+  if FLAGS.clang_format_path:
+    model = tflite_to_cpp_lib.format_model(
+        model=model,
+        clang_format_path=FLAGS.clang_format_path,
+        clang_format_style=FLAGS.clang_format_style,
+    )
+
+  cpp_path = tflite_to_cpp_lib.get_model_cpp_path(model, FLAGS.output_dir)
+  hdr_path = tflite_to_cpp_lib.get_model_hdr_path(model, FLAGS.output_dir)
+
+  logging.info('Writing generated files to [%s] and [%s].', cpp_path, hdr_path)
+  with open(cpp_path, 'wt') as f:
+    f.write(model.cpp)
+  with open(hdr_path, 'wt') as f:
+    f.write(model.hdr)
+  logging.info('Done.')
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/compiler_opt/tools/tflite_to_cpp_lib.py b/compiler_opt/tools/tflite_to_cpp_lib.py
new file mode 100644
index 00000000..de6cd6a1
--- /dev/null
+++ b/compiler_opt/tools/tflite_to_cpp_lib.py
@@ -0,0 +1,341 @@
+# coding=utf-8
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Library for converting between TFLite and C++."""
+from __future__ import annotations
+
+import os
+import dataclasses
+import subprocess
+import tempfile
+import pathlib
+import re
+
+from absl import logging
+
+_UNUSED_TENSOR_NAME = '_UnusedTensorType'
+_TFAGENTS_POLICY_NAME = 'action'
+_MODEL_NAMESPACE = 'llvm::emitc::generated'
+
+
+def _fmt_includes(includes):
+  return '\n'.join([f'#include "{hdr}"' for hdr in includes]) + '\n'
+
+
+@dataclasses.dataclass
+class EmitCRuntime:
+  """Holds the runtime buffers in memory."""
+
+  # Maps [header name] -> [header contents]
+  headers: dict[str, str]
+
+  # Which is the primary head for the runtime? e.g., 'tosa.h'
+  primary: str
+
+
+def _load_emitc_runtime(path: str) -> EmitCRuntime:
+  """Load the EmitC runtime from a given path."""
+  headers = {}
+  pathlist = pathlib.Path(path).glob('*.h')
+  for path in pathlist:
+    with open(path, 'rt') as f:
+      headers[path.name] = f.read()
+  return EmitCRuntime(headers=headers, primary='tosa.h')
+
+
+def _create_local_emitc_runtime(runtime: EmitCRuntime) -> str:
+  """Create a "local" version of the EmitC runtime.
+
+  The "local" version is analogous to a single-header version of the runtime,
+  but intended to be put in a .cpp file. All relevant code is wrapped in an
+  anonymous namespace in the .cpp file, so each model will have its own copy of
+  the runtime.
+
+  This function modifies the runtime in the following way:
+    1) removes all macros aside from includes
+    2) removes all comments/whitespace
+    3) renames the Tensor type to _UNUSED_TENSOR_NAME. This is because the
+    Tensor type is a part of the public API for the generated model, and to
+    maintain uniformity between each generated model interface we will use a
+    standard definition of Tensor in LLVM. This is the only part of the runtime
+    which is shared between each generated model.
+
+  This function depends on a particular implementation of the runtime which is
+  prefered by mlcompileropt. To generalize this code, the function should
+  topologically sort each header in the runtime by the inclusion ordering
+
+  Args:
+    runtime: the runtime to create a local version of.
+
+  Returns:
+    the contents of the local runtime as a string.
+  """
+  topsort_on_includes = [
+      'utility.h',
+      'types.h',
+      'core_ops.h',
+      'tensor.h',
+      'tosa.h',
+  ]
+  assert set(topsort_on_includes).issubset(set(runtime.headers.keys()))
+  # we don't currently support the eigen runtime, so set the file to zero
+  runtime.headers['tosa_eigen.h'] = ''
+  has_been_included = {key: False for key in topsort_on_includes}
+  for key in topsort_on_includes:
+
+    def on_match(m):
+      group = m.group(1)
+      if group not in topsort_on_includes or has_been_included[group]:
+        return ''
+      has_been_included[group] = True
+      return runtime.headers[group]
+
+    runtime.headers[key] = re.sub(
+        r'#include "emitc/(\w+\.h)"',
+        on_match,
+        runtime.headers[key],
+    )
+  local_runtime = runtime.headers[runtime.primary]
+  # Remove all comments, they just take up space
+  local_runtime = re.sub(r'//.*', '', local_runtime)
+
+  # Find any stdlib includes and store them
+  stdlib_includes = re.findall(r'#include <(\w+)>', local_runtime)
+
+  # Remove all the remaining macros
+  local_runtime = re.sub(r'#.*', '', local_runtime)
+
+  # Wrap the runtime in a local namespace to prevent ODR problems
+  local_runtime = 'namespace {\n' + local_runtime + '\n}'
+
+  # Reinsert the stdlib includes
+  include_str = (
+      '\n'.join([f'#include <{hdr}>' for hdr in stdlib_includes]) + '\n'
+  )
+
+  local_runtime = include_str + local_runtime
+
+  # Rename the tensor type in the runtime, we will use LLVM's internal tensor
+  # type so that the interface of each model is uniform. Theoretically, it
+  # would be better to just remove this class, but renaming it is easier to
+  # reason about.
+  local_runtime = re.sub(
+      r'class Tensor', f'class {_UNUSED_TENSOR_NAME}', local_runtime
+  )
+
+  # We also need to rename the constructors of the class
+  local_runtime = re.sub(r'Tensor\(', f'{_UNUSED_TENSOR_NAME}(', local_runtime)
+
+  # Remove all empty newlines and return
+  return '\n'.join(
+      [l for l in local_runtime.splitlines() if (l and not l.isspace())]
+  )
+
+
+@dataclasses.dataclass
+class EmitCModel:
+  # TODO: document this
+  # TODO: get rid of cpp and hdr
+  name: str
+  cpp: str
+  hdr: str
+
+
+def _run_clang_format(
+    buffer: str, clang_format_path: str, clang_format_style: str
+) -> str:
+  """Formats the given buffer and returns the result"""
+  cmdline = [clang_format_path, f'--style={clang_format_style}']
+  result = subprocess.run(
+      cmdline, stdout=subprocess.PIPE, text=True, input=buffer
+  )
+  return result.stdout
+
+
+def format_model(
+    model: EmitCModel, clang_format_path: str, clang_format_style: str
+) -> str:
+  """Formats the given model and returns the result"""
+  logging.info(
+      'Formatting the resulting model with style [%s].', clang_format_style
+  )
+  return dataclasses.replace(
+      model,
+      cpp=_run_clang_format(
+          model.cpp,
+          clang_format_path=clang_format_path,
+          clang_format_style=clang_format_style,
+      ),
+      hdr=_run_clang_format(
+          model.hdr,
+          clang_format_path=clang_format_path,
+          clang_format_style=clang_format_style,
+      ),
+  )
+
+
+def get_model_cpp_path(model: EmitCModel, root: str) -> str:
+  return os.path.join(root, model.name + '.emitc.cpp')
+
+
+def get_model_hdr_path(model: EmitCModel, root: str) -> str:
+  return os.path.join(root, model.name + '.emitc.h')
+
+
+def tflite_to_tosa(
+    tflite_path: str, iree_import_tflite_path: str, *, convert_i48=True
+) -> str:
+  """Converts TFLite to TOSA MLIR."""
+  logging.info('Converting the TFLite model to TOSA MLIR.')
+  cmdline = [
+      iree_import_tflite_path,
+      '-o',
+      '-',
+      tflite_path,
+      '--output-format=mlir-ir',
+  ]
+  result = subprocess.run(cmdline, stdout=subprocess.PIPE, text=True)
+  if convert_i48:
+    return re.sub(r'i48', 'i64', result.stdout)
+  return result.stdout
+
+
+def tosa_to_emitc_mlir(tosa: str, emitc_opt_path: str) -> str:
+  """Converts TOSA MLIR to EmitC MLIR using emitc-opt."""
+  logging.info('Converting the TOSA MLIR to EmitC MLIR.')
+  cmdline = [emitc_opt_path, '--convert-tosa-to-emitc', '-o', '-', '-']
+  result = subprocess.run(
+      cmdline, stdout=subprocess.PIPE, text=True, input=tosa
+  )
+  return result.stdout
+
+
+def emitc_mlir_to_cpp(
+    emitc_mlir: str,
+    mlir_translate_path: str,
+    name: str,
+    base_class: str,
+) -> EmitCModel:
+  """Converts EmitC MLIR to C++ files using mlir-translate."""
+  logging.info('Converting the EmitC MLIR to C++.')
+
+  def _get_cmdline(kind: str):
+    return [
+        mlir_translate_path,
+        '-mlir-to-cpp',
+        '--emit-cpp-kind=stateful',
+        '--emit-cpp-arg-name-attr=tf_saved_model.index_path',
+        f'--emit-cpp-model-name={name}',
+        f'--emit-cpp-base-class={base_class}',
+        f'--emit-cpp-file-kind={kind}',
+        f'--emit-cpp-only-one-fn={_TFAGENTS_POLICY_NAME}',
+        '-o',
+        '-',
+        '-',
+    ]
+
+  result_cpp = subprocess.run(
+      _get_cmdline('cpp'), stdout=subprocess.PIPE, text=True, input=emitc_mlir
+  ).stdout
+  result_hdr = subprocess.run(
+      _get_cmdline('header'),
+      stdout=subprocess.PIPE,
+      text=True,
+      input=emitc_mlir,
+  ).stdout
+
+  # Wrap results in namespaces
+  result_cpp = f'namespace {_MODEL_NAMESPACE} {{' + '\n' + result_cpp + '}\n'
+  result_hdr = f'namespace {_MODEL_NAMESPACE} {{' + '\n' + result_hdr + '}\n'
+
+  return EmitCModel(cpp=result_cpp, hdr=result_hdr, name=name)
+
+
+def embed_runtime(
+    model: EmitCModel,
+    runtime_path: str,
+) -> EmitCModel:
+  """Embed the emitc runtime in the model.cpp file.
+
+  This also:
+    1) renames any types that are coming from LLVM instead of the embedded
+       runtime, and
+    2) includes all required headers
+
+  Args:
+    model: the model which we are embedding the runtime into.
+    runtime_path: path to the emitc runtime to embed.
+
+  Returns:
+    the new model
+  """
+  logging.info('Embedding the EmitC runtime in the generated model.')
+
+  runtime = _load_emitc_runtime(runtime_path)
+  local_runtime = _create_local_emitc_runtime(runtime)
+
+  new_cpp = local_runtime + model.cpp
+
+  # Rename any uses of the Tensor template type to the fully qualified LLVM name
+  # This regex uses a negative character lookbehind, so:
+  #   `(Tensor<` and ` Tensor<`
+  # both match, but
+  #   `IsTensor<`
+  # does not. the latter appears in the runtime, which we don't want to replace
+  new_cpp = re.sub(r'(?<![A-Za-z])Tensor<', r'::llvm::emitc::Tensor<', new_cpp)
+  new_hdr = re.sub(
+      r'(?<![A-Za-z])Tensor<', r'::llvm::emitc::Tensor<', model.hdr
+  )
+
+  # We also need to fully-qualify the references to emitc:: because the emitc
+  # namespace is ambiguous in the file. This uses a similar lookbehind to avoid
+  # replacing `llvm::emitc` which is what makes the namespace ambiguous
+  new_cpp = re.sub(r'(?<!llvm::)emitc::', r'::emitc::', new_cpp)
+
+  # Add necessary includes to both files
+  cpp_includes = ['llvm/Analysis/EmitCTensor.h', f'{model.name}.emitc.h']
+  hdr_includes = ['llvm/Analysis/EmitCTensor.h']
+
+  new_cpp = _fmt_includes(cpp_includes) + new_cpp
+  new_hdr = _fmt_includes(hdr_includes) + new_hdr
+
+  return dataclasses.replace(model, cpp=new_cpp, hdr=new_hdr)
+
+
+def add_additional_headers(model: EmitCModel, additional_headers: list[str]):
+  include_str = _fmt_includes(additional_headers)
+  new_hdr = include_str + model.hdr
+  return dataclasses.replace(model, hdr=new_hdr)
+
+
+def print_llvm_registration_handle(model: EmitCModel, base_class: str):
+  """Prints LLVM model registration code.
+
+  This handle automatically adds the model to a global registry of models that
+  are available in LLVM, so all that needs to be done to integrate the model in
+  LLVM is link the .cpp with the required binary.
+  """
+  registration_msg = f"""
+{'*'*60}
+To register the generated model in LLVM, please include the
+generated header and copy the following code into a .cpp file:
+
+REGISTER_EMITC_MODEL({base_class}, {model.name});
+
+Note the generated .cpp file must include the line at least once:
+
+#include "llvm/Analysis/EmitCModelRegistry.h"
+{'*'*60}
+"""
+  logging.info(registration_msg)

From bbe300e90deb332050757935ea34b3c7c32b3ef8 Mon Sep 17 00:00:00 2001
From: Jacob Hegna <jacobhegna@gmail.com>
Date: Sat, 22 Apr 2023 00:53:43 +0000
Subject: [PATCH 2/5] Yapf formatting.

---
 compiler_opt/tools/tflite_to_cpp.py     | 49 +++++++++---------------
 compiler_opt/tools/tflite_to_cpp_lib.py | 50 +++++++++++--------------
 2 files changed, 39 insertions(+), 60 deletions(-)

diff --git a/compiler_opt/tools/tflite_to_cpp.py b/compiler_opt/tools/tflite_to_cpp.py
index c6a79e5b..2ba60e66 100644
--- a/compiler_opt/tools/tflite_to_cpp.py
+++ b/compiler_opt/tools/tflite_to_cpp.py
@@ -19,46 +19,38 @@
 
 from compiler_opt.tools import tflite_to_cpp_lib
 
-flags.DEFINE_string(
-    'input', None, 'Input, which should be a path to a tflite model'
-)
+flags.DEFINE_string('input', None,
+                    'Input, which should be a path to a tflite model')
 flags.mark_flag_as_required('input')
 
-flags.DEFINE_string(
-    'output_dir', None, 'Output directory for the generated files'
-)
+flags.DEFINE_string('output_dir', None,
+                    'Output directory for the generated files')
 flags.mark_flag_as_required('output_dir')
 
 flags.DEFINE_string(
     'name',
     None,
-    (
-        'Name to use for the model. This will be in the filenames and also will'
-        ' be used to identify the model within LLVM. This should be unique'
-        ' between models'
-    ),
+    ('Name to use for the model. This will be in the filenames and also will'
+     ' be used to identify the model within LLVM. This should be unique'
+     ' between models'),
 )
 flags.mark_flag_as_required('name')
 
 flags.DEFINE_string(
     'base_class',
     None,
-    (
-        'Base class to use for the generated model. This is used when'
-        ' registering the model in LLVM. This should be a fully-qualified name,'
-        ' e.g. ::llvm::MLInlineOzEmitCModel'
-    ),
+    ('Base class to use for the generated model. This is used when'
+     ' registering the model in LLVM. This should be a fully-qualified name,'
+     ' e.g. ::llvm::MLInlineOzEmitCModel'),
 )
 flags.mark_flag_as_required('base_class')
 
 flags.DEFINE_multi_string(
     'additional_headers',
     None,
-    (
-        'Additional headers to include for the model, for instance the header'
-        ' definining the base class. Should be of the form'
-        ' --additional_headers="llvm/Analysis/MyHeader.h"'
-    ),
+    ('Additional headers to include for the model, for instance the header'
+     ' definining the base class. Should be of the form'
+     ' --additional_headers="llvm/Analysis/MyHeader.h"'),
 )
 
 flags.DEFINE_string(
@@ -92,10 +84,8 @@
 flags.DEFINE_string(
     'clang_format_path',
     None,
-    (
-        '(Optional) path to clang-format binary to use to format the resulting'
-        ' files'
-    ),
+    ('(Optional) path to clang-format binary to use to format the resulting'
+     ' files'),
 )
 flags.DEFINE_string(
     'clang_format_style',
@@ -113,8 +103,7 @@ def main(argv):
       iree_import_tflite_path=FLAGS.iree_import_tflite_path,
   )
   emitc_mlir = tflite_to_cpp_lib.tosa_to_emitc_mlir(
-      tosa=tosa, emitc_opt_path=FLAGS.emitc_opt_path
-  )
+      tosa=tosa, emitc_opt_path=FLAGS.emitc_opt_path)
   model = tflite_to_cpp_lib.emitc_mlir_to_cpp(
       emitc_mlir=emitc_mlir,
       mlir_translate_path=FLAGS.mlir_translate_path,
@@ -126,12 +115,10 @@ def main(argv):
       runtime_path=FLAGS.emitc_runtime_path,
   )
   model = tflite_to_cpp_lib.add_additional_headers(
-      model=model, additional_headers=FLAGS.additional_headers
-  )
+      model=model, additional_headers=FLAGS.additional_headers)
 
   tflite_to_cpp_lib.print_llvm_registration_handle(
-      model=model, base_class=FLAGS.base_class
-  )
+      model=model, base_class=FLAGS.base_class)
 
   if FLAGS.clang_format_path:
     model = tflite_to_cpp_lib.format_model(
diff --git a/compiler_opt/tools/tflite_to_cpp_lib.py b/compiler_opt/tools/tflite_to_cpp_lib.py
index de6cd6a1..a74a022e 100644
--- a/compiler_opt/tools/tflite_to_cpp_lib.py
+++ b/compiler_opt/tools/tflite_to_cpp_lib.py
@@ -120,9 +120,8 @@ def on_match(m):
   local_runtime = 'namespace {\n' + local_runtime + '\n}'
 
   # Reinsert the stdlib includes
-  include_str = (
-      '\n'.join([f'#include <{hdr}>' for hdr in stdlib_includes]) + '\n'
-  )
+  include_str = ('\n'.join([f'#include <{hdr}>' for hdr in stdlib_includes]) +
+                 '\n')
 
   local_runtime = include_str + local_runtime
 
@@ -130,17 +129,15 @@ def on_match(m):
   # type so that the interface of each model is uniform. Theoretically, it
   # would be better to just remove this class, but renaming it is easier to
   # reason about.
-  local_runtime = re.sub(
-      r'class Tensor', f'class {_UNUSED_TENSOR_NAME}', local_runtime
-  )
+  local_runtime = re.sub(r'class Tensor', f'class {_UNUSED_TENSOR_NAME}',
+                         local_runtime)
 
   # We also need to rename the constructors of the class
   local_runtime = re.sub(r'Tensor\(', f'{_UNUSED_TENSOR_NAME}(', local_runtime)
 
   # Remove all empty newlines and return
   return '\n'.join(
-      [l for l in local_runtime.splitlines() if (l and not l.isspace())]
-  )
+      [l for l in local_runtime.splitlines() if (l and not l.isspace())])
 
 
 @dataclasses.dataclass
@@ -152,24 +149,20 @@ class EmitCModel:
   hdr: str
 
 
-def _run_clang_format(
-    buffer: str, clang_format_path: str, clang_format_style: str
-) -> str:
+def _run_clang_format(buffer: str, clang_format_path: str,
+                      clang_format_style: str) -> str:
   """Formats the given buffer and returns the result"""
   cmdline = [clang_format_path, f'--style={clang_format_style}']
   result = subprocess.run(
-      cmdline, stdout=subprocess.PIPE, text=True, input=buffer
-  )
+      cmdline, stdout=subprocess.PIPE, text=True, input=buffer)
   return result.stdout
 
 
-def format_model(
-    model: EmitCModel, clang_format_path: str, clang_format_style: str
-) -> str:
+def format_model(model: EmitCModel, clang_format_path: str,
+                 clang_format_style: str) -> str:
   """Formats the given model and returns the result"""
-  logging.info(
-      'Formatting the resulting model with style [%s].', clang_format_style
-  )
+  logging.info('Formatting the resulting model with style [%s].',
+               clang_format_style)
   return dataclasses.replace(
       model,
       cpp=_run_clang_format(
@@ -193,9 +186,10 @@ def get_model_hdr_path(model: EmitCModel, root: str) -> str:
   return os.path.join(root, model.name + '.emitc.h')
 
 
-def tflite_to_tosa(
-    tflite_path: str, iree_import_tflite_path: str, *, convert_i48=True
-) -> str:
+def tflite_to_tosa(tflite_path: str,
+                   iree_import_tflite_path: str,
+                   *,
+                   convert_i48=True) -> str:
   """Converts TFLite to TOSA MLIR."""
   logging.info('Converting the TFLite model to TOSA MLIR.')
   cmdline = [
@@ -216,8 +210,7 @@ def tosa_to_emitc_mlir(tosa: str, emitc_opt_path: str) -> str:
   logging.info('Converting the TOSA MLIR to EmitC MLIR.')
   cmdline = [emitc_opt_path, '--convert-tosa-to-emitc', '-o', '-', '-']
   result = subprocess.run(
-      cmdline, stdout=subprocess.PIPE, text=True, input=tosa
-  )
+      cmdline, stdout=subprocess.PIPE, text=True, input=tosa)
   return result.stdout
 
 
@@ -246,8 +239,8 @@ def _get_cmdline(kind: str):
     ]
 
   result_cpp = subprocess.run(
-      _get_cmdline('cpp'), stdout=subprocess.PIPE, text=True, input=emitc_mlir
-  ).stdout
+      _get_cmdline('cpp'), stdout=subprocess.PIPE, text=True,
+      input=emitc_mlir).stdout
   result_hdr = subprocess.run(
       _get_cmdline('header'),
       stdout=subprocess.PIPE,
@@ -294,9 +287,8 @@ def embed_runtime(
   #   `IsTensor<`
   # does not. the latter appears in the runtime, which we don't want to replace
   new_cpp = re.sub(r'(?<![A-Za-z])Tensor<', r'::llvm::emitc::Tensor<', new_cpp)
-  new_hdr = re.sub(
-      r'(?<![A-Za-z])Tensor<', r'::llvm::emitc::Tensor<', model.hdr
-  )
+  new_hdr = re.sub(r'(?<![A-Za-z])Tensor<', r'::llvm::emitc::Tensor<',
+                   model.hdr)
 
   # We also need to fully-qualify the references to emitc:: because the emitc
   # namespace is ambiguous in the file. This uses a similar lookbehind to avoid

From fb9a70f50f4eb3fc64523b50ba028eb31b26e581 Mon Sep 17 00:00:00 2001
From: Jacob Hegna <jacobhegna@gmail.com>
Date: Sat, 22 Apr 2023 01:56:44 +0000
Subject: [PATCH 3/5] Fix pylint errors.

---
 compiler_opt/tools/tflite_to_cpp.py     |  5 +++--
 compiler_opt/tools/tflite_to_cpp_lib.py | 23 ++++++++++++++---------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/compiler_opt/tools/tflite_to_cpp.py b/compiler_opt/tools/tflite_to_cpp.py
index 2ba60e66..47e7bcb3 100644
--- a/compiler_opt/tools/tflite_to_cpp.py
+++ b/compiler_opt/tools/tflite_to_cpp.py
@@ -97,6 +97,7 @@
 
 
 def main(argv):
+  del argv
   logging.info('Beginning conversion pipeline.')
   tosa = tflite_to_cpp_lib.tflite_to_tosa(
       tflite_path=FLAGS.input,
@@ -131,9 +132,9 @@ def main(argv):
   hdr_path = tflite_to_cpp_lib.get_model_hdr_path(model, FLAGS.output_dir)
 
   logging.info('Writing generated files to [%s] and [%s].', cpp_path, hdr_path)
-  with open(cpp_path, 'wt') as f:
+  with open(cpp_path, 'wt', encoding='utf-8') as f:
     f.write(model.cpp)
-  with open(hdr_path, 'wt') as f:
+  with open(hdr_path, 'wt', encoding='utf-8') as f:
     f.write(model.hdr)
   logging.info('Done.')
 
diff --git a/compiler_opt/tools/tflite_to_cpp_lib.py b/compiler_opt/tools/tflite_to_cpp_lib.py
index a74a022e..55b5799b 100644
--- a/compiler_opt/tools/tflite_to_cpp_lib.py
+++ b/compiler_opt/tools/tflite_to_cpp_lib.py
@@ -18,7 +18,6 @@
 import os
 import dataclasses
 import subprocess
-import tempfile
 import pathlib
 import re
 
@@ -48,9 +47,9 @@ def _load_emitc_runtime(path: str) -> EmitCRuntime:
   """Load the EmitC runtime from a given path."""
   headers = {}
   pathlist = pathlib.Path(path).glob('*.h')
-  for path in pathlist:
-    with open(path, 'rt') as f:
-      headers[path.name] = f.read()
+  for p in pathlist:
+    with open(p, 'rt', encoding='utf-8') as f:
+      headers[p.name] = f.read()
   return EmitCRuntime(headers=headers, primary='tosa.h')
 
 
@@ -154,7 +153,7 @@ def _run_clang_format(buffer: str, clang_format_path: str,
   """Formats the given buffer and returns the result"""
   cmdline = [clang_format_path, f'--style={clang_format_style}']
   result = subprocess.run(
-      cmdline, stdout=subprocess.PIPE, text=True, input=buffer)
+      cmdline, stdout=subprocess.PIPE, text=True, input=buffer, check=True)
   return result.stdout
 
 
@@ -199,7 +198,8 @@ def tflite_to_tosa(tflite_path: str,
       tflite_path,
       '--output-format=mlir-ir',
   ]
-  result = subprocess.run(cmdline, stdout=subprocess.PIPE, text=True)
+  result = subprocess.run(
+      cmdline, stdout=subprocess.PIPE, text=True, check=True)
   if convert_i48:
     return re.sub(r'i48', 'i64', result.stdout)
   return result.stdout
@@ -210,7 +210,7 @@ def tosa_to_emitc_mlir(tosa: str, emitc_opt_path: str) -> str:
   logging.info('Converting the TOSA MLIR to EmitC MLIR.')
   cmdline = [emitc_opt_path, '--convert-tosa-to-emitc', '-o', '-', '-']
   result = subprocess.run(
-      cmdline, stdout=subprocess.PIPE, text=True, input=tosa)
+      cmdline, stdout=subprocess.PIPE, text=True, input=tosa, check=True)
   return result.stdout
 
 
@@ -239,13 +239,18 @@ def _get_cmdline(kind: str):
     ]
 
   result_cpp = subprocess.run(
-      _get_cmdline('cpp'), stdout=subprocess.PIPE, text=True,
-      input=emitc_mlir).stdout
+      _get_cmdline('cpp'),
+      stdout=subprocess.PIPE,
+      text=True,
+      input=emitc_mlir,
+      check=True,
+  ).stdout
   result_hdr = subprocess.run(
       _get_cmdline('header'),
       stdout=subprocess.PIPE,
       text=True,
       input=emitc_mlir,
+      check=True,
   ).stdout
 
   # Wrap results in namespaces

From 82a1285cd6608e773c743569779cef517b9e324c Mon Sep 17 00:00:00 2001
From: Jacob Hegna <jacobhegna@gmail.com>
Date: Sat, 22 Apr 2023 02:04:32 +0000
Subject: [PATCH 4/5] Fix pytype errors.

---
 compiler_opt/tools/tflite_to_cpp_lib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler_opt/tools/tflite_to_cpp_lib.py b/compiler_opt/tools/tflite_to_cpp_lib.py
index 55b5799b..964e3145 100644
--- a/compiler_opt/tools/tflite_to_cpp_lib.py
+++ b/compiler_opt/tools/tflite_to_cpp_lib.py
@@ -158,7 +158,7 @@ def _run_clang_format(buffer: str, clang_format_path: str,
 
 
 def format_model(model: EmitCModel, clang_format_path: str,
-                 clang_format_style: str) -> str:
+                 clang_format_style: str) -> EmitCModel:
   """Formats the given model and returns the result"""
   logging.info('Formatting the resulting model with style [%s].',
                clang_format_style)

From b5e669f62798587b84f495b4def56903f4f52997 Mon Sep 17 00:00:00 2001
From: Jacob Hegna <jacobhegna@gmail.com>
Date: Sat, 22 Apr 2023 03:31:43 +0000
Subject: [PATCH 5/5] Add license to autogenerated files.

---
 compiler_opt/tools/tflite_to_cpp.py     |  2 ++
 compiler_opt/tools/tflite_to_cpp_lib.py | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/compiler_opt/tools/tflite_to_cpp.py b/compiler_opt/tools/tflite_to_cpp.py
index 47e7bcb3..6decc357 100644
--- a/compiler_opt/tools/tflite_to_cpp.py
+++ b/compiler_opt/tools/tflite_to_cpp.py
@@ -121,6 +121,8 @@ def main(argv):
   tflite_to_cpp_lib.print_llvm_registration_handle(
       model=model, base_class=FLAGS.base_class)
 
+  model = tflite_to_cpp_lib.add_license_and_notice(model=model)
+
   if FLAGS.clang_format_path:
     model = tflite_to_cpp_lib.format_model(
         model=model,
diff --git a/compiler_opt/tools/tflite_to_cpp_lib.py b/compiler_opt/tools/tflite_to_cpp_lib.py
index 964e3145..ca2f7335 100644
--- a/compiler_opt/tools/tflite_to_cpp_lib.py
+++ b/compiler_opt/tools/tflite_to_cpp_lib.py
@@ -27,6 +27,25 @@
 _TFAGENTS_POLICY_NAME = 'action'
 _MODEL_NAMESPACE = 'llvm::emitc::generated'
 
+# pylint: disable=line-too-long
+_LICENSE_AND_NOTICE = """// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+// This code was originally sourced from github.com/iml130/mlir-emitc and has
+// been modified to fit the needs of generated C++ models in LLVM.
+"""
+
 
 def _fmt_includes(includes):
   return '\n'.join([f'#include "{hdr}"' for hdr in includes]) + '\n'
@@ -316,6 +335,12 @@ def add_additional_headers(model: EmitCModel, additional_headers: list[str]):
   return dataclasses.replace(model, hdr=new_hdr)
 
 
+def add_license_and_notice(model: EmitCModel) -> EmitCModel:
+  new_cpp = _LICENSE_AND_NOTICE + model.cpp
+  new_hdr = _LICENSE_AND_NOTICE + model.hdr
+  return dataclasses.replace(model, cpp=new_cpp, hdr=new_hdr)
+
+
 def print_llvm_registration_handle(model: EmitCModel, base_class: str):
   """Prints LLVM model registration code.