Remove the use of unsafeRunSync; Fix misuse of IO in core and JVM backend which was causing redundant op invocations

EmergentOrder · EmergentOrder · commit aceed2ea69ee · 2022-12-08T12:07:17.000-05:00
diff --git a/backends/.js/src/main/scala/ORTOperatorBackend.scala b/backends/.js/src/main/scala/ORTOperatorBackend.scala
@@ -1,8 +1,8 @@
 package org.emergentorder.onnx.backends
 
 import scala.concurrent.duration._
-//import typings.onnxruntimeWeb.tensorMod._
-//import typings.onnxruntimeWeb.tensorMod.Tensor.FloatType
+//import typings.onnxruntimeWeb.tensorMod
+import org.emergentorder.onnx.onnxruntimeWeb.tensorMod
 //import typings.onnxruntimeWeb.tensorMod.Tensor.DataType
 //import typings.onnxjs.libTensorMod.Tensor.DataTypeMap.DataTypeMapOps
 import org.emergentorder.onnx.onnxruntimeWeb.mod.{InferenceSession => OrtSession}
@@ -38,12 +38,17 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter {
       val bytesArrayBuffer = bytes.toTypedArray.buffer
       val session: IO[
         InferenceSession
-      ] = IO.fromFuture(IO { OrtSession.create(bytesArrayBuffer, {
-        val opts = InferenceSession.SessionOptions()
-        opts.executionProviders = scala.scalajs.js.Array("wasm")
-        opts
-      }
-      ).toFuture })
+      ] = IO.fromFuture(IO {
+         OrtSession
+            .create(
+              bytesArrayBuffer, {
+                 val opts = InferenceSession.SessionOptions()
+                 opts.executionProviders = scala.scalajs.js.Array("cpu")
+                 opts
+              }
+            )
+            .toFuture
+      })
       session
    }
 
@@ -54,9 +59,10 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter {
        Td <: TensorShapeDenotation,
        S <: Shape
    ](
-       opModel: Array[Byte],
        inputs: Tuple,
-       input_node_names: IO[List[String]]
+       input_node_names: IO[List[String]],
+       opName: String, 
+       attrs: Map[String, Any]
    )(using
        s: ShapeOf[S],
        tt: ValueOf[Tt],
@@ -108,13 +114,20 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter {
             .map(_.toArray)
       }
 
+      val opModel = for {
+         tens <- inputTensors.memoize
+         t    <- tens
+      } yield opToModelProto(
+          opName,
+          (t.map(_.asInstanceOf[tensorMod.Tensor].`type`.valueOf.asInstanceOf[Float].round)  
+            zip t.map(_.dims.map(_.toInt).toArray)),
+          attrs
+        ).toByteArray
+ 
       val res: Tensor[T, Tuple3[Tt, Td, S]] = {
-//        val resource = cats.effect.Resource.make(IO{getSession(opModel)})(sess => IO{sess.close})
-         // resource.use( sess =>
-         inputTensors.flatMap { x =>
-            // input_node_names.flatMap{y =>
+         inputTensors.flatMap { x => 
             cats.effect.Resource
-               .make(IO(getSession(opModel)))(sess => IO {})
+               .make(opModel.map(getSession(_)))(sess => IO {})
                .use(sess =>
                   runModel(
                     sess,
@@ -127,8 +140,8 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter {
          }
 
       }
-      // res.flatMap(IO.println("Post run").as(_))
-      res
+      res.flatMap(IO.println("opNAme = " + opName).as(_))
+      //res
    }
 
    def callOp[T <: Supported, Tt <: TensorTypeDenotation, Td <: TensorShapeDenotation, S <: Shape](
@@ -142,26 +155,15 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter {
        td: TensorShapeDenotationOf[Td],
        s: ShapeOf[S]
    ): Tensor[T, Tuple3[Tt, Td, S]] = {
-      // TODO: prevent passing input to opToONNXBytes
-
-//      println("ATTR " + attrs)
-      val modelProto = opToModelProto(opName, inputs, attrs)
-
-      val result: IO[Tensor[T, Tuple3[Tt, Td, S]]] =
-         for {
-            mp <- modelProto.flatMap(IO.println("OpName => " + opName).as(_))
-         } yield {
-//            println(mp)
-            callByteArrayOp(
-              mp.toByteArray,
+      val inputNodeNames = (0 until inputs.size).toList.map(_.toString)
+      val result: Tensor[T, Tuple3[Tt, Td, S]] =
+            callByteArrayOp( 
               inputs,
-              IO.pure {
-                 mp.graph.map(_.input.map(_.name.getOrElse(""))).getOrElse(List[String]()).toList
-              }
+              IO{inputNodeNames},
+              opName,
+              attrs
             )
-         }
-
-      result.flatten
+      result
    }
 
    def runModel[
diff --git a/backends/.js/src/test/scala/SqueezeNetTest.scala b/backends/.js/src/test/scala/SqueezeNetTest.scala
@@ -28,7 +28,7 @@ class ONNXScalaSpec extends AsyncFreeSpec with AsyncIOSpec with Matchers {
    ] = IO.fromFuture(IO { OrtSession.create("squeezenet1.0-12.onnx",
       {
         val opts = InferenceSession.SessionOptions()
-        opts.executionProviders = scala.scalajs.js.Array("wasm")
+        opts.executionProviders = scala.scalajs.js.Array("cpu")
         opts
       }
       ).toFuture })
diff --git a/backends/.jvm/src/main/scala/ORTModelBackend.scala b/backends/.jvm/src/main/scala/ORTModelBackend.scala
@@ -74,17 +74,14 @@ class ORTModelBackend(onnxBytes: Array[Byte])
       val output = cats.effect.Resource
          .make(inputTensors)(inTens => IO { inTens.map(_.close) })
          .use(inTens =>
-            IO {
-               runModel[T, Tt, Td, S](
-                 session,
-                 inTens,
-                 allNodeNamesAndDims._1,
-                 allNodeNamesAndDims._3
-               )
-            }
+            runModel[T, Tt, Td, S](
+              session,
+              inTens,
+              allNodeNamesAndDims._1,
+              allNodeNamesAndDims._3
+            )
          )
-
-      output.unsafeRunSync()
+      output
    }
 
    override def close(): Unit = {}
diff --git a/backends/.jvm/src/main/scala/ORTOperatorBackend.scala b/backends/.jvm/src/main/scala/ORTOperatorBackend.scala
@@ -71,15 +71,14 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter with AutoCloseable {
 
       // TODO: Denotations
       val result: Tensor[T, Tuple3[Tt, Td, S]] = tensArr
-         .map(x =>
+         .flatMap(x =>
             Tensor(
               x,
               tensorTypeDenotationFromType,
               tensorShapeDenotationFromType,
               shapeFromType
             )
          )
-         .unsafeRunSync()
       // result.flatMap(IO.println("Invoking run").as(_))
       result
    }
@@ -91,52 +90,37 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter with AutoCloseable {
        Td <: TensorShapeDenotation,
        S <: Shape
    ](
-       opModel: Array[Byte],
        inputs: Tuple,
-       input_node_names: IO[List[String]]
+       input_node_names: List[String],
+       opName: String,
+       attrs: Map[String, Any]
    )(using
        s: ShapeOf[S],
        tt: ValueOf[Tt],
        td: TensorShapeDenotationOf[Td]
    ): Tensor[T, Tuple3[Tt, Td, S]] = {
-      /*
-     val input_node_names = inputs.toArray.zipWithIndex.map { (e, i) =>
-         val incr: String = if inputs.toArray.distinct.size == inputs.size then "" else i.toString
-         val tensE = e.asInstanceOf[Tensor[T, Tuple3[Tt, Td, S]]]
-         tensE.map{x =>
-           val t = ((x.toString + incr).hashCode).toString
-           println("ANESMMMS " + t + " " + i)
-           t
-         }
-      }.toList.sequence
-       */
-
       // TODO: more outputs
-      val output_node_names = List(input_node_names.toString)
+      val output_node_names = List(inputs.size.toString)
 
       // Spurious warning here, see: https://github.com/lampepfl/dotty/issues/10318
       // TODO: don't mix up Options and Tensors here
       @annotation.nowarn
-      def inputTensors: IO[Array[OnnxTensor]] = {
+      val inputTensors: IO[Array[OnnxTensor]] = {
 
          inputs.toArray
             .flatMap { elem =>
                elem match {
                   case opt: Option[Tensor[T, Tuple3[Tt, Td, S]]] =>
                      opt match {
                         case Some(x) =>
-                           Some(x.data.flatMap { y =>
-                              x.shape.map { z =>
-                                 getOnnxTensor(y, z, env)
-                              }
+                           Some(x.map { y =>
+                              getOnnxTensor(y._1, y._2._3.toSeq.toArray, env)
                            })
-                        case None => None
                      }
+                  case None => None
                   case tens: Tensor[T, Tuple3[Tt, Td, S]] =>
-                     Some(tens.data.flatMap { x =>
-                        tens.shape.map { y =>
-                           getOnnxTensor(x, y, env)
-                        }
+                     Some(tens.map { x =>
+                        getOnnxTensor(x._1, x._2._3.toSeq.toArray, env)
                      })
                }
             }
@@ -145,30 +129,40 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter with AutoCloseable {
             .map(_.toArray)
       }
 
-      def res: Tensor[T, Tuple3[Tt, Td, S]] = {
-//        val resource = cats.effect.Resource.make(IO{getSession(opModel)})(sess => IO{sess.close})
-         // resource.use( sess =>
+      def res(
+          opModelBytes: Array[Byte],
+          inputTensorss: IO[Array[OnnxTensor]]
+      ): Tensor[T, Tuple3[Tt, Td, S]] = {
          cats.effect.Resource
-            .make(inputTensors)(inTens => IO { inTens.map(_.close) })
+            .make(inputTensorss)(inTens => IO { inTens.map(_.close) })
             .use(inTens =>
-               input_node_names.flatMap { y =>
-                  cats.effect.Resource
-                     .make(IO.blocking(getSession(opModel)))(sess => IO { sess.close })
-                     .use(sess =>
-                        IO {
-                           runModel(
-                             sess,
-                             inTens,
-                             y,
-                             output_node_names
-                           )
-                        }
+               cats.effect.Resource
+                  .make(IO.blocking(getSession(opModelBytes)))(sess => IO { sess.close })
+                  .use(sess =>
+                     runModel(
+                       sess,
+                       inTens,
+                       input_node_names,
+                       output_node_names
                      )
-               }
+                  )
             )
-      }.unsafeRunSync()
+      }
+
+      val resFinal = for {
+         tens <- inputTensors.memoize
+         t    <- tens
+      } yield res(
+        opToModelProto(
+          opName,
+          (t.map(_.getInfo.onnxType.value) zip t.map(_.getInfo.getShape.map(_.toInt))),
+          attrs
+        ).toByteArray,
+        tens
+      )
+
       // res.flatMap(IO.println("Post run").as(_))
-      res
+      resFinal.flatten
    }
 
    def callOp[T <: Supported, Tt <: TensorTypeDenotation, Td <: TensorShapeDenotation, S <: Shape](
@@ -182,25 +176,16 @@ trait ORTOperatorBackend extends OpToONNXBytesConverter with AutoCloseable {
        td: TensorShapeDenotationOf[Td],
        s: ShapeOf[S]
    ): Tensor[T, Tuple3[Tt, Td, S]] = {
-      // TODO: prevent passing input to opToONNXBytes
-
-      val modelProto = opToModelProto(opName, inputs, attrs)
-
-//      val mp = opToModelProto(opName, inputs, attrs)
+      val inputNodeNames = (0 until inputs.size).toList.map(_.toString)
 
-      val result: IO[Tensor[T, Tuple3[Tt, Td, S]]] =
-         for {
-            mp <- modelProto // modelProto.flatMap(IO.println("OpName => " + opName).as(_))
-         } yield callByteArrayOp(
-           mp.toByteArray,
+      val result: Tensor[T, Tuple3[Tt, Td, S]] =
+         callByteArrayOp(
            inputs,
-           IO.pure {
-              mp.graph.map(_.input.map(_.name.getOrElse(""))).getOrElse(List[String]()).toList
-           }
+           inputNodeNames,
+           opName,
+           attrs
          )
-      val r =
-         result.unsafeRunSync() // If don't use unsafe here, we get redundant callOp invocations. If we memoize w/ unsafe, we leak memory.
-      r // This approach makes callOp sync/eager again.
+      result.flatMap(IO.println("Real call opName => " + opName).as(_))
    }
 
    def modelToPersist(mod: ModelProto, outName: String) = {
diff --git a/core/src/main/scala/OpToONNXBytesConverter.scala b/core/src/main/scala/OpToONNXBytesConverter.scala
diff --git a/core/src/main/scala/Tensors.scala b/core/src/main/scala/Tensors.scala

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ class ONNXScalaSpec extends AsyncFreeSpec with AsyncIOSpec with Matchers {`
`28`	`28`	`] = IO.fromFuture(IO { OrtSession.create("squeezenet1.0-12.onnx",`
`29`	`29`	`{`
`30`	`30`	`val opts = InferenceSession.SessionOptions()`
`31`		`- opts.executionProviders = scala.scalajs.js.Array("wasm")`
	`31`	`+ opts.executionProviders = scala.scalajs.js.Array("cpu")`
`32`	`32`	`opts`
`33`	`33`	`}`
`34`	`34`	`).toFuture })`