Merge pull request #592 from joker-star-l/dynamic-shape

zkaoudi · web-flow · commit 6db324b97331 · 2025-06-24T09:04:56.000+02:00
support dynamic batchsize on tensorflow
diff --git a/wayang-platforms/wayang-tensorflow/src/main/java/org/apache/wayang/tensorflow/model/op/nn/TensorflowBatchNorm3D.java b/wayang-platforms/wayang-tensorflow/src/main/java/org/apache/wayang/tensorflow/model/op/nn/TensorflowBatchNorm3D.java
@@ -23,9 +23,13 @@
 import org.tensorflow.Graph;
 import org.tensorflow.Operand;
 import org.tensorflow.op.Ops;
+import org.tensorflow.op.core.Shape;
 import org.tensorflow.types.TBool;
+import org.tensorflow.types.TInt32;
 import org.tensorflow.types.family.TNumber;
 
+import java.util.Arrays;
+
 public class TensorflowBatchNorm3D<T extends TNumber> {
     private final Ops tf;
     private final BatchNorm3D op;
@@ -44,9 +48,18 @@ public TensorflowBatchNorm3D(Graph graph, Ops tf, BatchNorm3D op, Class<T> tClas
     }
 
     public Operand<T> call(Operand<T> input, Operand<TBool> trainingMode) {
-        long[] s = input.shape().asArray(); // N, C, D, H, W
-        Operand<T> input2D = tf.reshape(input, tf.array(s[0], s[1], s[2], -1)); // N, C, D, H * W
+        // input: N, C, D, H, W
+        Shape<TInt32> inputShape = tf.shape(input);
+        Operand<TInt32> square = tf.math.mul(tf.shape.size(inputShape, tf.constant(3)), tf.shape.size(inputShape, tf.constant(4)));
+        Operand<TInt32> newShape = tf.concat(
+                Arrays.asList(
+                        tf.shape.take(inputShape, tf.constant(3)),
+                        square
+                ),
+                tf.constant(0)
+        ); // N, C, D, H * W
+        Operand<T> input2D = tf.reshape(input, newShape);
         Operand<T> output = batchNorm2D.call(input2D, trainingMode);
-        return tf.withName(op.getName()).reshape(output, tf.constant(s));
+        return tf.withName(op.getName()).reshape(output, inputShape);
     }
 }
diff --git a/wayang-platforms/wayang-tensorflow/src/main/java/org/apache/wayang/tensorflow/model/op/nn/TensorflowConvLSTM2D.java b/wayang-platforms/wayang-tensorflow/src/main/java/org/apache/wayang/tensorflow/model/op/nn/TensorflowConvLSTM2D.java
@@ -24,6 +24,7 @@
 import org.tensorflow.Output;
 import org.tensorflow.op.Ops;
 import org.tensorflow.op.core.Stack;
+import org.tensorflow.types.TInt32;
 import org.tensorflow.types.family.TNumber;
 
 import java.util.ArrayList;
@@ -45,15 +46,21 @@ public TensorflowConvLSTM2D(Ops tf, ConvLSTM2D op, Class<T> tClass) {
 
     public Operand<?> call(Operand<T> input) {
         // input: [batch_size, time_step, input_dim, height, width]
-        long batchSize = input.shape().get(0);
-        long seqLen = input.shape().get(1);
-        long height = input.shape().get(3);
-        long width = input.shape().get(4);
-
-        Operand<T> h = tf.zeros(tf.array(batchSize, op.getHiddenDim(), height, width), tClass);
-        Operand<T> c = tf.zeros(tf.array(batchSize, op.getHiddenDim(), height, width), tClass);
+        Operand<TInt32> shape = tf.concat(
+                Arrays.asList(
+                        tf.shape.size(tf.shape(input), tf.constant(0)), // batch_size
+                        tf.array(op.getHiddenDim()), // hidden_dim
+                        tf.shape.size(tf.shape(input), tf.constant(3)), // height
+                        tf.shape.size(tf.shape(input), tf.constant(4)) // width
+                ),
+                tf.constant(0)
+        );
+
+        Operand<T> h = tf.zeros(shape, tClass);
+        Operand<T> c = tf.zeros(shape, tClass);
 
         String outKey = op.getOutput();
+        long seqLen = input.shape().get(1);
         List<Operand<T>> outputs = new ArrayList<>((int) seqLen);
 
         for (long t = 0; t < seqLen; t++) {
diff --git a/wayang-tests-integration/src/test/java/org/apache/wayang/tests/TensorflowConvLSTMIT.java b/wayang-tests-integration/src/test/java/org/apache/wayang/tests/TensorflowConvLSTMIT.java
@@ -21,6 +21,7 @@
 import org.apache.wayang.api.DLTrainingDataQuantaBuilder;
 import org.apache.wayang.api.JavaPlanBuilder;
 import org.apache.wayang.api.LoadCollectionDataQuantaBuilder;
+import org.apache.wayang.api.PredictDataQuantaBuilder;
 import org.apache.wayang.basic.model.DLModel;
 import org.apache.wayang.basic.model.op.*;
 import org.apache.wayang.basic.model.op.nn.*;
@@ -32,22 +33,19 @@
 import org.apache.wayang.tensorflow.Tensorflow;
 import org.junit.Test;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Random;
+import java.util.*;
 
 /**
  * Test the Tensorflow ConvLSTM integration with Wayang.
  */
 public class TensorflowConvLSTMIT {
-    private int inputDim = 1;
+    private int inputDim = 2;
     private int hiddenDim = 64;
-    private int outputDim = 1;
-    private int inputFrames = 8;
+    private int outputDim = 2;
+    private int inputFrames = 6;
     private int outputFrames = 3;
-    private int height = 16;
-    private int width = 16;
+    private int height = 17;
+    private int width = 29;
 
     private int batchSize = 16;
 
@@ -59,8 +57,8 @@ public void test() {
         int[] stride = new int[]{1};
         int numLayers = 3;
 
-        Input features = new Input(new int[]{batchSize, inputFrames, inputDim, height, width}, Input.Type.FEATURES);
-        Input labels = new Input(new int[]{batchSize, outputFrames, outputDim, height, width}, Input.Type.LABEL);
+        Input features = new Input(new int[]{-1, inputFrames, inputDim, height, width}, Input.Type.FEATURES);
+        Input labels = new Input(new int[]{-1, outputFrames, outputDim, height, width}, Input.Type.LABEL);
 
         int[] perm = new int[]{0, 2, 1, 3, 4};
 
@@ -79,8 +77,8 @@ public void test() {
             ;
         }
         builder.layer(new Slice(new int[][]{{0, -1}, {inputFrames - outputFrames, -1}, {0, -1}, {0, -1}, {0, -1}})) // Input only the last outputFrames from ConvLSTM
-                .layer(new Reshape(new int[]{batchSize, -1, height, width}))
-                .layer(new Conv2D(hiddenDim * outputFrames, outputFrames, kernelSize, stride, "SAME", true))
+                .layer(new Reshape(new int[]{-1, hiddenDim * outputFrames, height, width}))
+                .layer(new Conv2D(hiddenDim * outputFrames, outputDim * outputFrames, kernelSize, stride, "SAME", true))
 //                .layer(new Transpose(perm)) // change channels and timeStep
 //                .layer(new Conv3D(hiddenDim, outputDim, new int[]{3, 3, 3}, stride, "SAME", true)) // FIXME: The gradient of conv3D cannot be calculated, use conv2D as a substitute.
 //                .layer(new Transpose(perm)) // change channels and timeStep
@@ -110,13 +108,17 @@ public void test() {
         JavaPlanBuilder plan = new JavaPlanBuilder(wayangContext);
 
         LoadCollectionDataQuantaBuilder<float[][][][]> X = plan.loadCollection(mockData(inputFrames, inputDim));
+        LoadCollectionDataQuantaBuilder<float[][][][]> XTest = plan.loadCollection(mockData(inputFrames, inputDim));
         LoadCollectionDataQuantaBuilder<float[][][][]> Y = plan.loadCollection(mockData(outputFrames, outputDim));
 
         DLTrainingDataQuantaBuilder<float[][][][], float[][][][]> trainingOperator = X.dlTraining(Y, model, option);
 
-        Collection<DLModel> trainedModel = trainingOperator.collect();
+//        Collection<DLModel> trainedModel = trainingOperator.collect();
+//        System.out.println(trainedModel);
 
-        System.out.println(trainedModel);
+        PredictDataQuantaBuilder<float[][][][], float[][][][]> predictOperator = trainingOperator.predict(XTest, float[][][][].class);
+        Collection<float[][][][]> predicted = predictOperator.collect();
+        System.out.println(Arrays.deepToString(predicted.iterator().next()));
     }
 
     /**