apache · guan404ming · Jan 12, 2026 · Jan 2, 2026 · Jan 3, 2026 · 400Ping
diff --git a/qdp/Cargo.lock b/qdp/Cargo.lock
diff --git a/qdp/Cargo.toml b/qdp/Cargo.toml
@@ -31,6 +31,12 @@ parquet = "54"
 # NumPy file format support
 ndarray = "0.16"
 ndarray-npy = "0.9"
+# Protocol Buffer support for TensorFlow TensorProto
+prost = "0.12"
+prost-build = "0.12"
+bytes = "1.5"
+# Optional: vendored protoc to avoid build failures when protoc is missing
+protoc-bin-vendored = "3"
 
 # Release profile optimizations
 [profile.release]

diff --git a/qdp/qdp-core/Cargo.toml b/qdp/qdp-core/Cargo.toml
@@ -13,6 +13,12 @@ arrow = { workspace = true }
 parquet = { workspace = true }
 ndarray = { workspace = true }
 ndarray-npy = { workspace = true }
+prost = { workspace = true }
+bytes = { workspace = true }
+
+[build-dependencies]
+prost-build = { workspace = true }
+protoc-bin-vendored = { workspace = true }
 
 [lib]
 name = "qdp_core"

diff --git a/qdp/qdp-core/build.rs b/qdp/qdp-core/build.rs
@@ -0,0 +1,36 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+fn main() {
+    // Use vendored protoc to avoid missing protoc in CI/dev environments
+    unsafe {
+        std::env::set_var("PROTOC", protoc_bin_vendored::protoc_bin_path().unwrap());
+    }
+
+    let mut config = prost_build::Config::new();
+
+    // Generate tensor_content as bytes::Bytes (avoids copy during protobuf decode)
+    config.bytes([".tensorflow.TensorProto.tensor_content"]);
+
+    // Generate fixed filename include file to avoid guessing output filename/module path
+    config.include_file("tensorflow_proto_mod.rs");
+
+    config
+        .compile_protos(&["proto/tensor.proto"], &["proto"])
+        .unwrap();
+
+    println!("cargo:rerun-if-changed=proto/tensor.proto");
+}
diff --git a/qdp/qdp-core/proto/tensor.proto b/qdp/qdp-core/proto/tensor.proto
@@ -0,0 +1,32 @@
+syntax = "proto3";
+
+package tensorflow;
+
+// TensorProto - only define necessary fields, field numbers match TensorFlow official
+message TensorProto {
+  // Field 1: dtype (enum DataType in TF, but varint in wire format)
+  // DT_DOUBLE = 2 (see tensorflow/core/framework/types.proto)
+  int32 dtype = 1;
+
+  // Field 2: tensor_shape
+  TensorShapeProto tensor_shape = 2;
+
+  // Field 4: tensor_content (preferred for efficient parsing)
+  bytes tensor_content = 4;
+
+  // Field 6: double_val (fallback, only used when tensor_content is empty)
+  repeated double double_val = 6 [packed = true];
+}
+
+message TensorShapeProto {
+  // Field 2: dim (field number matches official)
+  repeated Dim dim = 2;
+  // Field 3: unknown_rank (optional; helps with better error messages)
+  bool unknown_rank = 3;
+}
+
+message Dim {
+  // Field 1: size
+  int64 size = 1;
+  // Skip name field (field number 2) to reduce parsing overhead
+}
diff --git a/qdp/qdp-core/src/io.rs b/qdp/qdp-core/src/io.rs
@@ -267,3 +267,22 @@ pub fn read_numpy_batch<P: AsRef<Path>>(path: P) -> Result<(Vec<f64>, usize, usi
 ///
 /// This is a type alias for backward compatibility. Use [`crate::readers::ParquetStreamingReader`] directly.
 pub type ParquetBlockReader = crate::readers::ParquetStreamingReader;
+
+/// Reads batch data from a TensorFlow TensorProto file.
+///
+/// Supports Float64 tensors with shape [batch_size, feature_size] or [n].
+/// Prefers tensor_content for efficient parsing, but still requires one copy to Vec<f64>.
+///
+/// # Byte Order
+/// Assumes little-endian byte order (standard on x86_64).
+///
+/// # Returns
+/// Tuple of `(flattened_data, num_samples, sample_size)`
+///
+/// # TODO
+/// Add OOM protection for very large files
+pub fn read_tensorflow_batch<P: AsRef<Path>>(path: P) -> Result<(Vec<f64>, usize, usize)> {
+    use crate::reader::DataReader;
+    let mut reader = crate::readers::TensorFlowReader::new(path)?;
+    reader.read_batch()
+}
diff --git a/qdp/qdp-core/src/lib.rs b/qdp/qdp-core/src/lib.rs
@@ -21,6 +21,7 @@ pub mod io;
 pub mod preprocessing;
 pub mod reader;
 pub mod readers;
+pub mod tf_proto;
 #[macro_use]
 mod profiling;
 
@@ -489,6 +490,40 @@ impl QdpEngine {
             encoding_method,
         )
     }
+
+    /// Load data from TensorFlow TensorProto file and encode into quantum state
+    ///
+    /// Supports Float64 tensors with shape [batch_size, feature_size] or [n].
+    /// Uses efficient parsing with tensor_content when available.
+    ///
+    /// # Arguments
+    /// * `path` - Path to TensorProto file (.pb)
+    /// * `num_qubits` - Number of qubits
+    /// * `encoding_method` - Strategy: "amplitude", "angle", or "basis"
+    ///
+    /// # Returns
+    /// Single DLPack pointer containing all encoded states (shape: [num_samples, 2^num_qubits])
+    pub fn encode_from_tensorflow(
+        &self,
+        path: &str,
+        num_qubits: usize,
+        encoding_method: &str,
+    ) -> Result<*mut DLManagedTensor> {
+        crate::profile_scope!("Mahout::EncodeFromTensorFlow");
+
+        let (batch_data, num_samples, sample_size) = {
+            crate::profile_scope!("IO::ReadTensorFlowBatch");
+            crate::io::read_tensorflow_batch(path)?
+        };
+
+        self.encode_batch(
+            &batch_data,
+            num_samples,
+            sample_size,
+            num_qubits,
+            encoding_method,
+        )
+    }
 }
 
 // Re-export key types for convenience

diff --git a/qdp/qdp-core/src/readers/mod.rs b/qdp/qdp-core/src/readers/mod.rs
@@ -22,11 +22,14 @@
 //! # Fully Implemented Formats
 //! - **Parquet**: [`ParquetReader`], [`ParquetStreamingReader`]
 //! - **Arrow IPC**: [`ArrowIPCReader`]
+//! - **TensorFlow TensorProto**: [`TensorFlowReader`]
 
 pub mod arrow_ipc;
 pub mod numpy;
 pub mod parquet;
+pub mod tensorflow;
 
 pub use arrow_ipc::ArrowIPCReader;
 pub use numpy::NumpyReader;
 pub use parquet::{ParquetReader, ParquetStreamingReader};
+pub use tensorflow::TensorFlowReader;