Skip to content

Commit 593be85

Browse files
authored
feat: introduce manifest (#94)
1 parent 147f6d0 commit 593be85

File tree

3 files changed

+103
-3
lines changed

3 files changed

+103
-3
lines changed

crates/paimon/src/spec/manifest.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::io::FileIO;
19+
use crate::spec::manifest_entry::ManifestEntry;
20+
use apache_avro::types::Value;
21+
use apache_avro::{from_value, Reader};
22+
23+
use crate::Error;
24+
use crate::Result;
25+
26+
/// Manifest file reader and writer.
27+
///
28+
/// A manifest file contains a list of ManifestEntry records in Avro format.
29+
/// Each entry represents an addition or deletion of a data file.
30+
///
31+
/// Impl Reference: <https://github.com/apache/paimon/blob/release-1.3/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java>
32+
pub struct Manifest;
33+
34+
#[allow(dead_code)]
35+
impl Manifest {
36+
/// Read manifest entries from a file.
37+
///
38+
/// # Arguments
39+
/// * `file_io` - FileIO instance for reading files
40+
/// * `path` - Path to the manifest file
41+
///
42+
/// # Returns
43+
/// A vector of ManifestEntry records
44+
pub async fn read(file_io: &FileIO, path: &str) -> Result<Vec<ManifestEntry>> {
45+
let input_file = file_io.new_input(path)?;
46+
47+
if !input_file.exists().await? {
48+
return Ok(Vec::new());
49+
}
50+
51+
let content = input_file.read().await?;
52+
Self::read_from_bytes(&content)
53+
}
54+
55+
/// Read manifest entries from bytes.
56+
///
57+
/// # Arguments
58+
/// * `bytes` - Avro-encoded manifest file content
59+
///
60+
/// # Returns
61+
/// A vector of ManifestEntry records
62+
fn read_from_bytes(bytes: &[u8]) -> Result<Vec<ManifestEntry>> {
63+
let reader = Reader::new(bytes).map_err(Error::from)?;
64+
let records = reader
65+
.collect::<std::result::Result<Vec<Value>, _>>()
66+
.map_err(Error::from)?;
67+
let values = Value::Array(records);
68+
from_value::<Vec<ManifestEntry>>(&values).map_err(Error::from)
69+
}
70+
}
71+
72+
#[cfg(test)]
73+
#[cfg(not(windows))] // Skip on Windows due to path compatibility issues
74+
mod tests {
75+
use super::*;
76+
use crate::io::FileIO;
77+
use crate::spec::manifest_common::FileKind;
78+
use std::env::current_dir;
79+
80+
#[tokio::test]
81+
async fn test_read_manifest_from_file() {
82+
let workdir = current_dir().unwrap();
83+
let path =
84+
workdir.join("tests/fixtures/manifest/manifest-8ded1f09-fcda-489e-9167-582ac0f9f846-0");
85+
86+
let file_io = FileIO::from_url("file://").unwrap().build().unwrap();
87+
let entries = Manifest::read(&file_io, path.to_str().unwrap())
88+
.await
89+
.unwrap();
90+
assert_eq!(entries.len(), 2);
91+
// verify manifest entry
92+
let t1 = &entries[0];
93+
assert_eq!(t1.kind(), &FileKind::Delete);
94+
assert_eq!(t1.bucket(), 1);
95+
96+
let t2 = &entries[1];
97+
assert_eq!(t2.kind(), &FileKind::Add);
98+
assert_eq!(t2.bucket(), 2);
99+
}
100+
}

crates/paimon/src/spec/manifest_entry.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,15 @@ pub struct ManifestEntry {
5555

5656
#[allow(dead_code)]
5757
impl ManifestEntry {
58-
fn kind(&self) -> &FileKind {
58+
pub(crate) fn kind(&self) -> &FileKind {
5959
&self.kind
6060
}
6161

6262
fn partition(&self) -> &Vec<u8> {
6363
&self.partition
6464
}
6565

66-
fn bucket(&self) -> i32 {
66+
pub(crate) fn bucket(&self) -> i32 {
6767
self.bucket
6868
}
6969

crates/paimon/src/spec/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ mod index_file_meta;
3838
pub use index_file_meta::*;
3939

4040
mod index_manifest;
41+
mod manifest;
4142
mod manifest_common;
4243
mod manifest_entry;
4344
mod objects_file;
4445
mod stats;
4546
mod types;
46-
4747
pub use types::*;

0 commit comments

Comments
 (0)