Skip to content
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
e2226d9
feat: Includes xmp extensions when extracting xmp data
alextrnnn Jul 30, 2025
560844a
fix: Make sure that xmp extensions are correctly ordered by offset
alextrnnn Jul 31, 2025
e20cee9
fix: Verify that the extensions have the same guid, refactor large tu…
alextrnnn Jul 31, 2025
523e38e
fix: Add validation for offset/length aligning with actual data
alextrnnn Jul 31, 2025
21d8939
feat: Add method for extracting container Items
alextrnnn Aug 1, 2025
20cc54d
feat: Extract each individual item into separate file
alextrnnn Aug 5, 2025
c9adbf1
feat: Build out struct to store multi asset hash
alextrnnn Aug 5, 2025
dacfd9e
feat: Add verification of self for multi-asset hash
alextrnnn Aug 7, 2025
f234fef
fix: Update format, different approach to splitting
alextrnnn Aug 7, 2025
b244b1b
test: Add test to verify hash checks are correct
alextrnnn Aug 7, 2025
e72085a
Merge branch 'main' into alextrnnn/multi-asset-hash
alextrnnn Aug 8, 2025
101ca44
feat: Add validation for multi-asset hash if data hash fails
alextrnnn Aug 8, 2025
33bad03
fix: Doesn't correctly extract items, doesn't factor in optional leng…
alextrnnn Aug 11, 2025
da6e32b
refactor: Change names and return type of extract helper method
alextrnnn Aug 11, 2025
47c4532
fix: Validation status no longer adds incorrect status
alextrnnn Aug 11, 2025
a3580bf
refactor: Remove unnecessary code, change name
alextrnnn Aug 11, 2025
2730961
format: whitespace
alextrnnn Aug 11, 2025
0d4f995
Merge branch 'main' into alextrnnn/multi-asset-hash
alextrnnn Aug 11, 2025
a15f01a
fix: Represent locator map as an enum
alextrnnn Aug 11, 2025
37c8128
docs: Add documentation and remove new functions
alextrnnn Aug 11, 2025
4e9bb70
docs: More documentation
alextrnnn Aug 11, 2025
a784486
docs: Documentation
alextrnnn Aug 11, 2025
3306f4f
chore: Refactored code, add support for box hash, fix logical errors …
alextrnnn Aug 12, 2025
ff8db5c
fix: Return correct validation status for match, avoid searching twic…
alextrnnn Aug 12, 2025
589cd07
format: Formatting changes
alextrnnn Aug 12, 2025
87bb4e0
refactor: Move duplicated code into verify multi asset
alextrnnn Aug 12, 2025
9fa2ad2
chore: Remove unnecessary lifetime specifier
alextrnnn Aug 12, 2025
df18900
chore: Implement PR reviews
alextrnnn Aug 12, 2025
781ee06
test: Add more test cases
alextrnnn Aug 12, 2025
809a9ca
Merge branch 'main' into alextrnnn/multi-asset-hash
alextrnnn Aug 12, 2025
5375461
chore: yanked crate
alextrnnn Aug 13, 2025
bcb932b
Merge branch 'main' into alextrnnn/multi-asset-hash
alextrnnn Aug 13, 2025
ea2e998
Merge branch 'main' into alextrnnn/multi-asset-hash
alextrnnn Sep 5, 2025
287adae
format: Merge conflict artifacts
alextrnnn Sep 5, 2025
0f3e90d
chore: Address quick feedback
alextrnnn Sep 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sdk/src/assertions/labels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ pub static METADATA_LABEL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
}
});

/// Label prefix for multi-asset hash assertion.
///
/// See <https://spec.c2pa.org/specifications/specifications/2.2/specs/C2PA_Specification.html#_multi_asset_hash>
pub const MULTI_ASSET_HASH: &str = "c2pa.hash.multi-asset";

/// Return the version suffix from an assertion label if it exists.
///
/// When an assertion's schema is changed in a backwards-compatible manner,
Expand Down
3 changes: 3 additions & 0 deletions sdk/src/assertions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ pub use assertion_metadata::{
c2pa_source, Actor, AssertionMetadata, AssetType, DataBox, DataSource, ReviewCode, ReviewRating,
};

mod multi_asset_hash;
pub use multi_asset_hash::MultiAssetHash;

mod schema_org;
pub use schema_org::{SchemaDotOrg, SchemaDotOrgPerson};

Expand Down
328 changes: 328 additions & 0 deletions sdk/src/assertions/multi_asset_hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
// Copyright 2022 Adobe. All rights reserved.
// This file is licensed to you under the Apache License,
// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
// or the MIT license (http://opensource.org/licenses/MIT),
// at your option.

// Unless required by applicable law or agreed to in writing,
// this software is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or
// implied. See the LICENSE-MIT and LICENSE-APACHE files for the
// specific language governing permissions and limitations under
// each license.

#[cfg(feature = "file_io")]
use std::fs::File;
use std::io::Cursor;

use serde::{Deserialize, Serialize};

use crate::{
assertion::{Assertion, AssertionBase, AssertionCbor},
assertions::{labels, BmffHash, BoxHash, DataHash},
asset_io::{AssetIO, CAIRead},
claim::{Claim, ClaimAssetData},
error::{Error, Result},
jumbf_io::get_assetio_handler,
utils::io_utils::{stream_len, ReaderUtils},
validation_status::{
ASSERTION_MULTI_ASSET_HASH_MALFORMED, ASSERTION_MULTI_ASSET_HASH_MISSING_PART,
},
HashedUri,
};

/// A `MultiAssetHash` assertion provides information on hash values for multiple parts of an asset.
///
/// This assertion contains a list of parts, each one declaring a location within the asset and
/// the corresponding hash assertion for that part.
///
/// See <https://spec.c2pa.org/specifications/specifications/2.2/specs/C2PA_Specification.html#_multi_asset_hash>
#[derive(Deserialize, Serialize, Debug, PartialEq)]
pub struct MultiAssetHash {
pub parts: Vec<PartHashMap>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should add docs for these fields from the spec.

}

impl MultiAssetHash {
pub const LABEL: &'static str = labels::MULTI_ASSET_HASH;

/// The parts within the parts array shall be listed in the order in which they appear in the file,
/// and the parts shall be contiguous, non-overlapping, and cover every byte of the asset.
fn verify_self(&self, total_size: u64) -> Result<()> {
if self.parts.is_empty() {
return Err(Error::C2PAValidation(
ASSERTION_MULTI_ASSET_HASH_MALFORMED.to_string(),
));
}

let mut expected_offset: u64 = 0;
let mut optional_sizes: u64 = 0;

for part in &self.parts {
match &part.location {
LocatorMap::ByteRangeLocator(locator) => {
if locator.byte_offset != expected_offset {
return Err(Error::C2PAValidation(
ASSERTION_MULTI_ASSET_HASH_MALFORMED.to_string(),
));
}
// Keep track of the size of optional parts.
if part.optional.unwrap_or(false) {
optional_sizes += locator.length;
}
expected_offset += locator.length;
Comment on lines +68 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also verify the byte offsets so that each part's length + byte offset doesn't overlap and also doesn't extend past the end of the file.

}
LocatorMap::BmffBox { .. } => {
return Err(Error::NotImplemented(
"BmffBox locators not yet implemented for Multi-Asset hashes".to_string(),
));
}
}
}

// Deduct optional sizes and ensure that the offsets are less than the total size.
if expected_offset - optional_sizes > total_size {
return Err(Error::C2PAValidation(
ASSERTION_MULTI_ASSET_HASH_MALFORMED.to_string(),
));
}

Ok(())
}

// Verifies the multi-asset hash assertion against the provided asset data.
pub fn verify_hash(&self, asset_data: &mut ClaimAssetData<'_>, claim: &Claim) -> Result<()> {
match asset_data {
#[cfg(feature = "file_io")]
ClaimAssetData::Path(asset_path) => {
let mut file = File::open(&asset_path).map_err(Error::IoError)?;
let asset_handler = crate::jumbf_io::get_assetio_handler_from_path(asset_path);
self.verify_stream_hash(&mut file, claim, asset_handler)
}
ClaimAssetData::Bytes(asset_bytes, asset_type) => {
let mut cursor = Cursor::new(*asset_bytes);
let asset_handler = get_assetio_handler(asset_type);
self.verify_stream_hash(&mut cursor, claim, asset_handler)
}
ClaimAssetData::Stream(stream_data, asset_type) => {
let asset_handler = get_assetio_handler(asset_type);
self.verify_stream_hash(*stream_data, claim, asset_handler)
}
_ => Err(Error::UnsupportedType),
}
}

/// Verifies each part of the multi-asset hash through comparing computed hashes.
/// Validates part locations, reads the specified byte ranges, and verifies against referenced hash assertions.
fn verify_stream_hash(
&self,
mut reader: &mut dyn CAIRead,
claim: &Claim,
asset_handler: Option<&dyn AssetIO>,
) -> Result<()> {
let length = stream_len(reader)?;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to look into this for other parts of the SDK, there is mixed usage of this and .stream_position().

self.verify_self(length)?;

for part in &self.parts {
if let Some(optional) = part.optional {
if optional {
continue;
};
}

// Retrieve the assertion linked in the multi-asset assertions.
let assertion = claim
.get_assertion_from_link(&part.hash_assertion.url())
.ok_or_else(|| {
Error::C2PAValidation(ASSERTION_MULTI_ASSET_HASH_MISSING_PART.to_string())
})?;

let label = assertion.label();

match &part.location {
LocatorMap::ByteRangeLocator(locator) => {
let offset = locator.byte_offset;
let length = locator.length;

// Read only the specified parts within the larger stream.
reader.seek(std::io::SeekFrom::Start(offset))?;
let buf = reader.read_to_vec(length).map_err(|_| {
Error::C2PAValidation(ASSERTION_MULTI_ASSET_HASH_MISSING_PART.to_string())
})?;
let mut part_reader = Cursor::new(buf);
Comment on lines +144 to +149
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be able to use Read::take here instead of allocating a vec so we keep it purely streams.


// Perform validation on each part depending on type of hash.
match label.as_str() {
l if l.starts_with(DataHash::LABEL) => {
let dh = DataHash::from_assertion(assertion)?;
let alg = match &dh.alg {
Some(alg) => alg,
None => claim.alg(),
};
dh.verify_stream_hash(&mut part_reader, Some(alg))?;
}
l if l.starts_with(BoxHash::LABEL) => {
let bh = BoxHash::from_assertion(assertion)?;
let box_hash_processor = asset_handler
.ok_or(Error::UnsupportedType)?
.asset_box_hash_ref()
.ok_or(Error::HashMismatch("Box hash not supported".to_string()))?;
bh.verify_stream_hash(
&mut part_reader,
Some(claim.alg()),
box_hash_processor,
)?;
}
l if l.starts_with(BmffHash::LABEL) => {
return Err(Error::NotImplemented(
"BmffHash not yet implemented for Multi-Asset hashes".to_string(),
));
}
_ => {}
}
}
LocatorMap::BmffBox { .. } => {
return Err(Error::NotImplemented(
"BmffBox locators not yet implemented for Multi-Asset hashes".to_string(),
));
}
}
}

Ok(())
}
}

#[derive(Deserialize, Serialize, Debug, PartialEq, Eq)]
pub struct PartHashMap {
pub location: LocatorMap,
#[serde(rename = "hashAssertion")]
pub hash_assertion: HashedUri,

#[serde(skip_serializing_if = "Option::is_none")]
pub optional: Option<bool>,
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
pub enum LocatorMap {
ByteRangeLocator(ByteRangeLocator),
BmffBox {
#[serde(rename = "bmffBox")]
bmff_box: String,
},
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct ByteRangeLocator {
#[serde(rename = "byteOffset")]
pub byte_offset: u64,
pub length: u64,
}

impl AssertionCbor for MultiAssetHash {}

impl AssertionBase for MultiAssetHash {
const LABEL: &'static str = Self::LABEL;

fn to_assertion(&self) -> Result<Assertion> {
Self::to_cbor_assertion(self)
}

fn from_assertion(assertion: &Assertion) -> Result<Self> {
Self::from_cbor_assertion(assertion)
}
}

#[cfg(test)]
pub mod tests {
#![allow(clippy::expect_used)]
#![allow(clippy::unwrap_used)]

use std::io::Cursor;

use crate::{
assertion::AssertionBase, assertions::MultiAssetHash, status_tracker::StatusTracker,
store::Store,
};

const ORIG_MOTION_PHOTO: &[u8] = include_bytes!("../../tests/fixtures/motion_photo.jpg");
const NO_MOVIE_MOTION_PHOTO: &[u8] =
include_bytes!("../../tests/fixtures/no_movie_motion_photo.jpg");

#[test]
fn test_validation() {
let mut validation_log = StatusTracker::default();
let source = Cursor::new(ORIG_MOTION_PHOTO);
let store = Store::from_stream("image/jpeg", source, true, &mut validation_log).unwrap();
let claim = store.provenance_claim().unwrap();
let assertion =
MultiAssetHash::from_assertion(claim.get_assertion(MultiAssetHash::LABEL, 0).unwrap())
.unwrap();
let mut source = Cursor::new(ORIG_MOTION_PHOTO);
assertion
.verify_stream_hash(&mut source, claim, None)
.unwrap();
}

#[test]
fn test_validation_with_exclusion_of_optional_data_hash() {
let mut validation_log = StatusTracker::default();
let source = Cursor::new(NO_MOVIE_MOTION_PHOTO);
let store = Store::from_stream("image/jpeg", source, true, &mut validation_log).unwrap();
let claim = store.provenance_claim().unwrap();
let assertion =
MultiAssetHash::from_assertion(claim.get_assertion(MultiAssetHash::LABEL, 0).unwrap())
.unwrap();
let mut source = Cursor::new(NO_MOVIE_MOTION_PHOTO);
assertion
.verify_stream_hash(&mut source, claim, None)
.unwrap();
}

#[test]
fn test_json_round_trip() {
let json = serde_json::json!({
"parts": [
{
"location": {
"byteOffset": 0,
"length": 3211426
},
"hashAssertion": {
"url": "self#jumbf=c2pa.assertions/c2pa.hash.data.part",
"hash": "Lq2kdBpPG002xct74CAEOb93d/aRhDHhwzK0EGj9y98="
},
"optional": false
},
{
"location": {
"byteOffset": 3211426,
"length": 38044
},
"hashAssertion": {
"url": "self#jumbf=c2pa.assertions/c2pa.hash.data.part__1",
"hash": "KlwzkqoUjclLdqKN0N+T3eGCd45iwGncE4lcwiGXlKs="
},
"optional": false
},
{
"location": {
"byteOffset": 3249470,
"length": 1403182
},
"hashAssertion": {
"url": "self#jumbf=c2pa.assertions/c2pa.hash.data.part__2",
"hash": "GykUNh5wHwRVpfsduK2ylqY5IfuHZLyuwIkUTuD7O0E="
},
"optional": true
}
]
});

let original: MultiAssetHash = serde_json::from_value(json).unwrap();
let assertion = original.to_assertion().unwrap();
let result = MultiAssetHash::from_assertion(&assertion).unwrap();

assert_eq!(result, original);
}
}
Loading