Skip to content

Commit 2a13c04

Browse files
committed
returning cid in ETag instead of MD5 hash
1 parent c8272db commit 2a13c04

File tree

6 files changed

+36
-23
lines changed

6 files changed

+36
-23
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
/target
22
.env
33
/target/release
4+
/.claude

crates/fula-cli/src/handlers/multipart.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use axum::{
1313
use bytes::Bytes;
1414
use fula_blockstore::{BlockStore, PinStore};
1515
use fula_core::metadata::ObjectMetadata;
16-
use fula_crypto::hashing::md5_hash;
1716
use serde::Deserialize;
1817
use std::sync::Arc;
1918

@@ -116,7 +115,9 @@ pub async fn upload_part(
116115

117116
// Store part data
118117
let cid = state.block_store.put_block(&body).await?;
119-
let etag = md5_hash(&body);
118+
119+
// Use CID as ETag for parts (content-addressed identifier)
120+
let etag = cid.to_string();
120121

121122
let part = UploadPart::new(
122123
part_number,
@@ -158,13 +159,17 @@ pub async fn complete_multipart_upload(
158159
return Err(ApiError::s3(S3ErrorCode::InvalidArgument, "Bucket/key mismatch"));
159160
}
160161

161-
// Calculate final ETag (MD5 of ETags + "-N")
162+
// Calculate final ETag using CIDs (S3 multipart format: {hash}-{partCount})
163+
// We hash the concatenated part CIDs to create a unique identifier
162164
let part_count = upload.parts.len();
163-
let mut etag_concat = String::new();
165+
let mut cid_concat = String::new();
164166
for part in upload.sorted_parts() {
165-
etag_concat.push_str(&part.etag);
167+
cid_concat.push_str(&part.cid);
166168
}
167-
let final_etag = format!("{}-{}", md5_hash(etag_concat.as_bytes()), part_count);
169+
// Use BLAKE3 hash of concatenated CIDs, truncated to 32 hex chars for compatibility
170+
let hash = fula_crypto::hashing::hash(cid_concat.as_bytes());
171+
let hash_hex = hex::encode(&hash.as_bytes()[..16]); // First 16 bytes = 32 hex chars
172+
let final_etag = format!("{}-{}", hash_hex, part_count);
168173

169174
// Calculate total size
170175
let total_size: u64 = upload.parts.values().map(|p| p.size).sum();

crates/fula-cli/src/handlers/object.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,17 @@ pub async fn put_object(
3232

3333
// Store the data
3434
let cid = state.block_store.put_block(&body).await?;
35-
36-
// Calculate ETag (MD5)
37-
let etag = md5_hash(&body);
3835

39-
// Verify Content-MD5 if present
36+
// Use CID as ETag (content-addressed identifier)
37+
// This is S3-compliant: AWS docs state "The ETag may or may not be an MD5 digest"
38+
let etag = cid.to_string();
39+
40+
// Verify Content-MD5 if present (still uses MD5 for S3 compatibility)
4041
if let Some(md5_header) = headers.get("Content-MD5").and_then(|v| v.to_str().ok()) {
4142
if let Ok(expected_bytes) = general_purpose::STANDARD.decode(md5_header) {
4243
let expected_hex = hex::encode(expected_bytes);
43-
if etag != expected_hex {
44+
let actual_md5 = md5_hash(&body);
45+
if actual_md5 != expected_hex {
4446
return Err(ApiError::s3(S3ErrorCode::InvalidDigest, "The Content-MD5 you specified did not match what we received."));
4547
}
4648
} else {

crates/fula-cli/src/multipart.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ impl MultipartUpload {
7575
pub struct UploadPart {
7676
/// Part number (1-10000)
7777
pub part_number: u32,
78-
/// ETag (MD5 hash of part content)
78+
/// ETag (CID of part content - content-addressed identifier)
7979
pub etag: String,
8080
/// Part size in bytes
8181
pub size: u64,

crates/fula-core/src/metadata.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ pub struct ObjectMetadata {
6363
/// Size in bytes
6464
pub size: u64,
6565

66-
/// ETag (usually MD5 hash for S3 compatibility)
66+
/// ETag (CID for single uploads, BLAKE3-hash-of-CIDs-{partCount} for multipart)
67+
/// Using CID as ETag is S3-compliant: AWS docs state "The ETag may or may not be an MD5 digest"
6768
pub etag: String,
6869

6970
/// Last modified timestamp

docs/openAPI.yaml

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -268,14 +268,14 @@ paths:
268268
<Contents>
269269
<Key>photos/beach.jpg</Key>
270270
<LastModified>2025-12-07T01:42:00.000Z</LastModified>
271-
<ETag>"b2419b1e3fd45d596ee22bdf62aaaa2f"</ETag>
271+
<ETag>"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oc"</ETag>
272272
<Size>2048576</Size>
273273
<StorageClass>STANDARD</StorageClass>
274274
</Contents>
275275
<Contents>
276276
<Key>photos/sunset.png</Key>
277277
<LastModified>2025-12-07T03:15:00.000Z</LastModified>
278-
<ETag>"8f47e10b6a3c9d2e5f1a4b7c3d8e6f9a"</ETag>
278+
<ETag>"bafkreihxvh4d5pqhkhxqfdy3yzpfmam5a6t3wbmtj7d3iur"</ETag>
279279
<Size>1536000</Size>
280280
<StorageClass>STANDARD</StorageClass>
281281
</Contents>
@@ -456,10 +456,13 @@ paths:
456456
description: Object uploaded successfully
457457
headers:
458458
ETag:
459-
description: MD5 hash of the object or IPFS CID
459+
description: |
460+
IPFS CID of the uploaded object (content-addressed identifier).
461+
This is S3-compliant as AWS states "The ETag may or may not be an MD5 digest."
462+
The CID can be used directly to retrieve the object from IPFS.
460463
schema:
461464
type: string
462-
example: '"b2419b1e3fd45d596ee22bdf62aaaa2f"'
465+
example: '"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oc"'
463466
x-amz-version-id:
464467
description: Version ID (if versioning enabled)
465468
schema:
@@ -838,11 +841,11 @@ paths:
838841
headers:
839842
ETag:
840843
description: |
841-
ETag of the uploaded part (IPFS CID).
844+
IPFS CID of the uploaded part (content-addressed identifier).
842845
**Important**: Save this for CompleteMultipartUpload.
843846
schema:
844847
type: string
845-
example: '"a54357aff0632cce46d942af68356b38"'
848+
example: '"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oc"'
846849
'400':
847850
description: Bad Request (part too small, invalid upload ID)
848851
content:
@@ -894,11 +897,11 @@ paths:
894897
<CompleteMultipartUpload>
895898
<Part>
896899
<PartNumber>1</PartNumber>
897-
<ETag>"a54357aff0632cce46d942af68356b38"</ETag>
900+
<ETag>"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oc"</ETag>
898901
</Part>
899902
<Part>
900903
<PartNumber>2</PartNumber>
901-
<ETag>"0c78aef83f66abc1fa1e8477f296d394"</ETag>
904+
<ETag>"bafkreihxvh4d5pqhkhxqfdy3yzpfmam5a6t3wbmtj7d3iur"</ETag>
902905
</Part>
903906
</CompleteMultipartUpload>
904907
responses:
@@ -914,8 +917,9 @@ paths:
914917
<Location>https://gateway.example.com/my-bucket/large-file.bin</Location>
915918
<Bucket>my-bucket</Bucket>
916919
<Key>large-file.bin</Key>
917-
<ETag>"3858f62230ac3c915f300c664312c11f-2"</ETag>
920+
<ETag>"a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6-2"</ETag>
918921
</CompleteMultipartUploadResult>
922+
# Note: Multipart ETag format is {BLAKE3-hash-of-CIDs}-{partCount}
919923
'400':
920924
description: Invalid part list or missing parts
921925
content:
@@ -1132,7 +1136,7 @@ paths:
11321136
<?xml version="1.0" encoding="UTF-8"?>
11331137
<CopyObjectResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
11341138
<LastModified>2025-12-07T12:00:00.000Z</LastModified>
1135-
<ETag>"b2419b1e3fd45d596ee22bdf62aaaa2f"</ETag>
1139+
<ETag>"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oc"</ETag>
11361140
</CopyObjectResult>
11371141
'404':
11381142
$ref: '#/components/responses/NoSuchKey'

0 commit comments

Comments
 (0)