@@ -704,23 +704,23 @@ namespace rgw::dedup {
704704 // ---------------------------------------------------------------------------
705705 static void init_cmp_pairs (const disk_record_t *p_rec,
706706 const bufferlist &etag_bl,
707- bufferlist &sha256_bl , // OUT PARAM
707+ bufferlist &hash_bl , // OUT PARAM
708708 librados::ObjectWriteOperation *p_op)
709709 {
710710 p_op->cmpxattr (RGW_ATTR_ETAG, CEPH_OSD_CMPXATTR_OP_EQ, etag_bl);
711711 // TBD: do we really need the secondary compare using the full manifest?
712712 // Can replace it with something cheaper like size/version?
713713 p_op->cmpxattr (RGW_ATTR_MANIFEST, CEPH_OSD_CMPXATTR_OP_EQ, p_rec->manifest_bl );
714714
715- // SHA has 256 bit splitted into multiple 64bit units
715+ // BLAKE3 hash has 256 bit splitted into multiple 64bit units
716716 const unsigned units = (256 / (sizeof (uint64_t )*8 ));
717717 static_assert (units == 4 );
718718 for (unsigned i = 0 ; i < units; i++) {
719- ceph::encode (p_rec->s .sha256 [i], sha256_bl );
719+ ceph::encode (p_rec->s .hash [i], hash_bl );
720720 }
721721
722- if (!p_rec->s .flags .sha256_calculated ()) {
723- p_op->cmpxattr (RGW_ATTR_SHA256 , CEPH_OSD_CMPXATTR_OP_EQ, sha256_bl );
722+ if (!p_rec->s .flags .hash_calculated ()) {
723+ p_op->cmpxattr (RGW_ATTR_BLAKE3 , CEPH_OSD_CMPXATTR_OP_EQ, hash_bl );
724724 }
725725 }
726726
@@ -755,17 +755,17 @@ namespace rgw::dedup {
755755 ldpp_dout (dpp, 20 ) << __func__ << " ::num_parts=" << p_tgt_rec->s .num_parts
756756 << " ::ETAG=" << etag_bl.to_str () << dendl;
757757
758- bufferlist hash_bl, manifest_hash_bl, tgt_sha256_bl ;
758+ bufferlist hash_bl, manifest_hash_bl, tgt_hash_bl ;
759759 crypto::digest<crypto::SHA1>(p_src_rec->manifest_bl ).encode (hash_bl);
760760 // Use a shorter hash (64bit instead of 160bit)
761761 hash_bl.splice (0 , 8 , &manifest_hash_bl);
762762 librados::ObjectWriteOperation tgt_op;
763- init_cmp_pairs (p_tgt_rec, etag_bl, tgt_sha256_bl , &tgt_op);
763+ init_cmp_pairs (p_tgt_rec, etag_bl, tgt_hash_bl , &tgt_op);
764764 tgt_op.setxattr (RGW_ATTR_SHARE_MANIFEST, manifest_hash_bl);
765765 tgt_op.setxattr (RGW_ATTR_MANIFEST, p_src_rec->manifest_bl );
766- if (p_tgt_rec->s .flags .sha256_calculated ()) {
767- tgt_op.setxattr (RGW_ATTR_SHA256, tgt_sha256_bl );
768- p_stats->set_sha256_attrs ++;
766+ if (p_tgt_rec->s .flags .hash_calculated ()) {
767+ tgt_op.setxattr (RGW_ATTR_BLAKE3, tgt_hash_bl );
768+ p_stats->set_hash_attrs ++;
769769 }
770770
771771 std::string src_oid, tgt_oid;
@@ -800,13 +800,13 @@ namespace rgw::dedup {
800800 // disk-record (as require an expensive random-disk-write).
801801 // When deduping C we can trust the shared_manifest state in the table and
802802 // skip a redundant update to SRC object attribute
803- bufferlist src_sha256_bl ;
803+ bufferlist src_hash_bl ;
804804 librados::ObjectWriteOperation src_op;
805- init_cmp_pairs (p_src_rec, etag_bl, src_sha256_bl , &src_op);
805+ init_cmp_pairs (p_src_rec, etag_bl, src_hash_bl , &src_op);
806806 src_op.setxattr (RGW_ATTR_SHARE_MANIFEST, manifest_hash_bl);
807- if (p_src_rec->s .flags .sha256_calculated ()) {
808- src_op.setxattr (RGW_ATTR_SHA256, src_sha256_bl );
809- p_stats->set_sha256_attrs ++;
807+ if (p_src_rec->s .flags .hash_calculated ()) {
808+ src_op.setxattr (RGW_ATTR_BLAKE3, src_hash_bl );
809+ p_stats->set_hash_attrs ++;
810810 }
811811
812812 ldpp_dout (dpp, 20 ) << __func__ <<" ::send SRC CLS (Shared_Manifest)" << dendl;
@@ -824,57 +824,49 @@ namespace rgw::dedup {
824824 return ret;
825825 }
826826
827- using ceph::crypto::SHA256;
828827 // ---------------------------------------------------------------------------
829- int Background::calc_object_sha256 (const disk_record_t *p_rec, uint8_t *p_sha256 )
828+ int Background::calc_object_blake3 (const disk_record_t *p_rec, uint8_t *p_hash )
830829 {
831- ldpp_dout (dpp, 20 ) << __func__ << " ::p_rec->obj_name=" << p_rec->obj_name << dendl;
832- // Open questions -
833- // 1) do we need the secret if so what is the correct one to use?
834- // 2) are we passing the head/tail objects in the correct order?
830+ ldpp_dout (dpp, 20 ) << __func__ << " ::obj_name=" << p_rec->obj_name << dendl;
835831 RGWObjManifest manifest;
836832 try {
837833 auto bl_iter = p_rec->manifest_bl .cbegin ();
838834 decode (manifest, bl_iter);
839835 } catch (buffer::error& err) {
840- ldpp_dout (dpp, 1 ) << __func__ << " ::ERROR: bad src manifest" << dendl;
836+ ldpp_dout (dpp, 1 ) << __func__ << " ::ERROR: bad src manifest for: "
837+ << p_rec->obj_name << dendl;
841838 return -EINVAL;
842839 }
843- std::string oid;
844- build_oid (p_rec->bucket_id , p_rec->obj_name , &oid);
845- librados::IoCtx head_ioctx;
846- const char *secret = " 0555b35654ad1656d804f1b017cd26e9" ;
847- TOPNSPC::crypto::HMACSHA256 hmac ((const uint8_t *)secret, strlen (secret));
840+
841+ blake3_hasher hmac;
842+ blake3_hasher_init (&hmac);
848843 for (auto p = manifest.obj_begin (dpp); p != manifest.obj_end (dpp); ++p) {
849844 rgw_raw_obj raw_obj = p.get_location ().get_raw_obj (rados);
850845 rgw_rados_ref obj;
851846 int ret = rgw_get_rados_ref (dpp, rados_handle, raw_obj, &obj);
852847 if (ret < 0 ) {
853- ldpp_dout (dpp, 1 ) << __func__ << " ::failed rgw_get_rados_ref() for raw_obj= "
854- << raw_obj << dendl;
848+ ldpp_dout (dpp, 1 ) << __func__ << " ::failed rgw_get_rados_ref() for oid: "
849+ << raw_obj. oid << " , err is " << cpp_strerror (-ret) << dendl;
855850 return ret;
856851 }
857852
858- if (oid == raw_obj.oid ) {
859- ldpp_dout (dpp, 20 ) << __func__ << " ::manifest: head object=" << oid << dendl;
860- head_ioctx = obj.ioctx ;
861- }
862853 bufferlist bl;
863854 librados::IoCtx ioctx = obj.ioctx ;
864855 // read full object
865856 ret = ioctx.read (raw_obj.oid , bl, 0 , 0 );
866857 if (ret > 0 ) {
867858 for (const auto & bptr : bl.buffers ()) {
868- hmac. Update ( (const unsigned char *)bptr.c_str (), bptr.length ());
859+ blake3_hasher_update (&hmac, (const unsigned char *)bptr.c_str (), bptr.length ());
869860 }
870861 }
871862 else {
872- ldpp_dout (dpp, 1 ) << __func__ << " ::ERR: failed to read " << oid
863+ ldpp_dout (dpp, 1 ) << __func__ << " ::ERR: failed to read " << raw_obj. oid
873864 << " , error is " << cpp_strerror (-ret) << dendl;
874865 return ret;
875866 }
876867 }
877- hmac.Final (p_sha256);
868+
869+ blake3_hasher_finalize (&hmac, p_hash, BLAKE3_OUT_LEN);
878870 return 0 ;
879871 }
880872
@@ -977,33 +969,33 @@ namespace rgw::dedup {
977969 memset (&p_rec->s .shared_manifest , 0 , sizeof (p_rec->s .shared_manifest ));
978970 }
979971
980- itr = attrs.find (RGW_ATTR_SHA256 );
972+ itr = attrs.find (RGW_ATTR_BLAKE3 );
981973 if (itr != attrs.end ()) {
982974 try {
983975 auto bl_iter = itr->second .cbegin ();
984- // SHA has 256 bit splitted into multiple 64bit units
976+ // BLAKE3 hash 256 bit splitted into multiple 64bit units
985977 const unsigned units = (256 / (sizeof (uint64_t )*8 ));
986978 static_assert (units == 4 );
987979 for (unsigned i = 0 ; i < units; i++) {
988980 uint64_t val;
989981 ceph::decode (val, bl_iter);
990- p_rec->s .sha256 [i] = val;
982+ p_rec->s .hash [i] = val;
991983 }
992- p_stats->valid_sha256_attrs ++;
984+ p_stats->valid_hash_attrs ++;
993985 return 0 ;
994986 } catch (buffer::error& err) {
995- ldpp_dout (dpp, 1 ) << __func__ << " ::ERR: failed SHA256 decode" << dendl;
987+ ldpp_dout (dpp, 1 ) << __func__ << " ::ERR: failed HASH decode" << dendl;
996988 return -EINVAL;
997989 }
998990 }
999991
1000- p_stats->invalid_sha256_attrs ++;
992+ p_stats->invalid_hash_attrs ++;
1001993 // TBD: redundant memset...
1002- memset (p_rec->s .sha256 , 0 , sizeof (p_rec->s .sha256 ));
1003- // CEPH_CRYPTO_HMACSHA256_DIGESTSIZE is 32 Bytes (32*8=256)
1004- int ret = calc_object_sha256 (p_rec, (uint8_t *)p_rec->s .sha256 );
994+ memset (p_rec->s .hash , 0 , sizeof (p_rec->s .hash ));
995+ // BLAKE3_OUT_LEN is 32 Bytes
996+ int ret = calc_object_blake3 (p_rec, (uint8_t *)p_rec->s .hash );
1005997 if (ret == 0 ) {
1006- p_rec->s .flags .set_sha256_calculated ();
998+ p_rec->s .flags .set_hash_calculated ();
1007999 }
10081000
10091001 return ret;
@@ -1177,18 +1169,18 @@ namespace rgw::dedup {
11771169 }
11781170
11791171 // ---------------------------------------------------------------------------
1180- static int write_sha256_object_attribute (const DoutPrefixProvider* const dpp,
1172+ static int write_blake3_object_attribute (const DoutPrefixProvider* const dpp,
11811173 rgw::sal::Driver* driver,
11821174 RGWRados* rados,
11831175 const disk_record_t *p_rec)
11841176 {
11851177 bufferlist etag_bl;
1186- bufferlist sha256_bl ;
1178+ bufferlist hash_bl ;
11871179 librados::ObjectWriteOperation op;
11881180 etag_to_bufferlist (p_rec->s .md5_high , p_rec->s .md5_low , p_rec->s .num_parts ,
11891181 &etag_bl);
1190- init_cmp_pairs (p_rec, etag_bl, sha256_bl /* OUT PARAM*/ , &op);
1191- op.setxattr (RGW_ATTR_SHA256, sha256_bl );
1182+ init_cmp_pairs (p_rec, etag_bl, hash_bl /* OUT PARAM*/ , &op);
1183+ op.setxattr (RGW_ATTR_BLAKE3, hash_bl );
11921184
11931185 std::string oid;
11941186 librados::IoCtx ioctx;
@@ -1304,17 +1296,17 @@ namespace rgw::dedup {
13041296 return 0 ;
13051297 }
13061298
1307- if (memcmp (src_rec.s .sha256 , p_tgt_rec->s .sha256 , sizeof (src_rec.s .sha256 )) != 0 ) {
1308- p_stats->sha256_mismatch ++;
1309- ldpp_dout (dpp, 10 ) << __func__ << " ::SHA256 mismatch" << dendl;
1310- // TBD: set sha256 attributes on head objects to save calc next time
1311- if (src_rec.s .flags .sha256_calculated ()) {
1312- write_sha256_object_attribute (dpp, driver, rados, &src_rec);
1313- p_stats->set_sha256_attrs ++;
1299+ if (memcmp (src_rec.s .hash , p_tgt_rec->s .hash , sizeof (src_rec.s .hash )) != 0 ) {
1300+ p_stats->hash_mismatch ++;
1301+ ldpp_dout (dpp, 10 ) << __func__ << " ::HASH mismatch" << dendl;
1302+ // TBD: set hash attributes on head objects to save calc next time
1303+ if (src_rec.s .flags .hash_calculated ()) {
1304+ write_blake3_object_attribute (dpp, driver, rados, &src_rec);
1305+ p_stats->set_hash_attrs ++;
13141306 }
1315- if (p_tgt_rec->s .flags .sha256_calculated ()) {
1316- write_sha256_object_attribute (dpp, driver, rados, p_tgt_rec);
1317- p_stats->set_sha256_attrs ++;
1307+ if (p_tgt_rec->s .flags .hash_calculated ()) {
1308+ write_blake3_object_attribute (dpp, driver, rados, p_tgt_rec);
1309+ p_stats->set_hash_attrs ++;
13181310 }
13191311 return 0 ;
13201312 }
0 commit comments