Skip to content

Commit 3e5de8e

Browse files
Duplicate uploads of binary content for externalized attachments (#7184)
* Duplicate uploads of binary content for externalized attachments
1 parent f678b94 commit 3e5de8e

File tree

9 files changed

+650
-382
lines changed

9 files changed

+650
-382
lines changed

hapi-fhir-base/src/main/java/ca/uhn/fhir/util/AttachmentUtil.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,16 @@
2727
import ca.uhn.fhir.model.primitive.CodeDt;
2828
import org.apache.commons.lang3.Validate;
2929
import org.hl7.fhir.instance.model.api.IBase;
30+
import org.hl7.fhir.instance.model.api.IBaseExtension;
31+
import org.hl7.fhir.instance.model.api.IBaseHasExtensions;
3032
import org.hl7.fhir.instance.model.api.ICompositeType;
3133
import org.hl7.fhir.instance.model.api.IPrimitiveType;
3234

3335
import java.util.List;
36+
import java.util.Optional;
37+
import java.util.function.Predicate;
38+
39+
import static org.apache.commons.lang3.StringUtils.isNotBlank;
3440

3541
public class AttachmentUtil {
3642

@@ -93,6 +99,19 @@ public static void setSize(FhirContext theContext, ICompositeType theAttachment,
9399
}
94100
}
95101

102+
@SuppressWarnings("unchecked")
103+
public static Optional<String> getFirstExtension(
104+
IBaseHasExtensions theAttachment, String theExtension, Predicate<? super IBaseExtension<?, ?>> theFilter) {
105+
return theAttachment.getExtension().stream()
106+
.filter(theFilter)
107+
.filter(t -> theExtension.equals(t.getUrl()))
108+
.filter(t -> t.getValue() instanceof IPrimitiveType)
109+
.map(t -> (IPrimitiveType<String>) t.getValue())
110+
.map(t -> t.getValue())
111+
.filter(t -> isNotBlank(t))
112+
.findFirst();
113+
}
114+
96115
/**
97116
* This is internal API- Use with caution as it may change
98117
*/

hapi-fhir-base/src/main/java/ca/uhn/fhir/util/HapiExtensions.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ public class HapiExtensions {
8181
public static final String EXT_EXTERNALIZED_BINARY_ID =
8282
"http://hapifhir.io/fhir/StructureDefinition/externalized-binary-id";
8383

84+
/**
85+
* Extension ID for the SHA-256 hash of external binary content
86+
*/
87+
public static final String EXT_EXTERNALIZED_BINARY_HASH_SHA_256 =
88+
"http://hapifhir.io/fhir/StructureDefinition/externalized-binary-hash-sha256";
89+
8490
/**
8591
* For subscription, deliver a bundle containing a search result instead of just a single resource
8692
*/
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
type: fix
3+
issue: 7183
4+
jira: SMILE-10474
5+
title: "Previously, updating the binary content of a DocumentReference or Binary resource multiple times with
6+
the same data via the $binary-access-write operation or a resource update would cause a new resource version
7+
to be created and the binary data to be re-uploaded to external binary storage. This has been fixed."

hapi-fhir-jpaserver-test-r4/src/test/java/ca/uhn/fhir/jpa/packages/PackageInstallerSvcR4Test.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ public void testInstallR4PackageWithExternalizedBinaries() throws Exception {
374374
runInTransaction(() -> {
375375
SearchParameterMap map = SearchParameterMap.newSynchronous();
376376
map.add(StructureDefinition.SP_URL, new UriParam("http://hl7.org/fhir/uv/shorthand/CodeSystem/shorthand-code-system"));
377-
IBundleProvider result = myCodeSystemDao.search(map);
377+
IBundleProvider result = myCodeSystemDao.search(map, mySrd);
378378
assertEquals(1, result.sizeOrThrowNpe());
379379
IBaseResource resource = result.getResources(0, 1).get(0);
380380
assertEquals("CodeSystem/shorthand-code-system/_history/1", resource.getIdElement().toString());

hapi-fhir-jpaserver-test-r4/src/test/java/ca/uhn/fhir/jpa/provider/r4/BinaryAccessProviderR4Test.java

Lines changed: 171 additions & 249 deletions
Large diffs are not rendered by default.

hapi-fhir-jpaserver-test-r4/src/test/java/ca/uhn/fhir/jpa/provider/r4/BinaryStorageInterceptorR4Test.java

Lines changed: 179 additions & 41 deletions
Large diffs are not rendered by default.

hapi-fhir-storage/src/main/java/ca/uhn/fhir/jpa/binary/api/IBinaryTarget.java

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@
1919
*/
2020
package ca.uhn.fhir.jpa.binary.api;
2121

22+
import ca.uhn.fhir.util.AttachmentUtil;
2223
import ca.uhn.fhir.util.HapiExtensions;
24+
import org.hl7.fhir.instance.model.api.IBaseExtension;
2325
import org.hl7.fhir.instance.model.api.IBaseHasExtensions;
24-
import org.hl7.fhir.instance.model.api.IPrimitiveType;
2526

2627
import java.util.Optional;
27-
28-
import static org.apache.commons.lang3.StringUtils.isNotBlank;
28+
import java.util.function.Predicate;
2929

3030
/**
3131
* Wraps an Attachment datatype or Binary resource, since they both
@@ -45,14 +45,21 @@ public interface IBinaryTarget {
4545

4646
IBaseHasExtensions getTarget();
4747

48-
@SuppressWarnings("unchecked")
4948
default Optional<String> getAttachmentId() {
50-
return getTarget().getExtension().stream()
51-
.filter(t -> HapiExtensions.EXT_EXTERNALIZED_BINARY_ID.equals(t.getUrl()))
52-
.filter(t -> t.getValue() instanceof IPrimitiveType)
53-
.map(t -> (IPrimitiveType<String>) t.getValue())
54-
.map(t -> t.getValue())
55-
.filter(t -> isNotBlank(t))
56-
.findFirst();
49+
return AttachmentUtil.getFirstExtension(getTarget(), HapiExtensions.EXT_EXTERNALIZED_BINARY_ID, e -> true);
50+
}
51+
52+
default Optional<String> getAttachmentIdFiltered(Predicate<? super IBaseExtension<?, ?>> theFilter) {
53+
return AttachmentUtil.getFirstExtension(getTarget(), HapiExtensions.EXT_EXTERNALIZED_BINARY_ID, theFilter);
54+
}
55+
56+
default Optional<String> getHashExtension() {
57+
return AttachmentUtil.getFirstExtension(
58+
getTarget(), HapiExtensions.EXT_EXTERNALIZED_BINARY_HASH_SHA_256, e -> true);
59+
}
60+
61+
default Optional<String> getHashExtensionFiltered(Predicate<? super IBaseExtension<?, ?>> theFilter) {
62+
return AttachmentUtil.getFirstExtension(
63+
getTarget(), HapiExtensions.EXT_EXTERNALIZED_BINARY_HASH_SHA_256, theFilter);
5764
}
5865
}

hapi-fhir-storage/src/main/java/ca/uhn/fhir/jpa/binary/interceptor/BinaryStorageInterceptor.java

Lines changed: 133 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@
4545
import ca.uhn.fhir.util.IModelVisitor2;
4646
import jakarta.annotation.Nonnull;
4747
import org.apache.commons.io.FileUtils;
48-
import org.apache.commons.lang3.StringUtils;
4948
import org.hl7.fhir.instance.model.api.IBase;
49+
import org.hl7.fhir.instance.model.api.IBaseExtension;
5050
import org.hl7.fhir.instance.model.api.IBaseHasExtensions;
5151
import org.hl7.fhir.instance.model.api.IBaseResource;
5252
import org.hl7.fhir.instance.model.api.IIdType;
@@ -60,20 +60,41 @@
6060
import java.io.IOException;
6161
import java.io.InputStream;
6262
import java.util.ArrayList;
63-
import java.util.HashSet;
63+
import java.util.Collection;
64+
import java.util.HashMap;
6465
import java.util.List;
66+
import java.util.Map;
67+
import java.util.Objects;
6568
import java.util.Optional;
6669
import java.util.Set;
6770
import java.util.concurrent.atomic.AtomicInteger;
71+
import java.util.function.Predicate;
6872
import java.util.stream.Collectors;
6973

74+
import static ca.uhn.fhir.util.HapiExtensions.EXT_EXTERNALIZED_BINARY_HASH_SHA_256;
7075
import static ca.uhn.fhir.util.HapiExtensions.EXT_EXTERNALIZED_BINARY_ID;
7176
import static org.apache.commons.lang3.StringUtils.isNotBlank;
7277

7378
@Interceptor
7479
public class BinaryStorageInterceptor<T extends IPrimitiveType<byte[]>> {
7580

7681
private static final Logger ourLog = LoggerFactory.getLogger(BinaryStorageInterceptor.class);
82+
/**
83+
* UserData key that can be set in {@link RequestDetails#getUserData()} to override
84+
* the {@link #isAllowAutoInflateBinaries()} setting for a single request.
85+
* <p>
86+
* Possible values:
87+
* <ul>
88+
* <li>{@code Boolean.TRUE} – force binary inflation even if globally disabled</li>
89+
* <li>{@code Boolean.FALSE} – skip binary inflation even if globally enabled</li>
90+
* <li>Absent – use the global {@code isAllowAutoInflateBinaries()} setting</li>
91+
* </ul>
92+
*/
93+
public static final String AUTO_INFLATE_BINARY_CONTENT_KEY =
94+
BinaryStorageInterceptor.class.getName() + "_AUTO_INFLATE_BINARY_CONTENT";
95+
96+
private static final Predicate<IBaseExtension<?, ?>> EXTENSION_FILTER_PREDICATE =
97+
ext -> ext.getUserData(JpaConstants.EXTENSION_EXT_SYSTEMDEFINED) == null;
7798

7899
@Autowired
79100
private IBinaryStorageSvc myBinaryStorageSvc;
@@ -145,7 +166,7 @@ public void extractLargeBinariesBeforeCreate(
145166
IBaseResource theResource,
146167
Pointcut thePointcut)
147168
throws IOException {
148-
extractLargeBinaries(theRequestDetails, theTransactionDetails, theResource, thePointcut);
169+
extractLargeBinaries(theRequestDetails, theTransactionDetails, theResource, null, thePointcut);
149170
}
150171

151172
@Hook(Pointcut.STORAGE_PRESTORAGE_RESOURCE_UPDATED)
@@ -156,64 +177,60 @@ public void extractLargeBinariesBeforeUpdate(
156177
IBaseResource theResource,
157178
Pointcut thePointcut)
158179
throws IOException {
159-
blockIllegalExternalBinaryIds(thePreviousResource, theResource);
160-
extractLargeBinaries(theRequestDetails, theTransactionDetails, theResource, thePointcut);
180+
blockIllegalExternalExtensions(thePreviousResource, theResource);
181+
extractLargeBinaries(theRequestDetails, theTransactionDetails, theResource, thePreviousResource, thePointcut);
161182
}
162183

163184
/**
164-
* Don't allow clients to submit resources with binary storage attachments declared unless the ID was already in the
165-
* resource. In other words, only HAPI itself may add a binary storage ID extension to a resource unless that
166-
* extension was already present.
185+
* Prevents clients from submitting resources with binary storage ID or binary storage hash extensions,
186+
* unless those extensions were already present in the existing resource. In other words, only HAPI itself
187+
* may add these extensions to a resource.
167188
*/
168-
private void blockIllegalExternalBinaryIds(IBaseResource thePreviousResource, IBaseResource theResource) {
169-
Set<String> existingBinaryIds = new HashSet<>();
170-
if (thePreviousResource != null) {
171-
List<T> base64fields =
172-
myCtx.newTerser().getAllPopulatedChildElementsOfType(thePreviousResource, myBinaryType);
173-
for (IPrimitiveType<byte[]> nextBase64 : base64fields) {
174-
if (nextBase64 instanceof IBaseHasExtensions) {
175-
((IBaseHasExtensions) nextBase64)
176-
.getExtension().stream()
177-
.filter(t -> t.getUserData(JpaConstants.EXTENSION_EXT_SYSTEMDEFINED) == null)
178-
.filter(t -> EXT_EXTERNALIZED_BINARY_ID.equals(t.getUrl()))
179-
.map(t -> (IPrimitiveType<?>) t.getValue())
180-
.map(IPrimitiveType::getValueAsString)
181-
.filter(StringUtils::isNotBlank)
182-
.forEach(existingBinaryIds::add);
183-
}
189+
private void blockIllegalExternalExtensions(IBaseResource thePreviousResource, IBaseResource theResource) {
190+
Map<String, String> existingAttachmentIdToHashMap = buildHashAttachmentIdMap(thePreviousResource, true);
191+
Set<String> existingBinaryIds = existingAttachmentIdToHashMap.keySet();
192+
Collection<String> existingHashes = existingAttachmentIdToHashMap.values();
193+
194+
recursivelyScanResourceForBinaryData(theResource).forEach(target -> {
195+
String id =
196+
target.getAttachmentIdFiltered(EXTENSION_FILTER_PREDICATE).orElse(null);
197+
String hash =
198+
target.getHashExtensionFiltered(EXTENSION_FILTER_PREDICATE).orElse(null);
199+
200+
// binaryId check
201+
if (id != null && !existingBinaryIds.contains(id)) {
202+
throwIllegalExtension(EXT_EXTERNALIZED_BINARY_ID, id);
203+
}
204+
// hash check
205+
if (hash != null && !existingHashes.contains(hash)) {
206+
throwIllegalExtension(EXT_EXTERNALIZED_BINARY_HASH_SHA_256, hash);
184207
}
185-
}
186208

187-
List<T> base64fields = myCtx.newTerser().getAllPopulatedChildElementsOfType(theResource, myBinaryType);
188-
for (IPrimitiveType<byte[]> nextBase64 : base64fields) {
189-
if (nextBase64 instanceof IBaseHasExtensions) {
190-
Optional<String> hasExternalizedBinaryReference = ((IBaseHasExtensions) nextBase64)
191-
.getExtension().stream()
192-
.filter(t -> t.getUserData(JpaConstants.EXTENSION_EXT_SYSTEMDEFINED) == null)
193-
.filter(t -> t.getUrl().equals(EXT_EXTERNALIZED_BINARY_ID))
194-
.map(t -> (IPrimitiveType<?>) t.getValue())
195-
.map(IPrimitiveType::getValueAsString)
196-
.filter(StringUtils::isNotBlank)
197-
.filter(t -> !existingBinaryIds.contains(t))
198-
.findFirst();
199-
200-
if (hasExternalizedBinaryReference.isPresent()) {
201-
String msg = myCtx.getLocalizer()
202-
.getMessage(
203-
BinaryStorageInterceptor.class,
204-
"externalizedBinaryStorageExtensionFoundInRequestBody",
205-
EXT_EXTERNALIZED_BINARY_ID,
206-
hasExternalizedBinaryReference.get());
207-
throw new InvalidRequestException(Msg.code(1329) + msg);
209+
// binaryId - hash pair consistency check
210+
if (id != null && hash != null) {
211+
String expected = existingAttachmentIdToHashMap.get(id);
212+
if (!Objects.equals(expected, hash)) {
213+
throwIllegalExtension(EXT_EXTERNALIZED_BINARY_HASH_SHA_256, hash);
208214
}
209215
}
210-
}
216+
});
217+
}
218+
219+
private void throwIllegalExtension(String theExtensionUrl, String theValue) {
220+
String msg = myCtx.getLocalizer()
221+
.getMessage(
222+
BinaryStorageInterceptor.class,
223+
"externalizedBinaryStorageExtensionFoundInRequestBody",
224+
theExtensionUrl,
225+
theValue);
226+
throw new InvalidRequestException(Msg.code(1329) + msg);
211227
}
212228

213229
private void extractLargeBinaries(
214230
RequestDetails theRequestDetails,
215231
TransactionDetails theTransactionDetails,
216232
IBaseResource theResource,
233+
IBaseResource thePreviousResource,
217234
Pointcut thePointcut)
218235
throws IOException {
219236

@@ -223,6 +240,7 @@ private void extractLargeBinaries(
223240
resourceId = new IdType(resourceType + "/" + resourceId.getIdPart());
224241
}
225242

243+
Map<String, String> existingHashToAttachmentId = buildHashAttachmentIdMap(thePreviousResource, false);
226244
List<IBinaryTarget> attachments = recursivelyScanResourceForBinaryData(theResource);
227245
for (IBinaryTarget nextTarget : attachments) {
228246
byte[] data = nextTarget.getData();
@@ -233,13 +251,16 @@ private void extractLargeBinaries(
233251
boolean shouldStoreBlob =
234252
myBinaryStorageSvc.shouldStoreBinaryContent(nextPayloadLength, resourceId, nextContentType);
235253
if (shouldStoreBlob) {
236-
254+
String binaryContentHash = myBinaryAccessProvider.getBinaryContentHash(data);
237255
String newBinaryContentId;
238256
if (thePointcut == Pointcut.STORAGE_PRESTORAGE_RESOURCE_UPDATED) {
239-
ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
240-
StoredDetails storedDetails = myBinaryStorageSvc.storeBinaryContent(
241-
resourceId, null, nextContentType, inputStream, theRequestDetails);
242-
newBinaryContentId = storedDetails.getBinaryContentId();
257+
newBinaryContentId = storeBinaryContentIfRequired(
258+
theRequestDetails,
259+
existingHashToAttachmentId,
260+
binaryContentHash,
261+
data,
262+
resourceId,
263+
nextContentType);
243264
} else {
244265
assert thePointcut == Pointcut.STORAGE_PRESTORAGE_RESOURCE_CREATED : thePointcut.name();
245266
newBinaryContentId = myBinaryStorageSvc.newBinaryContentId();
@@ -264,11 +285,63 @@ private void extractLargeBinaries(
264285
}
265286

266287
myBinaryAccessProvider.replaceDataWithExtension(nextTarget, newBinaryContentId);
288+
myBinaryAccessProvider.addHashExtension(nextTarget, binaryContentHash);
267289
}
268290
}
269291
}
270292
}
271293

294+
/**
295+
* Builds a map of SHA-256 hashes to corresponding attachment IDs from the given FHIR resource.
296+
* @return A {@link Map} with keys as SHA-256 binary content hashes and values as attachment IDs.
297+
*/
298+
private Map<String, String> buildHashAttachmentIdMap(
299+
IBaseResource thePreviousResource, boolean isAttachmentIdToHash) {
300+
Map<String, String> result = new HashMap<>();
301+
302+
if (thePreviousResource == null) {
303+
return result;
304+
}
305+
306+
List<IBinaryTarget> previousAttachments = recursivelyScanResourceForBinaryData(thePreviousResource);
307+
for (IBinaryTarget attachment : previousAttachments) {
308+
Optional<String> hashOpt = attachment.getHashExtensionFiltered(EXTENSION_FILTER_PREDICATE);
309+
Optional<String> idOpt = attachment.getAttachmentIdFiltered(EXTENSION_FILTER_PREDICATE);
310+
311+
if (isAttachmentIdToHash) {
312+
idOpt.ifPresent(id -> result.put(id, hashOpt.orElse(null)));
313+
} else {
314+
hashOpt.ifPresent(hash -> result.put(hash, idOpt.orElse(null)));
315+
}
316+
}
317+
return result;
318+
}
319+
320+
/**
321+
* This method checks if the given binary content (based on its SHA-256 hash) is already stored in previous
322+
* resource version. If it is, it reuses the existing attachment ID to avoid saving the same content again.
323+
* If it's not found, it stores the new content and returns the newly generated attachment ID.
324+
*/
325+
private String storeBinaryContentIfRequired(
326+
RequestDetails theRequestDetails,
327+
Map<String, String> existingHashToAttachmentId,
328+
String binaryContentHash,
329+
byte[] data,
330+
IIdType resourceId,
331+
String nextContentType)
332+
throws IOException {
333+
if (existingHashToAttachmentId.get(binaryContentHash) != null) {
334+
// input binary content is the same as existing binary content, reuse existing binaryId
335+
return existingHashToAttachmentId.get(binaryContentHash);
336+
} else {
337+
// there is no existing binary content or content is different, store new content in binary storage
338+
ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
339+
StoredDetails storedDetails = myBinaryStorageSvc.storeBinaryContent(
340+
resourceId, null, nextContentType, inputStream, theRequestDetails);
341+
return storedDetails.getBinaryContentId();
342+
}
343+
}
344+
272345
/**
273346
* This invokes the {@link Pointcut#STORAGE_BINARY_ASSIGN_BLOB_ID_PREFIX} hook and returns the prefix to use for the blob ID, or null if there are no implementers.
274347
* @return A string, which will be used to prefix the blob ID. May be null.
@@ -350,8 +423,14 @@ public String getDeferredListKey() {
350423
}
351424

352425
@Hook(Pointcut.STORAGE_PRESHOW_RESOURCES)
353-
public void preShow(IPreResourceShowDetails theDetails) throws IOException {
354-
if (!isAllowAutoInflateBinaries()) {
426+
public void preShow(IPreResourceShowDetails theDetails, RequestDetails theRequestDetails) throws IOException {
427+
boolean isAllowAutoInflateBinaries = isAllowAutoInflateBinaries();
428+
// Override isAllowAutoInflateBinaries setting if AUTO_INFLATE_BINARY_CONTENT flag is present in userData
429+
if (theRequestDetails != null && theRequestDetails.getUserData().containsKey(AUTO_INFLATE_BINARY_CONTENT_KEY)) {
430+
isAllowAutoInflateBinaries =
431+
Boolean.TRUE.equals(theRequestDetails.getUserData().get(AUTO_INFLATE_BINARY_CONTENT_KEY));
432+
}
433+
if (!isAllowAutoInflateBinaries) {
355434
return;
356435
}
357436

0 commit comments

Comments
 (0)