Skip to content

Commit ba839a1

Browse files
authored
Merge pull request #11600 from Athemis/fix-json-ld
fix: work around breaking json-ld export by incomplete html tags in file descriptions
2 parents 8592e63 + 01c5681 commit ba839a1

File tree

3 files changed

+138
-2
lines changed

3 files changed

+138
-2
lines changed

src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2137,7 +2137,7 @@ public String getJsonLd() {
21372137
fileObject.add("name", fileMetadata.getLabel());
21382138
fileObject.add("encodingFormat", fileMetadata.getDataFile().getContentType());
21392139
fileObject.add("contentSize", fileMetadata.getDataFile().getFilesize());
2140-
fileObject.add("description", fileMetadata.getDescription());
2140+
fileObject.add("description", MarkupChecker.stripAllTags(fileMetadata.getDescription()));
21412141
fileObject.add("@id", filePidUrlAsString);
21422142
fileObject.add("identifier", filePidUrlAsString);
21432143
boolean hideFilesBoolean = JvmSettings.HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS.lookupOptional(Boolean.class).orElse(false);
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
package edu.harvard.iq.dataverse.api;
2+
3+
import static io.restassured.RestAssured.given;
4+
import static org.hamcrest.CoreMatchers.*;
5+
import static org.junit.jupiter.api.Assertions.*;
6+
7+
import io.restassured.RestAssured;
8+
import io.restassured.response.Response;
9+
import jakarta.json.Json;
10+
import jakarta.json.JsonObjectBuilder;
11+
import org.junit.jupiter.api.BeforeAll;
12+
import org.junit.jupiter.api.Test;
13+
14+
/**
15+
* Tests JSON-LD export with incomplete HTML tags in file descriptions.
16+
*
17+
* Incomplete HTML tags like "<CSP" or "<img" cause JsonParsingException
18+
* due to MarkupChecker.stripAllTags() processing JSON-LD as HTML.
19+
*/
20+
public class JsonLDExportIT {
21+
22+
@BeforeAll
23+
public static void setUpClass() {
24+
RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
25+
}
26+
27+
/**
28+
* Tests JSON-LD export with incomplete HTML tags in file descriptions.
29+
*/
30+
@Test
31+
public void testJsonLDExportWithIncompleteHtmlTagsInFileDescription() {
32+
// Create admin user
33+
Response createUserResponse = UtilIT.createRandomUser();
34+
createUserResponse.then().assertThat().statusCode(200);
35+
String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse);
36+
String username = UtilIT.getUsernameFromResponse(createUserResponse);
37+
UtilIT.makeSuperUser(username);
38+
39+
Integer datasetId = null;
40+
String dataverseAlias = null;
41+
42+
try {
43+
// Create test dataverse
44+
Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
45+
createDataverseResponse.then().assertThat().statusCode(201);
46+
dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
47+
48+
// Publish dataverse (required before publishing datasets)
49+
Response publishDataverse = UtilIT.publishDataverseViaNativeApi(
50+
dataverseAlias,
51+
apiToken
52+
);
53+
publishDataverse.then().assertThat().statusCode(200);
54+
55+
// Create test dataset
56+
Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(
57+
dataverseAlias,
58+
apiToken
59+
);
60+
createDatasetResponse.then().assertThat().statusCode(201);
61+
datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse);
62+
String datasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(
63+
createDatasetResponse
64+
);
65+
66+
// Upload file with incomplete HTML tags in description
67+
String problematicDescription =
68+
"File contains <CSP data, <img tag, <script and text ending with <";
69+
70+
JsonObjectBuilder fileMetadata = Json.createObjectBuilder()
71+
.add("description", problematicDescription)
72+
.add("label", "test-file-with-csp-tag.tab");
73+
74+
String pathToFile = "src/test/resources/tab/test.tab";
75+
76+
Response uploadResponse = UtilIT.uploadFileViaNative(
77+
datasetId.toString(),
78+
pathToFile,
79+
fileMetadata.build(),
80+
apiToken
81+
);
82+
uploadResponse.then().assertThat().statusCode(200);
83+
84+
assertTrue(UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile);
85+
86+
// Publish dataset
87+
Response publishResponse = UtilIT.publishDatasetViaNativeApi(
88+
datasetPersistentId,
89+
"major",
90+
apiToken
91+
);
92+
publishResponse.then().assertThat().statusCode(200);
93+
94+
// Test JSON-LD export
95+
Response jsonLdExportResponse = UtilIT.exportDataset(datasetPersistentId, "schema.org");
96+
97+
// Verify export does not fail
98+
jsonLdExportResponse
99+
.then()
100+
.assertThat()
101+
.statusCode(not(equalTo(500)))
102+
.body(not(containsString("JsonParsingException")))
103+
.body(not(containsString("Unexpected char -1")));
104+
105+
// Verify JSON structure if export succeeds
106+
if (jsonLdExportResponse.getStatusCode() == 200) {
107+
String responseBody = jsonLdExportResponse.getBody().asString();
108+
109+
// Verify valid JSON
110+
assertDoesNotThrow(
111+
() -> {
112+
jakarta.json.Json.createReader(
113+
new java.io.StringReader(responseBody)
114+
).readObject();
115+
},
116+
"JSON-LD export should produce valid JSON"
117+
);
118+
119+
// Verify JSON-LD structure
120+
assertTrue(
121+
responseBody.contains("@context") || responseBody.contains("@type"),
122+
"Response should contain JSON-LD structure"
123+
);
124+
}
125+
} finally {
126+
// Cleanup - delete in reverse order of creation
127+
if (datasetId != null) {
128+
UtilIT.destroyDataset(datasetId, apiToken);
129+
}
130+
if (dataverseAlias != null) {
131+
UtilIT.deleteDataverse(dataverseAlias, apiToken);
132+
}
133+
UtilIT.deleteUser(username);
134+
}
135+
}
136+
}

tests/integration-tests.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT,DatasetFieldsIT,SavedSearchIT,DatasetTypesIT,DataverseFeaturedItemsIT,SendFeedbackApiIT,CustomizationIT
1+
DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT,DatasetFieldsIT,SavedSearchIT,DatasetTypesIT,DataverseFeaturedItemsIT,SendFeedbackApiIT,CustomizationIT,JsonLDExportIT

0 commit comments

Comments
 (0)