Skip to content

Commit 7701ae3

Browse files
pilisclaude
andcommitted
fix: Use ISO-8601 date format in MongoDB to BigQuery template
MongoDB ISODate fields were being serialized using the JVM's default locale, resulting in descriptive date formats like "Feb 3, 2026, 3:31:41 PM" instead of ISO-8601 format. This change configures MongoDB's JsonWriterSettings to use JsonMode.RELAXED, which outputs dates in ISO-8601 format: {"$date": "2026-02-03T15:31:41.924Z"}. Changes: - Added EXTENDED_JSON_WRITER_SETTINGS constant to MongoDbUtils - Updated getTableSchema() to use MongoDB JsonWriterSettings for all user options (FLATTEN, JSON, NONE) - Updated JavascriptDocumentTransformer to use consistent date serialization - Added unit tests to verify ISO-8601 date format - Updated integration test for consistency Fixes locale-dependent date serialization issues in BigQuery output. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent ea80a3b commit 7701ae3

File tree

4 files changed

+88
-6
lines changed

4 files changed

+88
-6
lines changed

v2/mongodb-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/mongodb/templates/MongoDbUtils.java

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
import org.apache.beam.sdk.io.fs.MatchResult.Status;
5555
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.CharStreams;
5656
import org.bson.Document;
57+
import org.bson.json.JsonMode;
58+
import org.bson.json.JsonWriterSettings;
5759
import org.openjdk.nashorn.api.scripting.ScriptObjectMirror;
5860
import org.slf4j.Logger;
5961
import org.slf4j.LoggerFactory;
@@ -73,6 +75,13 @@ public class MongoDbUtils implements Serializable {
7375

7476
static final Gson GSON = new GsonBuilder().serializeSpecialFloatingPointValues().create();
7577

78+
/**
79+
* JsonWriterSettings configured to output Relaxed JSON format for consistent ISO-8601 date
80+
* serialization.
81+
*/
82+
public static final JsonWriterSettings EXTENDED_JSON_WRITER_SETTINGS =
83+
JsonWriterSettings.builder().outputMode(JsonMode.RELAXED).build();
84+
7685
public static TableSchema getTableFieldSchema(
7786
String uri, String database, String collection, String userOption) {
7887
List<TableFieldSchema> bigquerySchemaFields = new ArrayList<>();
@@ -141,16 +150,30 @@ public static TableRow getTableSchema(Document document, String userOption) {
141150
row.set(key, value);
142151
break;
143152
case "org.bson.Document":
144-
String data = GSON.toJson(value);
153+
String data = ((Document) value).toJson(EXTENDED_JSON_WRITER_SETTINGS);
145154
row.set(key, data);
146155
break;
156+
case "java.util.Date":
157+
// Format dates as ISO-8601 strings
158+
Document tempDoc = new Document("date", value);
159+
String dateJson = tempDoc.toJson(EXTENDED_JSON_WRITER_SETTINGS);
160+
// Extract just the date value from {"date":{"$date":"2026-02-03T15:31:41.924Z"}}
161+
try {
162+
JsonObject dateObj = GSON.fromJson(dateJson, JsonObject.class);
163+
String dateStr = dateObj.getAsJsonObject("date").get("$date").getAsString();
164+
row.set(key, dateStr);
165+
} catch (Exception e) {
166+
row.set(key, value.toString());
167+
}
168+
break;
147169
default:
148170
row.set(key, value.toString());
149171
}
150172
});
151173
row.set("timestamp", localDate.format(TIMEFORMAT));
152174
} else if (userOption.equals("JSON")) {
153-
JsonObject sourceDataJsonObject = GSON.toJsonTree(document).getAsJsonObject();
175+
String jsonString = document.toJson(EXTENDED_JSON_WRITER_SETTINGS);
176+
JsonObject sourceDataJsonObject = GSON.fromJson(jsonString, JsonObject.class);
154177

155178
// Convert to a Map
156179
Map<String, Object> sourceDataMap =
@@ -160,7 +183,7 @@ public static TableRow getTableSchema(Document document, String userOption) {
160183
.set("source_data", sourceDataMap)
161184
.set("timestamp", localDate.format(TIMEFORMAT));
162185
} else {
163-
String sourceData = GSON.toJson(document);
186+
String sourceData = document.toJson(EXTENDED_JSON_WRITER_SETTINGS);
164187

165188
row.set("id", document.get("_id").toString())
166189
.set("source_data", sourceData)
@@ -188,7 +211,8 @@ public static TableSchema getTableFieldSchemaForUDF(
188211
}
189212

190213
Document doc;
191-
Object result = invocable.invokeFunction(udfFunctionName, document.toJson());
214+
Object result =
215+
invocable.invokeFunction(udfFunctionName, document.toJson(EXTENDED_JSON_WRITER_SETTINGS));
192216
if (result == null || ScriptObjectMirror.isUndefined(result)) {
193217
return null;
194218
} else if (result instanceof Document) {

v2/mongodb-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/transforms/JavascriptDocumentTransformer.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
1919

2020
import com.google.auto.value.AutoValue;
21+
import com.google.cloud.teleport.v2.mongodb.templates.MongoDbUtils;
2122
import java.io.IOException;
2223
import java.io.Reader;
2324
import java.io.UncheckedIOException;
@@ -147,7 +148,9 @@ public Document invoke(Document data)
147148
throw new RuntimeException("No udf was loaded");
148149
}
149150

150-
Object result = getInvocable().invokeFunction(functionName(), data.toJson());
151+
Object result =
152+
getInvocable()
153+
.invokeFunction(functionName(), data.toJson(MongoDbUtils.EXTENDED_JSON_WRITER_SETTINGS));
151154
if (result == null || ScriptObjectMirror.isUndefined(result)) {
152155
return null;
153156
} else if (result instanceof Document) {

v2/mongodb-to-googlecloud/src/test/java/com/google/cloud/teleport/v2/mongodb/templates/MongoDbToBigQueryIT.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ private void mongoDbToBigQueryBase(String userOption, boolean applyUdf, boolean
215215
Map<String, JSONObject> mongoMap = new HashMap<>();
216216
mongoDocuments.forEach(
217217
mongoDocument -> {
218-
JSONObject mongoDbJson = new JSONObject(mongoDocument.toJson());
218+
JSONObject mongoDbJson =
219+
new JSONObject(mongoDocument.toJson(MongoDbUtils.EXTENDED_JSON_WRITER_SETTINGS));
219220
String mongoId = mongoDbJson.getJSONObject(MONGO_DB_FLATTEN_ID).getString("$oid");
220221
if (applyUdf) {
221222
mongoDbJson.put("udf", "out");

v2/mongodb-to-googlecloud/src/test/java/com/google/cloud/teleport/v2/mongodb/templates/MongoDbUtilsTest.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515
*/
1616
package com.google.cloud.teleport.v2.mongodb.templates;
1717

18+
import static org.junit.Assert.assertEquals;
19+
import static org.junit.Assert.assertFalse;
1820
import static org.junit.Assert.assertNotNull;
1921
import static org.junit.Assert.assertTrue;
2022

2123
import com.google.api.services.bigquery.model.TableRow;
24+
import java.util.Date;
2225
import org.bson.Document;
2326
import org.junit.Test;
2427

@@ -69,4 +72,55 @@ public void testGsonSerializesSpecialFloatingPointValues() {
6972
assertTrue("JSON should contain Infinity", jsonString.contains("Infinity"));
7073
assertTrue("JSON should contain NaN", jsonString.contains("NaN"));
7174
}
75+
76+
@Test
77+
public void testExtendedJsonDateSerialization() {
78+
// Create document with date field
79+
Date testDate = new Date(1738598501924L); // 2026-02-03T15:31:41.924Z
80+
Document document = new Document();
81+
document.put("_id", "test-id");
82+
document.put("createdAt", testDate);
83+
84+
// Serialize with EXTENDED_JSON_WRITER_SETTINGS
85+
String jsonString = document.toJson(MongoDbUtils.EXTENDED_JSON_WRITER_SETTINGS);
86+
87+
// Verify Extended JSON format with ISO-8601
88+
assertNotNull("JSON string should not be null", jsonString);
89+
assertTrue("Should contain $date key", jsonString.contains("\"$date\""));
90+
assertTrue(
91+
"Should contain ISO-8601 formatted date",
92+
jsonString.matches(
93+
".*\"\\$date\"\\s*:\\s*\"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z\".*"));
94+
assertFalse(
95+
"Should NOT contain locale-based date format",
96+
jsonString.matches(".*[A-Z][a-z]{2}\\s+\\d+,\\s+\\d{4}.*"));
97+
98+
// Verify round-trip conversion
99+
Document parsedDocument = Document.parse(jsonString);
100+
assertEquals("Document ID should match", "test-id", parsedDocument.get("_id"));
101+
}
102+
103+
@Test
104+
public void testExtendedJsonWithMultipleDateTypes() {
105+
Document document = new Document();
106+
document.put("_id", "test-id");
107+
document.put("currentDate", new Date());
108+
document.put("pastDate", new Date(0L)); // 1970-01-01T00:00:00.000Z
109+
document.put("futureDate", new Date(4102444800000L)); // 2100-01-01
110+
document.put("nullDate", null);
111+
document.put("stringField", "not a date");
112+
113+
String jsonString = document.toJson(MongoDbUtils.EXTENDED_JSON_WRITER_SETTINGS);
114+
115+
assertNotNull("JSON string should not be null", jsonString);
116+
// Verify all date fields use Extended JSON format
117+
int dateCount = jsonString.split("\"\\$date\"").length - 1;
118+
assertEquals("Should have 3 $date entries", 3, dateCount);
119+
120+
// Verify round-trip conversion
121+
Document parsedDocument = Document.parse(jsonString);
122+
assertNotNull(parsedDocument.get("currentDate"));
123+
assertNotNull(parsedDocument.get("pastDate"));
124+
assertNotNull(parsedDocument.get("futureDate"));
125+
}
72126
}

0 commit comments

Comments
 (0)