2121import static org .apache .beam .vendor .guava .v32_1_2_jre .com .google .common .base .Preconditions .checkState ;
2222
2323import java .io .IOException ;
24+ import java .time .Duration ;
25+ import java .time .Instant ;
2426import java .time .LocalDateTime ;
2527import java .time .YearMonth ;
2628import java .time .ZoneOffset ;
@@ -135,7 +137,8 @@ class DestinationState {
135137 RuntimeException rethrow =
136138 new RuntimeException (
137139 String .format (
138- "Encountered an error when closing data writer for table '%s', path: %s" ,
140+ "Encountered an error when closing data writer for table '%s',"
141+ + " path: %s" ,
139142 icebergDestination .getTableIdentifier (), recordWriter .path ()),
140143 e );
141144 exceptions .add (rethrow );
@@ -256,8 +259,40 @@ static String getPartitionDataPath(
256259 private final Map <WindowedValue <IcebergDestination >, List <SerializableDataFile >>
257260 totalSerializableDataFiles = Maps .newHashMap ();
258261
262+ static final class LastRefreshedTable {
263+ final Table table ;
264+ volatile Instant lastRefreshTime ;
265+ static final Duration STALENESS_THRESHOLD = Duration .ofMinutes (2 );
266+
267+ LastRefreshedTable (Table table , Instant lastRefreshTime ) {
268+ this .table = table ;
269+ this .lastRefreshTime = lastRefreshTime ;
270+ }
271+
272+ /**
273+ * Refreshes the table metadata if it is considered stale (older than 2 minutes).
274+ *
275+ * <p>This method first performs a non-synchronized check on the table's freshness. This
276+ * provides a lock-free fast path that avoids synchronization overhead in the common case where
277+ * the table does not need to be refreshed. If the table might be stale, it then enters a
278+ * synchronized block to ensure that only one thread performs the refresh operation.
279+ */
280+ void refreshIfStale () {
281+ // Fast path: Avoid entering the synchronized block if the table is not stale.
282+ if (lastRefreshTime .isAfter (Instant .now ().minus (STALENESS_THRESHOLD ))) {
283+ return ;
284+ }
285+ synchronized (this ) {
286+ if (lastRefreshTime .isBefore (Instant .now ().minus (STALENESS_THRESHOLD ))) {
287+ table .refresh ();
288+ lastRefreshTime = Instant .now ();
289+ }
290+ }
291+ }
292+ }
293+
259294 @ VisibleForTesting
260- static final Cache <TableIdentifier , Table > TABLE_CACHE =
295+ static final Cache <TableIdentifier , LastRefreshedTable > LAST_REFRESHED_TABLE_CACHE =
261296 CacheBuilder .newBuilder ().expireAfterAccess (10 , TimeUnit .MINUTES ).build ();
262297
263298 private boolean isClosed = false ;
@@ -272,22 +307,22 @@ static String getPartitionDataPath(
272307 /**
273308 * Returns an Iceberg {@link Table}.
274309 *
275- * <p>First attempts to fetch the table from the {@link #TABLE_CACHE }. If it's not there, we
276- * attempt to load it using the Iceberg API. If the table doesn't exist at all, we attempt to
277- * create it, inferring the table schema from the record schema.
310+ * <p>First attempts to fetch the table from the {@link #LAST_REFRESHED_TABLE_CACHE }. If it's not
311+ * there, we attempt to load it using the Iceberg API. If the table doesn't exist at all, we
312+ * attempt to create it, inferring the table schema from the record schema.
278313 *
279314 * <p>Note that this is a best-effort operation that depends on the {@link Catalog}
280315 * implementation. Although it is expected, some implementations may not support creating a table
281316 * using the Iceberg API.
282317 */
283- private Table getOrCreateTable (IcebergDestination destination , Schema dataSchema ) {
318+ @ VisibleForTesting
319+ Table getOrCreateTable (IcebergDestination destination , Schema dataSchema ) {
284320 TableIdentifier identifier = destination .getTableIdentifier ();
285- @ Nullable Table table = TABLE_CACHE .getIfPresent (identifier );
286- if (table != null ) {
287- // If fetching from cache, refresh the table to avoid working with stale metadata
288- // (e.g. partition spec)
289- table .refresh ();
290- return table ;
321+ @ Nullable
322+ LastRefreshedTable lastRefreshedTable = LAST_REFRESHED_TABLE_CACHE .getIfPresent (identifier );
323+ if (lastRefreshedTable != null && lastRefreshedTable .table != null ) {
324+ lastRefreshedTable .refreshIfStale ();
325+ return lastRefreshedTable .table ;
291326 }
292327
293328 Namespace namespace = identifier .namespace ();
@@ -299,7 +334,8 @@ private Table getOrCreateTable(IcebergDestination destination, Schema dataSchema
299334 ? createConfig .getTableProperties ()
300335 : Maps .newHashMap ();
301336
302- synchronized (TABLE_CACHE ) {
337+ @ Nullable Table table = null ;
338+ synchronized (LAST_REFRESHED_TABLE_CACHE ) {
303339 // Create namespace if it does not exist yet
304340 if (!namespace .isEmpty () && catalog instanceof SupportsNamespaces ) {
305341 SupportsNamespaces supportsNamespaces = (SupportsNamespaces ) catalog ;
@@ -323,7 +359,8 @@ private Table getOrCreateTable(IcebergDestination destination, Schema dataSchema
323359 try {
324360 table = catalog .createTable (identifier , tableSchema , partitionSpec , tableProperties );
325361 LOG .info (
326- "Created Iceberg table '{}' with schema: {}\n , partition spec: {}, table properties: {}" ,
362+ "Created Iceberg table '{}' with schema: {}\n "
363+ + ", partition spec: {}, table properties: {}" ,
327364 identifier ,
328365 tableSchema ,
329366 partitionSpec ,
@@ -334,8 +371,8 @@ private Table getOrCreateTable(IcebergDestination destination, Schema dataSchema
334371 }
335372 }
336373 }
337-
338- TABLE_CACHE .put (identifier , table );
374+ lastRefreshedTable = new LastRefreshedTable ( table , Instant . now ());
375+ LAST_REFRESHED_TABLE_CACHE .put (identifier , lastRefreshedTable );
339376 return table ;
340377 }
341378
0 commit comments