2424public class ExternalFilePathUtil {
2525 // Suffix acts as a marker when appended to a file path that the path was created by an external system and not a Hudi writer.
2626 private static final String EXTERNAL_FILE_SUFFIX = "_hudiext" ;
27+ // Marker for file group prefix in external file names. URL-encoded "_fg=" to avoid conflicts with file names.
28+ private static final String FILE_GROUP_PREFIX_MARKER = "_fg%3D" ;
2729
2830 /**
2931 * Appends the commit time and external file marker to the file path. Hudi relies on the commit time in the file name for properly generating views of the files in a table.
@@ -35,6 +37,23 @@ public static String appendCommitTimeAndExternalFileMarker(String filePath, Stri
3537 return filePath + "_" + commitTime + EXTERNAL_FILE_SUFFIX ;
3638 }
3739
40+ /**
41+ * Appends the commit time, file group prefix, and external file marker to the file name.
42+ * Use this when the external file is located in a subdirectory within the partition (e.g., bucket-0/file.parquet).
43+ *
44+ * @param fileName The original file name (without any path prefix)
45+ * @param commitTime The time of the commit that added this file to the table
46+ * @param externalFileGroupPrefix The prefix path where the file is located (e.g., "bucket-0"). Can be null or empty.
47+ * @return The file name with commit time and markers appended
48+ */
49+ public static String appendCommitTimeAndExternalFileMarker (String fileName , String commitTime , String externalFileGroupPrefix ) {
50+ if (externalFileGroupPrefix == null || externalFileGroupPrefix .isEmpty ()) {
51+ return appendCommitTimeAndExternalFileMarker (fileName , commitTime );
52+ }
53+ String encodedPrefix = PartitionPathEncodeUtils .escapePathName (externalFileGroupPrefix );
54+ return fileName + "_" + commitTime + FILE_GROUP_PREFIX_MARKER + encodedPrefix + EXTERNAL_FILE_SUFFIX ;
55+ }
56+
3857 /**
3958 * Checks if the file name was created by an external system by checking for the external file marker at the end of the file name.
4059 * @param fileName The file name
@@ -43,4 +62,62 @@ public static String appendCommitTimeAndExternalFileMarker(String filePath, Stri
4362 public static boolean isExternallyCreatedFile (String fileName ) {
4463 return fileName .endsWith (EXTERNAL_FILE_SUFFIX );
4564 }
65+
66+ /**
67+ * Checks if the external file name contains a file group prefix.
68+ * @param fileName The file name
69+ * @return True if the file has a file group prefix encoded in its name
70+ */
71+ public static boolean hasExternalFileGroupPrefix (String fileName ) {
72+ return isExternallyCreatedFile (fileName ) && fileName .contains (FILE_GROUP_PREFIX_MARKER );
73+ }
74+
75+ /**
76+ * Extracts the file group prefix from an external file name.
77+ * @param fileName The external file name
78+ * @return Option containing the decoded file group prefix, or empty if not present
79+ */
80+ public static Option <String > getExternalFileGroupPrefix (String fileName ) {
81+ if (!hasExternalFileGroupPrefix (fileName )) {
82+ return Option .empty ();
83+ }
84+ int start = fileName .indexOf (FILE_GROUP_PREFIX_MARKER ) + FILE_GROUP_PREFIX_MARKER .length ();
85+ int end = fileName .lastIndexOf (EXTERNAL_FILE_SUFFIX );
86+ return Option .of (PartitionPathEncodeUtils .unescapePathName (fileName .substring (start , end )));
87+ }
88+
89+ /**
90+ * Gets the original file path including the file group prefix if present.
91+ * For example, "data.parquet_123_fg%3Dbucket-0_hudiext" returns "bucket-0/data.parquet"
92+ *
93+ * @param fileName The external file name
94+ * @return The original file path with prefix
95+ */
96+ public static String getOriginalFilePath (String fileName ) {
97+ if (!isExternallyCreatedFile (fileName )) {
98+ return fileName ;
99+ }
100+ String originalName = getOriginalFileName (fileName );
101+ Option <String > prefix = getExternalFileGroupPrefix (fileName );
102+ return prefix .map (p -> p + "/" + originalName ).orElse (originalName );
103+ }
104+
105+ /**
106+ * Extracts the original file name from an external file name (without commit time and markers).
107+ * For example, "data.parquet_123_hudiext" returns "data.parquet"
108+ * And "data.parquet_123_fg%3Dbucket-0_hudiext" also returns "data.parquet"
109+ *
110+ * @param fileName The external file name
111+ * @return The original file name
112+ */
113+ public static String getOriginalFileName (String fileName ) {
114+ if (!isExternallyCreatedFile (fileName )) {
115+ return fileName ;
116+ }
117+ int markerEnd = hasExternalFileGroupPrefix (fileName )
118+ ? fileName .indexOf (FILE_GROUP_PREFIX_MARKER )
119+ : fileName .lastIndexOf (EXTERNAL_FILE_SUFFIX );
120+ int commitTimeStart = fileName .lastIndexOf ('_' , markerEnd - 1 );
121+ return fileName .substring (0 , commitTimeStart );
122+ }
46123}
0 commit comments