@@ -21,14 +21,24 @@ use std::sync::Arc;
21
21
use std:: sync:: atomic:: AtomicU64 ;
22
22
23
23
use crate :: Result ;
24
- use crate :: spec:: { DataFileFormat , TableMetadata } ;
24
+ use crate :: spec:: { DataFileFormat , PartitionKey , TableMetadata } ;
25
25
26
26
/// `LocationGenerator` used to generate the location of data file.
27
27
pub trait LocationGenerator : Clone + Send + ' static {
28
- /// Generate an absolute path for the given file name.
29
- /// e.g.
30
- /// For file name "part-00000.parquet", the generated location maybe "/table/data/part-00000.parquet"
31
- fn generate_location ( & self , file_name : & str ) -> String ;
28
+ /// Generate an absolute path for the given file name that includes the partition path.
29
+ ///
30
+ /// # Arguments
31
+ ///
32
+ /// * `partition_key` - The partition key of the file. If None, generate a non-partitioned path.
33
+ /// * `file_name` - The name of the file
34
+ ///
35
+ /// # Returns
36
+ ///
37
+ /// An absolute path that includes the partition path, e.g.,
38
+ /// "/table/data/id=1/name=alice/part-00000.parquet"
39
+ /// or non-partitioned path:
40
+ /// "/table/data/part-00000.parquet"
41
+ fn generate_location ( & self , partition_key : Option < & PartitionKey > , file_name : & str ) -> String ;
32
42
}
33
43
34
44
const WRITE_DATA_LOCATION : & str = "write.data.path" ;
@@ -39,29 +49,38 @@ const DEFAULT_DATA_DIR: &str = "/data";
39
49
/// `DefaultLocationGenerator` used to generate the data dir location of data file.
40
50
/// The location is generated based on the table location and the data location in table properties.
41
51
pub struct DefaultLocationGenerator {
42
- dir_path : String ,
52
+ data_location : String ,
43
53
}
44
54
45
55
impl DefaultLocationGenerator {
46
56
/// Create a new `DefaultLocationGenerator`.
47
57
pub fn new ( table_metadata : TableMetadata ) -> Result < Self > {
48
58
let table_location = table_metadata. location ( ) ;
49
59
let prop = table_metadata. properties ( ) ;
50
- let data_location = prop
60
+ let configured_data_location = prop
51
61
. get ( WRITE_DATA_LOCATION )
52
62
. or ( prop. get ( WRITE_FOLDER_STORAGE_LOCATION ) ) ;
53
- let dir_path = if let Some ( data_location) = data_location {
63
+ let data_location = if let Some ( data_location) = configured_data_location {
54
64
data_location. clone ( )
55
65
} else {
56
66
format ! ( "{}{}" , table_location, DEFAULT_DATA_DIR )
57
67
} ;
58
- Ok ( Self { dir_path } )
68
+ Ok ( Self { data_location } )
59
69
}
60
70
}
61
71
62
72
impl LocationGenerator for DefaultLocationGenerator {
63
- fn generate_location ( & self , file_name : & str ) -> String {
64
- format ! ( "{}/{}" , self . dir_path, file_name)
73
+ fn generate_location ( & self , partition_key : Option < & PartitionKey > , file_name : & str ) -> String {
74
+ if PartitionKey :: is_effectively_none ( partition_key) {
75
+ format ! ( "{}/{}" , self . data_location, file_name)
76
+ } else {
77
+ format ! (
78
+ "{}/{}/{}" ,
79
+ self . data_location,
80
+ partition_key. unwrap( ) . to_path( ) ,
81
+ file_name
82
+ )
83
+ }
65
84
}
66
85
}
67
86
@@ -115,11 +134,15 @@ impl FileNameGenerator for DefaultFileNameGenerator {
115
134
#[ cfg( test) ]
116
135
pub ( crate ) mod test {
117
136
use std:: collections:: HashMap ;
137
+ use std:: sync:: Arc ;
118
138
119
139
use uuid:: Uuid ;
120
140
121
141
use super :: LocationGenerator ;
122
- use crate :: spec:: { FormatVersion , PartitionSpec , StructType , TableMetadata } ;
142
+ use crate :: spec:: {
143
+ FormatVersion , Literal , NestedField , PartitionKey , PartitionSpec , PrimitiveType , Schema ,
144
+ Struct , StructType , TableMetadata , Transform , Type ,
145
+ } ;
123
146
use crate :: writer:: file_writer:: location_generator:: {
124
147
FileNameGenerator , WRITE_DATA_LOCATION , WRITE_FOLDER_STORAGE_LOCATION ,
125
148
} ;
@@ -136,8 +159,17 @@ pub(crate) mod test {
136
159
}
137
160
138
161
impl LocationGenerator for MockLocationGenerator {
139
- fn generate_location ( & self , file_name : & str ) -> String {
140
- format ! ( "{}/{}" , self . root, file_name)
162
+ fn generate_location ( & self , partition : Option < & PartitionKey > , file_name : & str ) -> String {
163
+ if PartitionKey :: is_effectively_none ( partition) {
164
+ format ! ( "{}/{}" , self . root, file_name)
165
+ } else {
166
+ format ! (
167
+ "{}/{}/{}" ,
168
+ self . root,
169
+ partition. unwrap( ) . to_path( ) ,
170
+ file_name
171
+ )
172
+ }
141
173
}
142
174
}
143
175
@@ -169,7 +201,7 @@ pub(crate) mod test {
169
201
encryption_keys : HashMap :: new ( ) ,
170
202
} ;
171
203
172
- let file_name_genertaor = super :: DefaultFileNameGenerator :: new (
204
+ let file_name_generator = super :: DefaultFileNameGenerator :: new (
173
205
"part" . to_string ( ) ,
174
206
Some ( "test" . to_string ( ) ) ,
175
207
crate :: spec:: DataFileFormat :: Parquet ,
@@ -179,7 +211,7 @@ pub(crate) mod test {
179
211
let location_generator =
180
212
super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ;
181
213
let location =
182
- location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ;
214
+ location_generator. generate_location ( None , & file_name_generator . generate_file_name ( ) ) ;
183
215
assert_eq ! ( location, "s3://data.db/table/data/part-00000-test.parquet" ) ;
184
216
185
217
// test custom data location
@@ -190,7 +222,7 @@ pub(crate) mod test {
190
222
let location_generator =
191
223
super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ;
192
224
let location =
193
- location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ;
225
+ location_generator. generate_location ( None , & file_name_generator . generate_file_name ( ) ) ;
194
226
assert_eq ! (
195
227
location,
196
228
"s3://data.db/table/data_1/part-00001-test.parquet"
@@ -203,7 +235,7 @@ pub(crate) mod test {
203
235
let location_generator =
204
236
super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ;
205
237
let location =
206
- location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ;
238
+ location_generator. generate_location ( None , & file_name_generator . generate_file_name ( ) ) ;
207
239
assert_eq ! (
208
240
location,
209
241
"s3://data.db/table/data_2/part-00002-test.parquet"
@@ -217,7 +249,79 @@ pub(crate) mod test {
217
249
let location_generator =
218
250
super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ;
219
251
let location =
220
- location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ;
252
+ location_generator. generate_location ( None , & file_name_generator . generate_file_name ( ) ) ;
221
253
assert_eq ! ( location, "s3://data.db/data_3/part-00003-test.parquet" ) ;
222
254
}
255
+
256
+ #[ test]
257
+ fn test_location_generate_with_partition ( ) {
258
+ // Create a schema with two fields: id (int) and name (string)
259
+ let schema = Arc :: new (
260
+ Schema :: builder ( )
261
+ . with_schema_id ( 1 )
262
+ . with_fields ( vec ! [
263
+ NestedField :: required( 1 , "id" , Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) ,
264
+ NestedField :: required( 2 , "name" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
265
+ ] )
266
+ . build ( )
267
+ . unwrap ( ) ,
268
+ ) ;
269
+
270
+ // Create a partition spec with both fields
271
+ let partition_spec = PartitionSpec :: builder ( schema. clone ( ) )
272
+ . add_partition_field ( "id" , "id" , Transform :: Identity )
273
+ . unwrap ( )
274
+ . add_partition_field ( "name" , "name" , Transform :: Identity )
275
+ . unwrap ( )
276
+ . build ( )
277
+ . unwrap ( ) ;
278
+
279
+ // Create partition data with values
280
+ let partition_data =
281
+ Struct :: from_iter ( [ Some ( Literal :: int ( 42 ) ) , Some ( Literal :: string ( "alice" ) ) ] ) ;
282
+
283
+ // Create a partition key
284
+ let partition_key = PartitionKey :: new ( partition_spec, schema, partition_data) ;
285
+
286
+ // Test with MockLocationGenerator
287
+ let mock_location_gen = MockLocationGenerator :: new ( "/base/path" . to_string ( ) ) ;
288
+ let file_name = "data-00000.parquet" ;
289
+ let location = mock_location_gen. generate_location ( Some ( & partition_key) , file_name) ;
290
+ assert_eq ! ( location, "/base/path/id=42/name=alice/data-00000.parquet" ) ;
291
+
292
+ // Create a table metadata for DefaultLocationGenerator
293
+ let table_metadata = TableMetadata {
294
+ format_version : FormatVersion :: V2 ,
295
+ table_uuid : Uuid :: parse_str ( "fb072c92-a02b-11e9-ae9c-1bb7bc9eca94" ) . unwrap ( ) ,
296
+ location : "s3://data.db/table" . to_string ( ) ,
297
+ last_updated_ms : 1515100955770 ,
298
+ last_column_id : 2 ,
299
+ schemas : HashMap :: new ( ) ,
300
+ current_schema_id : 1 ,
301
+ partition_specs : HashMap :: new ( ) ,
302
+ default_spec : PartitionSpec :: unpartition_spec ( ) . into ( ) ,
303
+ default_partition_type : StructType :: new ( vec ! [ ] ) ,
304
+ last_partition_id : 1000 ,
305
+ default_sort_order_id : 0 ,
306
+ sort_orders : HashMap :: from_iter ( vec ! [ ] ) ,
307
+ snapshots : HashMap :: default ( ) ,
308
+ current_snapshot_id : None ,
309
+ last_sequence_number : 1 ,
310
+ properties : HashMap :: new ( ) ,
311
+ snapshot_log : Vec :: new ( ) ,
312
+ metadata_log : vec ! [ ] ,
313
+ refs : HashMap :: new ( ) ,
314
+ statistics : HashMap :: new ( ) ,
315
+ partition_statistics : HashMap :: new ( ) ,
316
+ encryption_keys : HashMap :: new ( ) ,
317
+ } ;
318
+
319
+ // Test with DefaultLocationGenerator
320
+ let default_location_gen = super :: DefaultLocationGenerator :: new ( table_metadata) . unwrap ( ) ;
321
+ let location = default_location_gen. generate_location ( Some ( & partition_key) , file_name) ;
322
+ assert_eq ! (
323
+ location,
324
+ "s3://data.db/table/data/id=42/name=alice/data-00000.parquet"
325
+ ) ;
326
+ }
223
327
}
0 commit comments