3232import com .facebook .presto .hive .metastore .MetastoreContext ;
3333import com .facebook .presto .hive .metastore .Partition ;
3434import com .facebook .presto .hive .metastore .PartitionStatistics ;
35+ import com .facebook .presto .hive .metastore .StorageFormat ;
3536import com .facebook .presto .hive .metastore .Table ;
3637import com .facebook .presto .spi .ConnectorSession ;
38+ import com .facebook .presto .spi .PrestoException ;
3739import com .facebook .presto .spi .SchemaTableName ;
3840import com .google .common .base .Stopwatch ;
3941import com .google .common .cache .Cache ;
4042import com .google .common .cache .CacheBuilder ;
4143import com .google .common .collect .ImmutableList ;
4244import com .google .common .collect .ImmutableMap ;
4345import org .apache .hadoop .fs .Path ;
46+ import org .apache .hadoop .hive .ql .io .SymlinkTextInputFormat ;
47+ import org .apache .hadoop .mapred .InputFormat ;
4448import org .weakref .jmx .Managed ;
4549import org .weakref .jmx .Nested ;
4650
6165import java .util .concurrent .TimeoutException ;
6266import java .util .concurrent .atomic .AtomicLong ;
6367import java .util .concurrent .atomic .AtomicReference ;
68+ import java .util .stream .Collectors ;
6469
6570import static com .facebook .airlift .concurrent .Threads .daemonThreadsNamed ;
71+ import static com .facebook .presto .hive .HiveErrorCode .HIVE_BAD_DATA ;
6672import static com .facebook .presto .hive .HivePartition .UNPARTITIONED_ID ;
6773import static com .facebook .presto .hive .HiveSessionProperties .getQuickStatsBackgroundBuildTimeout ;
6874import static com .facebook .presto .hive .HiveSessionProperties .getQuickStatsInlineBuildTimeout ;
6975import static com .facebook .presto .hive .HiveSessionProperties .isQuickStatsEnabled ;
7076import static com .facebook .presto .hive .HiveSessionProperties .isSkipEmptyFilesEnabled ;
7177import static com .facebook .presto .hive .HiveSessionProperties .isUseListDirectoryCache ;
7278import static com .facebook .presto .hive .HiveUtil .buildDirectoryContextProperties ;
79+ import static com .facebook .presto .hive .HiveUtil .getInputFormat ;
80+ import static com .facebook .presto .hive .HiveUtil .getTargetPathsHiveFileInfos ;
81+ import static com .facebook .presto .hive .HiveUtil .readSymlinkPaths ;
7382import static com .facebook .presto .hive .NestedDirectoryPolicy .IGNORED ;
7483import static com .facebook .presto .hive .NestedDirectoryPolicy .RECURSE ;
7584import static com .facebook .presto .hive .metastore .PartitionStatistics .empty ;
@@ -323,15 +332,18 @@ private PartitionStatistics buildQuickStats(String partitionKey, String partitio
323332 Table resolvedTable = metastore .getTable (metastoreContext , table .getSchemaName (), table .getTableName ()).get ();
324333 Optional <Partition > partition ;
325334 Path path ;
335+ StorageFormat storageFormat ;
326336 if (UNPARTITIONED_ID .getPartitionName ().equals (partitionId )) {
327337 partition = Optional .empty ();
328338 path = new Path (resolvedTable .getStorage ().getLocation ());
339+ storageFormat = resolvedTable .getStorage ().getStorageFormat ();
329340 }
330341 else {
331342 partition = metastore .getPartitionsByNames (metastoreContext , table .getSchemaName (), table .getTableName (),
332343 ImmutableList .of (new PartitionNameWithVersion (partitionId , Optional .empty ()))).get (partitionId );
333344 checkState (partition .isPresent (), "getPartitionsByNames returned no partitions for partition with name [%s]" , partitionId );
334345 path = new Path (partition .get ().getStorage ().getLocation ());
346+ storageFormat = partition .get ().getStorage ().getStorageFormat ();
335347 }
336348
337349 HdfsContext hdfsContext = new HdfsContext (session , table .getSchemaName (), table .getTableName (), partitionId , false );
@@ -347,6 +359,37 @@ private PartitionStatistics buildQuickStats(String partitionKey, String partitio
347359
348360 Iterator <HiveFileInfo > fileList = directoryLister .list (fs , resolvedTable , path , partition , nameNodeStats , hiveDirectoryContext );
349361
362+ InputFormat <?, ?> inputFormat = getInputFormat (hdfsEnvironment .getConfiguration (hdfsContext , path ), storageFormat .getInputFormat (), storageFormat .getSerDe (), false );
363+ if (inputFormat instanceof SymlinkTextInputFormat ) {
364+ // For symlinks, follow the paths in the manifest file and create a new iterator of the target files
365+ try {
366+ List <Path > targetPaths = readSymlinkPaths (fs , fileList );
367+
368+ Map <Path , List <Path >> parentToTargets = targetPaths .stream ().collect (Collectors .groupingBy (Path ::getParent ));
369+
370+ ImmutableList .Builder <HiveFileInfo > targetFileInfoList = ImmutableList .builder ();
371+
372+ for (Map .Entry <Path , List <Path >> entry : parentToTargets .entrySet ()) {
373+ targetFileInfoList .addAll (getTargetPathsHiveFileInfos (
374+ path ,
375+ partition ,
376+ entry .getKey (),
377+ entry .getValue (),
378+ hiveDirectoryContext ,
379+ fs ,
380+ directoryLister ,
381+ resolvedTable ,
382+ nameNodeStats ,
383+ session ));
384+ }
385+
386+ fileList = targetFileInfoList .build ().iterator ();
387+ }
388+ catch (IOException e ) {
389+ throw new PrestoException (HIVE_BAD_DATA , "Error parsing symlinks" , e );
390+ }
391+ }
392+
350393 PartitionQuickStats partitionQuickStats = PartitionQuickStats .EMPTY ;
351394 Stopwatch buildStopwatch = Stopwatch .createStarted ();
352395 // Build quick stats one by one from statsBuilderStrategies. Do this until we get a non-empty PartitionQuickStats
0 commit comments