|
| 1 | +# Copyright 2025 AstroLab Software |
| 2 | +# Author: Julien Peloton |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | +"""Contains functionalities to work with HDFS""" |
| 16 | + |
| 17 | +from pyspark.sql import SparkSession |
| 18 | + |
| 19 | + |
| 20 | +def path_exist(path: str) -> bool: |
| 21 | + """Check if a path exists on Spark shared filesystem (HDFS or S3) |
| 22 | +
|
| 23 | + Parameters |
| 24 | + ---------- |
| 25 | + path : str |
| 26 | + Path to check |
| 27 | +
|
| 28 | + Returns |
| 29 | + ------- |
| 30 | + bool |
| 31 | + True if the path exists, False otherwise |
| 32 | + """ |
| 33 | + spark = SparkSession.builder.getOrCreate() |
| 34 | + |
| 35 | + jvm = spark._jvm |
| 36 | + jsc = spark._jsc |
| 37 | + |
| 38 | + conf = jsc.hadoopConfiguration() |
| 39 | + uri = jvm.java.net.URI(path) |
| 40 | + |
| 41 | + fs = jvm.org.apache.hadoop.fs.FileSystem.get(uri, conf) |
| 42 | + |
| 43 | + path_glob = jvm.org.apache.hadoop.fs.Path(path) |
| 44 | + status_list = fs.globStatus(path_glob) |
| 45 | + if len(list(status_list)) > 0: |
| 46 | + return True |
| 47 | + else: |
| 48 | + return False |
0 commit comments