Skip to content

Commit 12ee7e1

Browse files
committed
Update code comments
1 parent e4fd88d commit 12ee7e1

File tree

1 file changed

+15
-16
lines changed

1 file changed

+15
-16
lines changed

pointblank/validate.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,7 +2218,8 @@ class Validate:
22182218
- BigQuery table (`"bigquery"`)*
22192219
- Parquet table (`"parquet"`)*
22202220
- CSV files (string path or `pathlib.Path` object with `.csv` extension)
2221-
- Parquet files (string path, `pathlib.Path` object, glob pattern, directory with `.parquet` extension, or Spark-style partitioned dataset)
2221+
- Parquet files (string path, `pathlib.Path` object, glob pattern, directory with `.parquet`
2222+
extension, or partitioned dataset)
22222223

22232224
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
22242225
`ibis.expr.types.relations.Table`). Furthermore, the use of `Validate` with such tables requires
@@ -2735,11 +2736,11 @@ def _process_parquet_input(self, data: FrameT | Any) -> FrameT | Any:
27352736
Process data parameter to handle Parquet file inputs.
27362737

27372738
Supports:
2738-
- Single .parquet file (string or Path)
2739-
- Glob patterns for multiple .parquet files (e.g., "data/*.parquet")
2740-
- Directory containing .parquet files
2741-
- Spark-style partitioned datasets with automatic partition column inference
2742-
- List/sequence of .parquet file paths
2739+
- single .parquet file (string or Path)
2740+
- glob patterns for multiple .parquet files (e.g., "data/*.parquet")
2741+
- directory containing .parquet files
2742+
- partitioned Parquet datasets with automatic partition column inference
2743+
- list/sequence of .parquet file paths
27432744

27442745
Returns the original data if it's not a Parquet file input.
27452746
"""
@@ -2753,8 +2754,8 @@ def _process_parquet_input(self, data: FrameT | Any) -> FrameT | Any:
27532754
data_str = str(data)
27542755
path_obj = Path(data)
27552756

2756-
# Check if it's a glob pattern containing .parquet first
2757-
# Look for glob characters: *, ?, [, ]
2757+
# Check if it's a glob pattern containing .parquet first; look for glob
2758+
# characters: `*`, `?`, `[`, `]`
27582759
if ".parquet" in data_str.lower() and any(
27592760
char in data_str for char in ["*", "?", "[", "]"]
27602761
):
@@ -2773,9 +2774,8 @@ def _process_parquet_input(self, data: FrameT | Any) -> FrameT | Any:
27732774

27742775
# Check if it's a directory
27752776
elif path_obj.is_dir():
2776-
# First, try to read as a partitioned parquet dataset; This handles
2777-
# Spark-style partitioned datasets where parquet files are in subdirectories
2778-
# with partition columns encoded in paths
2777+
# First, try to read as a partitioned parquet dataset; This handles datasets where
2778+
# Parquet files are in subdirectories with partition columns encoded in paths
27792779
try:
27802780
# Both Polars and Pandas can handle partitioned datasets natively
27812781
if _is_lib_present(lib_name="polars"):
@@ -2826,8 +2826,7 @@ def _process_parquet_input(self, data: FrameT | Any) -> FrameT | Any:
28262826
if not parquet_paths:
28272827
return data
28282828

2829-
# Read the parquet file(s) using available libraries
2830-
# Prefer Polars, fallback to Pandas
2829+
# Read the parquet file(s) using available libraries; prefer Polars, fallback to Pandas
28312830
if _is_lib_present(lib_name="polars"):
28322831
try:
28332832
import polars as pl
@@ -2836,7 +2835,7 @@ def _process_parquet_input(self, data: FrameT | Any) -> FrameT | Any:
28362835
# Single file
28372836
return pl.read_parquet(parquet_paths[0])
28382837
else:
2839-
# Multiple files - concatenate them
2838+
# Multiple files: concatenate them
28402839
dfs = [pl.read_parquet(path) for path in parquet_paths]
28412840
return pl.concat(dfs, how="vertical_relaxed")
28422841
except Exception as e:
@@ -2847,7 +2846,7 @@ def _process_parquet_input(self, data: FrameT | Any) -> FrameT | Any:
28472846
if len(parquet_paths) == 1:
28482847
return pd.read_parquet(parquet_paths[0])
28492848
else:
2850-
# Multiple files - concatenate them
2849+
# Multiple files: concatenate them
28512850
dfs = [pd.read_parquet(path) for path in parquet_paths]
28522851
return pd.concat(dfs, ignore_index=True)
28532852
else:
@@ -2862,7 +2861,7 @@ def _process_parquet_input(self, data: FrameT | Any) -> FrameT | Any:
28622861
if len(parquet_paths) == 1:
28632862
return pd.read_parquet(parquet_paths[0])
28642863
else:
2865-
# Multiple files - concatenate them
2864+
# Multiple files: concatenate them
28662865
dfs = [pd.read_parquet(path) for path in parquet_paths]
28672866
return pd.concat(dfs, ignore_index=True)
28682867
except Exception as e:

0 commit comments

Comments
 (0)