1818"""The default input source for DataFusion."""
1919
2020import glob
21- import os
21+ from pathlib import Path
2222from typing import Any
2323
2424from datafusion .common import DataTypeMap , SqlTable
@@ -31,35 +31,36 @@ class LocationInputPlugin(BaseInputSource):
3131 This can be read in from a file (on disk, remote etc.).
3232 """
3333
34- def is_correct_input (self , input_item : Any , table_name : str , ** kwargs ):
34+ def is_correct_input (self , input_item : Any , table_name : str , ** kwargs : Any ) -> bool : # noqa: ARG002
3535 """Returns `True` if the input is valid."""
3636 return isinstance (input_item , str )
3737
3838 def build_table (
3939 self ,
4040 input_item : str ,
4141 table_name : str ,
42- ** kwargs ,
43- ) -> SqlTable :
42+ ** kwargs : Any , # noqa: ARG002
43+ ) -> SqlTable : # type: ignore[invalid-type-form]
4444 """Create a table from the input source."""
45- _ , extension = os . path . splitext (input_item )
46- format = extension .lstrip ("." ).lower ()
45+ extension = Path (input_item ). suffix
46+ file_format = extension .lstrip ("." ).lower ()
4747 num_rows = 0 # Total number of rows in the file. Used for statistics
4848 columns = []
49- if format == "parquet" :
49+ if file_format == "parquet" :
5050 import pyarrow .parquet as pq
5151
5252 # Read the Parquet metadata
5353 metadata = pq .read_metadata (input_item )
5454 num_rows = metadata .num_rows
5555 # Iterate through the schema and build the SqlTable
56- for col in metadata .schema :
57- columns .append (
58- (
59- col .name ,
60- DataTypeMap .from_parquet_type_str (col .physical_type ),
61- )
56+ columns = [
57+ (
58+ col .name ,
59+ DataTypeMap .from_parquet_type_str (col .physical_type ),
6260 )
61+ for col in metadata .schema
62+ ]
63+
6364 elif format == "csv" :
6465 import csv
6566
@@ -69,21 +70,20 @@ def build_table(
6970 # to get that information. However, this should only be occurring
7071 # at table creation time and therefore shouldn't
7172 # slow down query performance.
72- with open (input_item ) as file :
73+ with Path (input_item ). open ( ) as file :
7374 reader = csv .reader (file )
74- header_row = next (reader )
75- print (header_row )
75+ _header_row = next (reader )
7676 for _ in reader :
7777 num_rows += 1
7878 # TODO: Need to actually consume this row into reasonable columns
79- raise RuntimeError ("TODO: Currently unable to support CSV input files." )
79+ msg = "TODO: Currently unable to support CSV input files."
80+ raise RuntimeError (msg )
8081 else :
81- raise RuntimeError (
82- f"Input of format: `{ format } ` is currently not supported.\
82+ msg = f"Input of format: `{ format } ` is currently not supported.\
8383 Only Parquet and CSV."
84- )
84+ raise RuntimeError ( msg )
8585
8686 # Input could possibly be multiple files. Create a list if so
87- input_files = glob .glob (input_item )
87+ input_files = glob .glob (input_item ) # noqa: PTH207
8888
8989 return SqlTable (table_name , columns , num_rows , input_files )
0 commit comments