@@ -117,6 +117,7 @@ def list_readers(self):
117117 print ("\n Valid Readers for `reader_name` in read():\n " + "-" * 50 )
118118 print ("Collection : Loads data from an Ordered Dict. If multiple tables, each table must be a nested OrderedDict." )
119119 print ("CSV : Loads data from CSV files (one table per call)" )
120+ print ("Parquet : Loads data from Parquet - a columnar storage format for Apache Hadoop (one table per call)" )
120121 print ("YAML1 : Loads data from YAML files of a certain structure" )
121122 print ("TOML1 : Loads data from TOML files of a certain structure" )
122123 print ("JSON : Loads single-table data from JSON files" )
@@ -139,6 +140,7 @@ def read(self, filenames, reader_name, table_name = None):
139140 The expected input type depends on the selected `reader_name`:
140141 - "Collection" → Ordered Dictionary of table(s)
141142 - "CSV" → .csv
143+ - "Parquet" → .pq
142144 - "YAML1" → .yaml or .yml
143145 - "TOML1" → .toml
144146 - "JSON" → .json
@@ -163,7 +165,7 @@ def read(self, filenames, reader_name, table_name = None):
163165
164166 Required when using the `Collection` reader to load an Ordered Dictionary representing only one table.
165167
166- Recommended when the input file contains a single table for the `CSV`, `JSON`, or `Ensemble` reader.
168+ Recommended when the input file contains a single table for the `CSV`, `Parquet`, ` JSON`, or `Ensemble` reader.
167169 """
168170 if isinstance (filenames , str ) and not os .path .exists (filenames ):
169171 sys .exit ("read() ERROR: The input file must be a valid filepath. Please check again." )
@@ -234,6 +236,8 @@ def read(self, filenames, reader_name, table_name = None):
234236 self .t .load_module ('plugin' , 'Bueno' , 'reader' , filenames = filenames )
235237 elif reader_name .lower () == "csv" :
236238 self .t .load_module ('plugin' , 'Csv' , 'reader' , filenames = filenames , table_name = table_name )
239+ elif reader_name .lower () == "parquet" :
240+ self .t .load_module ('plugin' , 'Parquet' , 'reader' , filenames = filenames , table_name = table_name )
237241 elif reader_name .lower () == "yaml1" :
238242 self .t .load_module ('plugin' , 'YAML1' , 'reader' , filenames = filenames )
239243 elif reader_name .lower () == "toml1" :
@@ -257,7 +261,7 @@ def read(self, filenames, reader_name, table_name = None):
257261
258262 if correct_reader == False :
259263 print ("read() ERROR: Please check your spelling of the 'reader_name' argument as it does not exist in DSI\n " )
260- elg = "Collection, CSV, YAML1, TOML1, JSON, Ensemble, Cloverleaf, Bueno, DublinCoreDatacard, SchemaOrgDatacard"
264+ elg = "Collection, CSV, Parquet, YAML1, TOML1, JSON, Ensemble, Cloverleaf, Bueno, DublinCoreDatacard, SchemaOrgDatacard"
261265 sys .exit (f"Eligible readers are: { elg } , GoogleDatacard, Oceans11Datacard" )
262266
263267 table_keys = [k for k in self .t .new_tables if k not in ("dsi_relations" , "dsi_units" )]
@@ -672,6 +676,7 @@ def list_writers(self):
672676 print ("ER_Diagram : Creates a visual ER diagram image based on all tables in DSI." )
673677 print ("Table_Plot : Generates a plot of numerical data from a specified table." )
674678 print ("Csv : Exports the data of a specified table to a CSV file." )
679+ print ("Parquet : Exports the data of a specified table to a Parquet file." )
675680 print ()
676681
677682 def write (self , filename , writer_name , table_name = None ):
@@ -685,6 +690,7 @@ def write(self, filename, writer_name, table_name = None):
685690 - "ER_Diagram" → .png, .pdf, .jpg, .jpeg
686691 - "Table_Plot" → .png, .jpg, .jpeg
687692 - "Csv" → .csv
693+ - "Parquet" → .pq
688694
689695 `writer_name` : str
690696 Name of the DSI Writer to export data.
@@ -695,7 +701,7 @@ def write(self, filename, writer_name, table_name = None):
695701 For guidance on creating a DSI-compatible Writer, view :ref:`custom_writer`.
696702
697703 `table_name`: str, optional
698- Required when using "Table_Plot" or "Csv " to specify which table to export.
704+ Required when using "Table_Plot", "Csv" or "Parquet " to specify which table to export.
699705 """
700706 if not self .t .valid_backend (self .main_backend_obj , self .main_backend_obj .__class__ .__bases__ [0 ].__name__ ):
701707 sys .exit ("ERROR: Cannot write() data from an empty backend. Please ensure there is data in it." )
@@ -764,6 +770,8 @@ def write(self, filename, writer_name, table_name = None):
764770 self .t .load_module ('plugin' , 'Table_Plot' , 'writer' , filename = filename , table_name = table_name )
765771 elif writer_name .lower () in ["csv" , "csv writer" , "csv_writer" ]:
766772 self .t .load_module ('plugin' , 'Csv_Writer' , 'writer' , filename = filename , table_name = table_name )
773+ elif writer_name .lower () in ["parquet" , "parquet writer" , "parquet_writer" ]:
774+ self .t .load_module ('plugin' , 'Parquet_Writer' , 'writer' , filename = filename , table_name = table_name )
767775 else :
768776 correct_writer = False
769777 except Exception as e :
0 commit comments