2525queries : Optional [Union [Set [str ], List [str ]]] = None
2626
2727
28- def load_lineitem (data_folder : str , use_arrow_dtype : bool = None ) -> md .DataFrame :
28+ def load_lineitem (
29+ data_folder : str , use_arrow_dtype : bool = None , gpu : bool = False
30+ ) -> md .DataFrame :
2931 data_path = data_folder + "/lineitem.pq"
30- df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype )
32+ df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
3133 df ["L_SHIPDATE" ] = md .to_datetime (df .L_SHIPDATE , format = "%Y-%m-%d" )
3234 df ["L_RECEIPTDATE" ] = md .to_datetime (df .L_RECEIPTDATE , format = "%Y-%m-%d" )
3335 df ["L_COMMITDATE" ] = md .to_datetime (df .L_COMMITDATE , format = "%Y-%m-%d" )
3436 return df
3537
3638
37- def load_part (data_folder : str , use_arrow_dtype : bool = None ) -> md .DataFrame :
39+ def load_part (
40+ data_folder : str , use_arrow_dtype : bool = None , gpu : bool = False
41+ ) -> md .DataFrame :
3842 data_path = data_folder + "/part.pq"
39- df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype )
43+ df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
4044 return df
4145
4246
43- def load_orders (data_folder : str , use_arrow_dtype : bool = None ) -> md .DataFrame :
47+ def load_orders (
48+ data_folder : str , use_arrow_dtype : bool = None , gpu : bool = False
49+ ) -> md .DataFrame :
4450 data_path = data_folder + "/orders.pq"
45- df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype )
51+ df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
4652 df ["O_ORDERDATE" ] = md .to_datetime (df .O_ORDERDATE , format = "%Y-%m-%d" )
4753 return df
4854
4955
50- def load_customer (data_folder : str , use_arrow_dtype : bool = None ) -> md .DataFrame :
56+ def load_customer (
57+ data_folder : str , use_arrow_dtype : bool = None , gpu : bool = False
58+ ) -> md .DataFrame :
5159 data_path = data_folder + "/customer.pq"
52- df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype )
60+ df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
5361 return df
5462
5563
56- def load_nation (data_folder : str , use_arrow_dtype : bool = None ) -> md .DataFrame :
64+ def load_nation (
65+ data_folder : str , use_arrow_dtype : bool = None , gpu : bool = False
66+ ) -> md .DataFrame :
5767 data_path = data_folder + "/nation.pq"
58- df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype )
68+ df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
5969 return df
6070
6171
62- def load_region (data_folder : str , use_arrow_dtype : bool = None ) -> md .DataFrame :
72+ def load_region (
73+ data_folder : str , use_arrow_dtype : bool = None , gpu : bool = False
74+ ) -> md .DataFrame :
6375 data_path = data_folder + "/region.pq"
64- df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype )
76+ df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
6577 return df
6678
6779
68- def load_supplier (data_folder : str , use_arrow_dtype : bool = None ) -> md .DataFrame :
80+ def load_supplier (
81+ data_folder : str , use_arrow_dtype : bool = None , gpu : bool = False
82+ ) -> md .DataFrame :
6983 data_path = data_folder + "/supplier.pq"
70- df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype )
84+ df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
7185 return df
7286
7387
74- def load_partsupp (data_folder : str , use_arrow_dtype : bool = None ) -> md .DataFrame :
88+ def load_partsupp (
89+ data_folder : str , use_arrow_dtype : bool = None , gpu : bool = False
90+ ) -> md .DataFrame :
7591 data_path = data_folder + "/partsupp.pq"
76- df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype )
92+ df = md .read_parquet (data_path , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
7793 return df
7894
7995
@@ -967,22 +983,25 @@ def q22(customer, orders):
967983
968984
969985def run_queries (
970- data_folder : str , select : List [str ] = None , use_arrow_dtype : bool = None
986+ data_folder : str ,
987+ select : List [str ] = None ,
988+ use_arrow_dtype : bool = None ,
989+ gpu : bool = False ,
971990):
972991 if select :
973992 global queries
974993 queries = select
975994
976995 # Load the data
977996 t1 = time .time ()
978- lineitem = load_lineitem (data_folder , use_arrow_dtype = use_arrow_dtype )
979- orders = load_orders (data_folder , use_arrow_dtype = use_arrow_dtype )
980- customer = load_customer (data_folder , use_arrow_dtype = use_arrow_dtype )
981- nation = load_nation (data_folder , use_arrow_dtype = use_arrow_dtype )
982- region = load_region (data_folder , use_arrow_dtype = use_arrow_dtype )
983- supplier = load_supplier (data_folder , use_arrow_dtype = use_arrow_dtype )
984- part = load_part (data_folder , use_arrow_dtype = use_arrow_dtype )
985- partsupp = load_partsupp (data_folder , use_arrow_dtype = use_arrow_dtype )
997+ lineitem = load_lineitem (data_folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
998+ orders = load_orders (data_folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
999+ customer = load_customer (data_folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
1000+ nation = load_nation (data_folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
1001+ region = load_region (data_folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
1002+ supplier = load_supplier (data_folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
1003+ part = load_part (data_folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
1004+ partsupp = load_partsupp (data_folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
9861005 mars .execute ([lineitem , orders , customer , nation , region , supplier , part , partsupp ])
9871006 print ("Reading time (s): " , time .time () - t1 )
9881007
@@ -1038,7 +1057,15 @@ def main():
10381057 "--use-arrow-dtype" ,
10391058 type = str ,
10401059 choices = ["true" , "false" ],
1041- help = ("Use arrow dtype to read parquet" ),
1060+ help = "Use arrow dtype to read parquet" ,
1061+ )
1062+ parser .add_argument (
1063+ "--gpu" , "-g" , action = "store_true" , help = "Use GPU to read parquet"
1064+ )
1065+ parser .add_argument (
1066+ "--cuda-devices" ,
1067+ type = str ,
1068+ help = "GPU devices to use, use comma to split, only available when using GPU" ,
10421069 )
10431070 args = parser .parse_args ()
10441071 folder = args .folder
@@ -1051,9 +1078,14 @@ def main():
10511078 queries = (
10521079 set (x .lower ().strip () for x in args .query .split ("," )) if args .query else None
10531080 )
1054- sess = mars .new_session (endpoint )
1081+ gpu = args .gpu
1082+ new_session_kwargs = dict ()
1083+ if gpu and args .cuda_devices :
1084+ cuda_devices = args .cuda_devices .split ("," )
1085+ new_session_kwargs ["cuda_devices" ] = [int (d ) for d in cuda_devices ]
1086+ sess = mars .new_session (endpoint , ** new_session_kwargs )
10551087 try :
1056- run_queries (folder , use_arrow_dtype = use_arrow_dtype )
1088+ run_queries (folder , use_arrow_dtype = use_arrow_dtype , gpu = gpu )
10571089 finally :
10581090 if endpoint is None :
10591091 sess .stop_server ()
0 commit comments