|  | 
| 47 | 47 | 
 | 
| 48 | 48 | ctx = SessionContext() | 
| 49 | 49 | 
 | 
| 50 |  | -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( | 
| 51 |  | -    "p_partkey", "p_type" | 
| 52 |  | -) | 
| 53 |  | -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( | 
|  | 50 | +df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_type") | 
|  | 51 | +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( | 
| 54 | 52 |     "s_suppkey", "s_nationkey" | 
| 55 | 53 | ) | 
| 56 |  | -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( | 
|  | 54 | +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( | 
| 57 | 55 |     "l_partkey", "l_extendedprice", "l_discount", "l_suppkey", "l_orderkey" | 
| 58 | 56 | ) | 
| 59 |  | -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( | 
|  | 57 | +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( | 
| 60 | 58 |     "o_orderkey", "o_custkey", "o_orderdate" | 
| 61 | 59 | ) | 
| 62 |  | -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( | 
|  | 60 | +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( | 
| 63 | 61 |     "c_custkey", "c_nationkey" | 
| 64 | 62 | ) | 
| 65 |  | -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( | 
|  | 63 | +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( | 
| 66 | 64 |     "n_nationkey", "n_name", "n_regionkey" | 
| 67 | 65 | ) | 
| 68 |  | -df_region = ctx.read_parquet(get_data_path("region.parquet")).select_columns( | 
|  | 66 | +df_region = ctx.read_parquet(get_data_path("region.parquet")).select( | 
| 69 | 67 |     "r_regionkey", "r_name" | 
| 70 | 68 | ) | 
| 71 | 69 | 
 | 
|  | 
| 133 | 131 | 
 | 
| 134 | 132 | # When we join to the customer dataframe, we don't want to confuse other columns, so only | 
| 135 | 133 | # select the supplier key that we need | 
| 136 |  | -df_national_suppliers = df_national_suppliers.select_columns("s_suppkey") | 
|  | 134 | +df_national_suppliers = df_national_suppliers.select("s_suppkey") | 
| 137 | 135 | 
 | 
| 138 | 136 | 
 | 
| 139 | 137 | # Part 3: Combine suppliers and customers and compute the market share | 
|  | 
0 commit comments