| 
89 | 89 | 
 
  | 
90 | 90 | # After this join we have all of the possible sales nations  | 
91 | 91 | df_regional_customers = df_regional_customers.join(  | 
92 |  | -    df_nation, (["r_regionkey"], ["n_regionkey"]), how="inner"  | 
 | 92 | +    df_nation, left_on=["r_regionkey"], right_on=["n_regionkey"], how="inner"  | 
93 | 93 | )  | 
94 | 94 | 
 
  | 
95 | 95 | # Now find the possible customers  | 
96 | 96 | df_regional_customers = df_regional_customers.join(  | 
97 |  | -    df_customer, (["n_nationkey"], ["c_nationkey"]), how="inner"  | 
 | 97 | +    df_customer, left_on=["n_nationkey"], right_on=["c_nationkey"], how="inner"  | 
98 | 98 | )  | 
99 | 99 | 
 
  | 
100 | 100 | # Next find orders for these customers  | 
101 | 101 | df_regional_customers = df_regional_customers.join(  | 
102 |  | -    df_orders, (["c_custkey"], ["o_custkey"]), how="inner"  | 
 | 102 | +    df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="inner"  | 
103 | 103 | )  | 
104 | 104 | 
 
  | 
105 | 105 | # Find all line items from these orders  | 
106 | 106 | df_regional_customers = df_regional_customers.join(  | 
107 |  | -    df_lineitem, (["o_orderkey"], ["l_orderkey"]), how="inner"  | 
 | 107 | +    df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner"  | 
108 | 108 | )  | 
109 | 109 | 
 
  | 
110 | 110 | # Limit to the part of interest  | 
111 | 111 | df_regional_customers = df_regional_customers.join(  | 
112 |  | -    df_part, (["l_partkey"], ["p_partkey"]), how="inner"  | 
 | 112 | +    df_part, left_on=["l_partkey"], right_on=["p_partkey"], how="inner"  | 
113 | 113 | )  | 
114 | 114 | 
 
  | 
115 | 115 | # Compute the volume for each line item  | 
 | 
126 | 126 | 
 
  | 
127 | 127 | # Determine the suppliers by the limited nation key we have in our single row df above  | 
128 | 128 | df_national_suppliers = df_national_suppliers.join(  | 
129 |  | -    df_supplier, (["n_nationkey"], ["s_nationkey"]), how="inner"  | 
 | 129 | +    df_supplier, left_on=["n_nationkey"], right_on=["s_nationkey"], how="inner"  | 
130 | 130 | )  | 
131 | 131 | 
 
  | 
132 | 132 | # When we join to the customer dataframe, we don't want to confuse other columns, so only  | 
 | 
141 | 141 | # column only from suppliers in the nation we are evaluating.  | 
142 | 142 | 
 
  | 
143 | 143 | df = df_regional_customers.join(  | 
144 |  | -    df_national_suppliers, (["l_suppkey"], ["s_suppkey"]), how="left"  | 
 | 144 | +    df_national_suppliers, left_on=["l_suppkey"], right_on=["s_suppkey"], how="left"  | 
145 | 145 | )  | 
146 | 146 | 
 
  | 
147 | 147 | # Use a case statement to compute the volume sold by suppliers in the nation of interest  | 
 | 
0 commit comments