346346 (-> (hash-join colname lhs rhs (assoc options :lhs-missing? true ))
347347 :left-outer )))
348348
349+ (defn- col-or-data->reader
350+ ([tuple-data ds])
351+ ([tuple-data ds outer?]
352+ ; ;Else not having the column is an error
353+ (if (and (sequential? tuple-data)
354+ (not= 1 (count tuple-data)))
355+ (-> (ds-base/select-columns ds tuple-data)
356+ (ds-readers/value-reader {:copying? true }))
357+ (let [tuple-data (if (sequential? tuple-data)
358+ (first tuple-data)
359+ tuple-data)]
360+ (if outer?
361+ (get ds tuple-data [])
362+ (ds-base/column ds tuple-data))))))
363+
364+ (defn- ensure-sequential
365+ [colname]
366+ (if-not (sequential? colname) [colname] colname))
367+
368+ (defn- filter-columns
369+ [ds collist outer?]
370+ (when collist
371+ (if outer?
372+ (vec (filter (set (ds-base/column-names ds)) collist))
373+ collist)))
374+
349375
350376(defn pd-merge
351377 " Pandas-style [merge](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html).
@@ -419,15 +445,6 @@ outer-join [8 4]:
419445 ([left-ds right-ds options]
420446 (let [lhs-table-name (default-table-name left-ds " left" )
421447 rhs-table-name (default-table-name right-ds " right" )
422- col-or-data->reader
423- (fn [tuple-data ds]
424- (if (and (sequential? tuple-data)
425- (not= 1 (count tuple-data)))
426- (-> (ds-base/select-columns ds tuple-data)
427- (ds-readers/value-reader {:copying? true }))
428- (if (sequential? tuple-data)
429- (ds-base/column ds (first tuple-data))
430- (ds-base/column ds tuple-data))))
431448 how (get options :how :inner )]
432449 (if (identical? how :cross )
433450 (do
@@ -453,10 +470,10 @@ outer-join [8 4]:
453470 [lhs-table-name lhs-columns]
454471 [rhs-table-name rhs-columns]))
455472 (update-join-metadata lhs-table-name rhs-table-name))))
456- (let [left-on (get options :left-on (get options :on ))
457- right-on (get options :right-on (get options :on ))
458- left-on ( when left-on ( if-not ( sequential? left-on) [left-on] left-on))
459- right-on ( when right-on ( if-not ( sequential? right-on) [right-on] right-on ))
473+ (let [left-on (ensure-sequential ( get options :left-on (get options :on ) ))
474+ right-on (ensure-sequential ( get options :right-on (get options :on ) ))
475+
476+ outer? ( identical? :outer ( get options :how ))
460477 on-int (->> (concat left-on right-on)
461478 (filter (set/intersection (set left-on) (set right-on)))
462479 (distinct )
@@ -465,8 +482,10 @@ outer-join [8 4]:
465482 (== (count left-on) (count right-on))
466483 " Number of left join columns (%d) doesn't equal number of right join columns %d"
467484 (count left-on) (count right-on))
468- left-join-data (col-or-data->reader left-on left-ds)
469- right-join-data (col-or-data->reader right-on right-ds)
485+ left-on (filter-columns left-ds left-on outer?)
486+ right-on (filter-columns right-ds right-on outer?)
487+ left-join-data (col-or-data->reader left-on left-ds outer?)
488+ right-join-data (col-or-data->reader right-on right-ds outer?)
470489
471490
472491 {:keys [lhs-indexes rhs-indexes lhs-missing rhs-missing]}
@@ -524,30 +543,36 @@ outer-join [8 4]:
524543 [rhs-table-name rhs-cols]))
525544 (update-join-metadata lhs-table-name rhs-table-name)))
526545 :outer
527- (let [n-left-empty (count rhs-missing)
528- n-right-empty (count lhs-missing)
529- ; ;Order is intersection, left-missing, right-missing
530- lhs-indexes (add-all! (dtype/clone lhs-indexes) lhs-missing)
531- left-valid (ds-base/select-rows left-ds lhs-indexes)
532- right-valid (ds-base/select-rows right-ds rhs-indexes)
533- right-missing (ds-base/select-rows right-ds rhs-missing)
534- ; ;For the columns we perhaps joined on
535- intersection-ds (-> (ds-base/select-columns left-valid on-int)
536- (ds-base/concat-copying (ds-base/select-columns
537- right-missing on-int)))
538- left-full (-> (ds-base/remove-columns left-valid on-int)
539- (ds-base/extend-with-empty n-left-empty))
540- right-full (-> (ds-base/remove-columns right-valid on-int)
541- (ds-base/extend-with-empty n-right-empty)
542- (ds-base/concat-copying (ds-base/remove-columns
543- right-missing on-int)))]
544- (-> (ds-impl/new-dataset
545- " outer-join"
546- (nice-column-names
547- [lhs-table-name (concat (ds-base/columns intersection-ds)
548- (ds-base/columns left-full))]
549- [rhs-table-name (ds-base/columns right-full)]))
550- (update-join-metadata lhs-table-name rhs-table-name))))))))
546+ (cond
547+ (== 0 (ds-base/row-count left-ds))
548+ (vary-meta right-ds assoc :name " outer-join" )
549+ (== 0 (ds-base/row-count right-ds))
550+ (vary-meta left-ds assoc :name " outer-join" )
551+ :else
552+ (let [n-left-empty (count rhs-missing)
553+ n-right-empty (count lhs-missing)
554+ ; ;Order is intersection, left-missing, right-missing
555+ lhs-indexes (add-all! (dtype/clone lhs-indexes) lhs-missing)
556+ left-valid (ds-base/select-rows left-ds lhs-indexes)
557+ right-valid (ds-base/select-rows right-ds rhs-indexes)
558+ right-missing (ds-base/select-rows right-ds rhs-missing)
559+ ; ;For the columns we perhaps joined on
560+ intersection-ds (-> (ds-base/select-columns left-valid on-int)
561+ (ds-base/concat-copying (ds-base/select-columns
562+ right-missing on-int)))
563+ left-full (-> (ds-base/remove-columns left-valid on-int)
564+ (ds-base/extend-with-empty n-left-empty))
565+ right-full (-> (ds-base/remove-columns right-valid on-int)
566+ (ds-base/extend-with-empty n-right-empty)
567+ (ds-base/concat-copying (ds-base/remove-columns
568+ right-missing on-int)))]
569+ (-> (ds-impl/new-dataset
570+ " outer-join"
571+ (nice-column-names
572+ [lhs-table-name (concat (ds-base/columns intersection-ds)
573+ (ds-base/columns left-full))]
574+ [rhs-table-name (ds-base/columns right-full)]))
575+ (update-join-metadata lhs-table-name rhs-table-name)))))))))
551576 ([left-ds right-ds]
552577 (pd-merge left-ds right-ds {:on (set/intersection
553578 (set (ds-base/column-names left-ds))
0 commit comments