@@ -521,6 +521,80 @@ public actor DataFrame: Sendable {
521
521
}
522
522
}
523
523
524
+ /// Join with another `DataFrame`.
525
+ /// Behaves as an INNER JOIN and requires a subsequent join predicate.
526
+ /// - Parameter right: Right side of the join operation.
527
+ /// - Returns: A `DataFrame`.
528
+ public func join( _ right: DataFrame ) async -> DataFrame {
529
+ let right = await ( right. getPlan ( ) as! Plan ) . root
530
+ let plan = SparkConnectClient . getJoin ( self . plan. root, right, JoinType . inner)
531
+ return DataFrame ( spark: self . spark, plan: plan)
532
+ }
533
+
534
+ /// Equi-join with another `DataFrame` using the given column. A cross join with a predicate is
535
+ /// specified as an inner join. If you would explicitly like to perform a cross join use the
536
+ /// `crossJoin` method.
537
+ /// - Parameters:
538
+ /// - right: Right side of the join operation.
539
+ /// - usingColumn: Name of the column to join on. This column must exist on both sides.
540
+ /// - joinType: Type of join to perform. Default `inner`.
541
+ /// - Returns: <#description#>
542
+ public func join( _ right: DataFrame , _ usingColumn: String , _ joinType: String = " inner " ) async -> DataFrame {
543
+ await join ( right, [ usingColumn] , joinType)
544
+ }
545
+
546
+ /// Inner equi-join with another `DataFrame` using the given columns.
547
+ /// - Parameters:
548
+ /// - right: Right side of the join operation.
549
+ /// - usingColumn: Names of the columns to join on. These columns must exist on both sides.
550
+ /// - joinType: A join type name.
551
+ /// - Returns: A `DataFrame`.
552
+ public func join( _ other: DataFrame , _ usingColumns: [ String ] , _ joinType: String = " inner " ) async -> DataFrame {
553
+ let right = await ( other. getPlan ( ) as! Plan ) . root
554
+ let plan = SparkConnectClient . getJoin (
555
+ self . plan. root,
556
+ right,
557
+ joinType. toJoinType,
558
+ usingColumns: usingColumns
559
+ )
560
+ return DataFrame ( spark: self . spark, plan: plan)
561
+ }
562
+
563
+ /// Inner equi-join with another `DataFrame` using the given columns.
564
+ /// - Parameters:
565
+ /// - right: Right side of the join operation.
566
+ /// - joinExprs:A join expression string.
567
+ /// - Returns: A `DataFrame`.
568
+ public func join( _ right: DataFrame , joinExprs: String ) async -> DataFrame {
569
+ return await join ( right, joinExprs: joinExprs, joinType: " inner " )
570
+ }
571
+
572
+ /// Inner equi-join with another `DataFrame` using the given columns.
573
+ /// - Parameters:
574
+ /// - right: Right side of the join operation.
575
+ /// - joinExprs:A join expression string.
576
+ /// - joinType: A join type name.
577
+ /// - Returns: A `DataFrame`.
578
+ public func join( _ right: DataFrame , joinExprs: String , joinType: String ) async -> DataFrame {
579
+ let rightPlan = await ( right. getPlan ( ) as! Plan ) . root
580
+ let plan = SparkConnectClient . getJoin (
581
+ self . plan. root,
582
+ rightPlan,
583
+ joinType. toJoinType,
584
+ joinCondition: joinExprs
585
+ )
586
+ return DataFrame ( spark: self . spark, plan: plan)
587
+ }
588
+
589
+ /// Explicit cartesian join with another `DataFrame`.
590
+ /// - Parameter right: Right side of the join operation.
591
+ /// - Returns: Cartesian joins are very expensive without an extra filter that can be pushed down.
592
+ public func crossJoin( _ right: DataFrame ) async -> DataFrame {
593
+ let rightPlan = await ( right. getPlan ( ) as! Plan ) . root
594
+ let plan = SparkConnectClient . getJoin ( self . plan. root, rightPlan, JoinType . cross)
595
+ return DataFrame ( spark: self . spark, plan: plan)
596
+ }
597
+
524
598
/// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame`.
525
599
/// This is equivalent to `EXCEPT DISTINCT` in SQL.
526
600
/// - Parameter other: A `DataFrame` to exclude.
0 commit comments