@@ -521,6 +521,80 @@ public actor DataFrame: Sendable {
521521 }
522522 }
523523
524+ /// Join with another `DataFrame`.
525+ /// Behaves as an INNER JOIN and requires a subsequent join predicate.
526+ /// - Parameter right: Right side of the join operation.
527+ /// - Returns: A `DataFrame`.
528+ public func join( _ right: DataFrame ) async -> DataFrame {
529+ let right = await ( right. getPlan ( ) as! Plan ) . root
530+ let plan = SparkConnectClient . getJoin ( self . plan. root, right, JoinType . inner)
531+ return DataFrame ( spark: self . spark, plan: plan)
532+ }
533+
534+ /// Equi-join with another `DataFrame` using the given column. A cross join with a predicate is
535+ /// specified as an inner join. If you would explicitly like to perform a cross join use the
536+ /// `crossJoin` method.
537+ /// - Parameters:
538+ /// - right: Right side of the join operation.
539+ /// - usingColumn: Name of the column to join on. This column must exist on both sides.
540+ /// - joinType: Type of join to perform. Default `inner`.
541+ /// - Returns: <#description#>
542+ public func join( _ right: DataFrame , _ usingColumn: String , _ joinType: String = " inner " ) async -> DataFrame {
543+ await join ( right, [ usingColumn] , joinType)
544+ }
545+
546+ /// Inner equi-join with another `DataFrame` using the given columns.
547+ /// - Parameters:
548+ /// - right: Right side of the join operation.
549+ /// - usingColumn: Names of the columns to join on. These columns must exist on both sides.
550+ /// - joinType: A join type name.
551+ /// - Returns: A `DataFrame`.
552+ public func join( _ other: DataFrame , _ usingColumns: [ String ] , _ joinType: String = " inner " ) async -> DataFrame {
553+ let right = await ( other. getPlan ( ) as! Plan ) . root
554+ let plan = SparkConnectClient . getJoin (
555+ self . plan. root,
556+ right,
557+ joinType. toJoinType,
558+ usingColumns: usingColumns
559+ )
560+ return DataFrame ( spark: self . spark, plan: plan)
561+ }
562+
563+ /// Inner equi-join with another `DataFrame` using the given columns.
564+ /// - Parameters:
565+ /// - right: Right side of the join operation.
566+ /// - joinExprs:A join expression string.
567+ /// - Returns: A `DataFrame`.
568+ public func join( _ right: DataFrame , joinExprs: String ) async -> DataFrame {
569+ return await join ( right, joinExprs: joinExprs, joinType: " inner " )
570+ }
571+
572+ /// Inner equi-join with another `DataFrame` using the given columns.
573+ /// - Parameters:
574+ /// - right: Right side of the join operation.
575+ /// - joinExprs:A join expression string.
576+ /// - joinType: A join type name.
577+ /// - Returns: A `DataFrame`.
578+ public func join( _ right: DataFrame , joinExprs: String , joinType: String ) async -> DataFrame {
579+ let rightPlan = await ( right. getPlan ( ) as! Plan ) . root
580+ let plan = SparkConnectClient . getJoin (
581+ self . plan. root,
582+ rightPlan,
583+ joinType. toJoinType,
584+ joinCondition: joinExprs
585+ )
586+ return DataFrame ( spark: self . spark, plan: plan)
587+ }
588+
589+ /// Explicit cartesian join with another `DataFrame`.
590+ /// - Parameter right: Right side of the join operation.
591+ /// - Returns: Cartesian joins are very expensive without an extra filter that can be pushed down.
592+ public func crossJoin( _ right: DataFrame ) async -> DataFrame {
593+ let rightPlan = await ( right. getPlan ( ) as! Plan ) . root
594+ let plan = SparkConnectClient . getJoin ( self . plan. root, rightPlan, JoinType . cross)
595+ return DataFrame ( spark: self . spark, plan: plan)
596+ }
597+
524598 /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame`.
525599 /// This is equivalent to `EXCEPT DISTINCT` in SQL.
526600 /// - Parameter other: A `DataFrame` to exclude.
0 commit comments