@@ -522,6 +522,87 @@ public actor DataFrame: Sendable {
522522 }
523523 }
524524
525+ /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame`.
526+ /// This is equivalent to `EXCEPT DISTINCT` in SQL.
527+ /// - Parameter other: A `DataFrame` to exclude.
528+ /// - Returns: A `DataFrame`.
529+ public func except( _ other: DataFrame ) async -> DataFrame {
530+ let right = await ( other. getPlan ( ) as! Plan ) . root
531+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . except)
532+ return DataFrame ( spark: self . spark, plan: plan)
533+ }
534+
535+ /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame` while
536+ /// preserving the duplicates. This is equivalent to `EXCEPT ALL` in SQL.
537+ /// - Parameter other: A `DataFrame` to exclude.
538+ /// - Returns: A `DataFrame`.
539+ public func exceptAll( _ other: DataFrame ) async -> DataFrame {
540+ let right = await ( other. getPlan ( ) as! Plan ) . root
541+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . except, isAll: true )
542+ return DataFrame ( spark: self . spark, plan: plan)
543+ }
544+
545+ /// Returns a new `DataFrame` containing rows only in both this `DataFrame` and another `DataFrame`.
546+ /// This is equivalent to `INTERSECT` in SQL.
547+ /// - Parameter other: A `DataFrame` to intersect with.
548+ /// - Returns: A `DataFrame`.
549+ public func intersect( _ other: DataFrame ) async -> DataFrame {
550+ let right = await ( other. getPlan ( ) as! Plan ) . root
551+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . intersect)
552+ return DataFrame ( spark: self . spark, plan: plan)
553+ }
554+
555+ /// Returns a new `DataFrame` containing rows only in both this `DataFrame` and another `DataFrame` while
556+ /// preserving the duplicates. This is equivalent to `INTERSECT ALL` in SQL.
557+ /// - Parameter other: A `DataFrame` to intersect with.
558+ /// - Returns: A `DataFrame`.
559+ public func intersectAll( _ other: DataFrame ) async -> DataFrame {
560+ let right = await ( other. getPlan ( ) as! Plan ) . root
561+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . intersect, isAll: true )
562+ return DataFrame ( spark: self . spark, plan: plan)
563+ }
564+
565+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
566+ /// This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
567+ /// deduplication of elements), use this function followed by a [[distinct]].
568+ /// Also as standard in SQL, this function resolves columns by position (not by name)
569+ /// - Parameter other: A `DataFrame` to union with.
570+ /// - Returns: A `DataFrame`.
571+ public func union( _ other: DataFrame ) async -> DataFrame {
572+ let right = await ( other. getPlan ( ) as! Plan ) . root
573+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . union, isAll: true )
574+ return DataFrame ( spark: self . spark, plan: plan)
575+ }
576+
577+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
578+ /// This is an alias of `union`.
579+ /// - Parameter other: A `DataFrame` to union with.
580+ /// - Returns: A `DataFrame`.
581+ public func unionAll( _ other: DataFrame ) async -> DataFrame {
582+ return await union ( other)
583+ }
584+
585+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
586+ /// The difference between this function and [[union]] is that this function resolves columns by
587+ /// name (not by position).
588+ /// When the parameter `allowMissingColumns` is `true`, the set of column names in this and other
589+ /// `DataFrame` can differ; missing columns will be filled with null. Further, the missing columns
590+ /// of this `DataFrame` will be added at the end in the schema of the union result
591+ /// - Parameter other: A `DataFrame` to union with.
592+ /// - Returns: A `DataFrame`.
593+ public func unionByName( _ other: DataFrame , _ allowMissingColumns: Bool = false ) async -> DataFrame {
594+ let right = await ( other. getPlan ( ) as! Plan ) . root
595+ let plan = SparkConnectClient . getSetOperation (
596+ self . plan. root,
597+ right,
598+ SetOpType . union,
599+ isAll: true ,
600+ byName: true ,
601+ allowMissingColumns: allowMissingColumns
602+ )
603+ return DataFrame ( spark: self . spark, plan: plan)
604+ }
605+
525606 /// Returns a ``DataFrameWriter`` that can be used to write non-streaming data.
526607 public var write : DataFrameWriter {
527608 get {
0 commit comments