@@ -499,6 +499,86 @@ public actor DataFrame: Sendable {
499499 }
500500 }
501501
502+ /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame`.
503+ /// This is equivalent to `EXCEPT DISTINCT` in SQL.
504+ /// - Parameter other: A `DataFrame` to exclude.
505+ /// - Returns: A `DataFrame`.
506+ public func except( _ other: DataFrame ) async -> DataFrame {
507+ let right = await ( other. getPlan ( ) as! Plan ) . root
508+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . except)
509+ return DataFrame ( spark: self . spark, plan: plan)
510+ }
511+
512+ /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame` while
513+ /// preserving the duplicates. This is equivalent to `EXCEPT ALL` in SQL.
514+ /// - Parameter other: A `DataFrame` to exclude.
515+ /// - Returns: A `DataFrame`.
516+ public func exceptAll( _ other: DataFrame ) async -> DataFrame {
517+ let right = await ( other. getPlan ( ) as! Plan ) . root
518+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . except, isAll: true )
519+ return DataFrame ( spark: self . spark, plan: plan)
520+ }
521+
522+ /// Returns a new `DataFrame` containing rows only in both this `DataFrame` and another `DataFrame`.
523+ /// This is equivalent to `INTERSECT` in SQL.
524+ /// - Parameter other: A `DataFrame` to intersect with.
525+ /// - Returns: A `DataFrame`.
526+ public func intersect( _ other: DataFrame ) async -> DataFrame {
527+ let right = await ( other. getPlan ( ) as! Plan ) . root
528+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . intersect)
529+ return DataFrame ( spark: self . spark, plan: plan)
530+ }
531+
532+ /// Returns a new `DataFrame` containing rows only in both this `DataFrame` and another `DataFrame` while
533+ /// preserving the duplicates. This is equivalent to `INTERSECT ALL` in SQL.
534+ /// - Parameter other: A `DataFrame` to intersect with.
535+ /// - Returns: A `DataFrame`.
536+ public func intersectAll( _ other: DataFrame ) async -> DataFrame {
537+ let right = await ( other. getPlan ( ) as! Plan ) . root
538+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . intersect, isAll: true )
539+ return DataFrame ( spark: self . spark, plan: plan)
540+ }
541+
542+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
543+ /// This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
544+ /// deduplication of elements), use this function followed by a [[distinct]].
545+ /// Also as standard in SQL, this function resolves columns by position (not by name)
546+ /// - Parameter other: A `DataFrame` to union with.
547+ /// - Returns: A `DataFrame`.
548+ public func union( _ other: DataFrame ) async -> DataFrame {
549+ let right = await ( other. getPlan ( ) as! Plan ) . root
550+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . union, isAll: true )
551+ return DataFrame ( spark: self . spark, plan: plan)
552+ }
553+
554+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
555+ /// This is an alias of `union`.
556+ /// - Parameter other: A `DataFrame` to union with.
557+ /// - Returns: A `DataFrame`.
558+ public func unionAll( _ other: DataFrame ) async -> DataFrame {
559+ return await union ( other)
560+ }
561+
562+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
563+ /// The difference between this function and [[union]] is that this function resolves columns by
564+ /// name (not by position).
565+ /// When the parameter `allowMissingColumns` is `true`, the set of column names in this and other
566+ /// `DataFrame` can differ; missing columns will be filled with null. Further, the missing columns
567+ /// of this `DataFrame` will be added at the end in the schema of the union result
568+ /// - Parameter other: A `DataFrame` to union with.
569+ /// - Returns: A `DataFrame`.
570+ public func unionByName( _ other: DataFrame , _ allowMissingColumns: Bool = false ) async -> DataFrame {
571+ let right = await ( other. getPlan ( ) as! Plan ) . root
572+ let plan = SparkConnectClient . getSetOperation (
573+ self . plan. root,
574+ right,
575+ SetOpType . union,
576+ byName: true ,
577+ allowMissingColumns: allowMissingColumns
578+ )
579+ return DataFrame ( spark: self . spark, plan: plan)
580+ }
581+
502582 /// Returns a ``DataFrameWriter`` that can be used to write non-streaming data.
503583 public var write : DataFrameWriter {
504584 get {
0 commit comments