@@ -522,6 +522,87 @@ public actor DataFrame: Sendable {
522
522
}
523
523
}
524
524
525
+ /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame`.
526
+ /// This is equivalent to `EXCEPT DISTINCT` in SQL.
527
+ /// - Parameter other: A `DataFrame` to exclude.
528
+ /// - Returns: A `DataFrame`.
529
+ public func except( _ other: DataFrame ) async -> DataFrame {
530
+ let right = await ( other. getPlan ( ) as! Plan ) . root
531
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . except)
532
+ return DataFrame ( spark: self . spark, plan: plan)
533
+ }
534
+
535
+ /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame` while
536
+ /// preserving the duplicates. This is equivalent to `EXCEPT ALL` in SQL.
537
+ /// - Parameter other: A `DataFrame` to exclude.
538
+ /// - Returns: A `DataFrame`.
539
+ public func exceptAll( _ other: DataFrame ) async -> DataFrame {
540
+ let right = await ( other. getPlan ( ) as! Plan ) . root
541
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . except, isAll: true )
542
+ return DataFrame ( spark: self . spark, plan: plan)
543
+ }
544
+
545
+ /// Returns a new `DataFrame` containing rows only in both this `DataFrame` and another `DataFrame`.
546
+ /// This is equivalent to `INTERSECT` in SQL.
547
+ /// - Parameter other: A `DataFrame` to intersect with.
548
+ /// - Returns: A `DataFrame`.
549
+ public func intersect( _ other: DataFrame ) async -> DataFrame {
550
+ let right = await ( other. getPlan ( ) as! Plan ) . root
551
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . intersect)
552
+ return DataFrame ( spark: self . spark, plan: plan)
553
+ }
554
+
555
+ /// Returns a new `DataFrame` containing rows only in both this `DataFrame` and another `DataFrame` while
556
+ /// preserving the duplicates. This is equivalent to `INTERSECT ALL` in SQL.
557
+ /// - Parameter other: A `DataFrame` to intersect with.
558
+ /// - Returns: A `DataFrame`.
559
+ public func intersectAll( _ other: DataFrame ) async -> DataFrame {
560
+ let right = await ( other. getPlan ( ) as! Plan ) . root
561
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . intersect, isAll: true )
562
+ return DataFrame ( spark: self . spark, plan: plan)
563
+ }
564
+
565
+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
566
+ /// This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
567
+ /// deduplication of elements), use this function followed by a [[distinct]].
568
+ /// Also as standard in SQL, this function resolves columns by position (not by name)
569
+ /// - Parameter other: A `DataFrame` to union with.
570
+ /// - Returns: A `DataFrame`.
571
+ public func union( _ other: DataFrame ) async -> DataFrame {
572
+ let right = await ( other. getPlan ( ) as! Plan ) . root
573
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . union, isAll: true )
574
+ return DataFrame ( spark: self . spark, plan: plan)
575
+ }
576
+
577
+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
578
+ /// This is an alias of `union`.
579
+ /// - Parameter other: A `DataFrame` to union with.
580
+ /// - Returns: A `DataFrame`.
581
+ public func unionAll( _ other: DataFrame ) async -> DataFrame {
582
+ return await union ( other)
583
+ }
584
+
585
+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
586
+ /// The difference between this function and [[union]] is that this function resolves columns by
587
+ /// name (not by position).
588
+ /// When the parameter `allowMissingColumns` is `true`, the set of column names in this and other
589
+ /// `DataFrame` can differ; missing columns will be filled with null. Further, the missing columns
590
+ /// of this `DataFrame` will be added at the end in the schema of the union result
591
+ /// - Parameter other: A `DataFrame` to union with.
592
+ /// - Returns: A `DataFrame`.
593
+ public func unionByName( _ other: DataFrame , _ allowMissingColumns: Bool = false ) async -> DataFrame {
594
+ let right = await ( other. getPlan ( ) as! Plan ) . root
595
+ let plan = SparkConnectClient . getSetOperation (
596
+ self . plan. root,
597
+ right,
598
+ SetOpType . union,
599
+ isAll: true ,
600
+ byName: true ,
601
+ allowMissingColumns: allowMissingColumns
602
+ )
603
+ return DataFrame ( spark: self . spark, plan: plan)
604
+ }
605
+
525
606
/// Returns a ``DataFrameWriter`` that can be used to write non-streaming data.
526
607
public var write : DataFrameWriter {
527
608
get {
0 commit comments