@@ -499,6 +499,86 @@ public actor DataFrame: Sendable {
499
499
}
500
500
}
501
501
502
+ /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame`.
503
+ /// This is equivalent to `EXCEPT DISTINCT` in SQL.
504
+ /// - Parameter other: A `DataFrame` to exclude.
505
+ /// - Returns: A `DataFrame`.
506
+ public func except( _ other: DataFrame ) async -> DataFrame {
507
+ let right = await ( other. getPlan ( ) as! Plan ) . root
508
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . except)
509
+ return DataFrame ( spark: self . spark, plan: plan)
510
+ }
511
+
512
+ /// Returns a new `DataFrame` containing rows in this `DataFrame` but not in another `DataFrame` while
513
+ /// preserving the duplicates. This is equivalent to `EXCEPT ALL` in SQL.
514
+ /// - Parameter other: A `DataFrame` to exclude.
515
+ /// - Returns: A `DataFrame`.
516
+ public func exceptAll( _ other: DataFrame ) async -> DataFrame {
517
+ let right = await ( other. getPlan ( ) as! Plan ) . root
518
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . except, isAll: true )
519
+ return DataFrame ( spark: self . spark, plan: plan)
520
+ }
521
+
522
+ /// Returns a new `DataFrame` containing rows only in both this `DataFrame` and another `DataFrame`.
523
+ /// This is equivalent to `INTERSECT` in SQL.
524
+ /// - Parameter other: A `DataFrame` to intersect with.
525
+ /// - Returns: A `DataFrame`.
526
+ public func intersect( _ other: DataFrame ) async -> DataFrame {
527
+ let right = await ( other. getPlan ( ) as! Plan ) . root
528
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . intersect)
529
+ return DataFrame ( spark: self . spark, plan: plan)
530
+ }
531
+
532
+ /// Returns a new `DataFrame` containing rows only in both this `DataFrame` and another `DataFrame` while
533
+ /// preserving the duplicates. This is equivalent to `INTERSECT ALL` in SQL.
534
+ /// - Parameter other: A `DataFrame` to intersect with.
535
+ /// - Returns: A `DataFrame`.
536
+ public func intersectAll( _ other: DataFrame ) async -> DataFrame {
537
+ let right = await ( other. getPlan ( ) as! Plan ) . root
538
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . intersect, isAll: true )
539
+ return DataFrame ( spark: self . spark, plan: plan)
540
+ }
541
+
542
+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
543
+ /// This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
544
+ /// deduplication of elements), use this function followed by a [[distinct]].
545
+ /// Also as standard in SQL, this function resolves columns by position (not by name)
546
+ /// - Parameter other: A `DataFrame` to union with.
547
+ /// - Returns: A `DataFrame`.
548
+ public func union( _ other: DataFrame ) async -> DataFrame {
549
+ let right = await ( other. getPlan ( ) as! Plan ) . root
550
+ let plan = SparkConnectClient . getSetOperation ( self . plan. root, right, SetOpType . union, isAll: true )
551
+ return DataFrame ( spark: self . spark, plan: plan)
552
+ }
553
+
554
+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
555
+ /// This is an alias of `union`.
556
+ /// - Parameter other: A `DataFrame` to union with.
557
+ /// - Returns: A `DataFrame`.
558
+ public func unionAll( _ other: DataFrame ) async -> DataFrame {
559
+ return await union ( other)
560
+ }
561
+
562
+ /// Returns a new `DataFrame` containing union of rows in this `DataFrame` and another `DataFrame`.
563
+ /// The difference between this function and [[union]] is that this function resolves columns by
564
+ /// name (not by position).
565
+ /// When the parameter `allowMissingColumns` is `true`, the set of column names in this and other
566
+ /// `DataFrame` can differ; missing columns will be filled with null. Further, the missing columns
567
+ /// of this `DataFrame` will be added at the end in the schema of the union result
568
+ /// - Parameter other: A `DataFrame` to union with.
569
+ /// - Returns: A `DataFrame`.
570
+ public func unionByName( _ other: DataFrame , _ allowMissingColumns: Bool = false ) async -> DataFrame {
571
+ let right = await ( other. getPlan ( ) as! Plan ) . root
572
+ let plan = SparkConnectClient . getSetOperation (
573
+ self . plan. root,
574
+ right,
575
+ SetOpType . union,
576
+ byName: true ,
577
+ allowMissingColumns: allowMissingColumns
578
+ )
579
+ return DataFrame ( spark: self . spark, plan: plan)
580
+ }
581
+
502
582
/// Returns a ``DataFrameWriter`` that can be used to write non-streaming data.
503
583
public var write : DataFrameWriter {
504
584
get {
0 commit comments