@@ -413,7 +413,8 @@ def test_two_same_backend(pico_df):
413413
414414def test_cast_to_second_backend_with_concat (pico_df , cluster_df , caplog ):
415415 with caplog .at_level (level = logging .INFO , logger = DEFAULT_LOGGER_NAME ):
416- df3 = pd .concat ([pico_df , cluster_df ], axis = 1 )
416+ # We have to copy the input dataframes because of inplace merging
417+ df3 = pd .concat ([pico_df .copy (), cluster_df .copy ()], axis = 1 )
417418 assert pico_df .get_backend () == "Pico"
418419 assert cluster_df .get_backend () == "Cluster"
419420 assert df3 .get_backend () == "Cluster" # result should be on cluster
@@ -431,7 +432,10 @@ def test_cast_to_second_backend_with_concat_uses_second_backend_api_override(
431432 register_pd_accessor (name = "concat" , backend = "Cluster" )(
432433 lambda * args , ** kwargs : "custom_concat_result"
433434 )
434- assert pd .concat ([pico_df , cluster_df ], axis = 1 ) == "custom_concat_result"
435+ # copy dataframes for concat to allow for in-place merging
436+ assert (
437+ pd .concat ([pico_df .copy (), cluster_df .copy ()], axis = 1 ) == "custom_concat_result"
438+ )
435439 assert pico_df .get_backend () == "Pico"
436440 assert cluster_df .get_backend () == "Cluster"
437441
@@ -449,14 +453,14 @@ def test_moving_pico_to_cluster_in_place_calls_set_backend_only_once_github_issu
449453
450454def test_cast_to_second_backend_with___init__ (pico_df , cluster_df ):
451455 df3 = pd .DataFrame ({"pico" : pico_df .iloc [:, 0 ], "cluster" : cluster_df .iloc [:, 0 ]})
452- assert pico_df .get_backend () == "Pico"
456+ assert pico_df .get_backend () == "Cluster" # pico_df was cast inplace
453457 assert cluster_df .get_backend () == "Cluster"
454458 assert df3 .get_backend () == "Cluster" # result should be on cluster
455459
456460
457461def test_cast_to_first_backend (pico_df , cluster_df ):
458462 df3 = pd .concat ([cluster_df , pico_df ], axis = 1 )
459- assert pico_df .get_backend () == "Pico"
463+ assert pico_df .get_backend () == "Cluster" # pico_df was cast in place
460464 assert cluster_df .get_backend () == "Cluster"
461465 assert df3 .get_backend () == cluster_df .get_backend () # result should be on cluster
462466
@@ -468,7 +472,7 @@ def test_cast_to_first_backend_with_concat_uses_first_backend_api_override(
468472 lambda * args , ** kwargs : "custom_concat_result"
469473 )
470474 assert pd .concat ([cluster_df , pico_df ], axis = 1 ) == "custom_concat_result"
471- assert pico_df .get_backend () == "Pico"
475+ assert pico_df .get_backend () == "Cluster" # pico was cast inplace to cluster
472476 assert cluster_df .get_backend () == "Cluster"
473477
474478
@@ -479,7 +483,7 @@ def test_cast_to_first_backend_with___init__(pico_df, cluster_df):
479483 "pico" : pico_df .iloc [:, 0 ],
480484 }
481485 )
482- assert pico_df .get_backend () == "Pico"
486+ assert pico_df .get_backend () == "Cluster" # cluster was cast in place
483487 assert cluster_df .get_backend () == "Cluster"
484488 assert df3 .get_backend () == "Cluster" # result should be on cluster
485489
@@ -557,31 +561,33 @@ def test_two_two_qc_types_default_rhs(default_df, cluster_df):
557561 # so we default to the caller
558562 df3 = pd .concat ([default_df , cluster_df ], axis = 1 )
559563 assert default_df .get_backend () == "Test_casting_default"
560- assert cluster_df .get_backend () == "Cluster"
564+ assert (
565+ cluster_df .get_backend () == "Test_casting_default"
566+ ) # in place cast to default
561567 assert df3 .get_backend () == default_df .get_backend () # should move to default
562568
563569
564570def test_two_two_qc_types_default_lhs (default_df , cluster_df ):
565571 # none of the query compilers know about each other here
566572 # so we default to the caller
567573 df3 = pd .concat ([cluster_df , default_df ], axis = 1 )
568- assert default_df .get_backend () == "Test_casting_default"
574+ assert default_df .get_backend () == "Cluster" # in place cast to Cluster
569575 assert cluster_df .get_backend () == "Cluster"
570576 assert df3 .get_backend () == cluster_df .get_backend () # should move to cluster
571577
572578
573579def test_two_two_qc_types_default_2_rhs (default_df , cloud_df ):
574580 # cloud knows a bit about costing; so we prefer moving to there
575581 df3 = pd .concat ([default_df , cloud_df ], axis = 1 )
576- assert default_df .get_backend () == "Test_casting_default"
582+ assert default_df .get_backend () == "Cloud" # inplace cast to Cloud
577583 assert cloud_df .get_backend () == "Cloud"
578584 assert df3 .get_backend () == cloud_df .get_backend () # should move to cloud
579585
580586
581587def test_two_two_qc_types_default_2_lhs (default_df , cloud_df ):
582588 # cloud knows a bit about costing; so we prefer moving to there
583589 df3 = pd .concat ([cloud_df , default_df ], axis = 1 )
584- assert default_df .get_backend () == "Test_casting_default"
590+ assert default_df .get_backend () == "Cloud" # inplace cast to Cloud
585591 assert cloud_df .get_backend () == "Cloud"
586592 assert df3 .get_backend () == cloud_df .get_backend () # should move to cloud
587593
@@ -651,6 +657,18 @@ def test_qc_mixed_loc(pico_df, cloud_df):
651657 assert cloud_df1 [pico_df1 [0 ][0 ]][pico_df1 [0 ][1 ]] == 1
652658
653659
660+ def test_merge_in_place (default_df , lazy_df , cloud_df ):
661+ # lazy_df tries to pawn off work on other engines
662+ df = default_df .merge (lazy_df )
663+ assert type (df ) is type (default_df )
664+ # Both arguments now have the same qc type
665+ assert type (lazy_df ) is type (default_df )
666+
667+ df = cloud_df .merge (lazy_df )
668+ assert type (df ) is type (cloud_df )
669+ assert type (lazy_df ) is type (cloud_df )
670+
671+
654672def test_information_asymmetry (default_df , cloud_df , eager_df , lazy_df ):
655673 # normally, the default query compiler should be chosen
656674 # here, but since eager knows about default, but not
0 commit comments