1
1
""" Tests over datasource bootstrapping """
2
2
3
3
import json
4
+ import os
4
5
from pathlib import Path
5
6
from typing import Callable , List
6
7
from uuid import uuid4
@@ -341,7 +342,6 @@ def test_bootstrap_datasources(self, datasource_id_factory, tmp_path: Path):
341
342
class TestBootstrapDatasource :
342
343
@pytest .mark .parametrize ('sample_name' , SampleData .all_sample_names ())
343
344
def test_success (self , sample_name , datasource_id ):
344
-
345
345
# Clear out connections at the onset, else the get_engine() portion
346
346
# gets confused over human-name conflicts when we've bootstrapped
347
347
# the same sample data repeatedly, namely through having first
@@ -547,6 +547,208 @@ def test_had_bad_sqlite_database_files(self, datasource_id, bad_pathname: str):
547
547
# the real problem to the user.
548
548
549
549
550
+ class TestDatabricks :
551
+ """Test ENG-5517 Very Special Behavior for databricks side-effects if databricks-connect script is
552
+ in PATH and we have the optional 'cluster_id' datapoint in connect args.
553
+ """
554
+
555
+ @pytest .fixture ()
556
+ def tmp_home (self , tmpdir : Path ) -> Path :
557
+ existing_home = os .environ ['HOME' ]
558
+
559
+ new_home = tmpdir / 'home'
560
+
561
+ new_home .mkdir ()
562
+
563
+ os .environ ['HOME' ] = str (new_home )
564
+
565
+ try :
566
+ yield new_home
567
+ finally :
568
+ os .environ ['HOME' ] = existing_home
569
+
570
+ @pytest .fixture ()
571
+ def databricks_connect_in_path (self , tmpdir : Path ) -> Path :
572
+ # Get a mock-ish executable 'databricks-connect' into an element in the path
573
+ # so that which('databricks-connect') will find something (see databricks post
574
+ # processor)
575
+
576
+ # Make a new subdir of tmpdir, add it to the path, create executable
577
+ # shell script databricks-connect
578
+
579
+ bindir = tmpdir / 'scratch-bin'
580
+ bindir .mkdir ()
581
+
582
+ orig_path = os .environ ['PATH' ]
583
+
584
+ os .environ ['PATH' ] = f"{ orig_path } :{ bindir } "
585
+
586
+ scriptpath = bindir / 'databricks-connect'
587
+ script_output_path = tmpdir / 'connect-inputs.txt'
588
+
589
+ # Now make a 'databricks-connect' executable that echos all its stdin to tmpdir/connect-inputs.txt.txt.
590
+ with open (scriptpath , 'w' ) as outfile :
591
+ outfile .write (f'#!/bin/sh\n cat > { script_output_path } \n exit 0\n ' )
592
+
593
+ scriptpath .chmod (0o755 )
594
+
595
+ try :
596
+ # Yield the script output path so a test can inspect its contents.
597
+ yield script_output_path
598
+
599
+ finally :
600
+ # Undo $PATH change
601
+ os .environ ['PATH' ] = orig_path
602
+
603
+ @pytest .fixture ()
604
+ def jsons_for_extra_behavior (self ):
605
+ """Return a DatasourceJSONs describing databricks that will tickle postprocess_databricks()
606
+ into doing its extra behavior. Also returns dict of some of the fields within that JSON."""
607
+
608
+ hostname = 'dbc-1bab80fc-a74b.cloud.databricks.com'
609
+ password = 'dapie372d57cefdc078d8ce3936fcb0e22ee'
610
+ port = 54321
611
+ org_id = 65475674534576
612
+ cluster_id = '0122-044839-vx2fk606'
613
+
614
+ case_data = DatasourceJSONs (
615
+ meta_dict = {
616
+ 'required_python_modules' : ['sqlalchemy-databricks' ],
617
+ 'allow_datasource_dialect_autoinstall' : False ,
618
+ 'drivername' : 'databricks+connector' ,
619
+ 'sqlmagic_autocommit' : False , # This one is special!
620
+ 'name' : 'Databricks With Extras' ,
621
+ },
622
+ dsn_dict = {
623
+ 'username' : 'token' ,
624
+ 'host' : hostname ,
625
+ 'password' : password ,
626
+ },
627
+ connect_args_dict = {
628
+ "http_path" : "sql/protocolv1/o/2414094324684936/0125-220758-m9pfb4c7" ,
629
+ "cluster_id" : cluster_id ,
630
+ "org_id" : org_id ,
631
+ "port" : port ,
632
+ },
633
+ )
634
+
635
+ return (
636
+ case_data ,
637
+ {
638
+ 'hostname' : hostname ,
639
+ 'password' : password ,
640
+ 'port' : port ,
641
+ 'org_id' : org_id ,
642
+ 'cluster_id' : cluster_id ,
643
+ },
644
+ )
645
+
646
+ def test_extra_behavior (
647
+ self , datasource_id , databricks_connect_in_path , tmp_home , jsons_for_extra_behavior
648
+ ):
649
+ """Test creating databricks with extra keys to cause postprocess_databricks() to do its magic"""
650
+
651
+ # Make a preexisting tmp_home/.databricks-connect, expect it to get unlinked
652
+ # (see lines in postprocess_databricks)
653
+ dotconnect = tmp_home / '.databricks-connect'
654
+ with dotconnect .open ('w' ) as of :
655
+ of .write ('exists' )
656
+
657
+ assert dotconnect .exists ()
658
+
659
+ case_data , case_dict = jsons_for_extra_behavior
660
+
661
+ assert 'cluster_id' in case_data .connect_args_dict
662
+
663
+ initial_len = len (Connection .connections )
664
+
665
+ datasources .bootstrap_datasource (
666
+ datasource_id , case_data .meta_json , case_data .dsn_json , case_data .connect_args_json
667
+ )
668
+
669
+ assert len (Connection .connections ) == initial_len + 1
670
+
671
+ # Preexisting file should have been unlinked. The real databricks-connect
672
+ # script would have recreated it, but the mock version we create in fixture
673
+ # databricks_connect_in_path will create a different file.
674
+ assert not dotconnect .exists ()
675
+
676
+ # databricks_connect_in_path is the path where the fake script output was placed
677
+ assert databricks_connect_in_path .exists ()
678
+
679
+ # Expect to find things in it. See ENG-5517.
680
+ # We can only test that we ran this mock script and the known result
681
+ # of our mock script. What the real one does ... ?
682
+ contents = databricks_connect_in_path .read ().split ()
683
+ assert len (contents ) == 6
684
+ assert contents [0 ] == 'y'
685
+ assert contents [1 ] == f"https://{ case_dict ['hostname' ]} /"
686
+ assert contents [2 ] == case_dict ['password' ]
687
+ assert contents [3 ] == case_dict ['cluster_id' ]
688
+ assert contents [4 ] == str (case_dict ['org_id' ])
689
+ assert contents [5 ] == str (case_dict ['port' ])
690
+
691
+ def test_skip_extra_behavior_if_no_databricks_connect (
692
+ self , datasource_id , tmp_home , jsons_for_extra_behavior
693
+ ):
694
+ # Let's create a $HOME/.databricks-connect file. It should remain untouched
695
+ # since we're not also using fixture databricks_connect_in_path putting the
696
+ # script in our path.
697
+
698
+ dotconnect = tmp_home / '.databricks-connect'
699
+ with dotconnect .open ('w' ) as of :
700
+ of .write ('preexists' )
701
+
702
+ assert dotconnect .exists ()
703
+
704
+ case_data , case_dict = jsons_for_extra_behavior
705
+
706
+ initial_len = len (Connection .connections )
707
+
708
+ # Should not fail, but won't have done any extra behavior.
709
+ datasources .bootstrap_datasource (
710
+ datasource_id , case_data .meta_json , case_data .dsn_json , case_data .connect_args_json
711
+ )
712
+
713
+ assert len (Connection .connections ) == initial_len + 1
714
+
715
+ # Left unchanged
716
+ assert dotconnect .exists ()
717
+ assert 'preexists' in dotconnect .read ()
718
+
719
+ def test_skip_extra_behavior_if_no_cluster_id (
720
+ self , datasource_id , tmp_home , databricks_connect_in_path
721
+ ):
722
+ # Let's create a $HOME/.databricks-connect file. It should remain untouched
723
+ # since we're not also using fixture databricks_connect_in_path putting the
724
+ # script in our path.
725
+
726
+ dotconnect = tmp_home / '.databricks-connect'
727
+ with dotconnect .open ('w' ) as of :
728
+ of .write ('preexists' )
729
+
730
+ assert dotconnect .exists ()
731
+
732
+ case_data = SampleData .get_sample ('databricks' )
733
+ assert 'cluster_id' not in case_data .connect_args_dict
734
+
735
+ initial_len = len (Connection .connections )
736
+
737
+ # Should not have triggered extra behavior -- cluster_id wasn't present (but
738
+ # we do have a databricks-connect script in the PATH).
739
+
740
+ datasources .bootstrap_datasource (
741
+ datasource_id , case_data .meta_json , case_data .dsn_json , case_data .connect_args_json
742
+ )
743
+
744
+ assert len (Connection .connections ) == initial_len + 1
745
+
746
+ # But won't have breathed on dotconnect file.
747
+ # Left unchanged
748
+ assert dotconnect .exists ()
749
+ assert 'preexists' in dotconnect .read ()
750
+
751
+
550
752
class TestEnsureRequirements :
551
753
def test_already_installed (self , datasource_id ):
552
754
requirements = ['pip' ]
@@ -569,7 +771,6 @@ def test_raises_when_disallowed_but_needs_to_install(
569
771
570
772
class TestInstallPackage :
571
773
def test_install (self , not_installed_package ):
572
-
573
774
pkgname = not_installed_package
574
775
575
776
# Better not be installed right now!
0 commit comments