3
3
import json
4
4
import os
5
5
from pathlib import Path
6
- from typing import Callable , List
6
+ from typing import Callable , List , Tuple
7
7
from uuid import uuid4
8
8
9
9
import pkg_resources
@@ -554,6 +554,7 @@ class TestDatabricks:
554
554
555
555
@pytest .fixture ()
556
556
def tmp_home (self , tmpdir : Path ) -> Path :
557
+ """Replace $HOME to be a new directory of $TMPDIR, yielding the new Path."""
557
558
existing_home = os .environ ['HOME' ]
558
559
559
560
new_home = tmpdir / 'home'
@@ -568,10 +569,13 @@ def tmp_home(self, tmpdir: Path) -> Path:
568
569
os .environ ['HOME' ] = existing_home
569
570
570
571
@pytest .fixture ()
571
- def databricks_connect_in_path (self , tmpdir : Path ) -> Path :
572
- # Get a mock-ish executable 'databricks-connect' into an element in the path
573
- # so that which('databricks-connect') will find something (see databricks post
574
- # processor)
572
+ def databricks_connect_in_path (self , tmpdir : Path ) -> Tuple [Path , Path ]:
573
+ """Get a mock-ish executable 'databricks-connect' into an element in the path
574
+ so that which('databricks-connect') will find something (see databricks post
575
+ processor)
576
+
577
+ Yields the new executable's path, plus where it will scribble its own output.
578
+ """
575
579
576
580
# Make a new subdir of tmpdir, add it to the path, create executable
577
581
# shell script databricks-connect
@@ -593,15 +597,14 @@ def databricks_connect_in_path(self, tmpdir: Path) -> Path:
593
597
scriptpath .chmod (0o755 )
594
598
595
599
try :
596
- # Yield the script output path so a test can inspect its contents.
597
- yield script_output_path
600
+ yield scriptpath , script_output_path
598
601
599
602
finally :
600
603
# Undo $PATH change
601
604
os .environ ['PATH' ] = orig_path
602
605
603
606
@pytest .fixture ()
604
- def jsons_for_extra_behavior (self ):
607
+ def jsons_for_extra_behavior (self ) -> Tuple [ DatasourceJSONs , dict ] :
605
608
"""Return a DatasourceJSONs describing databricks that will tickle postprocess_databricks()
606
609
into doing its extra behavior. Also returns dict of some of the fields within that JSON."""
607
610
@@ -643,6 +646,100 @@ def jsons_for_extra_behavior(self):
643
646
},
644
647
)
645
648
649
+ def test_postprocess_databricks_pops_correctly (self , datasource_id , jsons_for_extra_behavior ):
650
+ """Ensure that postprocess_databricks side effect pops from the correct dict (connect_args,
651
+ not the containing create_engine_kwargs dict), even w/o databricks-connect
652
+ being found in the $PATH.
653
+ """
654
+
655
+ keys_expected_to_be_removed = ['cluster_id' , 'org_id' , 'port' ]
656
+ jsons_obj , specific_fields = jsons_for_extra_behavior
657
+ connect_args = jsons_obj .connect_args_dict
658
+
659
+ # All initially there...
660
+ assert all (key in connect_args for key in keys_expected_to_be_removed )
661
+
662
+ create_engine_kwargs = {'connect_args' : connect_args }
663
+
664
+ datasource_postprocessing .postprocess_databricks (
665
+ datasource_id ,
666
+ jsons_obj .dsn_dict ,
667
+ create_engine_kwargs ,
668
+ )
669
+
670
+ # Should have removed all the keys as side effect of the call.
671
+ # (Had bug where they were popped from wrong dict originally.)
672
+ assert not any (key in connect_args for key in keys_expected_to_be_removed )
673
+
674
+ def test_errors_from_databricks_connect_are_surfaced (
675
+ datasource_id , databricks_connect_in_path , tmp_home , jsons_for_extra_behavior
676
+ ):
677
+ """Prove that if databricks-connect script exits nonzero, a ValueError is raised
678
+ and the script's stderr will be within the error message."""
679
+
680
+ # Respell the databricks-connect script to always error out, expect that in a ValueError
681
+ # when calling postprocess_databricks
682
+
683
+ script_path , _ = databricks_connect_in_path
684
+
685
+ expected_error_message = 'oh noes!'
686
+
687
+ # Respell the script to bomb out with message to stderr.
688
+ with script_path .open ('w' ) as of :
689
+ of .write ('#!/bin/sh\n ' )
690
+ of .write (f'echo "{ expected_error_message } " 1>&2\n ' )
691
+ of .write ('exit 1\n ' )
692
+
693
+ jsons_obj , specific_fields = jsons_for_extra_behavior
694
+ create_engine_kwargs = {'connect_args' : jsons_obj .connect_args_dict }
695
+
696
+ with pytest .raises (ValueError , match = expected_error_message ):
697
+ datasource_postprocessing .postprocess_databricks (
698
+ datasource_id ,
699
+ jsons_obj .dsn_dict ,
700
+ create_engine_kwargs ,
701
+ )
702
+
703
+ @pytest .fixture ()
704
+ def short_script_timeout (self ):
705
+ """Respell datasource_postprocessing.DATABRICKS_CONNECT_SCRIPT_TIMEOUT to 1 (second)"""
706
+ original_value = datasource_postprocessing .DATABRICKS_CONNECT_SCRIPT_TIMEOUT
707
+
708
+ datasource_postprocessing .DATABRICKS_CONNECT_SCRIPT_TIMEOUT = 1
709
+
710
+ try :
711
+ yield datasource_postprocessing .DATABRICKS_CONNECT_SCRIPT_TIMEOUT
712
+ finally :
713
+ datasource_postprocessing .DATABRICKS_CONNECT_SCRIPT_TIMEOUT = original_value
714
+
715
+ def test_databricks_connect_taking_too_long (
716
+ datasource_id , databricks_connect_in_path , short_script_timeout , jsons_for_extra_behavior
717
+ ):
718
+ """Prove that if databricks-connect takes longer than allowed to run, that ValueError will
719
+ be raised with an appropriate message.
720
+ """
721
+
722
+ # Respell the databricks-connect script to take longer than short_script_timeout seconds,
723
+ # expect that in a ValueError when calling postprocess_databricks.
724
+
725
+ script_path , _ = databricks_connect_in_path
726
+
727
+ # Respell the script to take longer than new timeout, but to (try to) exit cleanly
728
+ with script_path .open ('w' ) as of :
729
+ of .write ('#!/bin/sh\n ' )
730
+ of .write (f'sleep { short_script_timeout + 1 } \n ' )
731
+ of .write ('exit 0\n ' )
732
+
733
+ jsons_obj , specific_fields = jsons_for_extra_behavior
734
+ create_engine_kwargs = {'connect_args' : jsons_obj .connect_args_dict }
735
+
736
+ with pytest .raises (ValueError , match = 'databricks-connect took longer than' ):
737
+ datasource_postprocessing .postprocess_databricks (
738
+ datasource_id ,
739
+ jsons_obj .dsn_dict ,
740
+ create_engine_kwargs ,
741
+ )
742
+
646
743
def test_extra_behavior (
647
744
self , datasource_id , databricks_connect_in_path , tmp_home , jsons_for_extra_behavior
648
745
):
@@ -673,13 +770,14 @@ def test_extra_behavior(
673
770
# databricks_connect_in_path will create a different file.
674
771
assert not dotconnect .exists ()
675
772
676
- # databricks_connect_in_path is the path where the fake script output was placed
677
- assert databricks_connect_in_path .exists ()
773
+ # databricks_connect_in_path second member is the path where the fake script output was placed
774
+ _ , script_output = databricks_connect_in_path
775
+ assert script_output .exists ()
678
776
679
777
# Expect to find things in it. See ENG-5517.
680
778
# We can only test that we ran this mock script and the known result
681
779
# of our mock script. What the real one does ... ?
682
- contents = databricks_connect_in_path .read ().split ()
780
+ contents = script_output .read ().split (' \n ' )
683
781
assert len (contents ) == 6
684
782
assert contents [0 ] == 'y'
685
783
assert contents [1 ] == f"https://{ case_dict ['hostname' ]} /"
0 commit comments