@@ -78,9 +78,23 @@ def __init__(
7878 self ._enable_hms_federation = enable_hms_federation
7979 self ._config = config
8080
81- # Supported databases and version for HMS Federation
82- supported_database_versions : ClassVar [dict [str , list [str ]]] = {
83- "mysql" : ["2.3" , "0.13" ],
81+ # Supported databases and associated ports
82+ # https://docs.databricks.com/en/data-governance/unity-catalog/hms-federation/hms-federation-external.html
83+ # https://dev.mysql.com/doc/mysql-port-reference/en/mysql-port-reference-tables.html
84+ # https://www.postgresql.org/docs/current/runtime-config-connection.html
85+ # https://learn.microsoft.com/en-us/sql/connect/jdbc/building-the-connection-url?view=sql-server-ver15
86+ supported_databases_port : ClassVar [dict [str , int ]] = {
87+ "mysql" : 3306 ,
88+ "postgresql" : 5432 ,
89+ "sqlserver" : 1433 ,
90+ }
91+
92+ # Supported HMS versions
93+ # https://docs.databricks.com/en/data-governance/unity-catalog/hms-federation/hms-federation-external.html
94+ supported_hms_versions : ClassVar [set [tuple [int , int ]]] = {
95+ (0 , 13 ),
96+ (2 , 3 ),
97+ (3 , 1 ),
8498 }
8599
86100 def create_from_cli (self , prompts : Prompts ) -> None :
@@ -127,19 +141,24 @@ def _external_hms(self) -> ExternalHmsInfo | None:
127141 if not version :
128142 logger .info ('Hive Metastore version not found' )
129143 return None
130- major_minor_match = re .match (r'(^\d+\. \d+)' , version )
131- if not major_minor_match :
144+ major_minor_match = re .match (r'(^(?P<major> \d+)\.(?P<minor> \d+) )' , version )
145+ if not major_minor_match or not major_minor_match . group ( 'major' ) or not major_minor_match . group ( 'minor' ) :
132146 logger .info (f'Wrong Hive Metastore Database Version Format: { version } ' )
133147 return None
134- major_minor_version = major_minor_match . group ( 1 )
135- external_hms = replace ( self . _split_jdbc_url ( jdbc_url ), version = major_minor_version )
136- supported_versions = self . supported_database_versions . get ( external_hms . database_type )
137- if not supported_versions :
138- logger .info (f'Unsupported Hive Metastore: { external_hms . database_type } ' )
148+ try :
149+ major = int ( major_minor_match . group ( 'major' ) )
150+ minor = int ( major_minor_match . group ( 'minor' ) )
151+ except ValueError :
152+ logger .info (f'Wrong Hive Metastore Database Version Format : { version } ' )
139153 return None
140- if major_minor_version not in supported_versions :
141- logger .info (f'Unsupported Hive Metastore Version: { external_hms .database_type } - { version } ' )
154+
155+ # Verify HMS version
156+ if (major , minor ) not in self .supported_hms_versions :
157+ logger .info (
158+ f'Unsupported Hive Metastore Version: { version } . We currently support: { self .supported_hms_versions } '
159+ )
142160 return None
161+ external_hms = replace (self ._split_jdbc_url (jdbc_url ), version = f'{ major } .{ minor } ' )
143162
144163 if not external_hms .user :
145164 external_hms = replace (
@@ -158,19 +177,33 @@ def _external_hms(self) -> ExternalHmsInfo | None:
158177 @classmethod
159178 def _split_jdbc_url (cls , jdbc_url : str ) -> ExternalHmsInfo :
160179 # Define the regex pattern to match the JDBC URL components
180+ # The regex supports the following JDBC URL formats:
181+ # 1. jdbc:mysql://hostname:3306/metastore
182+ # 2. jdbc:mysql://hostname/metastore
183+ # 3. jdbc:mysql://hostname:3306/metastore?user=foo&password=bar
184+ # 4. jdbc:mysql://hostname/metastore?user=foo&password=bar
185+ # 5. jdbc:mssql://hostname:1433;database=database;user=foo;password=bar
161186 pattern = re .compile (
162- r'jdbc:(?P<db_type>[a-zA-Z0-9]+)://(?P<host>[^:/]+):(?P<port>\d+)/(?P<database>[^?]+)(\?user=(?P<user>[^&]+)&password= (?P<password>[^&] +))?'
187+ r'jdbc:(?P<db_type>[a-zA-Z0-9]+)://(?P<host>[^:/?; ]+)( :(?P<port>\d+))?( /(?P<database>[^?^; ]+))?([?;] (?P<parameters>. +))?'
163188 )
164189 match = pattern .match (jdbc_url )
165190 if not match :
166191 raise ValueError (f'Unsupported JDBC URL: { jdbc_url } ' )
167192
193+ params = {}
194+ if match .group ('parameters' ):
195+ params = dict (param .split ('=' ) for param in re .split (r"[;&]" , match .group ('parameters' )))
196+
168197 db_type = match .group ('db_type' )
198+ port = match .group ('port' ) or str (cls .supported_databases_port .get (db_type ))
199+ if not port :
200+ raise ValueError (f"Can't identify Port for { db_type } " )
169201 host = match .group ('host' )
170- port = match .group ('port' )
171- database = match .group ('database' )
172- user = match .group ('user' )
173- password = match .group ('password' )
202+ database = match .group ('database' ) or params .get ("database" )
203+ if not database or not isinstance (database , str ):
204+ raise ValueError (f"Can't identify Database for { db_type } " )
205+ user = params .get ('user' )
206+ password = params .get ('password' )
174207
175208 return ExternalHmsInfo (db_type , host , port , database , user , password , None )
176209
0 commit comments