1- from databricks .labs .ucx .hive_metastore .mapping import Rule
1+ import itertools
2+ import json
3+ import logging
4+ from pathlib import Path
5+ from typing import TypeVar
6+
7+ import pytest
8+
9+ from databricks .labs .ucx .hive_metastore .mapping import Rule , TableToMigrate
210from databricks .labs .ucx .hive_metastore .migration_status import MigrationIndex , MigrationStatus
311from databricks .labs .ucx .hive_metastore .tables import Table
4- from databricks .labs .ucx .hive_metastore .view_migrate import ViewToMigrate
12+ from databricks .labs .ucx .hive_metastore .view_migrate import ViewsMigrationSequencer , ViewToMigrate
513
614
7- def test_view_to_migrate_sql_migrate_view ():
15+ def test_view_to_migrate_sql_migrate_view_sql ():
816 expected_query = "CREATE OR REPLACE VIEW IF NOT EXISTS `cat1`.`schema1`.`dest_view1` AS SELECT * FROM `cat1`.`schema1`.`dest_table1`"
917 view = Table (
1018 object_type = "VIEW" ,
@@ -27,3 +35,156 @@ def test_view_to_migrate_sql_migrate_view():
2735 sql = view_to_migrate .sql_migrate_view (migration_index )
2836
2937 assert sql == expected_query
38+
39+
40+ @pytest .fixture (scope = "session" )
41+ def samples () -> dict [str , dict [str , str ]]:
42+ path = Path (Path (__file__ ).parent , "tables" , "tables_and_views.json" )
43+ samples_with_key = {}
44+ with path .open (encoding = "utf-8" ) as f :
45+ for sample in json .load (f ):
46+ key = sample ["db" ] + "." + sample ["table" ]
47+ samples_with_key [key ] = sample
48+ return samples_with_key
49+
50+
51+ @pytest .fixture
52+ def tables (request , samples ) -> list [TableToMigrate ]:
53+ tables_to_migrate = []
54+ rule = Rule ("ws1" , "cat1" , "schema" , "db1" , "table1" , "table2" )
55+ for key in request .param :
56+ sample = samples [key ]
57+ table = Table (
58+ "hive_metastore" ,
59+ sample ["db" ],
60+ sample ["table" ],
61+ "type" ,
62+ "DELTA" if sample .get ("view_text" ) is None else "VIEW" ,
63+ view_text = sample .get ("view_text" ),
64+ )
65+ table_to_migrate = TableToMigrate (table , rule )
66+ tables_to_migrate .append (table_to_migrate )
67+ return tables_to_migrate
68+
69+
70+ @pytest .mark .parametrize ("tables" , [("db1.t1" , "db2.t1" )], indirect = True )
71+ def test_empty_sequence_without_views (tables ):
72+ migration_index = MigrationIndex (
73+ [
74+ MigrationStatus ("db1" , "t1" , "cat1" , "db2" , "t1" ),
75+ MigrationStatus ("db2" , "t2" , "cat1" , "db2" , "t1" ),
76+ ]
77+ )
78+ sequencer = ViewsMigrationSequencer (tables , migration_index = migration_index )
79+ batches = sequencer .sequence_batches ()
80+
81+ assert len (batches ) == 0
82+
83+
84+ T = TypeVar ("T" )
85+
86+
87+ def flatten (lists : list [list [T ]]) -> list [T ]:
88+ return list (itertools .chain .from_iterable (lists ))
89+
90+
91+ @pytest .mark .parametrize ("tables" , [("db1.t1" , "db1.v1" )], indirect = True )
92+ def test_sequence_direct_view (tables ) -> None :
93+ expected = ["hive_metastore.db1.v1" ]
94+ migration_index = MigrationIndex ([MigrationStatus ("db1" , "t1" , "cat1" , "db1" , "t1" )])
95+ sequencer = ViewsMigrationSequencer (tables , migration_index = migration_index )
96+
97+ batches = sequencer .sequence_batches ()
98+
99+ assert [t .src .key for t in flatten (batches )] == expected
100+
101+
102+ @pytest .mark .parametrize ("tables" , [("db1.t1" , "db1.v1" , "db1.t2" , "db1.v2" )], indirect = True )
103+ def test_sequence_direct_views (tables ) -> None :
104+ expected = ["hive_metastore.db1.v1" , "hive_metastore.db1.v2" ]
105+ migration_index = MigrationIndex (
106+ [MigrationStatus ("db1" , "t1" , "cat1" , "db1" , "t1" ), MigrationStatus ("db1" , "t2" , "cat1" , "db1" , "t2" )]
107+ )
108+ sequencer = ViewsMigrationSequencer (tables , migration_index = migration_index )
109+
110+ batches = sequencer .sequence_batches ()
111+
112+ assert len (batches ) == 1
113+ # Sort because the order of the views is not guaranteed as they both depend on just tables
114+ assert sorted ([t .src .key for t in flatten (batches )]) == expected
115+
116+
117+ @pytest .mark .parametrize ("tables" , [("db1.t1" , "db1.v1" , "db1.v4" )], indirect = True )
118+ def test_sequence_indirect_views (tables ) -> None :
119+ expected = ["hive_metastore.db1.v1" , "hive_metastore.db1.v4" ]
120+ migration_index = MigrationIndex ([MigrationStatus ("db1" , "t1" , "cat1" , "db1" , "t1" )])
121+ sequencer = ViewsMigrationSequencer (tables , migration_index = migration_index )
122+
123+ batches = sequencer .sequence_batches ()
124+
125+ assert len (batches ) == 2
126+ assert [t .src .key for t in flatten (batches )] == expected
127+
128+
129+ @pytest .mark .parametrize ("tables" , [("db1.t1" , "db1.v1" , "db1.v4" , "db1.v5" , "db1.v6" , "db1.v7" )], indirect = True )
130+ def test_sequence_deep_indirect_views (tables ) -> None :
131+ expected = [
132+ "hive_metastore.db1.v1" ,
133+ "hive_metastore.db1.v4" ,
134+ "hive_metastore.db1.v7" ,
135+ "hive_metastore.db1.v6" ,
136+ "hive_metastore.db1.v5" ,
137+ ]
138+ migration_index = MigrationIndex ([MigrationStatus ("db1" , "t1" , "cat1" , "db1" , "t1" )])
139+ sequencer = ViewsMigrationSequencer (tables , migration_index = migration_index )
140+
141+ batches = sequencer .sequence_batches ()
142+
143+ assert len (batches ) == 5
144+ assert [t .src .key for t in flatten (batches )] == expected
145+
146+
147+ @pytest .mark .parametrize ("tables" , [("db1.v1" , "db1.v15" )], indirect = True )
148+ def test_sequence_view_with_view_and_table_dependency (tables ) -> None :
149+ expected = ["hive_metastore.db1.v1" , "hive_metastore.db1.v15" ]
150+ migration_index = MigrationIndex ([MigrationStatus ("db1" , "t1" , "cat1" , "db1" , "t1" )])
151+ sequencer = ViewsMigrationSequencer (tables , migration_index = migration_index )
152+
153+ batches = sequencer .sequence_batches ()
154+
155+ assert len (batches ) == 2
156+ assert [t .src .key for t in flatten (batches )] == expected
157+
158+
159+ @pytest .mark .parametrize ("tables" , [("db1.v8" ,)], indirect = True )
160+ def test_sequence_view_with_invalid_query_raises_value_error (tables ) -> None :
161+ sequencer = ViewsMigrationSequencer (tables )
162+
163+ with pytest .raises (ValueError ) as error :
164+ sequencer .sequence_batches ()
165+ assert "Could not analyze view SQL:" in str (error )
166+
167+
168+ @pytest .mark .parametrize ("tables" , [("db1.v9" ,)], indirect = True )
169+ def test_sequencing_logs_unresolved_dependencies (caplog , tables ) -> None :
170+ sequencer = ViewsMigrationSequencer (tables )
171+
172+ with caplog .at_level (logging .ERROR , logger = "databricks.labs.ucx.hive_metastore.view_migrate" ):
173+ sequencer .sequence_batches ()
174+ assert "Unresolved dependencies prevent batch sequencing:" in caplog .text
175+
176+
177+ @pytest .mark .parametrize (
178+ "tables" ,
179+ [
180+ ("db1.v10" , "db1.v11" ),
181+ ("db1.v12" , "db1.v13" , "db1.v14" ),
182+ ],
183+ indirect = True ,
184+ )
185+ def test_sequencing_logs_circular_dependency (caplog , tables ) -> None :
186+ sequencer = ViewsMigrationSequencer (tables )
187+
188+ with caplog .at_level (logging .ERROR , logger = "databricks.labs.ucx.hive_metastore.view_migrate" ):
189+ sequencer .sequence_batches ()
190+ assert "Circular dependency detected starting from:" in caplog .text
0 commit comments