1+ """Test live mask chunk size calculation."""
2+
3+ import numpy as np
4+ import pytest
5+
6+ from mdio .converters .segy import _calculate_live_mask_chunksize
7+ from mdio .core import Grid , Dimension
8+ from mdio .constants import INT32_MAX
9+
10+
11+ def test_small_grid_no_chunking ():
12+ """Test that small grids return -1 (no chunking needed)."""
13+ # Create a small grid that fits within INT32_MAX
14+ dims = [
15+ Dimension (coords = range (0 , 100 , 1 ), name = "dim1" ),
16+ Dimension (coords = range (0 , 100 , 1 ), name = "dim2" ),
17+ Dimension (coords = range (0 , 100 , 1 ), name = "sample" )
18+ ]
19+ grid = Grid (dims = dims )
20+ grid .live_mask = np .ones ((100 , 100 ), dtype = bool )
21+
22+ result = _calculate_live_mask_chunksize (grid )
23+ assert result == - 1
24+
25+
26+ def test_large_2d_grid_chunking ():
27+ """Test exact chunk size calculation for a 2D grid that exceeds INT32_MAX."""
28+ # Create a grid that exceeds INT32_MAX (2,147,483,647)
29+ # Using 50,000 x 50,000 = 2,500,000,000 elements
30+ dims = [
31+ Dimension (coords = range (0 , 50000 , 1 ), name = "dim1" ),
32+ Dimension (coords = range (0 , 50000 , 1 ), name = "dim2" ),
33+ Dimension (coords = range (0 , 100 , 1 ), name = "sample" )
34+ ]
35+ grid = Grid (dims = dims )
36+ grid .live_mask = np .ones ((50000 , 50000 ), dtype = bool )
37+
38+ result = _calculate_live_mask_chunksize (grid )
39+
40+ # Calculate expected values
41+ total_elements = 50000 * 50000
42+ num_chunks = np .ceil (total_elements / INT32_MAX ).astype (int )
43+ dim_chunks = int (np .ceil (50000 / np .ceil (np .power (num_chunks , 1 / 2 ))))
44+ expected_chunk_size = int (np .ceil (50000 / dim_chunks ))
45+
46+ assert result == (expected_chunk_size , expected_chunk_size )
47+
48+
49+ def test_large_3d_grid_chunking ():
50+ """Test exact chunk size calculation for a 3D grid that exceeds INT32_MAX."""
51+ # Create a 3D grid that exceeds INT32_MAX
52+ # Using 1500 x 1500 x 1500 = 3,375,000,000 elements
53+ dims = [
54+ Dimension (coords = range (0 , 1500 , 1 ), name = "dim1" ),
55+ Dimension (coords = range (0 , 1500 , 1 ), name = "dim2" ),
56+ Dimension (coords = range (0 , 1500 , 1 ), name = "dim3" ),
57+ Dimension (coords = range (0 , 100 , 1 ), name = "sample" )
58+ ]
59+ grid = Grid (dims = dims )
60+ grid .live_mask = np .ones ((1500 , 1500 , 1500 ), dtype = bool )
61+
62+ result = _calculate_live_mask_chunksize (grid )
63+
64+ # Calculate expected values
65+ total_elements = 1500 * 1500 * 1500
66+ num_chunks = np .ceil (total_elements / INT32_MAX ).astype (int )
67+ dim_chunks = int (np .ceil (1500 / np .ceil (np .power (num_chunks , 1 / 3 ))))
68+ expected_chunk_size = int (np .ceil (1500 / dim_chunks ))
69+
70+ assert result == (expected_chunk_size , expected_chunk_size , expected_chunk_size )
71+
72+
73+ def test_uneven_dimensions_chunking ():
74+ """Test exact chunk size calculation for uneven dimensions."""
75+ # Create a grid with uneven dimensions that exceeds INT32_MAX
76+ # Using 50,000 x 50,000 = 2,500,000,000 elements (exceeds INT32_MAX)
77+ # But with uneven chunking: 50,000 x 25,000
78+ dims = [
79+ Dimension (coords = range (0 , 50000 , 1 ), name = "dim1" ),
80+ Dimension (coords = range (0 , 50000 , 1 ), name = "dim2" ),
81+ Dimension (coords = range (0 , 100 , 1 ), name = "sample" )
82+ ]
83+ grid = Grid (dims = dims )
84+ grid .live_mask = np .ones ((50000 , 50000 ), dtype = bool )
85+
86+ result = _calculate_live_mask_chunksize (grid )
87+
88+ # Calculate expected values
89+ total_elements = 50000 * 50000
90+ num_chunks = np .ceil (total_elements / INT32_MAX ).astype (int )
91+ dim_chunks = int (np .ceil (50000 / np .ceil (np .power (num_chunks , 1 / 2 ))))
92+ expected_chunk_size = int (np .ceil (50000 / dim_chunks ))
93+
94+ assert result == (expected_chunk_size , expected_chunk_size )
95+
96+
97+ def test_prestack_land_survey_chunking ():
98+ """Test exact chunk size calculation for a dense pre-stack land survey grid."""
99+ # Create a dense pre-stack land survey grid that exceeds INT32_MAX
100+ # Using realistic dimensions:
101+ # - 1000 shot points
102+ # - 1000 receiver points
103+ # - 100 offsets
104+ # - 36 azimuths
105+ # Total elements: 1000 * 1000 * 100 * 36 = 3,600,000,000 elements
106+ dims = [
107+ Dimension (coords = range (0 , 1000 , 1 ), name = "shot_point" ),
108+ Dimension (coords = range (0 , 1000 , 1 ), name = "receiver_point" ),
109+ Dimension (coords = range (0 , 100 , 1 ), name = "offset" ),
110+ Dimension (coords = range (0 , 36 , 1 ), name = "azimuth" ),
111+ Dimension (coords = range (0 , 1000 , 1 ), name = "sample" )
112+ ]
113+ grid = Grid (dims = dims )
114+ grid .live_mask = np .ones ((1000 , 1000 , 100 , 36 ), dtype = bool )
115+
116+ result = _calculate_live_mask_chunksize (grid )
117+
118+ # Calculate expected values
119+ total_elements = 1000 * 1000 * 100 * 36
120+ num_chunks = np .ceil (total_elements / INT32_MAX ).astype (int )
121+ dim_chunks = int (np .ceil (1000 / np .ceil (np .power (num_chunks , 1 / 4 ))))
122+ expected_chunk_size = int (np .ceil (1000 / dim_chunks ))
123+
124+ # For a 4D grid, we expect chunk sizes to be distributed across all dimensions
125+ # The chunk size should be the same for all dimensions since they're all equally important
126+ assert result == (expected_chunk_size , expected_chunk_size , expected_chunk_size , expected_chunk_size )
127+
128+
129+ def test_edge_case_empty_grid ():
130+ """Test empty grid edge case."""
131+ dims = [
132+ Dimension (coords = range (0 , 0 , 1 ), name = "dim1" ),
133+ Dimension (coords = range (0 , 0 , 1 ), name = "dim2" ),
134+ Dimension (coords = range (0 , 100 , 1 ), name = "sample" )
135+ ]
136+ grid = Grid (dims = dims )
137+ grid .live_mask = np .zeros ((0 , 0 ), dtype = bool )
138+
139+ result = _calculate_live_mask_chunksize (grid )
140+ assert result == - 1 # Empty grid shouldn't need chunking
0 commit comments