Skip to content

Commit f5cb64c

Browse files
authored
[FIX] Fix PyKeops tensor operations and ensure contiguous arrays (#22)
# Fix PyKeops tensor operations and improve performance This PR addresses several issues with tensor operations, particularly when using PyKeops: 1. Add contiguous array check and conversion for PyKeops tensors 2. Fix matrix multiplication in symbolic evaluator by correcting axis parameters 3. Remove commented-out code in fancy_triangulation 4. Add a comment about CUDA device check slowing down code 5. Ensure tensors are contiguous before passing to LazyTensor These changes improve the reliability of tensor operations across different backends and fix potential issues with non-contiguous arrays that could cause PyKeops to fail.
2 parents d6ba230 + 88af28a commit f5cb64c

File tree

5 files changed

+54
-51
lines changed

5 files changed

+54
-51
lines changed

gempy_engine/core/backend_tensor.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
class BackendTensor:
2222
engine_backend: AvailableBackends
2323

24-
pykeops_enabled: bool
24+
pykeops_enabled: bool = False
2525
use_pykeops: bool = False
2626
use_gpu: bool = True
2727
dtype: str = DEFAULT_TENSOR_DTYPE
@@ -46,7 +46,7 @@ def get_backend_string(cls) -> str:
4646
return "CPU"
4747

4848
@classmethod
49-
def change_backend_gempy(cls, engine_backend: AvailableBackends, use_gpu: bool = True, dtype: Optional[str] = None):
49+
def change_backend_gempy(cls, engine_backend: AvailableBackends, use_gpu: bool = False, dtype: Optional[str] = None):
5050
cls._change_backend(engine_backend, use_pykeops=PYKEOPS, use_gpu=use_gpu, dtype=dtype)
5151

5252
@classmethod
@@ -110,7 +110,10 @@ def _change_backend(cls, engine_backend: AvailableBackends, use_pykeops: bool =
110110
# Check if CUDA is available
111111
if not pytorch_copy.cuda.is_available():
112112
raise RuntimeError("GPU requested but CUDA is not available in PyTorch")
113-
if False:
113+
if False: # * (Miguel) this slows down the code a lot
114+
# Check if CUDA device is available
115+
if not pytorch_copy.cuda.device_count():
116+
raise RuntimeError("GPU requested but no CUDA device is available in PyTorch")
114117
# Set default device to CUDA
115118
cls.device = pytorch_copy.device("cuda")
116119
pytorch_copy.set_default_device("cuda")
@@ -293,6 +296,7 @@ def _fill_diagonal(tensor, value):
293296
cls.tfnp.tile = lambda tensor, repeats: tensor.repeat(repeats)
294297
cls.tfnp.ravel = lambda tensor: tensor.flatten()
295298
cls.tfnp.packbits = _packbits
299+
cls.tfnp.ascontiguousarray = lambda tensor: tensor.contiguous()
296300
cls.tfnp.fill_diagonal = _fill_diagonal
297301
cls.tfnp.isclose = lambda a, b, rtol=1e-05, atol=1e-08, equal_nan=False: isclose(
298302
a,

gempy_engine/modules/dual_contouring/fancy_triangulation.py

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -231,36 +231,25 @@ def check_voxels_exist_next_to_edge(coord_col, edge_vector, _left_right_array_ac
231231
raise ValueError("n must be smaller than 12")
232232

233233
# flip triangle order if normal is negative
234-
if False:
235-
indices = BackendTensor.tfnp.stack([x[normal >= 0], y[normal >= 0], z[normal >= 0]]).T
236-
flipped_indices = BackendTensor.tfnp.stack(
237-
[
238-
x[normal < 0],
239-
y[normal < 0],
240-
z[normal < 0]]).T[:, [0, 2, 1]
241-
]
242-
indices = BackendTensor.tfnp.stack([indices, flipped_indices])
243-
else:
244-
# flip triangle order if normal is negative
245-
# Create masks for positive and negative normals
246-
positive_mask = normal >= 0
247-
negative_mask = normal < 0
248-
249-
# Extract indices for positive normals (keep original order)
250-
x_pos = x[positive_mask]
251-
y_pos = y[positive_mask]
252-
z_pos = z[positive_mask]
253-
254-
# Extract indices for negative normals (flip order: x, z, y instead of x, y, z)
255-
x_neg = x[negative_mask]
256-
y_neg = y[negative_mask]
257-
z_neg = z[negative_mask]
258-
259-
# Combine all indices
260-
all_x = BackendTensor.tfnp.concatenate([x_pos, x_neg], axis=0)
261-
all_y = BackendTensor.tfnp.concatenate([y_pos, z_neg], axis=0) # Note: z_neg for flipped triangles
262-
all_z = BackendTensor.tfnp.concatenate([z_pos, y_neg], axis=0) # Note: y_neg for flipped triangles
263-
264-
# Stack into final indices array
265-
indices = BackendTensor.tfnp.stack([all_x, all_y, all_z], axis=1)
234+
# Create masks for positive and negative normals
235+
positive_mask = normal >= 0
236+
negative_mask = normal < 0
237+
238+
# Extract indices for positive normals (keep original order)
239+
x_pos = x[positive_mask]
240+
y_pos = y[positive_mask]
241+
z_pos = z[positive_mask]
242+
243+
# Extract indices for negative normals (flip order: x, z, y instead of x, y, z)
244+
x_neg = x[negative_mask]
245+
y_neg = y[negative_mask]
246+
z_neg = z[negative_mask]
247+
248+
# Combine all indices
249+
all_x = BackendTensor.tfnp.concatenate([x_pos, x_neg], axis=0)
250+
all_y = BackendTensor.tfnp.concatenate([y_pos, z_neg], axis=0) # Note: z_neg for flipped triangles
251+
all_z = BackendTensor.tfnp.concatenate([z_pos, y_neg], axis=0) # Note: y_neg for flipped triangles
252+
253+
# Stack into final indices array
254+
indices = BackendTensor.tfnp.stack([all_x, all_y, all_z], axis=1)
266255
return indices

gempy_engine/modules/evaluator/symbolic_evaluator.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,15 @@ def symbolic_evaluator(solver_input: SolverInput, weights: np.ndarray, options:
2121
eval_kernel = yield_evaluation_kernel(solver_input, options.kernel_options)
2222
if BackendTensor.engine_backend == gempy_engine.config.AvailableBackends.numpy:
2323
from pykeops.numpy import LazyTensor
24-
lazy_weights = LazyTensor(np.asfortranarray(weights), axis=1)
24+
# Create lazy_weights with correct dimensions: we want (16, 1) to match eval_kernel's nj dimension
25+
lazy_weights = LazyTensor(np.asfortranarray(weights.reshape(-1, 1)), axis=0) # axis=0 means this is the 'i' dimension
26+
scalar_field: np.ndarray = (eval_kernel * lazy_weights).sum(axis=0, backend=backend_string).reshape(-1)
2527
else:
2628
from pykeops.torch import LazyTensor
27-
lazy_weights = LazyTensor(weights.view((-1, 1)), axis=1)
28-
scalar_field: np.ndarray = (eval_kernel.T * lazy_weights).sum(axis=1, backend=backend_string).reshape(-1)
29+
lazy_weights = LazyTensor(weights.view((-1, 1)), axis=0) # axis=0 for 'i' dimension
30+
# Use element-wise multiplication and sum over the correct axis
31+
scalar_field: np.ndarray = (eval_kernel * lazy_weights).sum(axis=0, backend=backend_string).reshape(-1)
32+
2933
gx_field: Optional[np.ndarray] = None
3034
gy_field: Optional[np.ndarray] = None
3135
gz_field: Optional[np.ndarray] = None
@@ -34,12 +38,12 @@ def symbolic_evaluator(solver_input: SolverInput, weights: np.ndarray, options:
3438
eval_gx_kernel = yield_evaluation_grad_kernel(solver_input, options.kernel_options, axis=0)
3539
eval_gy_kernel = yield_evaluation_grad_kernel(solver_input, options.kernel_options, axis=1)
3640

37-
gx_field = (eval_gx_kernel.T * lazy_weights).sum(axis=1, backend=backend_string).reshape(-1)
38-
gy_field = (eval_gy_kernel.T * lazy_weights).sum(axis=1, backend=backend_string).reshape(-1)
41+
gx_field = (eval_gx_kernel * lazy_weights).sum(axis=0, backend=backend_string).reshape(-1)
42+
gy_field = (eval_gy_kernel * lazy_weights).sum(axis=0, backend=backend_string).reshape(-1)
3943

4044
if options.number_dimensions == 3:
4145
eval_gz_kernel = yield_evaluation_grad_kernel(solver_input, options.kernel_options, axis=2)
42-
gz_field = (eval_gz_kernel.T * lazy_weights).sum(axis=1, backend=backend_string).reshape(-1)
46+
gz_field = (eval_gz_kernel * lazy_weights).sum(axis=0, backend=backend_string).reshape(-1)
4347
elif options.number_dimensions == 2:
4448
gz_field = None
4549
else:

gempy_engine/modules/kernel_constructor/_structs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ def _upgrade_kernel_input_to_keops_tensor_pytorch(struct_data_instance):
2222

2323
for key, val in struct_data_instance.__dict__.items():
2424
if key == "n_faults_i": continue
25+
if (val.is_contiguous() is False):
26+
raise ValueError("Input tensors are not contiguous")
27+
2528
struct_data_instance.__dict__[key] = LazyTensor(val.type(BackendTensor.dtype_obj))
2629

2730

gempy_engine/modules/kernel_constructor/_vectors_preparation.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ def cov_vectors_preparation(interp_input: SolverInput, kernel_options: KernelOpt
4848
return KernelInput(
4949
ori_sp_matrices=orientations_sp_matrices,
5050
cartesian_selector=cartesian_selector,
51-
nugget_scalar= interp_input.sp_internal.nugget_effect_ref_rest,
52-
nugget_grad= interp_input.ori_internal.nugget_effect_grad,
51+
nugget_scalar=interp_input.sp_internal.nugget_effect_ref_rest,
52+
nugget_grad=interp_input.ori_internal.nugget_effect_grad,
5353
# Drift
5454
ori_drift=dips_ug,
5555
ref_drift=dips_ref_ui,
@@ -61,11 +61,11 @@ def cov_vectors_preparation(interp_input: SolverInput, kernel_options: KernelOpt
6161
)
6262

6363

64-
def evaluation_vectors_preparations(interp_input: SolverInput, kernel_options: KernelOptions,
65-
axis: Optional[int] = None, slice_array = None) -> KernelInput:
64+
def evaluation_vectors_preparations(interp_input: SolverInput, kernel_options: KernelOptions,
65+
axis: Optional[int] = None, slice_array=None) -> KernelInput:
6666
sp_: SurfacePointsInternals = interp_input.sp_internal
6767
ori_: OrientationsInternals = interp_input.ori_internal
68-
68+
6969
# if is none just get the whole array
7070
if slice_array is not None:
7171
grid: np.ndarray = interp_input.xyz_to_interpolate[slice_array]
@@ -129,10 +129,10 @@ def evaluation_vectors_preparations(interp_input: SolverInput, kernel_options: K
129129
def _assembly_dips_points_tensors(matrices_size: MatricesSizes, ori_, sp_) -> OrientationSurfacePointsCoords:
130130
dips_ref_coord = assembly_dips_points_tensor(ori_.dip_positions_tiled, sp_.ref_surface_points, matrices_size)
131131
dips_rest_coord = assembly_dips_points_tensor(ori_.dip_positions_tiled, sp_.rest_surface_points, matrices_size)
132-
132+
133133
orientations_sp_matrices = OrientationSurfacePointsCoords(dips_ref_coord, dips_ref_coord, dips_rest_coord,
134134
dips_rest_coord) # When we create que core covariance these are the repeated since the distance are with themselves
135-
135+
136136
return orientations_sp_matrices
137137

138138

@@ -196,7 +196,6 @@ def _assembly_drift_grid_tensors(grid: np.ndarray, options: KernelOptions, matri
196196
# region UG
197197
dips_ug_d1, dips_ug_d2a, dips_ug_d2b, second_degree_selector = assembly_dips_ug_coords(ori_, options, matrices_size)
198198

199-
200199
grid_1 = BackendTensor.t.zeros_like(grid)
201200
grid_1[:, axis] = 1
202201

@@ -223,7 +222,10 @@ def _assembly_drift_grid_tensors(grid: np.ndarray, options: KernelOptions, matri
223222

224223
def _assembly_fault_grid_tensors(fault_values_on_grid, options: KernelOptions, faults_val: FaultsData, ori_size: int) -> FaultDrift:
225224
fault_vector_ref, fault_vector_rest = _assembly_fault_internals(faults_val, options, ori_size)
226-
fault_drift = FaultDrift(fault_vector_ref, fault_values_on_grid.T)
225+
fault_drift = FaultDrift(
226+
x_degree_1=fault_vector_ref,
227+
y_degree_1=BackendTensor.t.ascontiguousarray(fault_values_on_grid.T)
228+
)
227229
return fault_drift
228230

229231

@@ -244,6 +246,7 @@ def _assembler(matrix_val, ori_size_: int, uni_drift_size: int): # TODO: This f
244246

245247
ref_matrix_val = faults_val.fault_values_ref
246248
rest_matrix_val = faults_val.fault_values_rest
247-
fault_vector_ref = _assembler(ref_matrix_val.T, ori_size, options.n_uni_eq)
249+
ref_matrix_contig = BackendTensor.t.ascontiguousarray(ref_matrix_val.T)
250+
fault_vector_ref = _assembler(ref_matrix_contig, ori_size, options.n_uni_eq)
248251
fault_vector_rest = _assembler(rest_matrix_val.T, ori_size, options.n_uni_eq)
249252
return fault_vector_ref, fault_vector_rest

0 commit comments

Comments
 (0)