Skip to content

Commit 3b33588

Browse files
authored
Merge pull request #45 from dbinfrago/fix/horizon-calculation-rewrite
fix: complete rewrite of horizon calculation algorithm
2 parents 5ba73bf + 601197e commit 3b33588

File tree

3 files changed

+278
-342
lines changed

3 files changed

+278
-342
lines changed
Lines changed: 89 additions & 275 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,45 @@
11
# Copyright DB InfraGO AG and contributors
22
# SPDX-License-Identifier: MIT
33

4+
"""Horizon calculation for camera sensors based on extrinsics and intrinsics."""
5+
46
import typing as t
57

68
import numpy as np
79
import raillabel
810
from scipy.spatial.transform import Rotation
911

1012

11-
def _generate_line_function(
12-
p1: raillabel.format.Point2d, p2: raillabel.format.Point2d
13-
) -> t.Callable[[float], float]:
14-
"""Generate a callable line function from two given points in 2D space.
15-
16-
Parameters
17-
----------
18-
p1 : raillabel.format.Point2d
19-
The first point.
20-
p2 : raillabel.format.Point2d
21-
The second point. Please note that p2.x has to be different from p1.x.
22-
23-
Returns
24-
-------
25-
t.Callable[[float], float]
26-
A callable line function f(x: float) -> float that returns the y for a given x.
27-
The calculated line is the line that connects p1 and p2 so that y = f(x) = mx + n.
28-
"""
29-
if p2.x == p1.x:
30-
msg = "The two points must have different x values!"
31-
raise ValueError(msg)
32-
33-
m: float = (p2.y - p1.y) / (p2.x - p1.x)
34-
n: float = p1.y - m * p1.x
35-
36-
def f(x: float) -> float:
37-
return float(m * x + n)
38-
39-
return f
40-
41-
4213
class _HorizonCalculator:
43-
extrinsics: np.ndarray
44-
extrinsics_inv: np.ndarray
45-
intrinsics: np.ndarray
46-
distortion_coefficients: np.ndarray
47-
sensor_resolution: np.ndarray
48-
alternative_calibration_workaround: bool
14+
"""Calculate the horizon line position in camera images.
15+
16+
The horizon is calculated based on the camera's pitch angle (tilt) derived
17+
from the extrinsics rotation matrix. This approach is robust across different
18+
calibration conventions (OSDAR23, OSDAR26, etc.) as it directly uses the
19+
geometric relationship between camera orientation and the image plane.
20+
21+
The horizon line in the image represents where points at camera height
22+
in the far distance would project. Points above this line in the image
23+
represent real-world points above camera height (sky, distant hills, etc.).
24+
Track annotations should always be below the horizon since rails are on
25+
the ground below camera height.
26+
"""
4927

5028
def __init__(
5129
self,
5230
camera: raillabel.format.Camera,
53-
alternative_calibration_workaround: bool = False,
31+
alternative_calibration_workaround: bool = False, # noqa: ARG002
5432
) -> None:
55-
# Apply workarounds
56-
self.alternative_calibration_workaround = alternative_calibration_workaround
57-
33+
"""Initialize the horizon calculator with camera parameters.
34+
35+
Parameters
36+
----------
37+
camera : raillabel.format.Camera
38+
The camera sensor with extrinsics and intrinsics.
39+
alternative_calibration_workaround : bool, optional
40+
Legacy parameter kept for backwards compatibility.
41+
The new implementation handles all calibration formats automatically.
42+
"""
5843
if camera.extrinsics is None:
5944
msg = "Only sensors with extrinsics != None are supported."
6045
raise ValueError(msg)
@@ -65,247 +50,76 @@ def __init__(
6550
msg = "Only sensors with intrinsics != None with IntrinsicsPinhole format are supported."
6651
raise ValueError(msg)
6752

68-
extrinsics: raillabel.format.Transform = camera.extrinsics
69-
intrinsics: raillabel.format.IntrinsicsPinhole = camera.intrinsics
70-
71-
# Store extrinsics/intrinsics/distortion as numpy matrices
72-
self.extrinsics = np.zeros((4, 4))
73-
self.intrinsics = np.zeros((3, 3))
74-
self.distortion_coefficients = np.zeros((1, 5))
53+
extrinsics = camera.extrinsics
54+
intrinsics = camera.intrinsics
7555

76-
# Get extrinsics translation vector
77-
extrinsics_translation: np.ndarray = np.array(
78-
[[extrinsics.pos.x], [extrinsics.pos.y], [extrinsics.pos.z]]
79-
)
80-
# Get extrinsics rotation matrix (from quaternion)
81-
extrinsics_rotation: np.ndarray = Rotation.from_quat(
82-
[
83-
extrinsics.quat.x,
84-
extrinsics.quat.y,
85-
extrinsics.quat.z,
86-
extrinsics.quat.w,
87-
]
56+
# Get rotation matrix from quaternion
57+
self._rotation_matrix: np.ndarray = Rotation.from_quat(
58+
[extrinsics.quat.x, extrinsics.quat.y, extrinsics.quat.z, extrinsics.quat.w]
8859
).as_matrix()
89-
# Combine extrinsics rotation matrix and translation vector into extrinsics matrix
90-
extrinsics_helper = np.zeros((3, 4))
91-
extrinsics_helper = np.hstack((extrinsics_rotation, extrinsics_translation))
92-
self.extrinsics = np.vstack((extrinsics_helper, np.array([0, 0, 0, 1])))
93-
self.extrinsics_inv = np.linalg.inv(self.extrinsics)
94-
95-
# Get intrinsics matrix and convert it into 3x3
96-
cm: tuple = intrinsics.camera_matrix
97-
self.intrinsics = np.array(
98-
[[cm[0], cm[1], cm[2]], [cm[4], cm[5], cm[6]], [cm[8], cm[9], cm[10]]]
99-
)
100-
101-
# Get distortion coefficients and convert to numpy array
102-
distortion_coefficients = intrinsics.distortion
103-
self.distortion_coefficients = np.array(distortion_coefficients)
104-
105-
# Store sensor resolution for future use
106-
self.sensor_resolution = np.array([intrinsics.width_px, intrinsics.height_px])
107-
108-
def transform_coordinates_from_world_to_camera(
109-
self, world_coordinates: raillabel.format.Point3d
110-
) -> raillabel.format.Point3d:
111-
# Convert world coordinates into vector (numpy 4x1 matrix)
112-
world_coordinates_converted: np.ndarray = np.array(
113-
[world_coordinates.x, world_coordinates.y, world_coordinates.z, 1.0]
114-
)
115-
116-
# Transform coordinates from world to camera coordinates
117-
camera_coordinates: np.ndarray = self.extrinsics_inv.dot(world_coordinates_converted)
118-
119-
# Convert camera coordinates back into raillabel Point3d format and return them
120-
return raillabel.format.Point3d(
121-
camera_coordinates[0], camera_coordinates[1], camera_coordinates[2]
122-
)
123-
124-
def transform_coordinates_from_camera_to_image(
125-
self, camera_coordinates: raillabel.format.Point3d
126-
) -> raillabel.format.Point2d:
127-
# Convert camera coordinates into vector (numpy 1x3 matrix)
128-
# x forward, y left, z up
129-
camera_coordinates_converted: np.ndarray = np.array(
130-
[camera_coordinates.x, camera_coordinates.y, camera_coordinates.z]
131-
)
132-
133-
# For OSDaR23, adjust/flip axes
134-
# so that it matches the conventions used for the following calculations
135-
# x right, y down, z forward
136-
if not self.alternative_calibration_workaround:
137-
camera_coordinates_converted = np.array(
138-
[
139-
-camera_coordinates_converted[1],
140-
-camera_coordinates_converted[2],
141-
camera_coordinates_converted[0],
142-
]
143-
)
144-
145-
# Transform coordinates from camera to image coordinates 3d (u',v',w')
146-
image_coordinates_3d: np.ndarray = self.intrinsics.dot(camera_coordinates_converted)
147-
148-
# Get the corresponding 2d image coordinates (u,v), where u=u'/w' and v=v'/w'
149-
image_coordinates_2d: np.ndarray = np.array(
150-
[
151-
image_coordinates_3d[0] / image_coordinates_3d[2],
152-
image_coordinates_3d[1] / image_coordinates_3d[2],
153-
]
154-
)
155-
156-
# NOTE: Distortion coefficients are ignored
15760

158-
# Return the 2d image coordinates as Point2d
159-
return raillabel.format.Point2d(image_coordinates_2d[0], image_coordinates_2d[1])
61+
# Get intrinsics parameters
62+
cm = intrinsics.camera_matrix
63+
self._fy: float = float(cm[5]) # Focal length in y direction (pixels)
64+
self._cy: float = float(cm[6]) # Principal point y coordinate
16065

161-
def get_translation_vector_from_extrinsics(self) -> np.ndarray:
162-
translation_vector: np.ndarray = np.array(
163-
[self.extrinsics[0][3], self.extrinsics[1][3], self.extrinsics[2][3]]
164-
)
165-
return translation_vector
66+
# Calculate camera forward direction in world coordinates
67+
# Camera Z-axis (optical axis) in camera frame is [0, 0, 1]
68+
# In world coordinates: R^T @ [0, 0, 1]
69+
self._camera_forward_world: np.ndarray = self._rotation_matrix.T @ np.array([0.0, 0.0, 1.0])
16670

167-
def get_rotation_matrix_from_extrinsics(self) -> np.ndarray:
168-
rotation_matrix: np.ndarray = np.array(
169-
[
170-
[self.extrinsics[0][0], self.extrinsics[0][1], self.extrinsics[0][2]],
171-
[self.extrinsics[1][0], self.extrinsics[1][1], self.extrinsics[1][2]],
172-
[self.extrinsics[2][0], self.extrinsics[2][1], self.extrinsics[2][2]],
173-
]
174-
)
175-
return rotation_matrix
176-
177-
def get_sensor_position_in_world_coordinates_flat(self) -> np.ndarray:
178-
# Get the translation vector
179-
sensor_position: np.ndarray = self.get_translation_vector_from_extrinsics()
180-
181-
# Negate the vector (thus making it the position of the sensor
182-
# in the world coordinate system)
183-
sensor_position *= -1
184-
185-
# Discard the z axis (flat world assumption)
186-
sensor_position[2] = 0
187-
188-
# Return the vector
189-
return sensor_position
190-
191-
def get_sensor_orientation_vector_in_world_coordinates_flat(self) -> np.ndarray:
192-
# The orientation vector shall be the vector that, in the world coordinate system,
193-
# points in the direction that the sensor is facing
194-
# Assumes flat world assumption, so z is thrown away (set to 0)
195-
196-
# Create a vector that points directly foward (in the world coordinate system)
197-
front_vector_world: np.ndarray = np.array([1, 0, 0])
198-
199-
# Get the rotation matrix
200-
rotation_matrix: np.ndarray = self.get_rotation_matrix_from_extrinsics()
201-
if rotation_matrix.shape != (3, 3):
202-
raise AssertionError
203-
204-
# Rotate the front vector using the rotation matrix from the extrinsics
205-
# (without the translation vector)
206-
rotated_front_vector: np.ndarray = rotation_matrix.dot(front_vector_world)
207-
208-
# Swap/flip rotation vector if alternative calibration conventions are used
209-
if self.alternative_calibration_workaround:
210-
rotated_front_vector = np.array(
211-
[
212-
-rotated_front_vector[1],
213-
-rotated_front_vector[0],
214-
rotated_front_vector[2],
215-
]
216-
)
217-
218-
# Mirror the vector along the x axis (by negating y)
219-
rotated_front_vector[1] *= -1
220-
221-
# Discard the z axis (flat world assumption)
222-
rotated_front_vector[2] = 0
223-
224-
# Normalize the vector to unit length (length = 1)
225-
normalized_orientation_vector: np.ndarray = rotated_front_vector / np.linalg.norm(
226-
rotated_front_vector
227-
)
228-
229-
# Return the vector (that, in the world coordinate system,
230-
# should now point inthe orientation that the sensor is facing)
231-
return normalized_orientation_vector
71+
# Calculate pitch angle (angle below horizontal)
72+
# Negative Z component of forward vector means looking down
73+
self._pitch_radians: float = float(np.arcsin(-self._camera_forward_world[2]))
23274

23375
def calculate_horizon(
23476
self,
235-
center_distance: float,
236-
side_distance: float,
77+
center_distance: float = 10000.0, # noqa: ARG002
78+
side_distance: float = 1000.0, # noqa: ARG002
23779
inclination: float = 0.0,
23880
) -> t.Callable[[float], float]:
239-
# Select points in the distance (within world, aka lidar coordinate system)
240-
241-
# Calculate a center point of the horizon that is far away (10km)
242-
# based on the camera's position and orientation
243-
camera_position: np.ndarray = self.get_sensor_position_in_world_coordinates_flat()
244-
camera_orientation_vector: np.ndarray = (
245-
self.get_sensor_orientation_vector_in_world_coordinates_flat()
246-
)
247-
horizon_center_point_far_away: np.ndarray = camera_position + (
248-
center_distance * camera_orientation_vector
249-
)
250-
251-
# Add buffer height based on the maximum inclination a train can have
252-
horizon_center_point_far_away[2] += inclination * center_distance
253-
254-
# In 2D, given the orientation vector (a,b), the normal vectors (orthogonal to the
255-
# orientation vector) are left=(-b,a) and right=(b,-a).
256-
# Remember that z is ignored because of the flat world assumption.
257-
left_vector: np.ndarray = np.array(
258-
[
259-
-camera_orientation_vector[1],
260-
camera_orientation_vector[0],
261-
camera_orientation_vector[2],
262-
]
263-
)
264-
right_vector: np.ndarray = np.array(
265-
[
266-
camera_orientation_vector[1],
267-
-camera_orientation_vector[0],
268-
camera_orientation_vector[2],
269-
]
270-
)
271-
272-
# Calculate two points left and right of the center point of the horizon (e.g. 1km each)
273-
horizon_left_point_far_away: np.ndarray = horizon_center_point_far_away + (
274-
side_distance * left_vector
275-
)
276-
horizon_right_point_far_away: np.ndarray = horizon_center_point_far_away + (
277-
side_distance * right_vector
278-
)
279-
280-
# Convert points into raillabel Point3d format
281-
p1_world: raillabel.format.Point3d = raillabel.format.Point3d(
282-
horizon_left_point_far_away[0],
283-
horizon_left_point_far_away[1],
284-
horizon_left_point_far_away[2],
285-
)
286-
p2_world: raillabel.format.Point3d = raillabel.format.Point3d(
287-
horizon_right_point_far_away[0],
288-
horizon_right_point_far_away[1],
289-
horizon_right_point_far_away[2],
290-
)
291-
292-
# Convert points from world to camera coordinate system
293-
p1_camera: raillabel.format.Point3d = self.transform_coordinates_from_world_to_camera(
294-
p1_world
295-
)
296-
p2_camera: raillabel.format.Point3d = self.transform_coordinates_from_world_to_camera(
297-
p2_world
298-
)
299-
300-
# Convert points from camera to image coordinate system
301-
p1_image: raillabel.format.Point2d = self.transform_coordinates_from_camera_to_image(
302-
p1_camera
303-
)
304-
p2_image: raillabel.format.Point2d = self.transform_coordinates_from_camera_to_image(
305-
p2_camera
306-
)
307-
308-
# Calculate line function between selected points in the image coordinate system
309-
horizon_line: t.Callable[[float], float] = _generate_line_function(p1_image, p2_image)
310-
311-
return horizon_line
81+
"""Calculate a function that returns the horizon Y coordinate for any X.
82+
83+
The horizon is calculated based on the camera's pitch angle. For a camera
84+
pointing horizontally (pitch=0), the horizon is at the principal point (cy).
85+
For a camera tilted down, the horizon moves up in the image (smaller Y).
86+
87+
Parameters
88+
----------
89+
center_distance : float, optional
90+
Legacy parameter, kept for backwards compatibility. Not used.
91+
side_distance : float, optional
92+
Legacy parameter, kept for backwards compatibility. Not used.
93+
inclination : float, optional
94+
Additional angular offset to add to the horizon calculation (in radians).
95+
Positive values move the horizon up (more permissive for tracks).
96+
This represents the expected maximum track inclination.
97+
Default is 0.0.
98+
99+
Returns
100+
-------
101+
Callable[[float], float]
102+
A function that takes an X coordinate and returns the horizon Y coordinate.
103+
Since the pitch-based horizon is horizontal in the image, this returns
104+
the same Y value for all X coordinates.
105+
"""
106+
# Calculate horizon Y position based on pitch angle
107+
# If camera looks down by pitch radians, horizon is above cy by tan(pitch) * fy
108+
# The inclination parameter adds additional upward shift for tolerance
109+
total_angle = self._pitch_radians + inclination
110+
horizon_y = float(self._cy - np.tan(total_angle) * self._fy)
111+
112+
# Return a function that gives the same horizon_y for all x
113+
# (horizon line is horizontal in image for level cameras)
114+
def horizon_function(x: float) -> float: # noqa: ARG001
115+
return horizon_y
116+
117+
return horizon_function
118+
119+
@property
120+
def pitch_degrees(self) -> float:
121+
"""Return the camera pitch angle in degrees.
122+
123+
Positive values mean the camera is pointing downward.
124+
"""
125+
return float(np.degrees(self._pitch_radians))

0 commit comments

Comments
 (0)