Skip to content

Commit e74a00f

Browse files
DuinoDuPICO-XR-Dev
andauthored
Feat/update rerun sample (#8)
* update rerun sample * Fix sm.head_to_imu binding bug. --------- Co-authored-by: bingwen.ai <[email protected]>
1 parent 8f6091f commit e74a00f

File tree

3 files changed

+145
-34
lines changed

3 files changed

+145
-34
lines changed

bindings/spatialmp4.cpp

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ PYBIND11_MODULE(spatialmp4, m) {
8585
m.def("get_major_version", &SpatialML::GetMajorVersion, "Get the major version number");
8686
m.def("get_minor_version", &SpatialML::GetMinorVersion, "Get the minor version number");
8787
m.def("get_patch_version", &SpatialML::GetPatchVersion, "Get the patch version number");
88+
m.attr("HEAD_MODEL_OFFSET") = Eigen::Vector3d(-0.05057, -0.01874, 0.04309);
8889

8990
// Bind StreamType enum
9091
py::enum_<SpatialML::StreamType>(m, "StreamType")
@@ -108,9 +109,7 @@ PYBIND11_MODULE(spatialmp4, m) {
108109
.def_readwrite("qx", &SpatialML::pose_frame::qx)
109110
.def_readwrite("qy", &SpatialML::pose_frame::qy)
110111
.def_readwrite("qz", &SpatialML::pose_frame::qz)
111-
.def("as_se3", [](const SpatialML::pose_frame &pose) {
112-
return pose.as_se3().matrix();
113-
})
112+
.def("as_se3", [](const SpatialML::pose_frame &pose) { return pose.as_se3().matrix(); })
114113
.def("__repr__", [](const SpatialML::pose_frame &p) {
115114
std::ostringstream ss;
116115
ss << p;
@@ -161,9 +160,8 @@ PYBIND11_MODULE(spatialmp4, m) {
161160
.def_readwrite("fy", &SpatialML::camera_intrinsics::fy)
162161
.def_readwrite("cx", &SpatialML::camera_intrinsics::cx)
163162
.def_readwrite("cy", &SpatialML::camera_intrinsics::cy)
164-
.def("as_cvmat", [](const SpatialML::camera_intrinsics &intrinsics) {
165-
return MatxToEigen(intrinsics.as_cvmat());
166-
})
163+
.def("as_cvmat",
164+
[](const SpatialML::camera_intrinsics &intrinsics) { return MatxToEigen(intrinsics.as_cvmat()); })
167165
.def("__repr__", [](const SpatialML::camera_intrinsics &i) {
168166
std::ostringstream ss;
169167
ss << i;
@@ -179,12 +177,9 @@ PYBIND11_MODULE(spatialmp4, m) {
179177
[](SpatialML::camera_extrinsics &extrinsics, const Eigen::Matrix4d &matrix) {
180178
extrinsics.extrinsics = EigenToMatx(matrix);
181179
})
182-
.def("as_cvmat", [](const SpatialML::camera_extrinsics &extrinsics) {
183-
return MatxToEigen(extrinsics.as_cvmat());
184-
})
185-
.def("as_se3", [](const SpatialML::camera_extrinsics &extrinsics) {
186-
return extrinsics.as_se3().matrix();
187-
})
180+
.def("as_cvmat",
181+
[](const SpatialML::camera_extrinsics &extrinsics) { return MatxToEigen(extrinsics.as_cvmat()); })
182+
.def("as_se3", [](const SpatialML::camera_extrinsics &extrinsics) { return extrinsics.as_se3().matrix(); })
188183
.def("__repr__", [](const SpatialML::camera_extrinsics &e) {
189184
std::ostringstream ss;
190185
ss << e;
@@ -312,7 +307,8 @@ PYBIND11_MODULE(spatialmp4, m) {
312307

313308
m.def(
314309
"head_to_imu",
315-
[](py::array_t<double> head_pose_array, py::array_t<double> head_model_offset_array) {
310+
[](py::array_t<double, py::array::forcecast> head_pose_array,
311+
py::array_t<double, py::array::forcecast> head_model_offset_array) {
316312
py::buffer_info pose_info = head_pose_array.request();
317313
if (pose_info.ndim != 2 || pose_info.shape[0] != 4 || pose_info.shape[1] != 4) {
318314
throw std::invalid_argument("head_pose must be a 4x4 matrix");
@@ -322,12 +318,36 @@ PYBIND11_MODULE(spatialmp4, m) {
322318
throw std::invalid_argument("head_model_offset must be a 3-element vector");
323319
}
324320

325-
Eigen::Map<const Eigen::Matrix<double, 4, 4, Eigen::RowMajor>> head_pose_map(
326-
static_cast<double *>(pose_info.ptr));
327-
Eigen::Map<const Eigen::Vector3d> head_model_offset_map(static_cast<double *>(offset_info.ptr));
321+
Eigen::Matrix4d head_pose_matrix;
322+
const auto row_stride = pose_info.strides[0] / static_cast<py::ssize_t>(sizeof(double));
323+
const auto col_stride = pose_info.strides[1] / static_cast<py::ssize_t>(sizeof(double));
324+
const double *pose_ptr = static_cast<const double *>(pose_info.ptr);
325+
if (pose_info.strides[1] == static_cast<py::ssize_t>(sizeof(double))) {
326+
Eigen::Map<const Eigen::Matrix<double, 4, 4, Eigen::RowMajor>> head_pose_map(pose_ptr);
327+
head_pose_matrix = head_pose_map;
328+
} else if (pose_info.strides[0] == static_cast<py::ssize_t>(sizeof(double))) {
329+
Eigen::Map<const Eigen::Matrix<double, 4, 4, Eigen::ColMajor>> head_pose_map(pose_ptr);
330+
head_pose_matrix = head_pose_map;
331+
} else {
332+
head_pose_matrix.setZero();
333+
for (py::ssize_t i = 0; i < 4; ++i) {
334+
for (py::ssize_t j = 0; j < 4; ++j) {
335+
head_pose_matrix(i, j) = pose_ptr[i * row_stride + j * col_stride];
336+
}
337+
}
338+
}
328339

329-
Eigen::Matrix4d head_pose_matrix = head_pose_map;
330-
Eigen::Vector3d head_model_offset = head_model_offset_map;
340+
Eigen::Vector3d head_model_offset;
341+
const double *offset_ptr = static_cast<const double *>(offset_info.ptr);
342+
if (offset_info.strides[0] == static_cast<py::ssize_t>(sizeof(double))) {
343+
Eigen::Map<const Eigen::Vector3d> head_model_offset_map(offset_ptr);
344+
head_model_offset = head_model_offset_map;
345+
} else {
346+
const auto offset_stride = offset_info.strides[0] / static_cast<py::ssize_t>(sizeof(double));
347+
for (py::ssize_t i = 0; i < 3; ++i) {
348+
head_model_offset[i] = offset_ptr[i * offset_stride];
349+
}
350+
}
331351

332352
Eigen::Matrix3d R = head_pose_matrix.block<3, 3>(0, 0);
333353
Eigen::Vector3d t = head_pose_matrix.block<3, 1>(0, 3);
@@ -338,9 +358,14 @@ PYBIND11_MODULE(spatialmp4, m) {
338358
Sophus::SE3d imu_pose;
339359
Utilities::HeadToImu(head_pose, head_model_offset, imu_pose);
340360

341-
Eigen::Matrix<double, 4, 4, Eigen::RowMajor> result = imu_pose.matrix();
361+
const Eigen::Matrix4d result = imu_pose.matrix();
342362
py::array_t<double> output({4, 4});
343-
std::memcpy(output.mutable_data(), result.data(), sizeof(double) * 16);
363+
auto mutable_result = output.mutable_unchecked<2>();
364+
for (py::ssize_t i = 0; i < 4; ++i) {
365+
for (py::ssize_t j = 0; j < 4; ++j) {
366+
mutable_result(i, j) = result(i, j);
367+
}
368+
}
344369
return output;
345370
},
346371
py::arg("head_pose"), py::arg("head_model_offset"));

examples/python/visualize_rerun.py

Lines changed: 83 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
import os
33
import cv2
44
import numpy as np
5+
from typing import Optional
56
from scipy.spatial.transform import Rotation
67
import spatialmp4 as sm
78
import rerun as rr
89
import rerun.blueprint as rrb
910

1011

1112
def pico_pose_to_open3d(extrinsic):
13+
extrinsic = np.array(extrinsic, dtype=np.float64, copy=True)
1214
convert = np.array([
1315
[0, 0, 1, 0],
1416
[1, 0, 0, 0],
@@ -19,26 +21,68 @@ def pico_pose_to_open3d(extrinsic):
1921
return output
2022

2123

24+
def ensure_right_handed_rotation(rotation_matrix: np.ndarray) -> np.ndarray:
25+
r = np.array(rotation_matrix, dtype=np.float64, copy=True)
26+
u, _, vh = np.linalg.svd(r)
27+
corrected = u @ vh
28+
if np.linalg.det(corrected) < 0:
29+
u[:, -1] *= -1
30+
corrected = u @ vh
31+
return corrected
32+
33+
34+
def head_to_imu(head_pose: np.ndarray, head_model_offset: np.ndarray) -> np.ndarray:
35+
"""Convert head pose (T_W_H) into IMU pose (T_W_I) matching Utilities::HeadToImu."""
36+
head_pose = np.asarray(head_pose, dtype=np.float64)
37+
if head_pose.shape != (4, 4):
38+
raise ValueError("head_pose must be a 4x4 matrix")
39+
40+
head_model_offset = np.asarray(head_model_offset, dtype=np.float64)
41+
if head_model_offset.shape != (3,):
42+
raise ValueError("head_model_offset must be a 3-element vector")
43+
44+
rotation_matrix = head_pose[:3, :3]
45+
translation = head_pose[:3, 3].copy()
46+
47+
rotation = Rotation.from_matrix(rotation_matrix)
48+
translation -= rotation.apply(head_model_offset)
49+
50+
quat_x, quat_y, quat_z, quat_w = rotation.as_quat() # scipy returns xyzw
51+
imu_quat = np.array([quat_y, -quat_x, quat_z, quat_w], dtype=np.float64) # xyzw
52+
imu_rotation = Rotation.from_quat(imu_quat).as_matrix()
53+
imu_translation = np.array([translation[1], -translation[0], translation[2]], dtype=np.float64)
54+
55+
imu_pose = np.eye(4, dtype=np.float64)
56+
imu_pose[:3, :3] = imu_rotation
57+
imu_pose[:3, 3] = imu_translation
58+
return imu_pose
59+
60+
2261
def main(
2362
video_file: str,
2463
depth_only: bool = False,
64+
rgb_only: bool = False,
65+
topk: Optional[int] = typer.Option(None, help="Limit visualization to the first K frames."),
2566
):
2667
"""Visualize spatialmp4 using rerun."""
68+
2769
reader = sm.Reader(video_file)
2870

2971
if depth_only:
3072
reader.set_read_mode(sm.ReadMode.DEPTH_ONLY)
73+
elif rgb_only:
74+
reader.set_read_mode(sm.ReadMode.RGB_ONLY)
3175
else:
3276
reader.set_read_mode(sm.ReadMode.DEPTH_FIRST)
3377

34-
if not reader.has_depth():
78+
if not rgb_only and not reader.has_depth():
3579
typer.echo(typer.style(f"No depth found in input file", fg=typer.colors.RED))
3680
return
3781
if not reader.has_pose():
38-
typer.echo(typer.style(f"No depth found in input file", fg=typer.colors.RED))
82+
typer.echo(typer.style(f"No pose found in input file", fg=typer.colors.RED))
3983
return
4084

41-
blueprint=rrb.Horizontal(
85+
blueprint = rrb.Horizontal(
4286
rrb.Vertical(
4387
rrb.Spatial3DView(name="3D", origin="world"),
4488
rrb.TextDocumentView(name="Description", origin="/description"),
@@ -80,6 +124,8 @@ def main(
80124
cx = float(reader.get_rgb_intrinsics_left().cx)
81125
cy = float(reader.get_rgb_intrinsics_left().cy)
82126

127+
processed = 0
128+
83129
while reader.has_next():
84130
if depth_only:
85131
depth_frame = reader.load_depth()
@@ -91,22 +137,35 @@ def main(
91137
extrinsic = np.eye(4)
92138
extrinsic[:3, 3] = [TWH.x, TWH.y, TWH.z]
93139
extrinsic[:3, :3] = Rotation.from_quat((TWH.qx, TWH.qy, TWH.qz, TWH.qw)).as_matrix()
140+
elif rgb_only:
141+
frame_rgb = reader.load_rgb()
142+
timestamp = frame_rgb.timestamp
143+
144+
T_I_Srgb = reader.get_rgb_extrinsics_left().as_se3()
145+
T_W_Hrgb = frame_rgb.pose.as_se3()
146+
T_W_Irgb = sm.head_to_imu(T_W_Hrgb, sm.HEAD_MODEL_OFFSET)
147+
T_W_Srgb = T_W_Irgb @ T_I_Srgb
148+
extrinsic = T_W_Srgb
149+
94150
else:
95151
rgbd = reader.load_rgbd(True)
96152
timestamp = rgbd.timestamp
97153
depth_np = rgbd.depth
98154
extrinsic = rgbd.T_W_S
99-
print(f"Loading frame {reader.get_index() + 1} / {reader.get_frame_count()}, timestamp: {timestamp}")
100155

101-
# preprocess on depthmap
102-
depth_uint16 = (depth_np * 1000).astype(np.uint16)
103-
sobelx = cv2.Sobel(depth_uint16, cv2.CV_32F, 1, 0, ksize=3)
104-
sobely = cv2.Sobel(depth_uint16, cv2.CV_32F, 0, 1, ksize=3)
105-
grad_mag = np.sqrt(sobelx**2 + sobely**2)
106-
depth_np[grad_mag > 500] = 0
107-
depth_np[(depth_np < 0.2) | (depth_np > 5)] = 0
156+
print(f"Loading frame {reader.get_index() + 1} / {reader.get_frame_count()}, timestamp: {timestamp}")
157+
158+
if not rgb_only:
159+
# preprocess on depthmap
160+
depth_uint16 = (depth_np * 1000).astype(np.uint16)
161+
sobelx = cv2.Sobel(depth_uint16, cv2.CV_32F, 1, 0, ksize=3)
162+
sobely = cv2.Sobel(depth_uint16, cv2.CV_32F, 0, 1, ksize=3)
163+
grad_mag = np.sqrt(sobelx**2 + sobely**2)
164+
depth_np[grad_mag > 500] = 0
165+
depth_np[(depth_np < 0.2) | (depth_np > 5)] = 0
108166

109167
extrinsic = pico_pose_to_open3d(extrinsic)
168+
extrinsic[:3, :3] = ensure_right_handed_rotation(extrinsic[:3, :3])
110169

111170
rr.set_time_seconds("time", timestamp)
112171
rr.log("world/xyz", rr.Arrows3D(vectors=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], colors=[[255, 0, 0], [0, 255, 0], [0, 0, 255]]))
@@ -119,9 +178,19 @@ def main(
119178
position = extrinsic[:3, 3]
120179
rotation = Rotation.from_matrix(extrinsic[:3, :3]).as_quat()
121180
rr.log("world/camera", rr.Transform3D(translation=position, rotation=rr.Quaternion(xyzw=rotation)))
122-
rr.log("world/camera/image/depth", rr.DepthImage(depth_np, meter=1.0))
123-
if not depth_only:
124-
rr.log("world/camera/image/rgb", rr.Image(rgbd.rgb, color_model="BGR").compress(jpeg_quality=95))
181+
182+
if rgb_only:
183+
rr.log("world/camera/image/rgb", rr.Image(frame_rgb.left_rgb, color_model="BGR").compress(jpeg_quality=95))
184+
else:
185+
rr.log("world/camera/image/depth", rr.DepthImage(depth_np, meter=1.0))
186+
if not depth_only:
187+
rr.log("world/camera/image/rgb", rr.Image(rgbd.rgb, color_model="BGR").compress(jpeg_quality=95))
188+
189+
processed += 1
190+
if topk is not None and processed >= topk:
191+
break
192+
if topk and reader.get_index() > topk:
193+
break
125194

126195

127196
if __name__ == "__main__":#

python/tests/test_utilities.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,20 @@ def test_head_to_imu_general_pose():
8585
imu_pose = spatialmp4.head_to_imu(head_pose, offset)
8686
expected = compute_expected_head_to_imu(head_pose, offset)
8787
assert np.allclose(imu_pose, expected, atol=1e-10)
88+
89+
90+
def test_head_to_imu_fortran_order_input():
91+
angle = np.deg2rad(30.0)
92+
Rx = np.array([
93+
[1.0, 0.0, 0.0],
94+
[0.0, np.cos(angle), -np.sin(angle)],
95+
[0.0, np.sin(angle), np.cos(angle)],
96+
])
97+
head_pose = np.eye(4, order="F")
98+
head_pose[:3, :3] = Rx
99+
head_pose[:3, 3] = np.array([0.3, -0.4, 0.5])
100+
offset = np.array([0.12, -0.08, 0.04])
101+
102+
expected = compute_expected_head_to_imu(np.array(head_pose), offset)
103+
imu_pose = spatialmp4.head_to_imu(head_pose, offset)
104+
assert np.allclose(imu_pose, expected, atol=1e-12)

0 commit comments

Comments
 (0)