Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 45 additions & 20 deletions bindings/spatialmp4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ PYBIND11_MODULE(spatialmp4, m) {
m.def("get_major_version", &SpatialML::GetMajorVersion, "Get the major version number");
m.def("get_minor_version", &SpatialML::GetMinorVersion, "Get the minor version number");
m.def("get_patch_version", &SpatialML::GetPatchVersion, "Get the patch version number");
m.attr("HEAD_MODEL_OFFSET") = Eigen::Vector3d(-0.05057, -0.01874, 0.04309);

// Bind StreamType enum
py::enum_<SpatialML::StreamType>(m, "StreamType")
Expand All @@ -108,9 +109,7 @@ PYBIND11_MODULE(spatialmp4, m) {
.def_readwrite("qx", &SpatialML::pose_frame::qx)
.def_readwrite("qy", &SpatialML::pose_frame::qy)
.def_readwrite("qz", &SpatialML::pose_frame::qz)
.def("as_se3", [](const SpatialML::pose_frame &pose) {
return pose.as_se3().matrix();
})
.def("as_se3", [](const SpatialML::pose_frame &pose) { return pose.as_se3().matrix(); })
.def("__repr__", [](const SpatialML::pose_frame &p) {
std::ostringstream ss;
ss << p;
Expand Down Expand Up @@ -161,9 +160,8 @@ PYBIND11_MODULE(spatialmp4, m) {
.def_readwrite("fy", &SpatialML::camera_intrinsics::fy)
.def_readwrite("cx", &SpatialML::camera_intrinsics::cx)
.def_readwrite("cy", &SpatialML::camera_intrinsics::cy)
.def("as_cvmat", [](const SpatialML::camera_intrinsics &intrinsics) {
return MatxToEigen(intrinsics.as_cvmat());
})
.def("as_cvmat",
[](const SpatialML::camera_intrinsics &intrinsics) { return MatxToEigen(intrinsics.as_cvmat()); })
.def("__repr__", [](const SpatialML::camera_intrinsics &i) {
std::ostringstream ss;
ss << i;
Expand All @@ -179,12 +177,9 @@ PYBIND11_MODULE(spatialmp4, m) {
[](SpatialML::camera_extrinsics &extrinsics, const Eigen::Matrix4d &matrix) {
extrinsics.extrinsics = EigenToMatx(matrix);
})
.def("as_cvmat", [](const SpatialML::camera_extrinsics &extrinsics) {
return MatxToEigen(extrinsics.as_cvmat());
})
.def("as_se3", [](const SpatialML::camera_extrinsics &extrinsics) {
return extrinsics.as_se3().matrix();
})
.def("as_cvmat",
[](const SpatialML::camera_extrinsics &extrinsics) { return MatxToEigen(extrinsics.as_cvmat()); })
.def("as_se3", [](const SpatialML::camera_extrinsics &extrinsics) { return extrinsics.as_se3().matrix(); })
.def("__repr__", [](const SpatialML::camera_extrinsics &e) {
std::ostringstream ss;
ss << e;
Expand Down Expand Up @@ -312,7 +307,8 @@ PYBIND11_MODULE(spatialmp4, m) {

m.def(
"head_to_imu",
[](py::array_t<double> head_pose_array, py::array_t<double> head_model_offset_array) {
[](py::array_t<double, py::array::forcecast> head_pose_array,
py::array_t<double, py::array::forcecast> head_model_offset_array) {
py::buffer_info pose_info = head_pose_array.request();
if (pose_info.ndim != 2 || pose_info.shape[0] != 4 || pose_info.shape[1] != 4) {
throw std::invalid_argument("head_pose must be a 4x4 matrix");
Expand All @@ -322,12 +318,36 @@ PYBIND11_MODULE(spatialmp4, m) {
throw std::invalid_argument("head_model_offset must be a 3-element vector");
}

Eigen::Map<const Eigen::Matrix<double, 4, 4, Eigen::RowMajor>> head_pose_map(
static_cast<double *>(pose_info.ptr));
Eigen::Map<const Eigen::Vector3d> head_model_offset_map(static_cast<double *>(offset_info.ptr));
Eigen::Matrix4d head_pose_matrix;
const auto row_stride = pose_info.strides[0] / static_cast<py::ssize_t>(sizeof(double));
const auto col_stride = pose_info.strides[1] / static_cast<py::ssize_t>(sizeof(double));
const double *pose_ptr = static_cast<const double *>(pose_info.ptr);
if (pose_info.strides[1] == static_cast<py::ssize_t>(sizeof(double))) {
Eigen::Map<const Eigen::Matrix<double, 4, 4, Eigen::RowMajor>> head_pose_map(pose_ptr);
head_pose_matrix = head_pose_map;
} else if (pose_info.strides[0] == static_cast<py::ssize_t>(sizeof(double))) {
Eigen::Map<const Eigen::Matrix<double, 4, 4, Eigen::ColMajor>> head_pose_map(pose_ptr);
head_pose_matrix = head_pose_map;
} else {
head_pose_matrix.setZero();
for (py::ssize_t i = 0; i < 4; ++i) {
for (py::ssize_t j = 0; j < 4; ++j) {
head_pose_matrix(i, j) = pose_ptr[i * row_stride + j * col_stride];
}
}
}

Eigen::Matrix4d head_pose_matrix = head_pose_map;
Eigen::Vector3d head_model_offset = head_model_offset_map;
Eigen::Vector3d head_model_offset;
const double *offset_ptr = static_cast<const double *>(offset_info.ptr);
if (offset_info.strides[0] == static_cast<py::ssize_t>(sizeof(double))) {
Eigen::Map<const Eigen::Vector3d> head_model_offset_map(offset_ptr);
head_model_offset = head_model_offset_map;
} else {
const auto offset_stride = offset_info.strides[0] / static_cast<py::ssize_t>(sizeof(double));
for (py::ssize_t i = 0; i < 3; ++i) {
head_model_offset[i] = offset_ptr[i * offset_stride];
}
}

Eigen::Matrix3d R = head_pose_matrix.block<3, 3>(0, 0);
Eigen::Vector3d t = head_pose_matrix.block<3, 1>(0, 3);
Expand All @@ -338,9 +358,14 @@ PYBIND11_MODULE(spatialmp4, m) {
Sophus::SE3d imu_pose;
Utilities::HeadToImu(head_pose, head_model_offset, imu_pose);

Eigen::Matrix<double, 4, 4, Eigen::RowMajor> result = imu_pose.matrix();
const Eigen::Matrix4d result = imu_pose.matrix();
py::array_t<double> output({4, 4});
std::memcpy(output.mutable_data(), result.data(), sizeof(double) * 16);
auto mutable_result = output.mutable_unchecked<2>();
for (py::ssize_t i = 0; i < 4; ++i) {
for (py::ssize_t j = 0; j < 4; ++j) {
mutable_result(i, j) = result(i, j);
}
}
return output;
},
py::arg("head_pose"), py::arg("head_model_offset"));
Expand Down
97 changes: 83 additions & 14 deletions examples/python/visualize_rerun.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
import os
import cv2
import numpy as np
from typing import Optional
from scipy.spatial.transform import Rotation
import spatialmp4 as sm
import rerun as rr
import rerun.blueprint as rrb


def pico_pose_to_open3d(extrinsic):
extrinsic = np.array(extrinsic, dtype=np.float64, copy=True)
convert = np.array([
[0, 0, 1, 0],
[1, 0, 0, 0],
Expand All @@ -19,26 +21,68 @@ def pico_pose_to_open3d(extrinsic):
return output


def ensure_right_handed_rotation(rotation_matrix: np.ndarray) -> np.ndarray:
r = np.array(rotation_matrix, dtype=np.float64, copy=True)
u, _, vh = np.linalg.svd(r)
corrected = u @ vh
if np.linalg.det(corrected) < 0:
u[:, -1] *= -1
corrected = u @ vh
return corrected


def head_to_imu(head_pose: np.ndarray, head_model_offset: np.ndarray) -> np.ndarray:
"""Convert head pose (T_W_H) into IMU pose (T_W_I) matching Utilities::HeadToImu."""
head_pose = np.asarray(head_pose, dtype=np.float64)
if head_pose.shape != (4, 4):
raise ValueError("head_pose must be a 4x4 matrix")

head_model_offset = np.asarray(head_model_offset, dtype=np.float64)
if head_model_offset.shape != (3,):
raise ValueError("head_model_offset must be a 3-element vector")

rotation_matrix = head_pose[:3, :3]
translation = head_pose[:3, 3].copy()

rotation = Rotation.from_matrix(rotation_matrix)
translation -= rotation.apply(head_model_offset)

quat_x, quat_y, quat_z, quat_w = rotation.as_quat() # scipy returns xyzw
imu_quat = np.array([quat_y, -quat_x, quat_z, quat_w], dtype=np.float64) # xyzw
imu_rotation = Rotation.from_quat(imu_quat).as_matrix()
imu_translation = np.array([translation[1], -translation[0], translation[2]], dtype=np.float64)

imu_pose = np.eye(4, dtype=np.float64)
imu_pose[:3, :3] = imu_rotation
imu_pose[:3, 3] = imu_translation
return imu_pose


def main(
video_file: str,
depth_only: bool = False,
rgb_only: bool = False,
topk: Optional[int] = typer.Option(None, help="Limit visualization to the first K frames."),
):
"""Visualize spatialmp4 using rerun."""

reader = sm.Reader(video_file)

if depth_only:
reader.set_read_mode(sm.ReadMode.DEPTH_ONLY)
elif rgb_only:
reader.set_read_mode(sm.ReadMode.RGB_ONLY)
else:
reader.set_read_mode(sm.ReadMode.DEPTH_FIRST)

if not reader.has_depth():
if not rgb_only and not reader.has_depth():
typer.echo(typer.style(f"No depth found in input file", fg=typer.colors.RED))
return
if not reader.has_pose():
typer.echo(typer.style(f"No depth found in input file", fg=typer.colors.RED))
typer.echo(typer.style(f"No pose found in input file", fg=typer.colors.RED))
return

blueprint=rrb.Horizontal(
blueprint = rrb.Horizontal(
rrb.Vertical(
rrb.Spatial3DView(name="3D", origin="world"),
rrb.TextDocumentView(name="Description", origin="/description"),
Expand Down Expand Up @@ -80,6 +124,8 @@ def main(
cx = float(reader.get_rgb_intrinsics_left().cx)
cy = float(reader.get_rgb_intrinsics_left().cy)

processed = 0

while reader.has_next():
if depth_only:
depth_frame = reader.load_depth()
Expand All @@ -91,22 +137,35 @@ def main(
extrinsic = np.eye(4)
extrinsic[:3, 3] = [TWH.x, TWH.y, TWH.z]
extrinsic[:3, :3] = Rotation.from_quat((TWH.qx, TWH.qy, TWH.qz, TWH.qw)).as_matrix()
elif rgb_only:
frame_rgb = reader.load_rgb()
timestamp = frame_rgb.timestamp

T_I_Srgb = reader.get_rgb_extrinsics_left().as_se3()
T_W_Hrgb = frame_rgb.pose.as_se3()
T_W_Irgb = sm.head_to_imu(T_W_Hrgb, sm.HEAD_MODEL_OFFSET)
T_W_Srgb = T_W_Irgb @ T_I_Srgb
extrinsic = T_W_Srgb

else:
rgbd = reader.load_rgbd(True)
timestamp = rgbd.timestamp
depth_np = rgbd.depth
extrinsic = rgbd.T_W_S
print(f"Loading frame {reader.get_index() + 1} / {reader.get_frame_count()}, timestamp: {timestamp}")

# preprocess on depthmap
depth_uint16 = (depth_np * 1000).astype(np.uint16)
sobelx = cv2.Sobel(depth_uint16, cv2.CV_32F, 1, 0, ksize=3)
sobely = cv2.Sobel(depth_uint16, cv2.CV_32F, 0, 1, ksize=3)
grad_mag = np.sqrt(sobelx**2 + sobely**2)
depth_np[grad_mag > 500] = 0
depth_np[(depth_np < 0.2) | (depth_np > 5)] = 0
print(f"Loading frame {reader.get_index() + 1} / {reader.get_frame_count()}, timestamp: {timestamp}")

if not rgb_only:
# preprocess on depthmap
depth_uint16 = (depth_np * 1000).astype(np.uint16)
sobelx = cv2.Sobel(depth_uint16, cv2.CV_32F, 1, 0, ksize=3)
sobely = cv2.Sobel(depth_uint16, cv2.CV_32F, 0, 1, ksize=3)
grad_mag = np.sqrt(sobelx**2 + sobely**2)
depth_np[grad_mag > 500] = 0
depth_np[(depth_np < 0.2) | (depth_np > 5)] = 0

extrinsic = pico_pose_to_open3d(extrinsic)
extrinsic[:3, :3] = ensure_right_handed_rotation(extrinsic[:3, :3])

rr.set_time_seconds("time", timestamp)
rr.log("world/xyz", rr.Arrows3D(vectors=[[1, 0, 0], [0, 1, 0], [0, 0, 1]], colors=[[255, 0, 0], [0, 255, 0], [0, 0, 255]]))
Expand All @@ -119,9 +178,19 @@ def main(
position = extrinsic[:3, 3]
rotation = Rotation.from_matrix(extrinsic[:3, :3]).as_quat()
rr.log("world/camera", rr.Transform3D(translation=position, rotation=rr.Quaternion(xyzw=rotation)))
rr.log("world/camera/image/depth", rr.DepthImage(depth_np, meter=1.0))
if not depth_only:
rr.log("world/camera/image/rgb", rr.Image(rgbd.rgb, color_model="BGR").compress(jpeg_quality=95))

if rgb_only:
rr.log("world/camera/image/rgb", rr.Image(frame_rgb.left_rgb, color_model="BGR").compress(jpeg_quality=95))
else:
rr.log("world/camera/image/depth", rr.DepthImage(depth_np, meter=1.0))
if not depth_only:
rr.log("world/camera/image/rgb", rr.Image(rgbd.rgb, color_model="BGR").compress(jpeg_quality=95))

processed += 1
if topk is not None and processed >= topk:
break
if topk and reader.get_index() > topk:
break


if __name__ == "__main__":#
Expand Down
17 changes: 17 additions & 0 deletions python/tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,20 @@ def test_head_to_imu_general_pose():
imu_pose = spatialmp4.head_to_imu(head_pose, offset)
expected = compute_expected_head_to_imu(head_pose, offset)
assert np.allclose(imu_pose, expected, atol=1e-10)


def test_head_to_imu_fortran_order_input():
angle = np.deg2rad(30.0)
Rx = np.array([
[1.0, 0.0, 0.0],
[0.0, np.cos(angle), -np.sin(angle)],
[0.0, np.sin(angle), np.cos(angle)],
])
head_pose = np.eye(4, order="F")
head_pose[:3, :3] = Rx
head_pose[:3, 3] = np.array([0.3, -0.4, 0.5])
offset = np.array([0.12, -0.08, 0.04])

expected = compute_expected_head_to_imu(np.array(head_pose), offset)
imu_pose = spatialmp4.head_to_imu(head_pose, offset)
assert np.allclose(imu_pose, expected, atol=1e-12)
Loading