Source code for projkit.camops

from typing import Any, Tuple

import cv2
import numpy as np


[docs]def to_homogeneous(points: np.ndarray) -> np.ndarray:
    """
    Convert points in 2D to homogeneous coordinates.

    Args:
        points (np.ndarray): 2D array containing points in Cartesian coordinates.

    Returns:
        np.ndarray: 2D array with points in homogeneous coordinates.
    """

    assert points.ndim == 2, f"Expected points to have dim = 2, received {points.ndim}"

    n, d = points.shape
    return np.c_[points, np.ones(n)]


[docs]def de_homogenize(points: np.ndarray) -> np.ndarray:
    """
    Convert points from homogeneous coordinates to Cartesian coordinates.

    Args:
        points (np.ndarray): 2D array containing points in homogeneous coordinates.

    Returns:
        np.ndarray: 2D array with points in Cartesian coordinates.
    """

    assert points.ndim == 2, f"Expected points to have dim = 2, received {points.ndim}"
    _dh = points[:, 0:-1] / points[:, -1][:, np.newaxis]
    return _dh


[docs]def get_t_from_R_C(R: np.ndarray, C: np.ndarray) -> np.ndarray:
    """
    Calculate the translation vector t from given rotation matrix R and camera center C.

    Args:
        R (np.ndarray): 3x3 rotation matrix representing camera orientation.
        C (np.ndarray): Camera center coordinates as a 1D array (shape: (3,)).

    Returns:
        np.ndarray: 3x1 translation vector representing camera position.
    """
    return -R @ C.reshape(3, 1)


[docs]def get_P_from_K_R_t(K: np.ndarray, R: np.ndarray, t: np.ndarray) -> np.ndarray:
    """
    Calculate the projection matrix P = K[R|t] from intrinsic matrix K, rotation matrix R, and translation vector t.

    Args:
        K (np.ndarray): 3x3 intrinsic camera matrix.
        R (np.ndarray): 3x3 rotation matrix representing camera orientation.
        t (np.ndarray): 3x1 translation vector representing camera position.

    Returns:
        np.ndarray: 3x4 projection matrix representing the full camera projection.
    """
    return K @ np.hstack([R, t])


[docs]def get_P(int_mat: np.ndarray, ext_mat: np.ndarray):
    """
    Calculate the projection matrix P = K[R|t] from intrinsic matrix and extrinsic matrix.

    Args:
        int_mat (np.ndarray): 3x3 intrinsic camera matrix.
        ext_mat (np.ndarray): 3x4 extrinsic matrix representing camera pose.

    Returns:
        np.ndarray: 3x4 projection matrix representing the full camera projection.
    """
    return int_mat @ ext_mat


[docs]def get_C_from_R_t(R, t):
    """
    Calculate the origin of the coordinate system (camera center) C from given rotation matrix R and translation vector t.

    Args:
        R (np.ndarray): 3x3 rotation matrix representing camera orientation.
        t (np.ndarray): 3x1 translation vector representing camera position.

    Returns:
        np.ndarray: 3x1 camera center coordinates.
    """
    return -(R.T @ t)


[docs]def project_in_3d_with_K_R_t_scale(
    K: np.ndarray, R: np.ndarray, t: np.ndarray, s: np.ndarray, ic: np.ndarray
) -> np.ndarray:
    """
    Project 2D image coordinates to 3D world coordinates with scale factor s, intrinsic matrix K, rotation matrix R,
    and translation vector t.

    Args:
        K (np.ndarray): 3x3 intrinsic camera matrix.
        R (np.ndarray): 3x3 rotation matrix representing camera orientation.
        t (np.ndarray): 3x1 translation vector representing camera position.
        s (np.ndarray): Scale factor applied to the image coordinates.
        ic (np.ndarray): 2D array of shape (Nx2) containing 2D image coordinates.

    Returns:
        np.ndarray: 2D array of shape (Nx3) containing 3D points in world coordinates.
    """

    return np.linalg.inv(R) @ ((np.linalg.inv(K) @ (s * to_homogeneous(ic)).T) - t)


[docs]def project_in_2d_with_K_R_t(
    K: np.ndarray, R: np.ndarray, t: np.ndarray, wc: np.ndarray
) -> np.ndarray:
    """
    Project 3D points in world coordinates to 2D image coordinates using intrinsic matrix K, rotation matrix R, and
    translation vector t.

    Args:
        K (np.ndarray): 3x3 intrinsic camera matrix.
        R (np.ndarray): 3x3 rotation matrix representing camera orientation.
        t (np.ndarray): 3x1 translation vector representing camera position.
        wc (np.ndarray): 2D array of shape (Nx3) containing 3D points in world coordinates.

    Returns:
        np.ndarray: 2D array of shape (Nx2) containing 2D points in image coordinates.
    """

    assert (
        wc.shape[-1] == 3 and wc.ndim == 2
    ), f"Expected shape for wc = (Nx3), received {wc.shape}"

    wc_homogeneous = to_homogeneous(wc)
    ic = de_homogenize((get_P_from_K_R_t(K, R, t) @ wc_homogeneous.T).T)

    return ic


[docs]def get_ic_wc_z_from_proj(inp: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Extract image coordinates (ic), world coordinates (wc), and z-coordinates from the given projection array.

    Args:
        inp (np.ndarray): 2D array with shape (Nx3) representing a projection array.

    Returns:
        Tuple[np.ndarray, np.ndarray, np.ndarray]: A tuple containing the image coordinates (ic),
        world coordinates (wc), and z-coordinates extracted from the input projection array.
    """
    assert (
        inp.ndim == 2 and inp.shape[-1] == 5
    ), f"Expected inp to have shape (Nx5), Got {inp.shape}"
    return inp[:, :2], inp[:, 2:], inp[:, -1:]


[docs]def project_in_2d_with_K_R_t_dist_coeff(
    K: np.ndarray,
    R: np.ndarray,
    t: np.ndarray,
    d: np.ndarray,
    wc: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Project 3D points in world coordinates to 2D image coordinates with distortion correction.

    Args:
        K (np.ndarray): 3x3 intrinsic camera matrix.
        R (np.ndarray): 3x3 rotation matrix representing camera orientation.
        t (np.ndarray): 3x1 translation vector representing camera position.
        d (np.ndarray): Distortion coefficients.
        wc (np.ndarray): 2D array of shape (Nx3) containing 3D points in world coordinates.

    Returns:
        np.ndarray: 2D array of shape (Nx5) containing 2D points in image coordinates and its corresponding 3d coordinates
         with distortion correction.
    """

    # https://answers.opencv.org/question/20138/projectpoints-fails-with-points-behind-the-camera/

    assert (
        wc.shape[-1] == 3 and wc.ndim == 2
    ), f"Expected shape for wc = (Nx3), received {wc.shape}"

    wc_homogeneous = to_homogeneous(wc)
    cc = (np.hstack([R, t]) @ wc_homogeneous.T).T
    z = cc[:, -1]

    cc = de_homogenize(cc)
    cc = cv2.undistortPoints(cc[np.newaxis, :, :], K, d, P=K).squeeze()

    # Remove points which lie behind the camera
    cc = cc[z > 0]

    # Project from camera coordinates to image coordinates
    ic = (K @ to_homogeneous(cc).T).T
    ic = de_homogenize(ic)

    return get_ic_wc_z_from_proj(np.hstack([ic, wc[z > 0]]))


[docs]def get_int_mat(fx: float, fy: float, cx: float, cy: float) -> np.ndarray:
    """
    Generate the camera intrinsic matrix (3x3) from focal lengths (fx, fy) and principal point (cx, cy).

    Args:
        fx (float): Focal length along the x-axis.
        fy (float): Focal length along the y-axis.
        cx (float): Principal point x-coordinate.
        cy (float): Principal point y-coordinate.

    Returns:
        np.ndarray: 3x3 intrinsic matrix representing the camera parameters.
    """

    return np.array(
        [
            [fx, 0, cx],
            [0, fy, cy],
            [0, 0, 1],
        ]
    )


[docs]def get_ext_mat(R: np.ndarray, t: np.ndarray):
    """
    Generate the camera extrinsic matrix (3x4) from rotation matrix R and translation vector t.

    Args:
        R (np.ndarray): 3x3 rotation matrix representing camera orientation.
        t (np.ndarray): 3x1 translation vector representing camera position.

    Returns:
        np.ndarray: 3x4 extrinsic matrix representing the camera pose.
    """

    return np.hstack([R, t])


[docs]def Rx(rx: float):
    """
    Generate a 3x3 rotation matrix around the x-axis.

    Args:
        rx (float): Rotation angle in radians around the x-axis.

    Returns:
        np.ndarray: 3x3 rotation matrix representing the rotation around the x-axis.
    """

    return np.array(
        [[1, 0, 0], [0, np.cos(rx), -np.sin(rx)], [0, np.sin(rx), np.cos(rx)]]
    )


[docs]def Ry(ry: float):
    """
    Generate a 3x3 rotation matrix around the y-axis.

    Args:
        ry (float): Rotation angle in radians around the y-axis.

    Returns:
        np.ndarray: 3x3 rotation matrix representing the rotation around the y-axis.
    """

    return np.array(
        [[np.cos(ry), 0, np.sin(ry)], [0, 1, 0], [-np.sin(ry), 0, np.cos(ry)]]
    )


[docs]def Rz(rz: float):
    """
    Generate a 3x3 rotation matrix around the z-axis.

    Args:
        rz (float): Rotation angle in radians around the z-axis.

    Returns:
        np.ndarray: 3x3 rotation matrix representing the rotation around the z-axis.
    """

    return np.array(
        [[np.cos(rz), -np.sin(rz), 0], [np.sin(rz), np.cos(rz), 0], [0, 0, 1]]
    )


[docs]def get_R(rx: float, ry: float, rz: float, to_rad=True) -> np.ndarray:
    """
    Calculate the 3x3 camera rotation matrix from the given rotation angles around x, y, and z axes.

    Args:
        rx (float): Rotation angle in radians around the x-axis.
        ry (float): Rotation angle in radians around the y-axis.
        rz (float): Rotation angle in radians around the z-axis.

    Returns:
        np.ndarray: 3x3 rotation matrix representing the camera orientation.
    """

    # https://staff.fnwi.uva.nl/r.vandenboomgaard/IPCV20162017/LectureNotes/CV/PinholeCamera/PinholeCamera.html
    if to_rad:
        rx = np.deg2rad(rx)
        ry = np.deg2rad(ry)
        rz = np.deg2rad(rz)

    return (Rz(rz) @ Ry(ry) @ Rx(rx)).T