# Copyright (c) 2022-2025, The Isaac Lab Project Developers.# All rights reserved.## SPDX-License-Identifier: BSD-3-Clausefrom__future__importannotationsimporttorchfromcollections.abcimportSequencefromtypingimportTYPE_CHECKING,ClassVar,Literalimportomni.isaac.core.utils.stageasstage_utilsimportomni.physics.tensors.impl.apiasphysxfromomni.isaac.core.primsimportXFormPrimViewimportomni.isaac.lab.utils.mathasmath_utilsfromomni.isaac.lab.sensors.cameraimportCameraDatafromomni.isaac.lab.utils.warpimportraycast_meshfrom.ray_casterimportRayCasterifTYPE_CHECKING:from.ray_caster_camera_cfgimportRayCasterCameraCfg
[文档]classRayCasterCamera(RayCaster):"""A ray-casting camera sensor. The ray-caster camera uses a set of rays to get the distances to meshes in the scene. The rays are defined in the sensor's local coordinate frame. The sensor has the same interface as the :class:`omni.isaac.lab.sensors.Camera` that implements the camera class through USD camera prims. However, this class provides a faster image generation. The sensor converts meshes from the list of primitive paths provided in the configuration to Warp meshes. The camera then ray-casts against these Warp meshes only. Currently, only the following annotators are supported: - ``"distance_to_camera"``: An image containing the distance to camera optical center. - ``"distance_to_image_plane"``: An image containing distances of 3D points from camera plane along camera's z-axis. - ``"normals"``: An image containing the local surface normal vectors at each pixel. .. note:: Currently, only static meshes are supported. Extending the warp mesh to support dynamic meshes is a work in progress. """cfg:RayCasterCameraCfg"""The configuration parameters."""UNSUPPORTED_TYPES:ClassVar[set[str]]={"rgb","instance_id_segmentation","instance_id_segmentation_fast","instance_segmentation","instance_segmentation_fast","semantic_segmentation","skeleton_data","motion_vectors","bounding_box_2d_tight","bounding_box_2d_tight_fast","bounding_box_2d_loose","bounding_box_2d_loose_fast","bounding_box_3d","bounding_box_3d_fast",}"""A set of sensor types that are not supported by the ray-caster camera."""
[文档]def__init__(self,cfg:RayCasterCameraCfg):"""Initializes the camera object. Args: cfg: The configuration parameters. Raises: ValueError: If the provided data types are not supported by the ray-caster camera. """# perform check on supported data typesself._check_supported_data_types(cfg)# initialize base classsuper().__init__(cfg)# create empty variables for storing output dataself._data=CameraData()
def__str__(self)->str:"""Returns: A string containing information about the instance."""return(f"Ray-Caster-Camera @ '{self.cfg.prim_path}': \n"f"\tview type : {self._view.__class__}\n"f"\tupdate period (s) : {self.cfg.update_period}\n"f"\tnumber of meshes : {len(self.meshes)}\n"f"\tnumber of sensors : {self._view.count}\n"f"\tnumber of rays/sensor: {self.num_rays}\n"f"\ttotal number of rays : {self.num_rays*self._view.count}\n"f"\timage shape : {self.image_shape}")""" Properties """@propertydefdata(self)->CameraData:# update sensors if neededself._update_outdated_buffers()# return the datareturnself._data@propertydefimage_shape(self)->tuple[int,int]:"""A tuple containing (height, width) of the camera sensor."""return(self.cfg.pattern_cfg.height,self.cfg.pattern_cfg.width)@propertydefframe(self)->torch.tensor:"""Frame number when the measurement took place."""returnself._frame""" Operations. """
[文档]defset_intrinsic_matrices(self,matrices:torch.Tensor,focal_length:float=1.0,env_ids:Sequence[int]|None=None):"""Set the intrinsic matrix of the camera. Args: matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). focal_length: Focal length to use when computing aperture values (in cm). Defaults to 1.0. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. """# resolve env_idsifenv_idsisNone:env_ids=slice(None)# save new intrinsic matrices and focal lengthself._data.intrinsic_matrices[env_ids]=matrices.to(self._device)self._focal_length=focal_length# recompute ray directionsself.ray_starts[env_ids],self.ray_directions[env_ids]=self.cfg.pattern_cfg.func(self.cfg.pattern_cfg,self._data.intrinsic_matrices[env_ids],self._device)
[文档]defreset(self,env_ids:Sequence[int]|None=None):# reset the timestampssuper().reset(env_ids)# resolve Noneifenv_idsisNone:env_ids=slice(None)# reset the data# note: this recomputation is useful if one performs events such as randomizations on the camera poses.pos_w,quat_w=self._compute_camera_world_poses(env_ids)self._data.pos_w[env_ids]=pos_wself._data.quat_w_world[env_ids]=quat_w# Reset the frame countself._frame[env_ids]=0
[文档]defset_world_poses(self,positions:torch.Tensor|None=None,orientations:torch.Tensor|None=None,env_ids:Sequence[int]|None=None,convention:Literal["opengl","ros","world"]="ros",):"""Set the pose of the camera w.r.t. the world frame using specified convention. Since different fields use different conventions for camera orientations, the method allows users to set the camera poses in the specified convention. Possible conventions are: - :obj:`"opengl"` - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention - :obj:`"ros"` - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention - :obj:`"world"` - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention See :meth:`omni.isaac.lab.utils.maths.convert_camera_frame_orientation_convention` for more details on the conventions. Args: positions: The cartesian coordinates (in meters). Shape is (N, 3). Defaults to None, in which case the camera position in not changed. orientations: The quaternion orientation in (w, x, y, z). Shape is (N, 4). Defaults to None, in which case the camera orientation in not changed. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. convention: The convention in which the poses are fed. Defaults to "ros". Raises: RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first. """# resolve env_idsifenv_idsisNone:env_ids=self._ALL_INDICES# get current positionspos_w,quat_w=self._compute_view_world_poses(env_ids)ifpositionsisnotNone:# transform to camera framepos_offset_world_frame=positions-pos_wself._offset_pos[env_ids]=math_utils.quat_apply(math_utils.quat_inv(quat_w),pos_offset_world_frame)iforientationsisnotNone:# convert rotation matrix from input convention to worldquat_w_set=math_utils.convert_camera_frame_orientation_convention(orientations,origin=convention,target="world")self._offset_quat[env_ids]=math_utils.quat_mul(math_utils.quat_inv(quat_w),quat_w_set)# update the datapos_w,quat_w=self._compute_camera_world_poses(env_ids)self._data.pos_w[env_ids]=pos_wself._data.quat_w_world[env_ids]=quat_w
[文档]defset_world_poses_from_view(self,eyes:torch.Tensor,targets:torch.Tensor,env_ids:Sequence[int]|None=None):"""Set the poses of the camera from the eye position and look-at target position. Args: eyes: The positions of the camera's eye. Shape is N, 3). targets: The target locations to look at. Shape is (N, 3). env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. Raises: RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first. NotImplementedError: If the stage up-axis is not "Y" or "Z". """# get up axis of current stageup_axis=stage_utils.get_stage_up_axis()# camera position and rotation in opengl conventionorientations=math_utils.quat_from_matrix(math_utils.create_rotation_matrix_from_view(eyes,targets,up_axis=up_axis,device=self._device))self.set_world_poses(eyes,orientations,env_ids,convention="opengl")
""" Implementation. """def_initialize_rays_impl(self):# Create all indices bufferself._ALL_INDICES=torch.arange(self._view.count,device=self._device,dtype=torch.long)# Create frame count bufferself._frame=torch.zeros(self._view.count,device=self._device,dtype=torch.long)# create buffersself._create_buffers()# compute intrinsic matricesself._compute_intrinsic_matrices()# compute ray stars and directionsself.ray_starts,self.ray_directions=self.cfg.pattern_cfg.func(self.cfg.pattern_cfg,self._data.intrinsic_matrices,self._device)self.num_rays=self.ray_directions.shape[1]# create buffer to store ray hitsself.ray_hits_w=torch.zeros(self._view.count,self.num_rays,3,device=self._device)# set offsetsquat_w=math_utils.convert_camera_frame_orientation_convention(torch.tensor([self.cfg.offset.rot],device=self._device),origin=self.cfg.offset.convention,target="world")self._offset_quat=quat_w.repeat(self._view.count,1)self._offset_pos=torch.tensor(list(self.cfg.offset.pos),device=self._device).repeat(self._view.count,1)def_update_buffers_impl(self,env_ids:Sequence[int]):"""Fills the buffers of the sensor data."""# increment frame countself._frame[env_ids]+=1# compute poses from current viewpos_w,quat_w=self._compute_camera_world_poses(env_ids)# update the dataself._data.pos_w[env_ids]=pos_wself._data.quat_w_world[env_ids]=quat_w# note: full orientation is consideredray_starts_w=math_utils.quat_apply(quat_w.repeat(1,self.num_rays),self.ray_starts[env_ids])ray_starts_w+=pos_w.unsqueeze(1)ray_directions_w=math_utils.quat_apply(quat_w.repeat(1,self.num_rays),self.ray_directions[env_ids])# ray cast and store the hits# note: we set max distance to 1e6 during the ray-casting. THis is because we clip the distance# to the image plane and distance to the camera to the maximum distance afterwards in-order to# match the USD camera behavior.# TODO: Make ray-casting work for multiple meshes?# necessary for regular dictionaries.self.ray_hits_w,ray_depth,ray_normal,_=raycast_mesh(ray_starts_w,ray_directions_w,mesh=self.meshes[self.cfg.mesh_prim_paths[0]],max_dist=1e6,return_distance=any([nameinself.cfg.data_typesfornamein["distance_to_image_plane","distance_to_camera"]]),return_normal="normals"inself.cfg.data_types,)# update output buffersif"distance_to_image_plane"inself.cfg.data_types:# note: data is in camera frame so we only take the first component (z-axis of camera frame)distance_to_image_plane=(math_utils.quat_apply(math_utils.quat_inv(quat_w).repeat(1,self.num_rays),(ray_depth[:,:,None]*ray_directions_w),))[:,:,0]# apply the maximum distance after the transformationifself.cfg.depth_clipping_behavior=="max":distance_to_image_plane=torch.clip(distance_to_image_plane,max=self.cfg.max_distance)distance_to_image_plane[torch.isnan(distance_to_image_plane)]=self.cfg.max_distanceelifself.cfg.depth_clipping_behavior=="zero":distance_to_image_plane[distance_to_image_plane>self.cfg.max_distance]=0.0distance_to_image_plane[torch.isnan(distance_to_image_plane)]=0.0self._data.output["distance_to_image_plane"][env_ids]=distance_to_image_plane.view(-1,*self.image_shape,1)if"distance_to_camera"inself.cfg.data_types:ifself.cfg.depth_clipping_behavior=="max":ray_depth=torch.clip(ray_depth,max=self.cfg.max_distance)elifself.cfg.depth_clipping_behavior=="zero":ray_depth[ray_depth>self.cfg.max_distance]=0.0self._data.output["distance_to_camera"][env_ids]=ray_depth.view(-1,*self.image_shape,1)if"normals"inself.cfg.data_types:self._data.output["normals"][env_ids]=ray_normal.view(-1,*self.image_shape,3)def_debug_vis_callback(self,event):# in case it crashes be safeifnothasattr(self,"ray_hits_w"):return# show ray hit positionsself.ray_visualizer.visualize(self.ray_hits_w.view(-1,3))""" Private Helpers """def_check_supported_data_types(self,cfg:RayCasterCameraCfg):"""Checks if the data types are supported by the ray-caster camera."""# check if there is any intersection in unsupported types# reason: we cannot obtain this data from simplified warp-based ray castercommon_elements=set(cfg.data_types)&RayCasterCamera.UNSUPPORTED_TYPESifcommon_elements:raiseValueError(f"RayCasterCamera class does not support the following sensor types: {common_elements}.""\n\tThis is because these sensor types cannot be obtained in a fast way using ''warp''.""\n\tHint: If you need to work with these sensor types, we recommend using the USD camera"" interface from the omni.isaac.lab.sensors.camera module.")def_create_buffers(self):"""Create buffers for storing data."""# prepare driftself.drift=torch.zeros(self._view.count,3,device=self.device)# create the data object# -- pose of the camerasself._data.pos_w=torch.zeros((self._view.count,3),device=self._device)self._data.quat_w_world=torch.zeros((self._view.count,4),device=self._device)# -- intrinsic matrixself._data.intrinsic_matrices=torch.zeros((self._view.count,3,3),device=self._device)self._data.intrinsic_matrices[:,2,2]=1.0self._data.image_shape=self.image_shape# -- output data# create the buffers to store the annotator data.self._data.output={}self._data.info=[{name:Nonefornameinself.cfg.data_types}]*self._view.countfornameinself.cfg.data_types:ifnamein["distance_to_image_plane","distance_to_camera"]:shape=(self.cfg.pattern_cfg.height,self.cfg.pattern_cfg.width,1)elifnamein["normals"]:shape=(self.cfg.pattern_cfg.height,self.cfg.pattern_cfg.width,3)else:raiseValueError(f"Received unknown data type: {name}. Please check the configuration.")# allocate tensor to store the dataself._data.output[name]=torch.zeros((self._view.count,*shape),device=self._device)def_compute_intrinsic_matrices(self):"""Computes the intrinsic matrices for the camera based on the config provided."""# get the sensor propertiespattern_cfg=self.cfg.pattern_cfg# check if vertical aperture is provided# if not then it is auto-computed based on the aspect ratio to preserve squared pixelsifpattern_cfg.vertical_apertureisNone:pattern_cfg.vertical_aperture=pattern_cfg.horizontal_aperture*pattern_cfg.height/pattern_cfg.width# compute the intrinsic matrixf_x=pattern_cfg.width*pattern_cfg.focal_length/pattern_cfg.horizontal_aperturef_y=pattern_cfg.height*pattern_cfg.focal_length/pattern_cfg.vertical_aperturec_x=pattern_cfg.horizontal_aperture_offset*f_x+pattern_cfg.width/2c_y=pattern_cfg.vertical_aperture_offset*f_y+pattern_cfg.height/2# allocate the intrinsic matricesself._data.intrinsic_matrices[:,0,0]=f_xself._data.intrinsic_matrices[:,0,2]=c_xself._data.intrinsic_matrices[:,1,1]=f_yself._data.intrinsic_matrices[:,1,2]=c_y# save focal lengthself._focal_length=pattern_cfg.focal_lengthdef_compute_view_world_poses(self,env_ids:Sequence[int])->tuple[torch.Tensor,torch.Tensor]:"""Obtains the pose of the view the camera is attached to in the world frame. Returns: A tuple of the position (in meters) and quaternion (w, x, y, z). """# obtain the poses of the sensors# note: clone arg doesn't exist for xform prim view so we need to do this manuallyifisinstance(self._view,XFormPrimView):pos_w,quat_w=self._view.get_world_poses(env_ids)elifisinstance(self._view,physx.ArticulationView):pos_w,quat_w=self._view.get_root_transforms()[env_ids].split([3,4],dim=-1)quat_w=math_utils.convert_quat(quat_w,to="wxyz")elifisinstance(self._view,physx.RigidBodyView):pos_w,quat_w=self._view.get_transforms()[env_ids].split([3,4],dim=-1)quat_w=math_utils.convert_quat(quat_w,to="wxyz")else:raiseRuntimeError(f"Unsupported view type: {type(self._view)}")# return the posereturnpos_w.clone(),quat_w.clone()def_compute_camera_world_poses(self,env_ids:Sequence[int])->tuple[torch.Tensor,torch.Tensor]:"""Computes the pose of the camera in the world frame. This function applies the offset pose to the pose of the view the camera is attached to. Returns: A tuple of the position (in meters) and quaternion (w, x, y, z) in "world" convention. """# get the pose of the view the camera is attached topos_w,quat_w=self._compute_view_world_poses(env_ids)# apply offsets# need to apply quat because offset relative to parent framepos_w+=math_utils.quat_apply(quat_w,self._offset_pos[env_ids])quat_w=math_utils.quat_mul(quat_w,self._offset_quat[env_ids])returnpos_w,quat_w