Saving rendered images and 3D re-projection

Saving rendered images and 3D re-projection#

This guide accompanied with the run_usd_camera.py script in the IsaacLab/scripts/tutorials/04_sensors directory.

Code for run_usd_camera.py
  1# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
  2# All rights reserved.
  3#
  4# SPDX-License-Identifier: BSD-3-Clause
  5
  6# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
  7# All rights reserved.
  8#
  9# SPDX-License-Identifier: BSD-3-Clause
 10
 11"""
 12This script shows how to use the camera sensor from the Isaac Lab framework.
 13
 14The camera sensor is created and interfaced through the Omniverse Replicator API. However, instead of using
 15the simulator or OpenGL convention for the camera, we use the robotics or ROS convention.
 16
 17.. code-block:: bash
 18
 19    # Usage with GUI
 20    ./isaaclab.sh -p scripts/tutorials/04_sensors/run_usd_camera.py --enable_cameras
 21
 22    # Usage with headless
 23    ./isaaclab.sh -p scripts/tutorials/04_sensors/run_usd_camera.py --headless --enable_cameras
 24
 25"""
 26
 27"""Launch Isaac Sim Simulator first."""
 28
 29import argparse
 30
 31from isaaclab.app import AppLauncher
 32
 33# add argparse arguments
 34parser = argparse.ArgumentParser(description="This script demonstrates how to use the camera sensor.")
 35parser.add_argument(
 36    "--draw",
 37    action="store_true",
 38    default=False,
 39    help="Draw the pointcloud from camera at index specified by ``--camera_id``.",
 40)
 41parser.add_argument(
 42    "--save",
 43    action="store_true",
 44    default=False,
 45    help="Save the data from camera at index specified by ``--camera_id``.",
 46)
 47parser.add_argument(
 48    "--camera_id",
 49    type=int,
 50    choices={0, 1},
 51    default=0,
 52    help=(
 53        "The camera ID to use for displaying points or saving the camera data. Default is 0."
 54        " The viewport will always initialize with the perspective of camera 0."
 55    ),
 56)
 57# append AppLauncher cli args
 58AppLauncher.add_app_launcher_args(parser)
 59# parse the arguments
 60args_cli = parser.parse_args()
 61
 62# launch omniverse app
 63app_launcher = AppLauncher(args_cli)
 64simulation_app = app_launcher.app
 65
 66"""Rest everything follows."""
 67
 68import numpy as np
 69import os
 70import random
 71import torch
 72
 73import isaacsim.core.utils.prims as prim_utils
 74import omni.replicator.core as rep
 75
 76import isaaclab.sim as sim_utils
 77from isaaclab.assets import RigidObject, RigidObjectCfg
 78from isaaclab.markers import VisualizationMarkers
 79from isaaclab.markers.config import RAY_CASTER_MARKER_CFG
 80from isaaclab.sensors.camera import Camera, CameraCfg
 81from isaaclab.sensors.camera.utils import create_pointcloud_from_depth
 82from isaaclab.utils import convert_dict_to_backend
 83
 84
 85def define_sensor() -> Camera:
 86    """Defines the camera sensor to add to the scene."""
 87    # Setup camera sensor
 88    # In contrast to the ray-cast camera, we spawn the prim at these locations.
 89    # This means the camera sensor will be attached to these prims.
 90    prim_utils.create_prim("/World/Origin_00", "Xform")
 91    prim_utils.create_prim("/World/Origin_01", "Xform")
 92    camera_cfg = CameraCfg(
 93        prim_path="/World/Origin_.*/CameraSensor",
 94        update_period=0,
 95        height=480,
 96        width=640,
 97        data_types=[
 98            "rgb",
 99            "distance_to_image_plane",
100            "normals",
101            "semantic_segmentation",
102            "instance_segmentation_fast",
103            "instance_id_segmentation_fast",
104        ],
105        colorize_semantic_segmentation=True,
106        colorize_instance_id_segmentation=True,
107        colorize_instance_segmentation=True,
108        spawn=sim_utils.PinholeCameraCfg(
109            focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1.0e5)
110        ),
111    )
112    # Create camera
113    camera = Camera(cfg=camera_cfg)
114
115    return camera
116
117
118def design_scene() -> dict:
119    """Design the scene."""
120    # Populate scene
121    # -- Ground-plane
122    cfg = sim_utils.GroundPlaneCfg()
123    cfg.func("/World/defaultGroundPlane", cfg)
124    # -- Lights
125    cfg = sim_utils.DistantLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75))
126    cfg.func("/World/Light", cfg)
127
128    # Create a dictionary for the scene entities
129    scene_entities = {}
130
131    # Xform to hold objects
132    prim_utils.create_prim("/World/Objects", "Xform")
133    # Random objects
134    for i in range(8):
135        # sample random position
136        position = np.random.rand(3) - np.asarray([0.05, 0.05, -1.0])
137        position *= np.asarray([1.5, 1.5, 0.5])
138        # sample random color
139        color = (random.random(), random.random(), random.random())
140        # choose random prim type
141        prim_type = random.choice(["Cube", "Cone", "Cylinder"])
142        common_properties = {
143            "rigid_props": sim_utils.RigidBodyPropertiesCfg(),
144            "mass_props": sim_utils.MassPropertiesCfg(mass=5.0),
145            "collision_props": sim_utils.CollisionPropertiesCfg(),
146            "visual_material": sim_utils.PreviewSurfaceCfg(diffuse_color=color, metallic=0.5),
147            "semantic_tags": [("class", prim_type)],
148        }
149        if prim_type == "Cube":
150            shape_cfg = sim_utils.CuboidCfg(size=(0.25, 0.25, 0.25), **common_properties)
151        elif prim_type == "Cone":
152            shape_cfg = sim_utils.ConeCfg(radius=0.1, height=0.25, **common_properties)
153        elif prim_type == "Cylinder":
154            shape_cfg = sim_utils.CylinderCfg(radius=0.25, height=0.25, **common_properties)
155        # Rigid Object
156        obj_cfg = RigidObjectCfg(
157            prim_path=f"/World/Objects/Obj_{i:02d}",
158            spawn=shape_cfg,
159            init_state=RigidObjectCfg.InitialStateCfg(pos=position),
160        )
161        scene_entities[f"rigid_object{i}"] = RigidObject(cfg=obj_cfg)
162
163    # Sensors
164    camera = define_sensor()
165
166    # return the scene information
167    scene_entities["camera"] = camera
168    return scene_entities
169
170
171def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
172    """Run the simulator."""
173    # extract entities for simplified notation
174    camera: Camera = scene_entities["camera"]
175
176    # Create replicator writer
177    output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output", "camera")
178    rep_writer = rep.BasicWriter(
179        output_dir=output_dir,
180        frame_padding=0,
181        colorize_instance_id_segmentation=camera.cfg.colorize_instance_id_segmentation,
182        colorize_instance_segmentation=camera.cfg.colorize_instance_segmentation,
183        colorize_semantic_segmentation=camera.cfg.colorize_semantic_segmentation,
184    )
185
186    # Camera positions, targets, orientations
187    camera_positions = torch.tensor([[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]], device=sim.device)
188    camera_targets = torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device=sim.device)
189    # These orientations are in ROS-convention, and will position the cameras to view the origin
190    camera_orientations = torch.tensor(  # noqa: F841
191        [[-0.1759, 0.3399, 0.8205, -0.4247], [-0.4247, 0.8205, -0.3399, 0.1759]], device=sim.device
192    )
193
194    # Set pose: There are two ways to set the pose of the camera.
195    # -- Option-1: Set pose using view
196    camera.set_world_poses_from_view(camera_positions, camera_targets)
197    # -- Option-2: Set pose using ROS
198    # camera.set_world_poses(camera_positions, camera_orientations, convention="ros")
199
200    # Index of the camera to use for visualization and saving
201    camera_index = args_cli.camera_id
202
203    # Create the markers for the --draw option outside of is_running() loop
204    if sim.has_gui() and args_cli.draw:
205        cfg = RAY_CASTER_MARKER_CFG.replace(prim_path="/Visuals/CameraPointCloud")
206        cfg.markers["hit"].radius = 0.002
207        pc_markers = VisualizationMarkers(cfg)
208
209    # Simulate physics
210    while simulation_app.is_running():
211        # Step simulation
212        sim.step()
213        # Update camera data
214        camera.update(dt=sim.get_physics_dt())
215
216        # Print camera info
217        print(camera)
218        if "rgb" in camera.data.output.keys():
219            print("Received shape of rgb image        : ", camera.data.output["rgb"].shape)
220        if "distance_to_image_plane" in camera.data.output.keys():
221            print("Received shape of depth image      : ", camera.data.output["distance_to_image_plane"].shape)
222        if "normals" in camera.data.output.keys():
223            print("Received shape of normals          : ", camera.data.output["normals"].shape)
224        if "semantic_segmentation" in camera.data.output.keys():
225            print("Received shape of semantic segm.   : ", camera.data.output["semantic_segmentation"].shape)
226        if "instance_segmentation_fast" in camera.data.output.keys():
227            print("Received shape of instance segm.   : ", camera.data.output["instance_segmentation_fast"].shape)
228        if "instance_id_segmentation_fast" in camera.data.output.keys():
229            print("Received shape of instance id segm.: ", camera.data.output["instance_id_segmentation_fast"].shape)
230        print("-------------------------------")
231
232        # Extract camera data
233        if args_cli.save:
234            # Save images from camera at camera_index
235            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
236            single_cam_data = convert_dict_to_backend(
237                {k: v[camera_index] for k, v in camera.data.output.items()}, backend="numpy"
238            )
239
240            # Extract the other information
241            single_cam_info = camera.data.info[camera_index]
242
243            # Pack data back into replicator format to save them using its writer
244            rep_output = {"annotators": {}}
245            for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
246                if info is not None:
247                    rep_output["annotators"][key] = {"render_product": {"data": data, **info}}
248                else:
249                    rep_output["annotators"][key] = {"render_product": {"data": data}}
250            # Save images
251            # Note: We need to provide On-time data for Replicator to save the images.
252            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
253            rep_writer.write(rep_output)
254
255        # Draw pointcloud if there is a GUI and --draw has been passed
256        if sim.has_gui() and args_cli.draw and "distance_to_image_plane" in camera.data.output.keys():
257            # Derive pointcloud from camera at camera_index
258            pointcloud = create_pointcloud_from_depth(
259                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
260                depth=camera.data.output["distance_to_image_plane"][camera_index],
261                position=camera.data.pos_w[camera_index],
262                orientation=camera.data.quat_w_ros[camera_index],
263                device=sim.device,
264            )
265
266            # In the first few steps, things are still being instanced and Camera.data
267            # can be empty. If we attempt to visualize an empty pointcloud it will crash
268            # the sim, so we check that the pointcloud is not empty.
269            if pointcloud.size()[0] > 0:
270                pc_markers.visualize(translations=pointcloud)
271
272
273def main():
274    """Main function."""
275    # Load simulation context
276    sim_cfg = sim_utils.SimulationCfg(device=args_cli.device)
277    sim = sim_utils.SimulationContext(sim_cfg)
278    # Set main camera
279    sim.set_camera_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
280    # design the scene
281    scene_entities = design_scene()
282    # Play simulator
283    sim.reset()
284    # Now we are ready!
285    print("[INFO]: Setup complete...")
286    # Run simulator
287    run_simulator(sim, scene_entities)
288
289
290if __name__ == "__main__":
291    # run the main function
292    main()
293    # close sim app
294    simulation_app.close()

Saving using Replicator Basic Writer#

To save camera outputs, we use the basic write class from Omniverse Replicator. This class allows us to save the images in a numpy format. For more information on the basic writer, please check the documentation.

    rep_writer = rep.BasicWriter(
        output_dir=output_dir,
        frame_padding=0,
        colorize_instance_id_segmentation=camera.cfg.colorize_instance_id_segmentation,
        colorize_instance_segmentation=camera.cfg.colorize_instance_segmentation,
        colorize_semantic_segmentation=camera.cfg.colorize_semantic_segmentation,
    )

While stepping the simulator, the images can be saved to the defined folder. Since the BasicWriter only supports saving data using NumPy format, we first need to convert the PyTorch sensors to NumPy arrays before packing them in a dictionary.

            # Save images from camera at camera_index
            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
            single_cam_data = convert_dict_to_backend(
                {k: v[camera_index] for k, v in camera.data.output.items()}, backend="numpy"
            )

            # Extract the other information
            single_cam_info = camera.data.info[camera_index]

After this step, we can save the images using the BasicWriter.

            # Pack data back into replicator format to save them using its writer
            rep_output = {"annotators": {}}
            for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
                if info is not None:
                    rep_output["annotators"][key] = {"render_product": {"data": data, **info}}
                else:
                    rep_output["annotators"][key] = {"render_product": {"data": data}}
            # Save images
            # Note: We need to provide On-time data for Replicator to save the images.
            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
            rep_writer.write(rep_output)

Projection into 3D Space#

We include utilities to project the depth image into 3D Space. The re-projection operations are done using PyTorch operations which allows faster computation.

from isaaclab.utils.math import transform_points, unproject_depth

# Pointcloud in world frame
points_3d_cam = unproject_depth(
   camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
)

points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros)

Alternately, we can use the isaaclab.sensors.camera.utils.create_pointcloud_from_depth() function to create a point cloud from the depth image and transform it to the world frame.

            # Derive pointcloud from camera at camera_index
            pointcloud = create_pointcloud_from_depth(
                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
                depth=camera.data.output["distance_to_image_plane"][camera_index],
                position=camera.data.pos_w[camera_index],
                orientation=camera.data.quat_w_ros[camera_index],
                device=sim.device,
            )

The resulting point cloud can be visualized using the isaacsim.util.debug_draw extension from Isaac Sim. This makes it easy to visualize the point cloud in the 3D space.

            # In the first few steps, things are still being instanced and Camera.data
            # can be empty. If we attempt to visualize an empty pointcloud it will crash
            # the sim, so we check that the pointcloud is not empty.
            if pointcloud.size()[0] > 0:
                pc_markers.visualize(translations=pointcloud)

Executing the script#

To run the accompanying script, execute the following command:

# Usage with saving and drawing
./isaaclab.sh -p scripts/tutorials/04_sensors/run_usd_camera.py --save --draw --enable_cameras

# Usage with saving only in headless mode
./isaaclab.sh -p scripts/tutorials/04_sensors/run_usd_camera.py --save --headless --enable_cameras

The simulation should start, and you can observe different objects falling down. An output folder will be created in the IsaacLab/scripts/tutorials/04_sensors directory, where the images will be saved. Additionally, you should see the point cloud in the 3D space drawn on the viewport.

To stop the simulation, close the window, or use Ctrl+C in the terminal.