Saving rendered images and 3D re-projection

Saving rendered images and 3D re-projection#

This guide accompanied with the run_usd_camera.py script in the IsaacLab/scripts/tutorials/04_sensors directory.

Code for run_usd_camera.py
  1# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
  2# All rights reserved.
  3#
  4# SPDX-License-Identifier: BSD-3-Clause
  5
  6"""
  7This script shows how to use the camera sensor from the Isaac Lab framework.
  8
  9The camera sensor is created and interfaced through the Omniverse Replicator API. However, instead of using
 10the simulator or OpenGL convention for the camera, we use the robotics or ROS convention.
 11
 12.. code-block:: bash
 13
 14    # Usage with GUI
 15    ./isaaclab.sh -p scripts/tutorials/04_sensors/run_usd_camera.py --enable_cameras
 16
 17    # Usage with headless
 18    ./isaaclab.sh -p scripts/tutorials/04_sensors/run_usd_camera.py --headless --enable_cameras
 19
 20"""
 21
 22"""Launch Isaac Sim Simulator first."""
 23
 24import argparse
 25
 26from isaaclab.app import AppLauncher
 27
 28# add argparse arguments
 29parser = argparse.ArgumentParser(description="This script demonstrates how to use the camera sensor.")
 30parser.add_argument(
 31    "--draw",
 32    action="store_true",
 33    default=False,
 34    help="Draw the pointcloud from camera at index specified by ``--camera_id``.",
 35)
 36parser.add_argument(
 37    "--save",
 38    action="store_true",
 39    default=False,
 40    help="Save the data from camera at index specified by ``--camera_id``.",
 41)
 42parser.add_argument(
 43    "--camera_id",
 44    type=int,
 45    choices={0, 1},
 46    default=0,
 47    help=(
 48        "The camera ID to use for displaying points or saving the camera data. Default is 0."
 49        " The viewport will always initialize with the perspective of camera 0."
 50    ),
 51)
 52# append AppLauncher cli args
 53AppLauncher.add_app_launcher_args(parser)
 54# parse the arguments
 55args_cli = parser.parse_args()
 56
 57# launch omniverse app
 58app_launcher = AppLauncher(args_cli)
 59simulation_app = app_launcher.app
 60
 61"""Rest everything follows."""
 62
 63import numpy as np
 64import os
 65import random
 66import torch
 67
 68import omni.replicator.core as rep
 69
 70import isaaclab.sim as sim_utils
 71from isaaclab.assets import RigidObject, RigidObjectCfg
 72from isaaclab.markers import VisualizationMarkers
 73from isaaclab.markers.config import RAY_CASTER_MARKER_CFG
 74from isaaclab.sensors.camera import Camera, CameraCfg
 75from isaaclab.sensors.camera.utils import create_pointcloud_from_depth
 76from isaaclab.utils import convert_dict_to_backend
 77
 78
 79def define_sensor() -> Camera:
 80    """Defines the camera sensor to add to the scene."""
 81    # Setup camera sensor
 82    # In contrast to the ray-cast camera, we spawn the prim at these locations.
 83    # This means the camera sensor will be attached to these prims.
 84    sim_utils.create_prim("/World/Origin_00", "Xform")
 85    sim_utils.create_prim("/World/Origin_01", "Xform")
 86    camera_cfg = CameraCfg(
 87        prim_path="/World/Origin_.*/CameraSensor",
 88        update_period=0,
 89        height=480,
 90        width=640,
 91        data_types=[
 92            "rgb",
 93            "distance_to_image_plane",
 94            "normals",
 95            "semantic_segmentation",
 96            "instance_segmentation_fast",
 97            "instance_id_segmentation_fast",
 98        ],
 99        colorize_semantic_segmentation=True,
100        colorize_instance_id_segmentation=True,
101        colorize_instance_segmentation=True,
102        spawn=sim_utils.PinholeCameraCfg(
103            focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1.0e5)
104        ),
105    )
106    # Create camera
107    camera = Camera(cfg=camera_cfg)
108
109    return camera
110
111
112def design_scene() -> dict:
113    """Design the scene."""
114    # Populate scene
115    # -- Ground-plane
116    cfg = sim_utils.GroundPlaneCfg()
117    cfg.func("/World/defaultGroundPlane", cfg)
118    # -- Lights
119    cfg = sim_utils.DistantLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75))
120    cfg.func("/World/Light", cfg)
121
122    # Create a dictionary for the scene entities
123    scene_entities = {}
124
125    # Xform to hold objects
126    sim_utils.create_prim("/World/Objects", "Xform")
127    # Random objects
128    for i in range(8):
129        # sample random position
130        position = np.random.rand(3) - np.asarray([0.05, 0.05, -1.0])
131        position *= np.asarray([1.5, 1.5, 0.5])
132        # sample random color
133        color = (random.random(), random.random(), random.random())
134        # choose random prim type
135        prim_type = random.choice(["Cube", "Cone", "Cylinder"])
136        common_properties = {
137            "rigid_props": sim_utils.RigidBodyPropertiesCfg(),
138            "mass_props": sim_utils.MassPropertiesCfg(mass=5.0),
139            "collision_props": sim_utils.CollisionPropertiesCfg(),
140            "visual_material": sim_utils.PreviewSurfaceCfg(diffuse_color=color, metallic=0.5),
141            "semantic_tags": [("class", prim_type)],
142        }
143        if prim_type == "Cube":
144            shape_cfg = sim_utils.CuboidCfg(size=(0.25, 0.25, 0.25), **common_properties)
145        elif prim_type == "Cone":
146            shape_cfg = sim_utils.ConeCfg(radius=0.1, height=0.25, **common_properties)
147        elif prim_type == "Cylinder":
148            shape_cfg = sim_utils.CylinderCfg(radius=0.25, height=0.25, **common_properties)
149        # Rigid Object
150        obj_cfg = RigidObjectCfg(
151            prim_path=f"/World/Objects/Obj_{i:02d}",
152            spawn=shape_cfg,
153            init_state=RigidObjectCfg.InitialStateCfg(pos=position),
154        )
155        scene_entities[f"rigid_object{i}"] = RigidObject(cfg=obj_cfg)
156
157    # Sensors
158    camera = define_sensor()
159
160    # return the scene information
161    scene_entities["camera"] = camera
162    return scene_entities
163
164
165def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
166    """Run the simulator."""
167    # extract entities for simplified notation
168    camera: Camera = scene_entities["camera"]
169
170    # Create replicator writer
171    output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output", "camera")
172    rep_writer = rep.BasicWriter(
173        output_dir=output_dir,
174        frame_padding=0,
175        colorize_instance_id_segmentation=camera.cfg.colorize_instance_id_segmentation,
176        colorize_instance_segmentation=camera.cfg.colorize_instance_segmentation,
177        colorize_semantic_segmentation=camera.cfg.colorize_semantic_segmentation,
178    )
179
180    # Camera positions, targets, orientations
181    camera_positions = torch.tensor([[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]], device=sim.device)
182    camera_targets = torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device=sim.device)
183    # These orientations are in ROS-convention, and will position the cameras to view the origin
184    camera_orientations = torch.tensor(  # noqa: F841
185        [[-0.1759, 0.3399, 0.8205, -0.4247], [-0.4247, 0.8205, -0.3399, 0.1759]], device=sim.device
186    )
187
188    # Set pose: There are two ways to set the pose of the camera.
189    # -- Option-1: Set pose using view
190    camera.set_world_poses_from_view(camera_positions, camera_targets)
191    # -- Option-2: Set pose using ROS
192    # camera.set_world_poses(camera_positions, camera_orientations, convention="ros")
193
194    # Index of the camera to use for visualization and saving
195    camera_index = args_cli.camera_id
196
197    # Create the markers for the --draw option outside of is_running() loop
198    if sim.has_gui() and args_cli.draw:
199        cfg = RAY_CASTER_MARKER_CFG.replace(prim_path="/Visuals/CameraPointCloud")
200        cfg.markers["hit"].radius = 0.002
201        pc_markers = VisualizationMarkers(cfg)
202
203    # Simulate physics
204    while simulation_app.is_running():
205        # Step simulation
206        sim.step()
207        # Update camera data
208        camera.update(dt=sim.get_physics_dt())
209
210        # Print camera info
211        print(camera)
212        if "rgb" in camera.data.output.keys():
213            print("Received shape of rgb image        : ", camera.data.output["rgb"].shape)
214        if "distance_to_image_plane" in camera.data.output.keys():
215            print("Received shape of depth image      : ", camera.data.output["distance_to_image_plane"].shape)
216        if "normals" in camera.data.output.keys():
217            print("Received shape of normals          : ", camera.data.output["normals"].shape)
218        if "semantic_segmentation" in camera.data.output.keys():
219            print("Received shape of semantic segm.   : ", camera.data.output["semantic_segmentation"].shape)
220        if "instance_segmentation_fast" in camera.data.output.keys():
221            print("Received shape of instance segm.   : ", camera.data.output["instance_segmentation_fast"].shape)
222        if "instance_id_segmentation_fast" in camera.data.output.keys():
223            print("Received shape of instance id segm.: ", camera.data.output["instance_id_segmentation_fast"].shape)
224        print("-------------------------------")
225
226        # Extract camera data
227        if args_cli.save:
228            # Save images from camera at camera_index
229            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
230            single_cam_data = convert_dict_to_backend(
231                {k: v[camera_index] for k, v in camera.data.output.items()}, backend="numpy"
232            )
233
234            # Extract the other information
235            single_cam_info = camera.data.info[camera_index]
236
237            # Pack data back into replicator format to save them using its writer
238            rep_output = {"annotators": {}}
239            for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
240                if info is not None:
241                    rep_output["annotators"][key] = {"render_product": {"data": data, **info}}
242                else:
243                    rep_output["annotators"][key] = {"render_product": {"data": data}}
244            # Save images
245            # Note: We need to provide On-time data for Replicator to save the images.
246            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
247            rep_writer.write(rep_output)
248
249        # Draw pointcloud if there is a GUI and --draw has been passed
250        if sim.has_gui() and args_cli.draw and "distance_to_image_plane" in camera.data.output.keys():
251            # Derive pointcloud from camera at camera_index
252            pointcloud = create_pointcloud_from_depth(
253                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
254                depth=camera.data.output["distance_to_image_plane"][camera_index],
255                position=camera.data.pos_w[camera_index],
256                orientation=camera.data.quat_w_ros[camera_index],
257                device=sim.device,
258            )
259
260            # In the first few steps, things are still being instanced and Camera.data
261            # can be empty. If we attempt to visualize an empty pointcloud it will crash
262            # the sim, so we check that the pointcloud is not empty.
263            if pointcloud.size()[0] > 0:
264                pc_markers.visualize(translations=pointcloud)
265
266
267def main():
268    """Main function."""
269    # Load simulation context
270    sim_cfg = sim_utils.SimulationCfg(device=args_cli.device)
271    sim = sim_utils.SimulationContext(sim_cfg)
272    # Set main camera
273    sim.set_camera_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
274    # Design scene
275    scene_entities = design_scene()
276    # Play simulator
277    sim.reset()
278    # Now we are ready!
279    print("[INFO]: Setup complete...")
280    # Run simulator
281    run_simulator(sim, scene_entities)
282
283
284if __name__ == "__main__":
285    # run the main function
286    main()
287    # close sim app
288    simulation_app.close()

Saving using Replicator Basic Writer#

To save camera outputs, we use the basic write class from Omniverse Replicator. This class allows us to save the images in a numpy format. For more information on the basic writer, please check the documentation.

    rep_writer = rep.BasicWriter(
        output_dir=output_dir,
        frame_padding=0,
        colorize_instance_id_segmentation=camera.cfg.colorize_instance_id_segmentation,
        colorize_instance_segmentation=camera.cfg.colorize_instance_segmentation,
        colorize_semantic_segmentation=camera.cfg.colorize_semantic_segmentation,
    )

While stepping the simulator, the images can be saved to the defined folder. Since the BasicWriter only supports saving data using NumPy format, we first need to convert the PyTorch sensors to NumPy arrays before packing them in a dictionary.

            # Save images from camera at camera_index
            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
            single_cam_data = convert_dict_to_backend(
                {k: v[camera_index] for k, v in camera.data.output.items()}, backend="numpy"
            )

            # Extract the other information
            single_cam_info = camera.data.info[camera_index]

After this step, we can save the images using the BasicWriter.

            # Pack data back into replicator format to save them using its writer
            rep_output = {"annotators": {}}
            for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
                if info is not None:
                    rep_output["annotators"][key] = {"render_product": {"data": data, **info}}
                else:
                    rep_output["annotators"][key] = {"render_product": {"data": data}}
            # Save images
            # Note: We need to provide On-time data for Replicator to save the images.
            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
            rep_writer.write(rep_output)

Projection into 3D Space#

We include utilities to project the depth image into 3D Space. The re-projection operations are done using PyTorch operations which allows faster computation.

from isaaclab.utils.math import transform_points, unproject_depth

# Pointcloud in world frame
points_3d_cam = unproject_depth(
   camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
)

points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros)

Alternately, we can use the isaaclab.sensors.camera.utils.create_pointcloud_from_depth() function to create a point cloud from the depth image and transform it to the world frame.

            # Derive pointcloud from camera at camera_index
            pointcloud = create_pointcloud_from_depth(
                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
                depth=camera.data.output["distance_to_image_plane"][camera_index],
                position=camera.data.pos_w[camera_index],
                orientation=camera.data.quat_w_ros[camera_index],
                device=sim.device,
            )

The resulting point cloud can be visualized using the isaacsim.util.debug_draw extension from Isaac Sim. This makes it easy to visualize the point cloud in the 3D space.

            # In the first few steps, things are still being instanced and Camera.data
            # can be empty. If we attempt to visualize an empty pointcloud it will crash
            # the sim, so we check that the pointcloud is not empty.
            if pointcloud.size()[0] > 0:
                pc_markers.visualize(translations=pointcloud)

Executing the script#

To run the accompanying script, execute the following command:

# Usage with saving and drawing
./isaaclab.sh -p scripts/tutorials/04_sensors/run_usd_camera.py --save --draw --enable_cameras

# Usage with saving only in headless mode
./isaaclab.sh -p scripts/tutorials/04_sensors/run_usd_camera.py --save --headless --enable_cameras

The simulation should start, and you can observe different objects falling down. An output folder will be created in the IsaacLab/scripts/tutorials/04_sensors directory, where the images will be saved. Additionally, you should see the point cloud in the 3D space drawn on the viewport.

To stop the simulation, close the window, or use Ctrl+C in the terminal.