How to Build High-Performance GPU-Accelerated Simulations and Differentiable Physics Workflows Using NVIDIA Warp Kernels

by CryptoExpert
synthesia


angles = np.linspace(0.0, 2.0 * np.pi, n_particles, endpoint=False, dtype=np.float32)
px0_np = 0.4 * np.cos(angles).astype(np.float32)
py0_np = (0.7 + 0.15 * np.sin(angles)).astype(np.float32)
vx0_np = (-0.8 * np.sin(angles)).astype(np.float32)
vy0_np = (0.8 * np.cos(angles)).astype(np.float32)

px0_wp = wp.array(px0_np, dtype=wp.float32, device=device)
py0_wp = wp.array(py0_np, dtype=wp.float32, device=device)
vx0_wp = wp.array(vx0_np, dtype=wp.float32, device=device)
vy0_wp = wp.array(vy0_np, dtype=wp.float32, device=device)

state_size = (steps + 1) * n_particles
px_wp = wp.empty(state_size, dtype=wp.float32, device=device)
py_wp = wp.empty(state_size, dtype=wp.float32, device=device)
vx_wp = wp.empty(state_size, dtype=wp.float32, device=device)
vy_wp = wp.empty(state_size, dtype=wp.float32, device=device)

wp.launch(
kernel=init_particles_kernel,
dim=n_particles,
inputs=[n_particles, px0_wp, py0_wp, vx0_wp, vy0_wp],
outputs=[px_wp, py_wp, vx_wp, vy_wp],
device=device,
)

synthesia

wp.launch(
kernel=simulate_particles_kernel,
dim=steps * n_particles,
inputs=[n_particles, dt, gravity, damping, bounce, radius],
outputs=[px_wp, py_wp, vx_wp, vy_wp],
device=device,
)
wp.synchronize()

px_traj = px_wp.numpy().reshape(steps + 1, n_particles)
py_traj = py_wp.numpy().reshape(steps + 1, n_particles)

sample_ids = np.linspace(0, n_particles – 1, 16, dtype=int)
plt.figure(figsize=(8, 6))
for idx in sample_ids:
plt.plot(px_traj[:, idx], py_traj[:, idx], linewidth=1.5)
plt.axhline(radius, linestyle=”–“)
plt.xlim(-1.05, 1.05)
plt.ylim(0.0, 1.25)
plt.title(f”Warp particle trajectories on {device}”)
plt.xlabel(“x”)
plt.ylabel(“y”)
plt.show()

proj_steps = 180
proj_dt = np.float32(0.025)
proj_g = np.float32(-9.8)
target_x = np.float32(3.8)
target_y = np.float32(0.0)

vx_value = np.float32(2.0)
vy_value = np.float32(6.5)
lr = 0.08
iters = 60

loss_history = []
vx_history = []
vy_history = []

for it in range(iters):
init_vx_wp = wp.array(np.array([vx_value], dtype=np.float32), dtype=wp.float32, device=device, requires_grad=True)
init_vy_wp = wp.array(np.array([vy_value], dtype=np.float32), dtype=wp.float32, device=device, requires_grad=True)

x_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
y_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
vx_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
vy_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
loss_wp = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)

tape = wp.Tape()
with tape:
wp.launch(
kernel=init_projectile_kernel,
dim=1,
inputs=[],
outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp, init_vx_wp, init_vy_wp],
device=device,
)
wp.launch(
kernel=projectile_step_kernel,
dim=proj_steps,
inputs=[proj_dt, proj_g],
outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp],
device=device,
)
wp.launch(
kernel=projectile_loss_kernel,
dim=1,
inputs=[proj_steps, target_x, target_y],
outputs=[x_hist_wp, y_hist_wp, loss_wp],
device=device,
)

tape.backward(loss=loss_wp)
wp.synchronize()

current_loss = float(loss_wp.numpy()[0])
grad_vx = float(init_vx_wp.grad.numpy()[0])
grad_vy = float(init_vy_wp.grad.numpy()[0])

vx_value = np.float32(vx_value – lr * grad_vx)
vy_value = np.float32(vy_value – lr * grad_vy)

loss_history.append(current_loss)
vx_history.append(float(vx_value))
vy_history.append(float(vy_value))

if it % 10 == 0 or it == iters – 1:
print(f”iter={it:02d} loss={current_loss:.6f} vx={vx_value:.4f} vy={vy_value:.4f} grad=({grad_vx:.4f}, {grad_vy:.4f})”)

final_init_vx_wp = wp.array(np.array([vx_value], dtype=np.float32), dtype=wp.float32, device=device)
final_init_vy_wp = wp.array(np.array([vy_value], dtype=np.float32), dtype=wp.float32, device=device)
x_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
y_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
vx_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
vy_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)

wp.launch(
kernel=init_projectile_kernel,
dim=1,
inputs=[],
outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp, final_init_vx_wp, final_init_vy_wp],
device=device,
)
wp.launch(
kernel=projectile_step_kernel,
dim=proj_steps,
inputs=[proj_dt, proj_g],
outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp],
device=device,
)
wp.synchronize()

x_path = x_hist_wp.numpy()
y_path = y_hist_wp.numpy()

fig = plt.figure(figsize=(15, 4))

ax1 = fig.add_subplot(1, 3, 1)
ax1.plot(loss_history)
ax1.set_title(“Optimization loss”)
ax1.set_xlabel(“Iteration”)
ax1.set_ylabel(“Squared distance”)

ax2 = fig.add_subplot(1, 3, 2)
ax2.plot(vx_history, label=”vx”)
ax2.plot(vy_history, label=”vy”)
ax2.set_title(“Learned initial velocity”)
ax2.set_xlabel(“Iteration”)
ax2.legend()

ax3 = fig.add_subplot(1, 3, 3)
ax3.plot(x_path, y_path, linewidth=2)
ax3.scatter([target_x], [target_y], s=80, marker=”x”)
ax3.set_title(“Differentiable projectile trajectory”)
ax3.set_xlabel(“x”)
ax3.set_ylabel(“y”)
ax3.set_ylim(-0.1, max(1.0, float(np.max(y_path)) + 0.3))

plt.tight_layout()
plt.show()

final_dx = float(x_path[-1] – target_x)
final_dy = float(y_path[-1] – target_y)
final_dist = math.sqrt(final_dx * final_dx + final_dy * final_dy)
print(f”Final target miss distance: {final_dist:.6f}”)
print(f”Optimized initial velocity: vx={vx_value:.6f}, vy={vy_value:.6f}”)



Source link

synthesia

You may also like

Subscribe To Our Newsletter

Join our mailing list to receive the latest news and updates from our team.

You have Successfully Subscribed!

Verified by MonsterInsights