A Coding Implementation of MolmoAct for Depth-Aware Spatial Reasoning, Visual Trajectory Tracing, and Robotic Action Prediction

by CryptoExpert
aistudios


class MolmoActVisualizer:
“””Visualization utilities for MolmoAct outputs”””

def __init__(self, figsize: Tuple[int, int] = (12, 8)):
self.figsize = figsize
self.colors = plt.cm.viridis(np.linspace(0, 1, 10))

def plot_trace(
self,
image: Image.Image,
trace: List[List[int]],
title: str = “Visual Reasoning Trace”,
save_path: Optional[str] = None
) -> None:
“””Plot visual trace overlaid on image”””
fig, ax = plt.subplots(figsize=self.figsize)

img_array = np.array(image)
ax.imshow(img_array)

aistudios

if trace and len(trace) > 0:
h, w = img_array.shape[:2]
trace_array = np.array(trace)

x_coords = trace_array[:, 0] * w / 256
y_coords = trace_array[:, 1] * h / 256

ax.plot(x_coords, y_coords, ‘w-‘, linewidth=2, alpha=0.7)
ax.plot(x_coords, y_coords, ‘c-‘, linewidth=1, alpha=0.9)

for i, (x, y) in enumerate(zip(x_coords, y_coords)):
color_idx = int(i * 9 / max(len(x_coords) – 1, 1))
ax.scatter(x, y, c=[self.colors[color_idx]], s=100,
edgecolors=”white”, linewidths=2, zorder=5)
ax.annotate(f'{i+1}’, (x, y), textcoords=”offset points”,
xytext=(5, 5), fontsize=10, color=”white”,
fontweight=”bold”)

ax.scatter(x_coords[0], y_coords[0], c=”lime”, s=200,
marker=”o”, edgecolors=”white”, linewidths=3,
zorder=6, label=”Start”)
ax.scatter(x_coords[-1], y_coords[-1], c=”red”, s=200,
marker=”X”, edgecolors=”white”, linewidths=3,
zorder=6, label=”End”)

ax.set_title(title, fontsize=14, fontweight=”bold”)
ax.axis(‘off’)
ax.legend(loc=”upper right”)

plt.tight_layout()

if save_path:
plt.savefig(save_path, dpi=150, bbox_inches=”tight”)
print(f”💾 Saved visualization to {save_path}”)

plt.show()

def plot_action(
self,
action: List[float],
action_labels: Optional[List[str]] = None,
title: str = “Predicted Robot Action”,
save_path: Optional[str] = None
) -> None:
“””Plot action values as a bar chart”””
if action_labels is None:
action_labels = [
‘Δx (forward)’, ‘Δy (left)’, ‘Δz (up)’,
‘Rx (roll)’, ‘Ry (pitch)’, ‘Rz (yaw)’,
‘Gripper’
]

fig, ax = plt.subplots(figsize=(10, 5))

colors = [‘#3498db’, ‘#3498db’, ‘#3498db’,
‘#e74c3c’, ‘#e74c3c’, ‘#e74c3c’,
‘#2ecc71′]

x = np.arange(len(action))
bars = ax.bar(x, action, color=colors, edgecolor=”white”, linewidth=1.5)

for bar, val in zip(bars, action):
height = bar.get_height()
ax.annotate(f'{val:.3f}’,
xy=(bar.get_x() + bar.get_width() / 2, height),
xytext=(0, 3 if height >= 0 else -12),
textcoords=”offset points”,
ha=”center”, va=”bottom” if height >= 0 else ‘top’,
fontsize=9, fontweight=”bold”)

ax.set_xticks(x)
ax.set_xticklabels(action_labels, rotation=45, ha=”right”)
ax.set_ylabel(‘Value’, fontsize=12)
ax.set_title(title, fontsize=14, fontweight=”bold”)
ax.axhline(y=0, color=”gray”, linestyle=”–“, alpha=0.5)
ax.grid(axis=”y”, alpha=0.3)

from matplotlib.patches import Patch
legend_elements = [
Patch(facecolor=”#3498db”, label=”Position”),
Patch(facecolor=”#e74c3c”, label=”Rotation”),
Patch(facecolor=”#2ecc71″, label=”Gripper”)
]
ax.legend(handles=legend_elements, loc=”upper right”)

plt.tight_layout()

if save_path:
plt.savefig(save_path, dpi=150, bbox_inches=”tight”)

plt.show()



Source link

livechat

You may also like

Subscribe To Our Newsletter

Join our mailing list to receive the latest news and updates from our team.

You have Successfully Subscribed!

Verified by MonsterInsights