A Coding Tutorial on Datashader on Rendering Massive Datasets with High-Performance Python Visual Analytics

by CryptoExpert
synthesia


import subprocess, sys
subprocess.check_call([sys.executable, “-m”, “pip”, “install”, “-q”,
“datashader”, “colorcet”, “numba”, “scipy”])

import numpy as np
import pandas as pd
import datashader as ds
import datashader.transfer_functions as tf
from datashader import reductions as rd
import colorcet as cc
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.gridspec import GridSpec
from scipy.stats import multivariate_normal
import time, warnings
warnings.filterwarnings(“ignore”)

print(“Datashader version:”, ds.__version__)

def show(img, title=””, ax=None, figsize=(6, 5)):
standalone = ax is None
if standalone:
fig, ax = plt.subplots(figsize=figsize)
rgba = img.to_pil()
ax.imshow(rgba, origin=”upper”, aspect=”auto”)
ax.set_title(title, fontsize=11, fontweight=”bold”)
ax.axis(“off”)
if standalone:
plt.tight_layout()
plt.show()

aistudios

print(“\n=== SECTION 1: Core Pipeline ===”)

rng = np.random.default_rng(42)
N = 2_000_000

x = np.concatenate([rng.normal(-1, 0.5, N//3),
rng.normal( 1, 0.5, N//3),
rng.normal( 0, 1.5, N//3)])
y = np.concatenate([rng.normal(-1, 0.5, N//3),
rng.normal( 1, 0.5, N//3),
rng.normal( 0, 0.5, N//3)])
df_base = pd.DataFrame({“x”: x, “y”: y})

canvas = ds.Canvas(plot_width=600, plot_height=500,
x_range=(-4, 4), y_range=(-4, 4))

agg = canvas.points(df_base, “x”, “y”, agg=rd.count())

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
combos = [
(“Linear / blues”, tf.shade(agg, cmap=cc.blues, how=”linear”)),
(“Log / fire”, tf.shade(agg, cmap=cc.fire, how=”log” )),
(“Eq-hist / bmy”, tf.shade(agg, cmap=cc.bmy, how=”eq_hist”)),
]
for ax, (title, img) in zip(axes, combos):
show(img, title, ax=ax)
plt.suptitle(“Section 1 – 2 M points: Linear vs Log vs Eq-Hist normalisation”,
fontsize=13, fontweight=”bold”)
plt.tight_layout()
plt.show()

print(“\n=== SECTION 2: Reduction Types ===”)

n_actual = len(df_base)
df_base[“value”] = rng.exponential(scale=2, size=n_actual)
df_base[“label”] = pd.Categorical(
rng.choice([“A”, “B”, “C”], size=n_actual),
categories=[“A”, “B”, “C”]
)

canvas2 = ds.Canvas(plot_width=400, plot_height=350,
x_range=(-4, 4), y_range=(-4, 4))

reductions_cfg = [
(“count()”, rd.count(), cc.kbc),
(“sum(value)”, rd.sum(“value”), cc.CET_L3),
(“mean(value)”, rd.mean(“value”), cc.CET_D4),
(“std(value)”, rd.std(“value”), cc.CET_L16),
(“min(value)”, rd.min(“value”), cc.CET_L17),
(“max(value)”, rd.max(“value”), cc.bgyw),
(“var(value)”, rd.var(“value”), cc.CET_L18),
(“count_cat(label)”, rd.count_cat(“label”), None),
]

fig, axes = plt.subplots(2, 4, figsize=(18, 9))
axes = axes.flat

for ax, (name, agg_fn, cmap) in zip(axes, reductions_cfg):
agg_r = canvas2.points(df_base, “x”, “y”, agg=agg_fn)
if cmap is None:
img = tf.shade(agg_r, color_key={“A”:”#e41a1c”,”B”:”#377eb8″,”C”:”#4daf4a”})
else:
img = tf.shade(agg_r, cmap=cmap, how=”eq_hist”)
show(img, name, ax=ax)

plt.suptitle(“Section 2 – All Reduction Types on 2 M points”, fontsize=14, fontweight=”bold”)
plt.tight_layout()
plt.show()

print(“\n=== SECTION 3: Categorical Visualisation ===”)

N_cat = 500_000
categories = [“Cluster A”, “Cluster B”, “Cluster C”, “Cluster D”]
centers = [(-2, -2), (-2, 2), (2, -2), (2, 2)]
colors = {“Cluster A”:”#e41a1c”,”Cluster B”:”#377eb8″,
“Cluster C”:”#4daf4a”,”Cluster D”:”#ff7f00″}

frames = []
for cat, (cx, cy) in zip(categories, centers):
n = N_cat // len(categories)
frames.append(pd.DataFrame({
“x”: rng.normal(cx, 0.8, n),
“y”: rng.normal(cy, 0.8, n),
“cat”: pd.Categorical([cat]*n, categories=categories),
}))
df_cat = pd.concat(frames, ignore_index=True)

canvas3 = ds.Canvas(plot_width=500, plot_height=500,
x_range=(-5, 5), y_range=(-5, 5))
agg_cat = canvas3.points(df_cat, “x”, “y”, agg=rd.count_cat(“cat”))

fig, axes = plt.subplots(1, 3, figsize=(16, 5))

img_raw = tf.shade(agg_cat, color_key=colors)
show(img_raw, “Raw (no spread)”, ax=axes[0])

img_sp1 = tf.spread(tf.shade(agg_cat, color_key=colors), px=1)
show(img_sp1, “Spread px=1″, ax=axes[1])

img_bg = tf.set_background(tf.shade(agg_cat, color_key=colors), color=”black”)
show(img_bg, “Black background”, ax=axes[2])

for cat, col in colors.items():
axes[2].plot([], [], “o”, color=col, label=cat, markersize=8)
axes[2].legend(loc=”lower right”, fontsize=8, framealpha=0.6)

plt.suptitle(“Section 3 – Categorical Rendering (500 k points)”, fontsize=13, fontweight=”bold”)
plt.tight_layout()
plt.show()



Source link

livechat

You may also like

Subscribe To Our Newsletter

Join our mailing list to receive the latest news and updates from our team.

You have Successfully Subscribed!

Verified by MonsterInsights