Big data scatterplot adding lines

237 views Asked by At

I need a scatterplot for a dataset with 77M+ rows, plus adding lines like the plt.axlines.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.DataFrame({ 'x' :np.random.normal(0,1,77000000),
                    'y' : np.random.normal(0,1,77000000) })

intercept_a = (-2, 0.5)
slope_a     = 0.5

intercept_b = (-0.5, -1.5)
slope_b     = 0.1

print(df.shape) # (77000000, 2)

Expected result

# (but w/ 77M rows)

plt.scatter(x = df.loc[1:3000,'x'], 
            y = df.loc[1:3000,'y'])
plt.axline(intercept_a, slope=slope_a, color='red', label='dry')
plt.axline(intercept_b, slope=slope_b, color='blue', label='edge')

enter image description here

I have tried with datashader and vaex unsuccesfully

Datashader

# DATASHADER 
import datashader as ds

cvs = ds.Canvas(plot_width=800, plot_height=500)  # auto range or provide the `bounds` argument
agg = cvs.points(df, 'x', 'y')
img = ds.tf.set_background(ds.tf.shade(agg, cmap=cc.fire), "white").to_pil()
plt.axline(intercept_a, slope=slope_a, color='red', label='dry')
plt.axline(intercept_b, slope=slope_b, color='blue', label='edge')
plt.imshow(img)

enter image description here

Vaex

# VAEX
import vaex
df_vaex = vaex.from_pandas(df=df)

df_vaex.viz.heatmap("x", "y",
               limits = '97%',
               figsize=(15, 7),
               colorbar = False);

plt.axline(intercept_a, slope=slope_a, color='red', label='dry')
plt.axline(intercept_b, slope=slope_b, color='blue', label='edge')

enter image description here

PS here is the datashader plot for my real data, I need the background white for a good visualization of the saturated points.

enter image description here

0

There are 0 answers