绘制散点热图以展示散点密度(by python)

前言

python为散点图添加色带表示散点密度
正文

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from matplotlib import rcParams
from statistics import mean
from sklearn.metrics import explained_variance_score, r2_score, mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr
from matplotlib.colors import LinearSegmentedColormap
import os

# ====================== 自行修改参数区域 ======================
# 输出目录（保存图片用）
output_dir = "./output"
os.makedirs(output_dir, exist_ok=True)

# 数据长度（可修改为你自己的数据长度）
n_points = 5000

# 此处替换为自己的数据，这里为演示选择随机生成（x为真实值，y为预测值）
np.random.seed(42)  # 保证可复现
x = np.random.rand(n_points) * 100            # 实测值（真实数据）
y = x + np.random.normal(0, 5, size=n_points)  # 预测值（在x基础上加噪声）

# 散点颜色选取，可改为误差或频率
color_type = "frequency"  # "error" 表示颜色表示误差，可改为 "frequency" 表示频率
# ===========================================================

# === 配置字体和风格 ===
config = {"font.family": 'Times New Roman', "font.size": 16, "mathtext.fontset": 'stix'}
rcParams.update(config)

# === 统计指标函数 ===
def nash_sutcliffe_efficiency(observed, predicted):
    observed_mean = np.mean(observed)
    numerator = np.sum((observed - predicted) ** 2)
    denominator = np.sum((observed - observed_mean) ** 2)
    nse = 1 - (numerator / denominator)
    return nse

# === 计算统计指标 ===
BIAS = mean(x - y)
MSE = mean_squared_error(x, y)
RMSE = np.sqrt(MSE)
R2 = r2_score(x, y)
PCC = pearsonr(x, y).statistic
adjR2 = 1 - ((1 - R2) * (len(x) - 1)) / (len(x) - 2 - 1)  # 自由度假设
MAE = mean_absolute_error(x, y)
EV = explained_variance_score(x, y)
NSE = 1 - (RMSE ** 2 / np.var(x))
nse = nash_sutcliffe_efficiency(x, y)

print(f"R2: {R2:.3f}, NSE: {NSE:.3f}, MAE: {MAE:.3f}, RMSE: {RMSE:.3f}, nse: {nse:.3f}, PCC: {PCC:.3f}")

# === 计算散点颜色 ===
if color_type == "error":
    z = np.abs(x - y)  # 误差
else:
    xy = np.vstack([x, y])
    z = stats.gaussian_kde(xy)(xy)  # 数据密度

# 对散点进行排序（让高密度点在上层）
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]

# === 回归线 ===
k, b = np.polyfit(x, y, 1)
regression_line = k * x + b

# === 绘制散点图 ===
scale = np.ceil(np.max(x)).astype(int)
fig, ax = plt.subplots(figsize=(7, 6), dpi=300)

# 自定义蓝色渐变 colormap
colors = ['#b3daff', '#062756']
cmap = LinearSegmentedColormap.from_list('custom_blues', colors, N=6)

scatter = ax.scatter(x, y, c=z, cmap=cmap, s=15, alpha=0.8)
cbar = plt.colorbar(scatter, shrink=1, orientation='vertical', extend='both',
                    pad=0.015, aspect=30, label='error' if color_type=="error" else 'frequency')

# 1:1 参考线和回归线
ax.plot([0, scale], [0, scale], 'red', lw=0.5, linestyle='--', label='1:1 line')
ax.plot(x, regression_line, 'black', lw=0.5, label='Regression Line')

# 网格
ax.grid(True, linestyle='--', alpha=0.2)

# 右上角统计量注释
plt.text(scale * 0.95, scale * 0.35, f'$R^2={R2:.3f}$', ha='right', color='#071952')
plt.text(scale * 0.95, scale * 0.275, f'$NSE={NSE:.3f}$', ha='right', color='#088395')
plt.text(scale * 0.95, scale * 0.2, f'$MAE={MAE:.3f}$', ha='right')
plt.text(scale * 0.95, scale * 0.125, f'$RMSE={RMSE:.3f}$', ha='right', color='#088395')
plt.text(scale * 0.95, scale * 0.05, f'y = {k:.3f}x + {b:.3f}', ha='right', color='#071952')

# 坐标轴范围
plt.axis([0, scale, 0, scale])
plt.tight_layout()

# 保存图片
save_path = os.path.join(output_dir, "scatter_plot_example.png")
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
print("Scatter plot saved to:", save_path)
备注

注意：
点密度在代码里通过核密度估计（Kernel Density Estimation, KDE）来计算的
color_type = "error"：散点颜色表示预测误差 |x - y|。
color_type = "frequency"：散点颜色表示点的密度，适合数据量大时突出密集区域。