python - seaborn/matplotlib 中的直方图显示 x 轴上的所有分箱数据索引

标签 python matplotlib plot data-visualization seaborn

我需要制作给定列表中所有值的直方图。我使用了seaborn的distplot,但其中一个轴不显示从0到列表中最后一个元素的索引,而是显示某种形式的分布。

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")
print(sns.__version__)

sns.set_context(font_scale=1.5)

data = [1.1992198, 1.2429917, 0.7515156, 1.7279389, -0.16715668, 0.94465995, 0.5149495, 0.94465995, 0.94465995, 3.8740897, 7.453125, 7.453125, 1.0201894, 1.444468, 0.06495813, 0.18581325, -0.69003785, 3.1213043, 0.24899049, -0.5395518, 2.6421795, 2.238052, -0.42627642, 0.689369, 1.0177083, 0.0021173293, 0.19708821, -0.6978323, -0.27355388, -1.0527502, -1.2287112, -0.73426425, -1.5779951, -1.4275085, -0.72636086, 0.49798694, 0.5233074, -0.8736689, -1.5343369, 0.83868057, 0.14993721, -1.5746347, -1.1844425, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328, 1.9799328]

data2 = [0.34094468, -1.8498722, -0.35345584, -1.0018779, 0.18884292, -2.6028345, 0.39934048, -0.119069986, -0.20210052, -0.2972668, -0.8640028, -0.6174464, 0.096682094, -1.9147822, -0.7738649, 0.6141649, 0.86409974, -0.5216787, -0.78182876, 0.22742827, -0.840597, -0.97359276, -0.018100848, -0.5059276, -1.7152423, -0.07815174, 0.18345535, -0.76344514, -0.39645284, 0.18889628, -0.5543669, -0.18788649, -0.13553666, 0.1379985, 0.65224963, 0.5777133, -0.9204392, -0.91472155, -0.58848035, -1.6883624, -0.58383256, 0.25340325, -0.09143271, 0.50240713, 0.8944117, 0.07218201, 1.1128205, 1.3817745, -0.09530114, 0.56783175, -0.12059356, 0.43868077, -0.2728266, 0.61756617, -0.51779836, -0.39096248, -0.635239, -0.635239, -0.5384383, -0.635239, -0.6920986, -0.9351034, -0.9254051, -0.842712, -1.1218141]

data3 = [0.72135484, -0.706092, 0.36165744, 0.40211153, 0.14495818, 0.9395333, 1.450367, 0.32213485, 0.52471924, 3.3083296, 6.7051606, 6.1889296, -0.210258, -0.09990394, -0.85894525, -0.36614275, -1.5075212, 1.8715478, 0.29819223, -1.0022302, 2.108101, 1.8913394, -0.24430388, 0.059003413, -0.39443398, -0.0057572527, 0.5327027, -1.4999104, -0.60988855, -0.95330614, -1.9033353, -0.93208313, -1.7135317, -1.2041125, 0.007865965, 1.0655571, -0.42969102, -1.9678588, -2.165072, -1.1763439, -0.4736237, -0.8522189, -1.073197, 2.3406122, 2.8758054, 1.9956598, 3.3263054, 3.0907226, 1.8059512, 2.533312, 2.1851382, 2.1604633, 1.7256155, 2.912341, 2.0519354, 2.0519354, 2.0519354, 2.0519354, 2.0519354, 2.0519354, 2.0519354, 2.0519354, 2.0519354, 2.0519354, 2.0519354]

sns.set_style("whitegrid")

# bins don't seem to do anything here
sns.distplot(data, norm_hist=0, hist=False, kde_kws={"shade":True, "bw": 0.05}, bins=100, color="b", label="attn")
sns.distplot(data2, norm_hist=0, hist=False, kde_kws={"shade":True, "bw": 0.05}, bins=100, color="g", label="attn_rel_pos")
pls = sns.distplot(data3, norm_hist=0, hist=False, kde_kws={"shade":True, "bw": 0.05}, bins=100, color="r", label="attn_comb")
plt.legend()


pls.axes.set_title("Title")
pls.set(xlabel='Show indexes from 0 to last here', ylabel='My Weight')

结果如下:

enter image description here

我需要显示列表中的权重如何从列表的 0 索引一直到最后一个索引(所有三个列表都具有相同的长度)分布。我可能会将它们装箱,因为列表大约有 60 个元素长,但我也找不到任何在这里实际起作用的 bin 参数。

这是我需要的图像处理版本(但我还需要正确呈现数据,而不是某种形式的密集分布): enter image description here

在这一点上,我也不关心它是否是seaborn,如果在matplotlib中更容易做到,那么我也同意这个解决方案。非常感谢!

编辑:一个值始终高于 0 的示例,并且使用条形图而不是直方图,是否可以将值显示为连续线:

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")
# print(sns.__version__)

fig = plt.figure(figsize=(8, 6))
# Not showing the data lists here

def softmax(x):
    return np.exp(x) / np.exp(x).sum(axis=0)

# attn
data = [1.2227119, 2.3106432, 0.3175479, 2.6548655, 0.87468535, -2.5941365, -2.8492305, -2.5941365, -2.5941365, 0.78577393, -3.1803715, -3.1803715, -2.527419, -0.32336473, -0.23149578, 0.1958992, -1.1464257, -1.3171446, -0.82610035, -0.6265811, -0.04922826, 1.268781, -0.63436747, 1.6067829, -0.12655944, 0.30039954, -0.16766489, -2.2401857, -0.036131226, -0.22972624, -0.041365635, 0.6901127, -1.3901691, -0.87032473, 0.13755159, 0.013177752, 1.343483, 0.17142272, -0.08306693, 0.9223409, -0.43641013, -1.2699138, -1.0307136, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937, -0.91284937]

# attn_pos
data2 = [ 0.41763785, -2.3683536, -0.10784631, -0.18569931, 0.20513327, -2.017535, -0.13072428, -1.423852, -0.037444096, -0.29240212, -0.33224604, -0.8710863, -0.2023627, -0.72792727, -0.15152885, 0.20426698, 0.9366691, -0.04617013, 0.03217609, -1.2339046, -0.54832625, -1.0488682, -0.13601255, 0.46882343, -1.1635672, -0.98546046, -0.5023038, -1.7461116, -0.38710907, -0.75636834, -0.077942476, -1.5930176, -0.8428734, 0.36083415, -0.235654, 0.27319083, 0.2481483, -0.8994429, 0.51432747, 0.3907901, -1.0105598, -0.3258296, -0.53897303, 1.3885188, -0.8589748, -0.25751373, 0.9007931, 0.64612645, -0.9111746, -0.5629407, 0.5113405, -0.5913975, 0.8435228, -0.0041873083, 1.2919891, 0.49669626, 0.96742135, 0.96742135, 0.9375946, 0.96742135, 0.62772936, 0.2486542, 0.59175867, 0.13839966, 0.33540285]

# combined
data3 = [1.2788991, 1.8618634, -0.39062586, 2.4721837, 3.1994174, -2.539834, -1.7466183, -2.7143257, -2.5147946, 2.0141108, -3.282056, -3.381072, -3.5476263, -0.7556571, -0.9092123, 0.39682359, -0.7735753, -0.5908251, -1.0364372, -1.0283178, -0.7909626, 1.2081728, -0.96530867, 2.3020573, 0.84674144, 0.7156407, 0.1626791, -1.6639395, 0.27049372, 0.5723161, -0.89840394, 1.5462611, -2.1371794, -1.70861, 0.30733263, 0.22821441, 0.7041679, -0.36799663, 0.27130017, 0.98303056, -1.142178, -2.035885, -1.7442997, -0.49790236, -1.6333843, -0.09258777, -1.971946, -1.0179313, -1.1023216, -0.7061392, -1.0784137, -1.8746437, -0.015198052, -0.9757373, -0.43436813, -0.43436813, -0.43436813, -0.43436813, -0.43436813, -0.43436813, -0.43436813, -0.43436813, -0.43436813, -0.43436813, -0.43436813]

softmax_flag = True

if softmax_flag:
    plt.bar(range(len(data)), softmax(data), color='b', alpha=0.3, label ='attn') #, hatch="/")
    plt.bar(range(len(data2)), softmax(data2), color='g', alpha = 0.3, label ='attn_rel_pos') #, hatch="o")
    plt.bar(range(len(data3)), softmax(data3), color='r',alpha = 0.3, label ='attn_comb') #, hatch="\\")
    plt.plot(range(len(data)), softmax(data),'bx', alpha = 0.5)
    plt.plot(range(len(data2)), softmax(data2),'go', alpha = 0.5, ms = 4)
    plt.plot(range(len(data3)), softmax(data3),'r+', alpha = 0.5)
else:
    plt.bar(range(len(data)), data, color='b', alpha=0.3, label ='attn') #, hatch="/")
    plt.bar(range(len(data2)), data2, color='g', alpha = 0.3, label ='attn_rel_pos') #, hatch="o")
    plt.bar(range(len(data3)), data3, color='r',alpha = 0.3, label ='attn_comb') #, hatch="\\")
    plt.plot(range(len(data)), data,'bx', alpha = 0.5)
    plt.plot(range(len(data2)), data2,'go', alpha = 0.5, ms = 4)
    plt.plot(range(len(data3)), data3,'r+', alpha = 0.5)

plt.title("Head 3")
plt.xlabel("Word Position in the Sentence")
plt.ylabel("Attention Weight")

# plt.grid()
plt.legend(fontsize=16)

plt.savefig('head_3_in_32_softmax.png', dpi=350)

# plt.close(fig)

图片:enter image description here

最佳答案

我不确定您是否想要在每个索引值处有某种离散峰值作为一种直方图。但从你的问题来看,我猜你想要的是以下内容:

plt.bar(range(len(data)), data, color='b', alpha=0.3, label ='attn')
plt.bar(range(len(data2)), data2, color='g', alpha = 0.3, label ='attn_rel_pos')
plt.bar(range(len(data3)), data3, color='r',alpha = 0.3, label ='attn_comb')

输出

enter image description here

如果您想以某种方式区分同一索引的每个高度/值,除了条形图之外,您还可以使用以下一些标记(这里我没有使用 sns):

import matplotlib.pyplot as plt

fig = plt.figure(figsize=(8, 6))
# Not showing the data lists here

plt.bar(range(len(data)), data, color='b', alpha=0.3, label ='attn') #, hatch="/")
plt.bar(range(len(data2)), data2, color='g', alpha = 0.3, label ='attn_rel_pos') #, hatch="o")
plt.bar(range(len(data3)), data3, color='r',alpha = 0.3, label ='attn_comb') #, hatch="\\")
plt.plot(range(len(data)), data,'bx', alpha = 0.5)
plt.plot(range(len(data2)), data2,'go', alpha = 0.5, ms = 4)
plt.plot(range(len(data3)), data3,'r+', alpha = 0.5)
plt.grid()
plt.legend(fontsize=16)

输出

enter image description here

如果您希望它们作为连续线,您可以使用以下内容:

import numpy as np
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)

def softmax(x):
    return np.exp(x) / np.exp(x).sum(axis=0)

# Not showing the data lists here

softmax_flag = True

if softmax_flag:
    plt.plot(range(len(data)), softmax(data),'-b', alpha = 0.5, label ='attn')
    plt.plot(range(len(data2)), softmax(data2),'-g', alpha = 0.5, lw=2, label ='attn_rel_pos')
    plt.plot(range(len(data3)), softmax(data3),'-r', alpha = 0.5, lw=2, label ='attn_comb')
    ax.fill_between(range(len(data)), 0, softmax(data), color='dodgerblue', alpha = 0.4)
    ax.fill_between(range(len(data)), 0, softmax(data2), color='mediumseagreen', alpha = 0.4)
    ax.fill_between(range(len(data)), 0, softmax(data3), color='indianred', alpha = 0.4)   
else:
    plt.bar(range(len(data)), data, color='b', alpha=0.3, label ='attn') #, hatch="/")
    plt.bar(range(len(data2)), data2, color='g', alpha = 0.3, label ='attn_rel_pos') #, hatch="o")
    plt.bar(range(len(data3)), data3, color='r',alpha = 0.3, label ='attn_comb') #, hatch="\\")
    plt.plot(range(len(data)), data,'bx', alpha = 0.5)
    plt.plot(range(len(data2)), data2,'go', alpha = 0.5, ms = 4)
    plt.plot(range(len(data3)), data3,'r+', alpha = 0.5)

plt.title("Head 3")
plt.xlabel("Word Position in the Sentence")
plt.ylabel("Attention Weight")

plt.grid()
plt.legend(fontsize=16)

输出

enter image description here

关于python - seaborn/matplotlib 中的直方图显示 x 轴上的所有分箱数据索引,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52020347/

相关文章:

Python Pandas CSV 导入/Unicode 问题

python - 将颜色图添加到图表中

pandas - 绘图图表的问题

r - 世界地图 - 使用 ggplot2 将国家的一半映射到不同的颜色

matplotlib - 直方图的对数 x Axis

python - 名称错误 : global name is not defined

python - 'tensorflow' 没有属性 'to_int32'

python - 使 Seaborn 热图之间的单元格大小相同

python - 将索引数组映射到坐标数组

python - Vowpal Wabbit Python Sklearn - 预测 vw 格式