python - 在 altair 图表中引用本地 .csv 文件

标签 python visualization altair vega-lite

我正在尝试使用 altair 来可视化数据,但我很难以我喜欢的方式使用它:通过不将数据嵌入到生成的 .html 图表中,但是通过引用包含数据的本地 .csv 文件。否则,会导致数据重复,从而使所需的存储空间翻倍。

这是我尝试过的:

import os
import altair as alt
import numpy as np
import pandas as pd

# Dummy data.

n = 100
df = pd.DataFrame({
    "x": np.arange(n),
    "y": np.random.randn(n),
})

# Using the dataframe directly.

chartdf = alt.Chart(df).mark_line().encode(
    x='x:Q',
    y='y:Q',
)
chartdf.save('chartdf.html') 

# Now referencing the local .csv file.

filename = "data.csv"
df.to_csv(filename)
directory = os.getcwd()
fullpath = "file://" + os.path.join(directory, filename)

alt.data_transformers.enable('csv')
chartcsv = alt.Chart(fullpath).mark_line().encode(
    x='x:Q',
    y='y:Q',
)
chartcsv.save('chartcsv.html')

结果,我得到了两个 .html 文件。这是 chartdf.html,正确显示了图表,但将数据存储在源代码中。

<!DOCTYPE html>
<html>
<head>
  <style>
    .error {
        color: red;
    }
  </style>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega@5"></script>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega-lite@4.8.1"></script>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega-embed@6"></script>
</head>
<body>
  <div id="vis"></div>
  <script>
    (function(vegaEmbed) {
      var spec = {"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "data": {"name": "data-1727235c61a7b22ee660b7640869d48d"}, "mark": "line", "encoding": {"x": {"type": "quantitative", "field": "x"}, "y": {"type": "quantitative", "field": "y"}}, "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json", "datasets": {"data-1727235c61a7b22ee660b7640869d48d": [{"x": 0, "y": 0.2112352635569061}, {"x": 1, "y": -0.6297931563725685}, {"x": 2, "y": -1.6669551411704615}, {"x": 3, "y": -0.8590377939062339}, {"x": 4, "y": -2.362675608121853}, {"x": 5, "y": 0.1816769959879393}, {"x": 6, "y": 0.21964717520484453}, {"x": 7, "y": 0.5924276442771413}, {"x": 8, "y": 2.062942588202062}, {"x": 9, "y": -1.411207827356746}, {"x": 10, "y": -2.2266024022172957}, {"x": 11, "y": 0.47229354076715996}, {"x": 12, "y": -0.14474440245785294}, {"x": 13, "y": 0.051738284319541146}, {"x": 14, "y": -0.4838552098939953}, {"x": 15, "y": 0.29366971719427554}, {"x": 16, "y": 0.18361573516404703}, {"x": 17, "y": 0.6734430961408209}, {"x": 18, "y": -1.6084727160210788}, {"x": 19, "y": 0.457742541113758}, {"x": 20, "y": -1.4873380353155474}, {"x": 21, "y": 0.6118088150042575}, {"x": 22, "y": 0.9889323424386781}, {"x": 23, "y": -0.4539168151678354}, {"x": 24, "y": 0.5438198860906843}, {"x": 25, "y": 0.47011997666216776}, {"x": 26, "y": -0.7562418295269627}, {"x": 27, "y": -0.27690138025681765}, {"x": 28, "y": 0.49827375786911876}, {"x": 29, "y": 1.0153141593800437}, {"x": 30, "y": 1.1782385627274985}, {"x": 31, "y": 0.0424730271073153}, {"x": 32, "y": -1.4185058816513483}, {"x": 33, "y": -1.8114268346098452}, {"x": 34, "y": -1.29594887684236}, {"x": 35, "y": 0.09894597002376446}, {"x": 36, "y": -1.4631390515148475}, {"x": 37, "y": -2.1926825852861485}, {"x": 38, "y": 1.1519292846453788}, {"x": 39, "y": -0.5818573231859363}, {"x": 40, "y": -0.02064804413385189}, {"x": 41, "y": 0.5006162765747295}, {"x": 42, "y": -0.9784096515241409}, {"x": 43, "y": 1.7117580547861466}, {"x": 44, "y": 0.8677907294763252}, {"x": 45, "y": -1.3285577876719292}, {"x": 46, "y": 0.385574566953019}, {"x": 47, "y": 1.0251802025168757}, {"x": 48, "y": 0.2255080418305147}, {"x": 49, "y": 0.8012860491915356}, {"x": 50, "y": -1.3742866681331305}, {"x": 51, "y": -0.5160585231146249}, {"x": 52, "y": 1.0086907194824801}, {"x": 53, "y": 1.8441072855180702}, {"x": 54, "y": 0.10647348146379314}, {"x": 55, "y": -0.9950070073653883}, {"x": 56, "y": -0.1209797393395647}, {"x": 57, "y": 1.2957688874221422}, {"x": 58, "y": 1.5377004050922511}, {"x": 59, "y": 0.14393885533903575}, {"x": 60, "y": -1.440499604914547}, {"x": 61, "y": 1.4762530684041733}, {"x": 62, "y": 0.3962335303842206}, {"x": 63, "y": 0.15195105628021952}, {"x": 64, "y": 1.274568257852339}, {"x": 65, "y": -1.9517895004128185}, {"x": 66, "y": -1.6847841535930692}, {"x": 67, "y": 1.3936389293839209}, {"x": 68, "y": -0.11370298418438128}, {"x": 69, "y": 1.8052343535396553}, {"x": 70, "y": -0.3443348662400949}, {"x": 71, "y": 0.8838398407281737}, {"x": 72, "y": 0.8666912228874258}, {"x": 73, "y": 2.33181288689919}, {"x": 74, "y": -1.7370617166021098}, {"x": 75, "y": -0.8447406898036955}, {"x": 76, "y": -0.27812566279331796}, {"x": 77, "y": 0.6939513790408266}, {"x": 78, "y": 1.2769804660273463}, {"x": 79, "y": 1.275176541502504}, {"x": 80, "y": -0.3843801173353608}, {"x": 81, "y": -0.14199480129344477}, {"x": 82, "y": -0.7443885322363791}, {"x": 83, "y": 0.392135684892227}, {"x": 84, "y": -1.4963957824987753}, {"x": 85, "y": -0.05395082651327829}, {"x": 86, "y": -1.1640386653528847}, {"x": 87, "y": -0.8740477495639896}, {"x": 88, "y": 2.367892815244499}, {"x": 89, "y": -1.1861757645950304}, {"x": 90, "y": 0.4862085488887814}, {"x": 91, "y": -1.907728993442288}, {"x": 92, "y": 0.6304124541928168}, {"x": 93, "y": 1.0704432779754836}, {"x": 94, "y": -0.805919444706216}, {"x": 95, "y": 0.829176968267078}, {"x": 96, "y": 0.16050957366472954}, {"x": 97, "y": 1.1663653159345964}, {"x": 98, "y": 0.33908181024409456}, {"x": 99, "y": -0.04008707863910976}]}};
      var embedOpt = {"mode": "vega-lite"};

      function showError(el, error){
          el.innerHTML = ('<div class="error" style="color:red;">'
                          + '<p>JavaScript Error: ' + error.message + '</p>'
                          + "<p>This usually means there's a typo in your chart specification. "
                          + "See the javascript console for the full traceback.</p>"
                          + '</div>');
          throw error;
      }
      const el = document.getElementById('vis');
      vegaEmbed("#vis", spec, embedOpt)
        .catch(error => showError(el, error));
    })(vegaEmbed);

  </script>
</body>
</html>

这里是 chartcsv.html 文件,不显示图表。

<!DOCTYPE html>
<html>
<head>
  <style>
    .error {
        color: red;
    }
  </style>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega@5"></script>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega-lite@4.8.1"></script>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega-embed@6"></script>
</head>
<body>
  <div id="vis"></div>
  <script>
    (function(vegaEmbed) {
      var spec = {"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "data": {"url": "file:///Users/MY_USER_NAME_WAS_HERE/Projects/ds-python/utilities/plotlibrary/data.csv"}, "mark": "line", "encoding": {"x": {"type": "quantitative", "field": "x"}, "y": {"type": "quantitative", "field": "y"}}, "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json"};
      var embedOpt = {"mode": "vega-lite"};

      function showError(el, error){
          el.innerHTML = ('<div class="error" style="color:red;">'
                          + '<p>JavaScript Error: ' + error.message + '</p>'
                          + "<p>This usually means there's a typo in your chart specification. "
                          + "See the javascript console for the full traceback.</p>"
                          + '</div>');
          throw error;
      }
      const el = document.getElementById('vis');
      vegaEmbed("#vis", spec, embedOpt)
        .catch(error => showError(el, error));
    })(vegaEmbed);

  </script>
</body>
</html>

使用存储在本地文件中的数据实现 altair 图表的正确方法是什么?

最佳答案

要在 Altair 图表中使用本地数据文件,需要满足两个条件:

  1. 文件必须指定为对显示图表的前端(即浏览器)有效的 URL
  2. URL 必须满足浏览器的安全要求(例如必须满足跨源策略)

不幸的是,确保满足 (1) 和 (2) 的方法在很大程度上取决于您使用的前端(即 JupyterLab vs. Jupyter Notebook vs. Colab vs. Streamlit vs. ...)以及在每个前端中这些甚至取决于您运行的笔记本/服务器的版本、您使用的浏览器、您启用的浏览器安全设置、您使用的是 http 还是 https、您是否使用广告拦截器、您使用的操作系统、您启动笔记本服务器或打开 HTML 文件的方式的精确细节,以及可能的许多其他变量。

出于这个原因,作为 Altair 的主要作者,我通常不鼓励人们尝试使用本地数据文件,因为很难回答看似简单的问题,例如“我如何在 Altair 图表中引用本地 CSV 文件? ".

如果您想继续前进,我建议您打开浏览器的 javascript 控制台,您会在其中看到警告或错误,它们可以帮助您诊断如何更改内容以满足上述 (1) 和 (2) 的要求自己的设置。如果您想要一些可以正常工作的东西,我建议使用 pandas 将文件加载到数据框中,然后以这种方式创建图表。

关于python - 在 altair 图表中引用本地 .csv 文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/67391359/

相关文章:

Python无法分割数据帧时间戳错误

python - 如何根据特定规则标记一系列值?

python - 什么是为(视觉上吸引人的)3D 物理模拟/可视化创建 python 程序的好库?

visualization - google.load 导致 dom/screen 为空

python - 根据组统计对 Altair 图的各个方面进行排序

altair - 在 Altair 中设置条形图上的标签格式

python - PySide:QAbstractItemModel - 连接 dataChanged()

Python Gtk TextView 在末尾插入文本

postgresql - 如何在 Kibana 中可视化来自 Postgresql 的数据?

python - Altair Ridgeline 不会创建标称组图