我正在尝试将 COO 类型的稀疏矩阵(来自 Scipy.Sparse)转换为 Pandas 稀疏序列。在文档(http://pandas.pydata.org/pandas-docs/stable/sparse.html)中,它说使用命令 SparseSeries.from_coo(A)
。这似乎没问题,但当我尝试查看系列的属性时,会发生这种情况。
10x10 似乎还可以。
import pandas as pd
import scipy.sparse as ss
import numpy as np
row = (np.random.random(10)*10).astype(int)
col = (np.random.random(10)*10).astype(int)
val = np.random.random(10)*10
sparse = ss.coo_matrix((val,(row,col)),shape=(10,10))
pss = pd.SparseSeries.from_coo(sparse)
print pss
0 7 1.416631
9 5.833902
1 0 4.131919
2 3 2.820531
7 2.227009
3 1 9.205619
4 4 8.309077
6 0 4.376921
7 6 8.444013
7 7.383886
dtype: float64
BlockIndex
Block locations: array([0])
Block lengths: array([10])
但不是 100x100。
import pandas as pd
import scipy.sparse as ss
import numpy as np
row = (np.random.random(100)*100).astype(int)
col = (np.random.random(100)*100).astype(int)
val = np.random.random(100)*100
sparse = ss.coo_matrix((val,(row,col)),shape=(100,100))
pss = pd.SparseSeries.from_coo(sparse)
print pss
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-790-f0c22a601b93> in <module>()
7 sparse = ss.coo_matrix((val,(row,col)),shape=(100,100))
8 pss = pd.SparseSeries.from_coo(sparse)
----> 9 print pss
10
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\core\base.pyc in __str__(self)
45 if compat.PY3:
46 return self.__unicode__()
---> 47 return self.__bytes__()
48
49 def __bytes__(self):
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\core\base.pyc in __bytes__(self)
57
58 encoding = get_option("display.encoding")
---> 59 return self.__unicode__().encode(encoding, 'replace')
60
61 def __repr__(self):
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\sparse\series.pyc in __unicode__(self)
287 def __unicode__(self):
288 # currently, unicode is same as repr...fixes infinite loop
--> 289 series_rep = Series.__unicode__(self)
290 rep = '%s\n%s' % (series_rep, repr(self.sp_index))
291 return rep
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\core\series.pyc in __unicode__(self)
895
896 self.to_string(buf=buf, name=self.name, dtype=self.dtype,
--> 897 max_rows=max_rows)
898 result = buf.getvalue()
899
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\core\series.pyc in to_string(self, buf, na_rep, float_format, header, length, dtype, name, max_rows)
960 the_repr = self._get_repr(float_format=float_format, na_rep=na_rep,
961 header=header, length=length, dtype=dtype,
--> 962 name=name, max_rows=max_rows)
963
964 # catch contract violations
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\core\series.pyc in _get_repr(self, name, header, length, dtype, na_rep, float_format, max_rows)
989 na_rep=na_rep,
990 float_format=float_format,
--> 991 max_rows=max_rows)
992 result = formatter.to_string()
993
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\core\format.pyc in __init__(self, series, buf, length, header, na_rep, name, float_format, dtype, max_rows)
145 self.dtype = dtype
146
--> 147 self._chk_truncate()
148
149 def _chk_truncate(self):
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\core\format.pyc in _chk_truncate(self)
158 else:
159 row_num = max_rows // 2
--> 160 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
161 self.tr_row_num = row_num
162 self.tr_series = series
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\tools\merge.pyc in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
752 keys=keys, levels=levels, names=names,
753 verify_integrity=verify_integrity,
--> 754 copy=copy)
755 return op.get_result()
756
C:\Users\ej\AppData\Local\Continuum\Anaconda\lib\site-packages\pandas\tools\merge.pyc in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
803 for obj in objs:
804 if not isinstance(obj, NDFrame):
--> 805 raise TypeError("cannot concatenate a non-NDFrame object")
806
807 # consolidate
TypeError: cannot concatenate a non-NDFrame object
我不太理解错误消息 - 我认为我完全按照文档中的示例操作,只是使用我自己的 COO 矩阵(可能是大小?)
问候
最佳答案
我有一个旧的 pandas
。它有稀疏代码,但没有tocoo
。
与此相关的 Pandas 问题是:
https://github.com/pydata/pandas/issues/10818
但我在 github
上发现:
def _coo_to_sparse_series(A, dense_index=False):
""" Convert a scipy.sparse.coo_matrix to a SparseSeries.
Use the defaults given in the SparseSeries constructor. """
s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
s = s.sort_index()
s = s.to_sparse() # TODO: specify kind?
# ...
return s
使用一个较小的稀疏矩阵,我可以毫无问题地构建和显示:
In [259]: Asml=sparse.coo_matrix(np.arange(10*5).reshape(10,5))
In [260]: s=pd.Series(Asml.data,pd.MultiIndex.from_arrays((Asml.row,Asml.col)))
In [261]: s=s.sort_index()
In [262]: s
Out[262]:
0 1 1
2 2
3 3
4 4
1 0 5
1 6
2 7
[... mine]
3 48
4 49
dtype: int32
In [263]: ssml=s.to_sparse()
In [264]: ssml
Out[264]:
0 1 1
2 2
3 3
4 4
1 0 5
[... mine]
2 47
3 48
4 49
dtype: int32
BlockIndex
Block locations: array([0])
Block lengths: array([49])
但是对于更大的数组(更多的非零元素),我得到了一个显示错误。我猜它发生在(普通)系列的显示开始使用省略号(...)时。我在 Py3 中运行,所以我收到不同的错误消息。
....\pandas\core\base.pyc in __str__(self)
45 if compat.PY3:
46 return self.__unicode__() # py3
47 return self.__bytes__() # py2 route
例如:
In [265]: Asml=sparse.coo_matrix(np.arange(10*7).reshape(10,7))
In [266]: s=pd.Series(Asml.data,pd.MultiIndex.from_arrays((Asml.row,Asml.col)))
In [267]: s=s.sort_index()
In [268]: s
Out[268]:
0 1 1
2 2
3 3
4 4
5 5
6 6
1 0 7
1 8
2 9
3 10
4 11
5 12
6 13
2 0 14
1 15
...
7 6 55
8 0 56
1 57
[... mine]
Length: 69, dtype: int32
In [269]: ssml=s.to_sparse()
In [270]: ssml
Out[270]: <repr(<pandas.sparse.series.SparseSeries at 0xaff6bc0c>)
failed: AttributeError: 'SparseArray' object has no attribute '_get_repr'>
我对 pandas 代码和结构还不够熟悉,暂时无法推断更多。
关于python - 使用 pandas.SparseSeries.from_coo() 函数的非 NDFFrame 对象错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31970070/