我的数据框包含一个分类特征“Street”,它可以采用 2 个可能值“Grvl”或“Pave”中的一个。我试图在拟合机器学习算法之前将这个分类特征转换为数值。我的代码如下所示
dataset['Street']=dataset['Street'].map({'Grvl':0,'Pave':1}).astype(int)
我已用数据框中最常出现的值填充缺失值
dataset['Street'].isnull().sum()
我收到以下错误
ValueError Traceback (most recent call last)
<ipython-input-59-86f0b031335a> in <module>()
2 print dataset['Street'].isnull().sum()
3 #dataset['MSZoning'] = dataset['MSZoning'].map( {'A': 0, 'C': 1,'FV': 2,'I':3,'RH':4,'RL':5,'RP':6,'RM':7} ).astype(int)
----> 4 dataset['Street']=dataset['Street'].map({'Grvl':0,'Pave':1}).astype(int)
5 dataset['LotShape']=dataset['LotShape'].map({'Reg':0,'IR1':1,'IR2':2,'IR3':3}).astype(int)
6 dataset['LandContour']=dataset['LandContour'].map({'Lvl':0,'Bnk':1,'HLS':2,'Low':3}).astype(int)
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\generic.pyc in astype(self, dtype, copy, raise_on_error, **kwargs)
2948
2949 mgr = self._data.astype(dtype=dtype, copy=copy,
-> 2950 raise_on_error=raise_on_error, **kwargs)
2951 return self._constructor(mgr).__finalize__(self)
2952
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in astype(self, dtype, **kwargs)
2936
2937 def astype(self, dtype, **kwargs):
-> 2938 return self.apply('astype', dtype=dtype, **kwargs)
2939
2940 def convert(self, **kwargs):
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in apply(self, f, axes, filter, do_integrity_check, consolidate, raw, **kwargs)
2888
2889 kwargs['mgr'] = self
-> 2890 applied = getattr(b, f)(**kwargs)
2891 result_blocks = _extend_blocks(applied, result_blocks)
2892
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in astype(self, dtype, copy, raise_on_error, values, **kwargs)
432 **kwargs):
433 return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
--> 434 values=values, **kwargs)
435
436 def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\internals.pyc in _astype(self, dtype, copy, raise_on_error, values, klass, mgr, **kwargs)
475
476 # _astype_nansafe works fine with 1-d only
--> 477 values = com._astype_nansafe(values.ravel(), dtype, copy=True)
478 values = values.reshape(self.shape)
479
C:\Users\JAYASHREE\Anaconda2\lib\site-packages\pandas\core\common.pyc in _astype_nansafe(arr, dtype, copy)
1912
1913 if np.isnan(arr).any():
-> 1914 raise ValueError('Cannot convert NA to integer')
1915 elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
1916 # work around NumPy brokenness, #1987
ValueError: Cannot convert NA to integer
最佳答案
您的数据框中有 NaN 值!由于如果存在缺失值,您无法将系列从对象转换为整数(使用 asType(int)),因此您应该在之前填充缺失值!
数据集['Street'].isnull().sum()
不填充缺失值
你可以使用pandas.DataFrame.fillna来做到这一点或sklearn.preprocessing.Imputer
关于python - 在 Pandas 中将分类特征转换为数字特征时出错,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45286289/