我使用 groupby
和 unstack
创建一个数据框,并希望为订阅率创建一个新列。
df_sub = df.groupby(['time_section', 'day_type', 'user_type']).size().unstack()
df_sub['ratio'] = df_sub['Subscriber'] / df_sub['Customer']
结果出现错误“TypeError: 'fill_value=ratio' 不存在于此分类的类别中”
有人可以帮助我吗?
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'ratio'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in _set_item(self, key, value)
3825 try:
-> 3826 loc = self._info_axis.get_loc(key)
3827 except KeyError:
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3077 )
-> 3078 casted_key = self._maybe_cast_indexer(key)
3079 try:
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/category.py in _maybe_cast_indexer(self, key)
495 def _maybe_cast_indexer(self, key) -> int:
--> 496 return self._data._unbox_scalar(key)
497
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/arrays/categorical.py in _unbox_scalar(self, key)
1720 # to same dtype as self.codes, we get much faster performance.
-> 1721 code = self.categories.get_loc(key)
1722 code = self._codes.dtype.type(code)
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
KeyError: 'ratio'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-80-ef99a6bd1acd> in <module>
----> 1 df_sub['ratio'] = df_sub['Subscriber'] / df_sub['Customer']
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
3161 else:
3162 # set column
-> 3163 self._set_item(key, value)
3164
3165 def _setitem_slice(self, key: slice, value):
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in _set_item(self, key, value)
3241 self._ensure_valid_index(value)
3242 value = self._sanitize_column(key, value)
-> 3243 NDFrame._set_item(self, key, value)
3244
3245 # check if we are modifying a copy
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in _set_item(self, key, value)
3827 except KeyError:
3828 # This item wasn't present, just insert at end
-> 3829 self._mgr.insert(len(self._info_axis), key, value)
3830 return
3831
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
1195
1196 # insert to the axis; this could possibly raise a TypeError
-> 1197 new_axis = self.items.insert(loc, item)
1198
1199 if value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype):
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/extension.py in insert(self, loc, item)
377 """
378 arr = self._data
--> 379 code = arr._validate_scalar(item)
380
381 new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:]))
/opt/anaconda3/lib/python3.8/site-packages/pandas/core/arrays/categorical.py in _validate_fill_value(self, fill_value)
1249 fill_value = self._unbox_scalar(fill_value)
1250 else:
-> 1251 raise TypeError(
1252 f"'fill_value={fill_value}' is not present "
1253 "in this Categorical's categories"
TypeError: 'fill_value=ratio' is not present in this Categorical's categories
df_sub.index
MultiIndex([('10am-12pm', 'weekday'),
('10am-12pm', 'weekend'),
('10pm-12am', 'weekday'),
('10pm-12am', 'weekend'),
( '1am-3am', 'weekday'),
( '1am-3am', 'weekend'),
( '1pm-3pm', 'weekday'),
( '1pm-3pm', 'weekend'),
( '4am-6am', 'weekday'),
( '4am-6am', 'weekend'),
( '4pm-6pm', 'weekday'),
( '4pm-6pm', 'weekend'),
( '7am-9am', 'weekday'),
( '7am-9am', 'weekend'),
( '7pm-9pm', 'weekday'),
( '7pm-9pm', 'weekend')],
names=['time_section', 'day_type'])
df_sub.columns
CategoricalIndex(['Customer', 'Subscriber'], categories=['Customer', 'Subscriber'], ordered=False, name='user_type', dtype='category')
最佳答案
使用CategoricalIndex.add_categories
对于 CategoricalIndex
中的新类别:
df_sub = df.groupby(['time_section', 'day_type', 'user_type']).size().unstack()
df_sub.columns = df_sub.columns.add_categories(['ratio'])
df_sub['ratio'] = df_sub['Subscriber'] / df_sub['Customer']
关于python - 类型错误 : 'fill_value= xx ' is not present in this Categorical's categories,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68903668/