df = pd.DataFrame ( { 'voucher_id': ['ugp_0008', 'ugp_0008', 'ugp_0008', 'ugp_0001', 'ugp_0009', 'ugp_0002', 'ugp_0003', 'ugp_0004', 'ugp_0005', 'ugp_0006', 'ugp_0007', 'ugp_0007'],
'status': ['REDEEMED', 'REDEEMED', 'REDEEMED', 'REDEEMED', 'AVAILABLE', 'AVAILABLE', 'REDEEMED', 'EXPIRED', 'EXPIRED', 'REDEEMED', 'REDEEMED', 'REDEEMED'],
'redeem_type': ['OTHERS', 'VOUCHER', 'VOUCHER', 'BILLER', 'exp_or_avail', 'exp_or_avail', 'VOUCHER', 'exp_or_avail', 'exp_or_avail', 'VOUCHER', 'OTHERS', 'DIRECT-MERCHANT'],
'nominal': [200000, 200000, 200000, 100000, 100000, 100000, 500000, 100000, 100000, 100000, 100000, 100000],
'value_used': [10000, 30000, 150000, 20000, 0, 0, 500000, 0, 0, 20000, 20000, 80000],
}
)
df_expect = pd.DataFrame ( { 'voucher_id': ['ugp_0008', 'ugp_0001', 'ugp_0009', 'ugp_0002', 'ugp_0003', 'ugp_0004', 'ugp_0005', 'ugp_0006', 'ugp_0007'],
'status': ['REDEEMED', 'REDEEMED', 'AVAILABLE', 'AVAILABLE', 'REDEEMED', 'EXPIRED', 'EXPIRED', 'REDEEMED', 'REDEEMED'],
'nominal': [200000, 100000, 100000, 100000, 500000, 100000, 100000, 100000, 100000],
'VOUCHER' : [180000, 0,0,0, 5000000,0,0,20000,0],
'BILLER' : [0,20000,0,0,0,0,0,0,0],
'OTHERS' : [10000,0,0,0,0,0,0,0,20000],
'DIRECT-MERCHANT' : [0,0,0,0,0,0,0,0,80000]
,'remaining_nominal' : [10000, 80000, 100000, 100000, 0, 100000, 100000, 80000, 0]
}
)
我希望voucher_id分组,并且redeem_type值根据条件成为新列
我尝试使用 np.where
df2 = df.copy()
df2['BILLER'] = np.where(df['redeem_type'] ==
'BILLER', df['value_used'], 0)
df2['VOUCHER'] = np.where(df['redeem_type']
== 'VOUCHER', df['value_used'], 0)
df2['OTHERS'] = np.where(df['redeem_type'] ==
'OTHERS', df['value_used'], 0)
df2['DIRECT-MERCHANT'] =
np.where(df['redeem_type'] == 'DIRECT-
MERCHANT', df['value_used'], 0)
df2['exp_or_avail'] =
np.where(df['redeem_type'] == 'exp_or_avail',
df['value_used'], 0)
然后我使用枢轴:
ss=df.groupby(['voucher_id','redeem_type']).sum().reset_index()
ss.pivot(index='voucher_id',columns='redeem_type',values='value_used').fillna(0)
希望大家能帮帮我,谢谢!祝大家度过愉快的一天
最佳答案
聚合数据帧以计算每张优惠券和状态的名义值(value)之和
keys = ['voucher_id', 'status']
nominal = df.groupby(keys)['nominal'].sum()
# voucher_id status
# ugp_0001 REDEEMED 100000
# ugp_0002 AVAILABLE 100000
# ugp_0003 REDEEMED 500000
# ugp_0004 EXPIRED 100000
# ugp_0005 EXPIRED 100000
# ugp_0006 REDEEMED 100000
# ugp_0007 REDEEMED 200000
# ugp_0008 REDEEMED 600000
# ugp_0009 AVAILABLE 100000
# Name: nominal, dtype: int64
透视数据框以计算每张优惠券的 value_used、状态和赎回类型的总和
df1 = df.pivot_table(index=keys, columns='redeem_type', values='value_used', aggfunc='sum', fill_value=0)
# redeem_type BILLER DIRECT-MERCHANT OTHERS VOUCHER exp_or_avail
# voucher_id status
# ugp_0001 REDEEMED 20000 0 0 0 0
# ugp_0002 AVAILABLE 0 0 0 0 0
# ugp_0003 REDEEMED 0 0 0 500000 0
# ugp_0004 EXPIRED 0 0 0 0 0
# ugp_0005 EXPIRED 0 0 0 0 0
# ugp_0006 REDEEMED 0 0 0 20000 0
# ugp_0007 REDEEMED 0 80000 20000 0 0
# ugp_0008 REDEEMED 0 0 10000 180000 0
# ugp_0009 AVAILABLE 0 0 0 0 0
从名义值中减去每行的所有 value_used
的总和,以计算剩余
df1 = df1.assign(nominal=nominal, remaining=nominal - df1.sum(axis=1))
# redeem_type BILLER DIRECT-MERCHANT OTHERS VOUCHER exp_or_avail nominal remaining
# voucher_id status
# ugp_0001 REDEEMED 20000 0 0 0 0 100000 80000
# ugp_0002 AVAILABLE 0 0 0 0 0 100000 100000
# ugp_0003 REDEEMED 0 0 0 500000 0 500000 0
# ugp_0004 EXPIRED 0 0 0 0 0 100000 100000
# ugp_0005 EXPIRED 0 0 0 0 0 100000 100000
# ugp_0006 REDEEMED 0 0 0 20000 0 100000 80000
# ugp_0007 REDEEMED 0 80000 20000 0 0 200000 100000
# ugp_0008 REDEEMED 0 0 10000 180000 0 600000 410000
# ugp_0009 AVAILABLE 0 0 0 0 0 100000 100000
关于python - Pandas 根据列的值创建新列,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/77295606/