python - 如何向行添加值?

标签 python pandas

如何向行添加值:

  1. 我在数据框中创建了一列并将值赋给 0。
  2. 更新这些 now 列值的书面逻辑,但不反射(reflect)。

输入:

>>> parafix_df = main_df[["line_width", "para_num", "bbox" ]]
>>> parafix_df
   line_width para_num                             bbox
0     238.546      NaN  (50.0, 579.3, 288.546, 598.022)
1         318        1    (64.0, 564.9, 382.0, 583.622)
2         332        2    (50.0, 550.5, 382.0, 569.222)
3         332        2    (50.0, 536.1, 382.0, 554.822)
4     328.977        2  (50.0, 521.7, 378.977, 540.422)
5         318        3    (64.0, 507.3, 382.0, 526.022)
6         332        3    (50.0, 492.9, 382.0, 511.622)
7         332        3    (50.0, 478.5, 382.0, 497.222)
8         332        3    (50.0, 464.1, 382.0, 482.822)
9         332        3    (50.0, 449.7, 382.0, 468.422)
10      59.04        3   (50.0, 435.3, 109.04, 454.022)
11    304.007        4  (64.0, 420.9, 368.007, 439.622)
12        318        5    (64.0, 406.5, 382.0, 425.222)
13        332        5    (50.0, 392.1, 382.0, 410.822)
14        332        5    (50.0, 377.7, 382.0, 396.422)
15        332        5    (50.0, 363.3, 382.0, 382.022)
16     43.252        5   (50.0, 348.9, 93.252, 367.622)
17        318        6    (64.0, 334.5, 382.0, 353.222)
18        332        6    (50.0, 320.1, 382.0, 338.822)
19        332        6    (50.0, 305.7, 382.0, 324.422)
20        332        6    (50.0, 291.3, 382.0, 310.022)
21        332        6    (50.0, 276.9, 382.0, 295.622)
22     317.02        6   (50.0, 262.5, 367.02, 281.222)
23        318        7    (64.0, 248.1, 382.0, 266.822)
24        332        7    (50.0, 233.7, 382.0, 252.422)
25     47.014        7   (50.0, 219.3, 97.014, 238.022)
26        318        8    (64.0, 204.9, 382.0, 223.622)
27    316.723        8  (50.0, 190.5, 366.723, 209.222)
28        318        9    (64.0, 176.1, 382.0, 194.822)
29    326.766        9  (50.0, 161.7, 376.766, 180.422)
30        318       10    (64.0, 147.3, 382.0, 166.022)
31        332       10    (50.0, 132.9, 382.0, 151.622)
32        332       10    (50.0, 118.5, 382.0, 137.222)
33    305.393       11  (64.0, 104.1, 369.393, 122.822)
34        318       12     (64.0, 89.7, 382.0, 108.422)
35        318       13      (64.0, 75.3, 382.0, 94.022)
36    319.165       13    (50.0, 60.9, 369.165, 79.622)
37    308.165       14    (64.0, 46.5, 372.165, 65.222)
38        318       15      (64.0, 32.1, 382.0, 50.822)
39    329.153       15    (50.0, 17.7, 379.153, 36.422)
40        318       16       (64.0, 3.3, 382.0, 22.022)
41    324.335       16    (50.0, -11.1, 374.335, 7.622)

代码:

parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
parafix_df["new_para_num"] = 0

max_width = parafix_df['line_width'].max()
bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]

previous = None
para1 = 1
for current, next in izip(parafix_df.iterrows(), parafix_df.iloc[1:].iterrows()):
    if previous==None:
        current[1]["new_para_num"] = para1
    else:
        bbox_current = current[1]["bbox"]
        bbox_next = next[1]["bbox"]
        bbox_previous = previous[1]["bbox"]
        if bbox_current[0]>bbox_max_width[0]:
            para1 += 1
            print "para1:", para1
        current[1]["new_para_num"] = para1

    previous = current

以上代码的输出:

                              bbox  new_para_num  
0   (50.0, 579.3, 288.546, 598.022)             0  
1     (64.0, 564.9, 382.0, 583.622)             0  
2     (50.0, 550.5, 382.0, 569.222)             0  
3     (50.0, 536.1, 382.0, 554.822)             0  
4   (50.0, 521.7, 378.977, 540.422)             0  
5     (64.0, 507.3, 382.0, 526.022)             0  
6     (50.0, 492.9, 382.0, 511.622)             0  
7     (50.0, 478.5, 382.0, 497.222)             0  
8     (50.0, 464.1, 382.0, 482.822)             0  
9     (50.0, 449.7, 382.0, 468.422)             0  
10   (50.0, 435.3, 109.04, 454.022)             0  
11  (64.0, 420.9, 368.007, 439.622)             0  
12    (64.0, 406.5, 382.0, 425.222)             0  
13    (50.0, 392.1, 382.0, 410.822)             0  
14    (50.0, 377.7, 382.0, 396.422)             0  
15    (50.0, 363.3, 382.0, 382.022)             0  
16   (50.0, 348.9, 93.252, 367.622)             0  
17    (64.0, 334.5, 382.0, 353.222)             0  
18    (50.0, 320.1, 382.0, 338.822)             0  
19    (50.0, 305.7, 382.0, 324.422)             0  
20    (50.0, 291.3, 382.0, 310.022)             0  
21    (50.0, 276.9, 382.0, 295.622)             0  
22   (50.0, 262.5, 367.02, 281.222)             0  
23    (64.0, 248.1, 382.0, 266.822)             0  
24    (50.0, 233.7, 382.0, 252.422)             0  
25   (50.0, 219.3, 97.014, 238.022)             0  
26    (64.0, 204.9, 382.0, 223.622)             0  
27  (50.0, 190.5, 366.723, 209.222)             0  
28    (64.0, 176.1, 382.0, 194.822)             0  
29  (50.0, 161.7, 376.766, 180.422)             0  
30    (64.0, 147.3, 382.0, 166.022)             0  
31    (50.0, 132.9, 382.0, 151.622)             0  
32    (50.0, 118.5, 382.0, 137.222)             0  
33  (64.0, 104.1, 369.393, 122.822)             0  
34     (64.0, 89.7, 382.0, 108.422)             0  
35      (64.0, 75.3, 382.0, 94.022)             0  
36    (50.0, 60.9, 369.165, 79.622)             0  
37    (64.0, 46.5, 372.165, 65.222)             0  
38      (64.0, 32.1, 382.0, 50.822)             0  
39    (50.0, 17.7, 379.153, 36.422)             0  
40       (64.0, 3.3, 382.0, 22.022)             0  
41    (50.0, -11.1, 374.335, 7.622)             0  

但我想要新的 para 值:

para1: 2
para1: 3
para1: 4
para1: 5
para1: 6
para1: 7
para1: 8
para1: 9
para1: 10
para1: 11
para1: 12
para1: 13
para1: 14
para1: 15
para1: 16

你能帮帮我吗?

以下是我最终的工作代码:

parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
parafix_df["new_para_num"] = 0

max_width = parafix_df['line_width'].max()
bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]

para1 = 1
for indx, current in enumerate(parafix_df.iterrows(), start=0):
    if indx!=0:
        bbox_current = current[1]["bbox"]
        if bbox_current[0]>bbox_max_width[0]:
            para1 += 1
    parafix_df.iloc[indx, 4] = para1

我们可以优化更多吗?

最佳答案

更新:

IIUC,你可以这样做:

df.new_para_num = 1

In [210]: df.loc[df.line_width == df.line_width.max(), 'new_para_num'].cumsum() + 1
Out[210]:
2      2
3      3
6      4
7      5
8      6
9      7
13     8
14     9
15    10
18    11
19    12
20    13
21    14
24    15
31    16
32    17
Name: new_para_num, dtype: int64

如果您想有条件地更新原始 DF 中的 new_para_num 列:

In [223]: df.new_para_num = 1

In [224]: selected = df.loc[df.line_width == df.line_width.max()].copy()

In [226]: selected.new_para_num = selected.new_para_num.cumsum() + 1

In [227]: selected
Out[227]:
    line_width  para_num                           bbox  new_para_num
2        332.0       2.0  [50.0, 550.5, 382.0, 569.222]             2
3        332.0       2.0  [50.0, 536.1, 382.0, 554.822]             3
6        332.0       3.0  [50.0, 492.9, 382.0, 511.622]             4
7        332.0       3.0  [50.0, 478.5, 382.0, 497.222]             5
8        332.0       3.0  [50.0, 464.1, 382.0, 482.822]             6
9        332.0       3.0  [50.0, 449.7, 382.0, 468.422]             7
13       332.0       5.0  [50.0, 392.1, 382.0, 410.822]             8
14       332.0       5.0  [50.0, 377.7, 382.0, 396.422]             9
15       332.0       5.0  [50.0, 363.3, 382.0, 382.022]            10
18       332.0       6.0  [50.0, 320.1, 382.0, 338.822]            11
19       332.0       6.0  [50.0, 305.7, 382.0, 324.422]            12
20       332.0       6.0  [50.0, 291.3, 382.0, 310.022]            13
21       332.0       6.0  [50.0, 276.9, 382.0, 295.622]            14
24       332.0       7.0  [50.0, 233.7, 382.0, 252.422]            15
31       332.0      10.0  [50.0, 132.9, 382.0, 151.622]            16
32       332.0      10.0  [50.0, 118.5, 382.0, 137.222]            17

In [228]: df.loc[df.line_width == df.line_width.max(), 'new_para_num'] = selected

In [229]: df
Out[229]:
    line_width  para_num                             bbox  new_para_num
0      238.546       NaN  [50.0, 579.3, 288.546, 598.022]             1
1      318.000       1.0    [64.0, 564.9, 382.0, 583.622]             1
2      332.000       2.0    [50.0, 550.5, 382.0, 569.222]             2
3      332.000       2.0    [50.0, 536.1, 382.0, 554.822]             3
4      328.977       2.0  [50.0, 521.7, 378.977, 540.422]             1
5      318.000       3.0    [64.0, 507.3, 382.0, 526.022]             1
6      332.000       3.0    [50.0, 492.9, 382.0, 511.622]             4
7      332.000       3.0    [50.0, 478.5, 382.0, 497.222]             5
8      332.000       3.0    [50.0, 464.1, 382.0, 482.822]             6
9      332.000       3.0    [50.0, 449.7, 382.0, 468.422]             7
10      59.040       3.0   [50.0, 435.3, 109.04, 454.022]             1
11     304.007       4.0  [64.0, 420.9, 368.007, 439.622]             1
12     318.000       5.0    [64.0, 406.5, 382.0, 425.222]             1
13     332.000       5.0    [50.0, 392.1, 382.0, 410.822]             8
14     332.000       5.0    [50.0, 377.7, 382.0, 396.422]             9
15     332.000       5.0    [50.0, 363.3, 382.0, 382.022]            10
16      43.252       5.0   [50.0, 348.9, 93.252, 367.622]             1
17     318.000       6.0    [64.0, 334.5, 382.0, 353.222]             1
18     332.000       6.0    [50.0, 320.1, 382.0, 338.822]            11
19     332.000       6.0    [50.0, 305.7, 382.0, 324.422]            12
20     332.000       6.0    [50.0, 291.3, 382.0, 310.022]            13
21     332.000       6.0    [50.0, 276.9, 382.0, 295.622]            14
22     317.020       6.0   [50.0, 262.5, 367.02, 281.222]             1
23     318.000       7.0    [64.0, 248.1, 382.0, 266.822]             1
24     332.000       7.0    [50.0, 233.7, 382.0, 252.422]            15
25      47.014       7.0   [50.0, 219.3, 97.014, 238.022]             1
26     318.000       8.0    [64.0, 204.9, 382.0, 223.622]             1
27     316.723       8.0  [50.0, 190.5, 366.723, 209.222]             1
28     318.000       9.0    [64.0, 176.1, 382.0, 194.822]             1
29     326.766       9.0  [50.0, 161.7, 376.766, 180.422]             1
30     318.000      10.0    [64.0, 147.3, 382.0, 166.022]             1
31     332.000      10.0    [50.0, 132.9, 382.0, 151.622]            16
32     332.000      10.0    [50.0, 118.5, 382.0, 137.222]            17
33     305.393      11.0  [64.0, 104.1, 369.393, 122.822]             1
34     318.000      12.0     [64.0, 89.7, 382.0, 108.422]             1
35     318.000      13.0      [64.0, 75.3, 382.0, 94.022]             1
36     319.165      13.0    [50.0, 60.9, 369.165, 79.622]             1
37     308.165      14.0    [64.0, 46.5, 372.165, 65.222]             1
38     318.000      15.0      [64.0, 32.1, 382.0, 50.822]             1
39     329.153      15.0    [50.0, 17.7, 379.153, 36.422]             1
40     318.000      16.0       [64.0, 3.3, 382.0, 22.022]             1
41     324.335      16.0    [50.0, -11.1, 374.335, 7.622]             1

PS 但我仍然不确定我是否正确理解了您的目标

旧答案:

你可以使用shift函数以访问上一行和下一行:

df.shift(-1)  # df will be shifted one row backwards (will show `next` row) 

df.shift(1)  # df will be shifted one row forwards (will show `prev` row)

例子:

In [142]: df
Out[142]:
   a  b  c
0  8  3  0
1  8  3  4
2  9  4  1
3  2  1  8
4  5  6  3

In [147]: df['prev_a'] = df.a.shift(1)

In [148]: df['next_a'] = df.a.shift(-1)

In [149]: df
Out[149]:
   a  b  c  prev_a  next_a
0  8  3  0     NaN     8.0
1  8  3  4     8.0     9.0
2  9  4  1     8.0     2.0
3  2  1  8     9.0     5.0
4  5  6  3     2.0     NaN

关于python - 如何向行添加值?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37459572/

相关文章:

python - 使用 (wx)python 记录外部程序的输出

python - 将报告格式转换为Python数据集

python - 用补丁模拟两个函数以进行单元测试

python - 无法在 Pandas 中创建锯齿状数据框?

Python:并行修改数组的简单方法

python - 包括基于一组分组数据的缺失值组合

python - Pinguin rcorr 热图

python - 从所有数据框列中删除子字符串

python - 在 panda 数据框中用更好的替代方案替换 for 循环以进行相似性测量

python - 使用不同的功能获得不同的对象大小