python - 尝试使用 get_loc 时出现 Pandas 错误

标签 python csv pandas dataframe

我一直在开发一个程序来对 CSV 进行一些解析,并安装了 Anaconda2 并通过 pip 更新了最新的 Pandas 包。但出于某种原因,无论程序多么简单,我在尝试使用 get_loc 时都会出错。

import sys
import pandas as pd

infile = pd.read_csv("Path/to/CSV")

loc_num = infile.get_loc('ColumnName')

print loc_num

这是错误:

loc_num = infile.get_loc('Item') AttributeError: 'DataFrame' object has no attribute 'get_loc'



有人知道我做错了什么吗? Pandas 可能安装错误吗?

编辑:这是我正在处理的完整程序,因此您可以了解我正在处理的内容。
import pandas as pd  # import pandas library for csv processing, abbreviating it pd
import sys  # import sys for writing to terminal
import Tkinter as tk
import tkFileDialog, ttk

file_loc_list = []
master_list=[]

class Files(tk.Frame):

   def __init__(self):

       tk.Frame.__init__(self)

       master_list=[]

       #define final column entry
       final_col_l = tk.Label(self, text="What is the final column you want before the images?", wraplength=150)
       final_col_l.grid(row=0)
       final_col_e = tk.Entry(self)
       final_col_e.grid(row=0, column=1)

        # define what is used to count the images
       img_count_l = tk.Label(self, text="Is this a calendar strip?")
       img_count_l.grid(row=1)
       img_count = tk.StringVar()
       img_countnumber = ttk.Radiobutton(self, text="No", variable=img_count, value="number")
       img_countday5 = ttk.Radiobutton(self, text='Yes (5 Days)', variable=img_count, value='day5')
       img_countday7 = ttk.Radiobutton(self, text='Yes (7 Days)', variable=img_count, value='day7')
       img_countnumber.grid(row=2, column=1)
       img_countday5.grid(row=3, column=1)
       img_countday7.grid(row=4, column=1)


       #define number of images selection
       n_images_l = tk.Label(self, text="How many images are you creating?", wraplength=150)
       n_images_l.grid(row=5, column=0)
       self.n_images_box_value = tk.IntVar()
       n_images = ttk.Combobox(self, textvariable=self.n_images_box_value)
       n_images.grid(row=5, column=1)
       n_images['values'] = (3, 4, 5, 6, 9)

       #define text before image numbers
       img_name1_l = tk.Label(self, text="What is before the number/day in the image columns?", wraplength=150)
       img_name1_l.grid(row=6)
       img_name1_e = tk.Entry(self)
       img_name1_e.grid(row=6, column=1)

       # define text after image numbers
       img_name2_l = tk.Label(self, text="What is after the number/day in the image columns (if none, enter nothing)?", wraplength=150)
       img_name2_l.grid(row=7)
       img_name2_e = tk.Entry(self)
       img_name2_e.grid(row=7, column=1)

       # define image width
       width_l = tk.Label(self, text="What is the width of images you want?", wraplength=150)
       width_l.grid(row=8)
       width_e = tk.Entry(self)
       width_e.grid(row=8, column=1)

       def data_listing():
           master_list.append(final_col_e.get())
           master_list.append(img_count.get())
           master_list.append(self.n_images_box_value.get())
           master_list.append(img_name1_e.get())
           master_list.append(img_name2_e.get())
           master_list.append(width_e.get())

       def doit():
           infile = pd.read_csv(file_loc_list[0])
           loc_num = infile.get_loc(master_list[0])
           cols = list(infile.columns.values)
           c = 0
           cols_wanted = []
           while c < loc_num:
               new_col = cols[c]
               cols_wanted.append(new_col)
               c += 1
           return cols_wanted

       # define button
       tk.Button(self, text='Input Data', command=data_listing).grid(row=9, column=0)
       tk.Button(self, text='Select CSV File', command=self.getcsvfile).grid(row=9, column=1)
       tk.Button(self, text='Save Text File', command=self.gettextfile).grid(row=10, column=1)
       tk.Button(self, text='Process', command=doit).grid(row=11, column=1)

       # define options for opening a file
       self.open_opt = options = {}
       options['defaultextension'] = '.csv'
       options['filetypes'] = [('all files', '.*'), ('CSV files', '.csv')]
       options['parent'] = root
       options['title'] = 'Open a CSV file.'

       # define options for saving a file
       self.save_opt = options = {}
       options['defaultextension'] = '.txt'
       options['filetypes'] = [('all files', '.*'), ('text files', '.txt')]
       options['parent'] = root
       options['title'] = 'Save a text file.'

   def getcsvfile(self):
       # get filename
       csvfile = tkFileDialog.askopenfilename(**self.open_opt)

       file_loc_list.append(csvfile)


   def gettextfile(self):
       textfile = tkFileDialog.asksaveasfilename(**self.save_opt)

       file_loc_list.append(textfile)

if __name__=='__main__':
    root = tk.Tk()
    Files().grid()
    root.mainloop()

最佳答案

我认为您需要添加 columns , 因为 Index.get_loc .最后通过 iloc 选择所需的列:

import pandas as pd
import io

temp=u"""A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15"""
#after testing replace io.StringIO(temp) to filename
infile = pd.read_csv(io.StringIO(temp))
print (infile)
    A   B   C   D
0   0   1   2   3
1   4   5   6   7
2   8   9  10  11
3  12  13  14  15

loc_num = infile.columns.get_loc('C')
print (loc_num)
2
print (infile.iloc[:, :loc_num])
    A   B
0   0   1
1   4   5
2   8   9
3  12  13

print (infile.iloc[:, :loc_num + 1])
    A   B   C
0   0   1   2
1   4   5   6
2   8   9  10
3  12  13  14

关于python - 尝试使用 get_loc 时出现 Pandas 错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37707011/

相关文章:

python - python中的语音识别api "bing"非常慢

python - 无法让我的 Sprite 沿着路径点移动

python - 使用 pandas 从 CSV 中读取十进制表示形式的 float

python - 比较csv的两列并在另一个csv中输出字符串相似度

python - 总结 5 年跨度内的数据

python - 如何根据另一列分别统计一列中的数据?

python - Web2Py - Python 语法

python - 在python django中解析csv文件

python - Plotly:如何使用折线图的下拉列表更新绘图数据?

python - 使用boto3 python计算s3中csv文件的行数