c - 使用 C 和动态分配从 CSV 读取和保存值

标签 c arrays csv

底部编辑(我将其写为编辑而不是它自己的问题,因为它是如此相关):

我正在编写一个 CSV 阅读器,它应该将所有值保存为大型多维数组中的字符。在之前的帖子中,我被警告说我的精简代码过于模糊,所以我将发布更多内容。我想为其长度道歉,因为我仍在尝试衡量该网站上合适的长度。

最终这个程序将成为我创建的用于执行数据分析的头文件中的头条新闻。我用于该程序的头文件是:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

我遇到的问题是阅读器似乎在名为 read_csv() 的函数内工作。我知道这一点是因为该函数中放置了 printf() 语句。当我在 main() 中 printf() 多维字符数组“缓冲区”时,数据显示不正确。

在 main() 中,第一列将正确打印,但接下来的三列则不会。另外,我的目标是能够读取任何单元格最多包含 20 个字符的 MxN CSV 文件。使用 malloc() 创建通用代码是我在此任务中的下一个目标。

我的 main() 结构如下:

int main(){

  FILE *f;
  char fname[20];
  int i, j;

  printf("enter name of csv file : ") ;
  scanf("%s",fname) ;       

  f = fopen(fname, "r");

  //find row/col
  int find_c_r[2];
  int * pfrc = &find_c_r[0];
  pfrc = find_col_row(f);
  printf("Find_c_r[0] = %d \t Find_c_r[1] = %d\n", *pfrc, *(pfrc+1));

  int numCol = *pfrc;
  int numRow = *(pfrc+1);

  char buffer[50][50][20];// ideally size buffer[numCol][numRow][20]

  //sets all values to NULL
  for(j = 0 ; j < 50 ; j++){
    for(i = 0 ; i < 10 ; i++){   
      memset(buffer[i][j],'\0', 20);
    }
  }

  read_csv(f, numCol, numRow, buffer); 
  /////
  printf("\n\n");

 for(j = 0 ; j < numRow ; j++){
    for(i = 0 ; i < numCol; i++){      
      printf("[%d][%d]",i, j);
      printf("_%s_  ",buffer[i][j]);
    } 
    printf("\n");
  }

  printf("END OF PROGRAM.\n");
 }

其中一部分是我的数组“缓冲区”的动态分配。我不太确定如何以这种格式进行 malloc() 。

main() 调用的第一个函数是 find_col_row(FILE *f)。它工作没有问题,但人们在问题中要求更多我的代码。它返回一个指向 int 数组的指针,该数组保存正在读取的 CSV 文件中的列数和行数:

int * find_col_row(FILE *f){
 //Find numCol and numRow
  int numCol, numRow;
  char c;
  int new_line= 0;
  int comma = 0;
  int z = 0;
  numCol = 0;
  numRow = 0;
  while (c != EOF) {
    c = fgetc(f) ; 
    if(c == ','){ //WORDS MUST BE SEPARATED BY COMMAS
      comma++;
    }
    if(c == ';'){ //LINES MUST BE SEPARATED BY SEMI-COLONS
      new_line++;
      if(numCol == 0){
         numCol = comma + 1;
      }
    } 
  }


  numRow = new_line - 1;

  int a[2] = {numCol, numRow};
  int * pa = &a[0];

  return pa;
}

被调用的第二个函数是 read_csv(...)。此函数的目标是“读取”CSV 文件并将每个单元格的值“保存”在多维字符数组“缓冲区”中:

void read_csv(FILE *f, int numCol, int numRow, char buffer[numCol][numRow][20])  //cells split by ',', row split by ';'
{
  char fname[100];
  int i = 0, j = 0;
  int c = 0,n = 0, z = 0;

  if (f == NULL) {
    printf("can't open file, %s\n", fname) ;
    exit(1) ;
  }

  n = 0 ;

  fseek(f, 0, SEEK_SET); //starts reading the file from the start
  c = fgetc(f) ;

  i = 0;
  j = 0;


  char temp[20];
  memset(temp, '\0', 20);
  int tc = 0; //temp counter
  int mv_temp = 0; //this aids in removing the first character if == ' '
  temp[tc] = c;
  while (c != EOF) {

    if(c == ','){
      if(temp[0] == ' '){
         for(mv_temp = 0 ; mv_temp < tc ; mv_temp++){
           temp[mv_temp] = temp[mv_temp + 1];
         }
      }
      strncpy(buffer[i][j], temp, 20);
      i++; 
      tc = 0;
      memset(temp, '\0', 20);
    }else if(c == ';'){
      if(temp[0] == ' '){
         for(mv_temp = 0 ; mv_temp < tc ; mv_temp++){
           temp[mv_temp] = temp[mv_temp + 1];
         }
      }
      strncpy(buffer[i][j], temp, 20);
      j++;
      i = 0;
      tc = 0;
      memset(temp, '\0', 20);
       c = fgetc(f);

    }else{
      temp[tc] = c;
      tc++;
    }
    c = fgetc(f);    
  }  /////while loop over


  for(j = 0 ; j < numRow ; j++){
    for(i = 0 ; i < numCol; i++){      
      printf("[%d][%d]",i, j);
      printf("_%s_  ",buffer[i][j]);
    } 
    printf("\n");
  }

}

尚未尝试使用任何其他 CSV 文件运行此程序,这是我使用的 CSV。运行程序时,第一步是 scanf() 文件名。我称之为

simp.csv

仅供引用,此数据指美式足球的基本数据:阵型、阵型变化、下移、距离。该文件如下所示:

OFF_FORM,FORM_VAR, DN, DIST;
DEUCE,RIGHT, 1, 10;
DEUCE,LEFT, 2, 7;
TRIO,RIGHT, 3, 3;
TREY,LEFT, 1, 10;
TRIO,RODDY, 1, 10;
TREY,LION, 2, 3;
DEEP,LEFT, 1, 10;
DEUCE,LION, 2, 15;
DEUCE,RIGHT, 3, 4;
DEEP,RODDY, 1, 10;
TREY,RIGHT, 1, 10;
TRIO,RAM, 2, 8;
TRIO,RAM, 3, 8;
DEEP,ROCK, 1, 10;
DEUCE,LION, 1, 10;
TRIO,LOUIE, 1, 10;
TRIO,RIGHT, 2,4;
DEUCE,RIGHT, 3, 6;
DEUCE,LION, 4, 2;
TREY,LION,1,10;

再次,我对问题的长度表示歉意。我希望我有足够的信息来提供帮助。作为一名年轻/新手程序员,我愿意接受任何和所有反馈。如果您能回答我的问题并指出优化我的代码以更有效地工作的方法,我将非常感谢您的反馈。

//////////////////////////////////////////////////////////// /////////////////////////////////

编辑:

@BLUEPIXY 在他们的答案中分享的代码运行得很好。现在,我只是想把它变成一个基本的头文件,我不知道如何修改我看到的一些问题。我对代码所做的只是更改函数的名称并将它们传输到头文件中。

#ifndef bp_csv_reader
#define bp_csv_reader    

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <ctype.h>

//https://tools.ietf.org/html/rfc4180
char *csv_get_field(FILE *fp, char separator, int *state)

char ***csv_read(const char *filename, size_t *rows, size_t *cols)

char *csv_trim(char *s)

#endif

csv.c 看起来像:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "csv.h"

//https://tools.ietf.org/html/rfc4180
char *csv_get_field(FILE *fp, char separator, int *state){
    int ch = fgetc(fp);

    if(ch == EOF)
        return NULL;

    size_t size = 1, index = 0;
    char *field = malloc(size);
    bool quoted_in = false;

    for(;ch != EOF; ch = fgetc(fp)){
        if(ch == '"'){
            if(quoted_in){
                int prefetch = fgetc(fp);
                if(prefetch == '"'){
                    ch = prefetch;
                } else {
                    quoted_in = false;
                    ungetc(prefetch, fp);
                    continue;
                }
            } else {
                quoted_in = true;
                continue;
            }
        } else if(!quoted_in && (ch == separator || ch == '\n')){
            break;
        }
        field[index++] = ch;
        char *temp = realloc(field, ++size);
        if(!temp){
            perror("realloc:");
            free(field);
            exit(EXIT_FAILURE);
        }
        field = temp;
    }
    field[index] = 0;
    *state = ch;
    if(quoted_in){
        fprintf(stderr, "The quotes is not closed.\n");
        free(field);
        return NULL;
    }
    return field;
}

char ***csv_read(const char *filename, size_t *rows, size_t *cols){
    *rows = *cols = 0;

    FILE *fp = fopen(filename, "r");
    if(!fp){
        fprintf(stderr, "%s can't open in %s\n", filename, __func__);
        perror("fopen");
        return NULL;
    }


    char *field;
    int state;
    size_t r = 0, c = 0;
    char ***mat = NULL;
    void *temp;

    while(field = csv_get_field(fp, ',', &state)){
        if(c == 0){
            mat = realloc(mat, (r + 1)*sizeof(*mat));
            if(!mat){
                fprintf(stderr, "realloc failed in %s\n", __func__);
                exit(EXIT_FAILURE);
            }
            mat[r] = NULL;
        }
        mat[r] = realloc(mat[r], (c + 1)*sizeof(**mat));
        if(!mat[r]){
            fprintf(stderr, "realloc failed in %s\n", __func__);
            exit(EXIT_FAILURE);
        }
        mat[r][c++] = field;
        if(state == '\n' || state == EOF){
            if(*cols == 0){
                *cols = c;
            } else if(c != *cols){
                fprintf(stderr, "line %zu doesn't match number of columns in %s\n", r, filename);
                exit(EXIT_FAILURE);
            }
            c  = 0;
            *rows = ++r;
        }
    }
    fclose(fp);

    return mat;
}

#include <ctype.h>

char *csv_trim(char *s){
    if(!s || !*s)
        return s;

    char *from, *to;

    for(from = s; *from && isspace((unsigned char)*from); ++from);
    for(to = s; *from;){
        *to++ = *from++;
    }
    *to = 0;
    while(s != to && isspace((unsigned char)to[-1])){
        *--to = 0;
    }
    return s;
}

调用它的代码如下所示:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <csv.h>


int main(void){
    size_t rows, cols;


    char ***mat = csv_read("simp.csv", &rows, &cols);

    size_t r, c;

    for(r = 0; r < rows; ++r){
        for(c = 0; c < cols; ++c){
            if(c)
                putchar(',');
            printf("%s", csv_trim(mat[r][c]));
            free(mat[r][c]);
        }
        puts("");
        free(mat[r]);
    }
    free(mat);
    return 0;
}

我不确定为什么会遇到这些错误。我得到的代码在它自己的文件中完美运行。直到我将它们放入头文件中才出现问题。这就是我在终端中编译的方式:

 acom test_csv.c csv.c -I. csv.h

这些是我看到的错误。

In file included from test_cesv.c:5:0:
./csv.h: In function ‘csv_get_field’:
./csv.h:15:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘char’
 char *csv_trim(char *s)
 ^
test_cesv.c:142:1: error: expected ‘{’ at end of input
 }
 ^
In file included from csv.c:5:0:
csv.h: In function ‘csv_get_field’:
csv.h:15:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘char’
 char *csv_trim(char *s)
 ^
csv.c:55:67: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
 char ***csv_read(const char *filename, size_t *rows, size_t *cols){
                                                                   ^
csv.c:105:24: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
 char *csv_trim(char *s){
                        ^
csv.c:120:1: error: expected ‘{’ at end of input
 }
 ^
csv.h: In function ‘csv_get_field’:
csv.h:15:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘char’
 char *csv_trim(char *s)
 ^

最佳答案

试试这个

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>

//https://tools.ietf.org/html/rfc4180
char *getCSVField(FILE *fp, char separator, int *state){
    int ch = fgetc(fp);

    if(ch == EOF)
        return NULL;

    size_t size = 1, index = 0;
    char *field = malloc(size);
    bool quoted_in = false;

    for(;ch != EOF; ch = fgetc(fp)){
        if(ch == '"'){
            if(quoted_in){
                int prefetch = fgetc(fp);
                if(prefetch == '"'){
                    ch = prefetch;
                } else {
                    quoted_in = false;
                    ungetc(prefetch, fp);
                    continue;
                }
            } else {
                quoted_in = true;
                continue;
            }
        } else if(!quoted_in && (ch == separator || ch == '\n')){
            break;
        }
        field[index++] = ch;
        char *temp = realloc(field, ++size);
        if(!temp){
            perror("realloc:");
            free(field);
            exit(EXIT_FAILURE);
        }
        field = temp;
    }
    field[index] = 0;
    *state = ch;
    if(quoted_in){
        fprintf(stderr, "The quotes is not closed.\n");
        free(field);
        return NULL;
    }
    return field;
}

char ***read_csv(const char *filename, size_t *rows, size_t *cols){
    *rows = *cols = 0;

    FILE *fp = fopen(filename, "r");
    if(!fp){
        fprintf(stderr, "%s can't open in %s\n", filename, __func__);
        perror("fopen");
        return NULL;
    }

    char *field;
    int state;
    size_t r = 0, c = 0;
    char ***mat = NULL;
    void *temp;

    while(field = getCSVField(fp, ',', &state)){
        if(c == 0){
            mat = realloc(mat, (r + 1)*sizeof(*mat));
            if(!mat){
                fprintf(stderr, "realloc failed in %s\n", __func__);
                exit(EXIT_FAILURE);
            }
            mat[r] = NULL;
        }
        mat[r] = realloc(mat[r], (c + 1)*sizeof(**mat));
        if(!mat[r]){
            fprintf(stderr, "realloc failed in %s\n", __func__);
            exit(EXIT_FAILURE);
        }
        mat[r][c++] = field;
        if(state == '\n' || state == EOF){
            if(*cols == 0){
                *cols = c;
            } else if(c != *cols){
                fprintf(stderr, "line %zu doesn't match number of columns in %s\n", r, filename);
                exit(EXIT_FAILURE);
            }
            c  = 0;
            *rows = ++r;
        }
    }
    fclose(fp);

    return mat;
}

#include <ctype.h>

char *trim(char *s){
    if(!s || !*s)
        return s;

    char *from, *to;

    for(from = s; *from && isspace((unsigned char)*from); ++from);
    for(to = s; *from;){
        *to++ = *from++;
    }
    *to = 0;
    while(s != to && isspace((unsigned char)to[-1])){
        *--to = 0;
    }
    return s;
}

int main(void){
    size_t rows, cols;
    char ***mat = read_csv("simp.csv", &rows, &cols);
    for(size_t r = 0; r < rows; ++r){
        for(size_t c = 0; c < cols; ++c){
            if(c)
                putchar(',');
            printf("%s", trim(mat[r][c]));
            free(mat[r][c]);
        }
        puts("");
        free(mat[r]);
    }
    free(mat);
    return 0;
}

关于c - 使用 C 和动态分配从 CSV 读取和保存值,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45952122/

相关文章:

c - 当大的正数相乘时,为什么结果是负数?

php - 将数组键及其值拆分为新变量

python - 按对计数/数据透视表

python - 如何解析多个xml文件并将属性传递给csv?

php - 使用 PHP 将单列从 CSV 转换为简单数组

c - 内核清零内存?

c - 如何在 C 中不访问此变量?

c - 如何在Windows中获取C中的调用堆栈?

python - 将关心索引的函数应用于 numpy 数组的每个元素

javascript - 代码不显示 json 页面中的数据