c - 从指针数组中读取字符串

标签 c arrays pointers

如何从通过指针数组访问的字符串中读取每个单独的字符?在下面的代码中,我目前在 makeCodes 函数中生成了一个指向名为 symCodes 的字符串的指针数组。我想一次读取 8 个字符的字符串,我考虑过将每个字符串连接在一起,然后逐个字符地循环,但 symCodes 中的字符串每个最多可以有 255 个字符,所以我觉得这可能太多了全部一次性处理。相反,我认为我可以从字符串中逐个字符地读取每个字符。

我尝试过 scanf 或只是循环,但总是以段错误结束。在 headerEncode() 的末尾,它靠近底部。我为每个单独的字符串分配了足够的内存,我尝试循环遍历指针数组并打印出每个单独的字符,但最终出现了段错误。

任何关于以不同方式读取字符串指针数组(逐字符读取最多 n 个字符)的建议,我们将不胜感激。

编辑 1:我已更新程序,在使用 -Wall 和 -W 标志时不再输出警告。我不再遇到段错误(耶!),但我仍然不确定如何解决我的问题,如何逐个字符读取最多 n 个字符的字符串指针数组?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"

#define FAIL 0
#define SUCCESS 1

/* global 1 day arrays that hold chars and their freqs from file */

unsigned long globalFreqs[256] = {0};
unsigned char globalUsedCh[256] = {0};
char globalCodes[256] = {0};
unsigned char globalUniqueSymbols;
unsigned long totalCount = 0;

typedef struct HuffmanTreeNode* HTNode;



struct HuffmanTreeNode* globalSortedLL;
/*
  struct has the input letter, the letters frequency, and the left and irght childs
*/
struct HuffmanTreeNode
{
  char symbol;
  unsigned long freq;
  char *code;
  struct HuffmanTreeNode *left, *right;
  struct HuffmanTreeNode* next;
};

/* does it make sense to have a struct for the entire huffman tree to see its size? */
struct HuffmanTree
{
  unsigned size;
};

/*generate new node with given symbol and freq */
struct HuffmanTreeNode* newNode(char symbol, int freq)
{
  struct HuffmanTreeNode* newNode = malloc(sizeof(struct HuffmanTreeNode));
  newNode->symbol = symbol;
  newNode->freq = freq;
  newNode->left = newNode->right = NULL;
  return newNode;
}

/*current work in progress, i believe this is the way to insert it for a BST
/* will change for HuffmanTreenode once working
/*
*/

struct HuffmanTreeNode* insert(struct HuffmanTreeNode* node, struct HuffmanTreeNode* htnNew)
{

  struct HuffmanTreeNode* currentNode = node;

  if(currentNode == NULL || compareTwoNodes(htnNew, currentNode))
  {
    htnNew->next = currentNode;
    return htnNew;
  }
  else
  {
    while(currentNode->next != NULL && compareTwoNodes(currentNode->next, htnNew))
    {
      currentNode = currentNode->next;
    }
    htnNew->next = currentNode->next;
    currentNode->next = htnNew;
    return node;
  }
}

int compareTwoNodes(struct HuffmanTreeNode* a, struct HuffmanTreeNode* b)
{
  if(b->freq < a->freq)
  {
    return 0;
  }
  if(a->freq == b->freq)
  {
    if(a->symbol > b->symbol)
      return 1;
    return 0;
  }
  if(b->freq > a->freq)
    return 1;
}

struct HuffmanTreeNode* popNode(struct HuffmanTreeNode** head)
{
  struct HuffmanTreeNode* node = *head;
  *head = (*head)->next;
  return node;
}

/*convert output to bytes from bits*/
/*use binary fileio to output */
/*put c for individual character byte*/
/*fwrite each individual byte for frequency of symbol(look at fileio slides) */

/*
  @function:
  @param:
  @return:
*/
int listLength(struct HuffmanTreeNode* node)
{
  struct HuffmanTreeNode* current = node;
  int length = 0;
  while(current != NULL)
  {
    length++;
    current = current->next;
  }
  return length;
}

/*
  @function:
  @param:
  @return:
*/
void printList(struct HuffmanTreeNode* node)
{
  struct HuffmanTreeNode* currentNode = node;

  while(currentNode != NULL)
  {
    if(currentNode->symbol <= ' ' || currentNode->symbol > '~')
      printf("=%d", currentNode->symbol);
    else
      printf("%c", currentNode->symbol);
    printf("%lu ", currentNode->freq);
    currentNode = currentNode->next;
  }
  printf("\n");
}

/*
  @function:
  @param:
  @return:
*/
void buildSortedList()
{
  int i;
  for(i = 0; i < 256; i++)
  {
    if(!globalFreqs[i] == 0)
    {
      globalSortedLL = insert(globalSortedLL, newNode(i, globalFreqs[i]));
    }
  }

  printf("Sorted freqs: ");
  printList(globalSortedLL);
  printf("listL: %d\n", listLength(globalSortedLL));
}
/*
  @function: isLeaf()
  will test to see if the current node is a leaf or not
  @param:
  @return
*/

int isLeaf(struct HuffmanTreeNode* node)
{
  if((node->left == NULL) && (node->right == NULL))
    return SUCCESS;
  else
    return FAIL;
}

/*where I plan to build the actual huffmantree */
/*
  @function:
  @param:
  @return:
*/
struct HuffmanTreeNode* buildHuffmanTree(struct HuffmanTreeNode* node)
{
  int top = 0;
  struct HuffmanTreeNode *left, *right, *topNode, *huffmanTree;
  struct HuffmanTreeNode* head = node;
  struct HuffmanTreeNode *newChildNode, *firstNode, *secondNode;

  while(head->next != NULL)
  {
    /*grab first two items from linkedL, and remove two items*/
    firstNode = popNode(&head);
    secondNode = popNode(&head);
    /*combine sums, use higher symbol, create new node*/
    newChildNode = newNode(secondNode->symbol, (firstNode->freq +   secondNode->freq));
    newChildNode->left = firstNode;
    newChildNode->right = secondNode;
    /*insert new node, decrement total symbols in use */
    head = insert(head, newChildNode);
  }

  return head;
}

void printTable(char *codesArray[])
{
  int i;
  printf("Symbol\tFreq\tCode\n");
  for(i = 0; i < 256; i++)
  {
    if(globalFreqs[i] != 0)
    {
      if(i <= ' ' || i > '~')
      {
        printf("=%d\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
      }
      else
      {
        printf("%c\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
      }
    }
  }
  printf("Total chars = %lu\n", totalCount);
}

void makeCodes(
   struct HuffmanTreeNode *node,        /* Pointer to some tree node */
   char *code,          /* The *current* code in progress */
   char *symCodes[256], /* The array to hold the codes for all the symbols */
   int depth)           /* How deep in the tree we are (code length) */
{
    char *copiedCode;
    int i = 0;

    if(isLeaf(node))
    {
        code[depth] = '\0';
        symCodes[node->symbol] = code;
        return;
    }

    copiedCode = malloc(255*sizeof(char));
    memcpy(copiedCode, code, 255*sizeof(char));

    code[depth] = '0';
    copiedCode[depth] = '1';
    makeCodes(node->left, code, symCodes, depth+1);
    makeCodes(node->right, copiedCode, symCodes, depth+1);
}

/*
  @function: getFileFreq()
  gets the frequencies of each character in the given
  file from the command line, this function will also
  create two global 1d arrays, one for the currently
  used characters in the file, and then one with those
  characters frequencies, the two arrays will line up
  parallel
  @param: FILE* in, FILE* out,
  the current file being processed
  @return: void
*/
void getFileFreq(FILE* in, FILE* out)
{  
    unsigned long freqs[256] = {0};
    int i, t, fileCh;

    while((fileCh = fgetc(in)) != EOF)
    {
        freqs[fileCh]++;
        totalCount++;
    }
    for(i = 0; i < 256; i++)
    {
        if(freqs[i] != 0)
        {
            globalUsedCh[i] = i;
            globalFreqs[i] = freqs[i];
            if(i <= ' ' || i > '~')
            {
                globalUniqueSymbols++;
            }
            else
            {
                globalUniqueSymbols++;
            }
        }
    }
    /* below code until total count is for debugging purposes */
    printf("Used Ch: ");
    for(t = 0; t < 256; t++)
    { 
    if(globalUsedCh[t] != 0)
            {  
         if(t <= ' ' || t > '~')
        {
            printf("%d ", globalUsedCh[t]);
        }
        else
            printf("%c ", globalUsedCh[t]);
        }
    }
    printf("\n");
    printf("Freq Ch: ");
    for(t = 0; t < 256; t++)
    {
        if(globalFreqs[t] != 0)
        {  
            printf("%lu ", globalFreqs[t]);
        }
    }
    printf("\n");
    /* end of code for debugging/vizualazation of arrays*/
    printf("Total Count %lu\n", totalCount);
    printf("globalArrayLength: %d\n", globalUniqueSymbols);
}


void headerEncode(FILE* in, FILE* out, char *symCodes[256])
{
    char c;
    int i, ch, t, q, b, z;
    char *a;
    char *fileIn;
    unsigned char *uniqueSymbols;
    unsigned char *byteStream;
    unsigned char *tooManySym = 0;
    unsigned long totalEncodedSym;


    *uniqueSymbols = globalUniqueSymbols;


    totalEncodedSym = ftell(in);
    rewind(in);

    fileIn = malloc((totalEncodedSym+1)*sizeof(char));
    fread(fileIn, totalEncodedSym, 1, in);
    if(globalUniqueSymbols == 256)
    {
        fwrite(tooManySym, 1, sizeof(char), out);
    }
    else
    {
        fwrite(uniqueSymbols, 1, sizeof(uniqueSymbols)-7, out);
    }
    for(i = 0; i < 256; i++)
    {
        if(globalFreqs[i] != 0)
        {
            fwrite(globalUsedCh+i, 1, sizeof(char), out);
            fwrite(globalFreqs+i, 8, sizeof(char), out);
        }
    }
    for(t = 0; t < totalEncodedSym; t++)
    {
            fwrite(symCodes[fileIn[t]], 8, sizeof(char), out);
    }
    for(q = 0; q < totalEncodedSym; q++)
    {
        symCodes[q] = malloc(255*sizeof(char));
        a = symCodes[q];
        while(*a != '\0')
            printf("%c\n", *(a++));
    }

    printf("Total encoded symbols: %lu\n", totalEncodedSym);
    printf("%s\n", fileIn);
}

void encodeFile(FILE* in, FILE* out)
{
    int top = 0;
    int i;
    char *code;
    char *symCodes[256] = {0};
    int depth = 0;

    code = malloc(255*sizeof(char));

    getFileFreq(in, out);
    buildSortedList();
    makeCodes(buildHuffmanTree(globalSortedLL), code, symCodes, depth);
    printTable(symCodes);
    headerEncode(in, out, symCodes);



    free(code);
}

/*
void decodeFile(FILE* in, FILE* out)
{

}*/

最佳答案

您的代码中有很多问题:

  • [major] 函数 compareTwoNodes 并不总是返回值。如果指示编译器输出更多警告,则可以检测到此类问题。

  • [major]HuffmanTreeNode 中的成员 symbol 类型应为 intchar 类型作为索引值是有问题的,因为它可以是有符号的或无符号的,具体取决于编译器配置和平台特性。您假设 char 的值从 0255,这对于大多数 char 实际上具有-128 .. 127 范围。使用 unsigned charint,但将 char 值强制转换为 unsigned char 以确保正确提升。

  • [major] 比较 if (globalUniqueSymbols == 256) 始终为 false,因为 globalUniqueSymbols 是一个 unsigned char 。对于 8 位字节,可能的字节值的最大数量确实是 256,但它不适合 unsigned char,请将 globalUniqueSymbols 设为int

  • [major] *uniqueSymbols = globalUniqueSymbols; 函数 headerEncode 中将 globalUniqueSymbols 存储到未初始化的变量中指针,肯定是未定义的行为,可能是段错误。

  • [major] sizeof(uniqueSymbols) 是指针的大小,而不是数组的大小,也不是类型的大小。不要将其破解为 sizeof(uniqueSymbols)-7fputc(globalUniqueSymbols, out);

  • [major] fwrite(tooManySym, 1, sizeof(char), out); 也不正确,因为 tooManySym被初始化为0,即:它是一个NULL指针。您需要一个特殊值来告诉源流中使用了所有字节值,请使用 0 并使用 fputc(0, out); 写入。 p>

  • 您在函数 insert 之前嵌套了 C 风格注释,这不是错误,而是容易出错,被认为是不好的风格。

  • 函数 newNode 应采用 unsigned long 类型作为 freq 以保持一致性。

  • 函数buildHuffmanTree具有未使用的局部变量:righttoptopNode

  • 变量i在函数makeCodes中未使用。

  • headerEncode 中许多未使用的变量:byteStreamcchb ...

  • totalEncodedSym 是一个unsigned long,请在停止于 totalEncodedSym 的循环中使用正确类型的索引。

  • encodeFile 中未使用的变量:itop...

编译器可以通过适当的警告级别检测到其中的大多数:gcc -Wall -Wclang -Weverything...

程序逻辑中也可能存在错误,但在解决上述主要问题之前您无法看到这些错误。

关于c - 从指针数组中读取字符串,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41012039/

相关文章:

c++ - 字符数组地址的意外输出

c - 如何根据大小取消引用?

ios - 获取创建套接字的进程的 pid

c - 无法在 system() 中输入字符串 "echo "$(cat test.txt )""

c - 是否保证类型 T[x][y] 与 C 中的 T[x*y] 具有相同的内存布局?

java - 从逗号分隔属性列表创建数组的更优雅的解决方案?

c - 为什么编译器把C程序汇编成这个?

c - C 的流行图表工具或方法

arrays - 为什么 "array"在 Visual-C++ 中被标记为保留字?

python - 将 2d 数组与 3d 数组的每个切片相乘 - Numpy