c - 为什么这会在一个系统上给我带来段错误,而在另一个系统上却不会?

标签 c dictionary pointers segmentation-fault trie

我正在运行一个程序,该程序通过读取“words.txt”中的单词来创建字典树,然后可以搜索以查看树中是否存在某些单词。在 https://www.onlinegdb.com/online_c_compiler 上运行此程序工作完美,但是当我尝试在我自己的 Linux 系统上运行它时,出现段错误。有什么想法吗?这是代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/* Node structure of trie */
struct node
{
    struct node *next[27]; // 26 for a-z and last one(26th index) is for apostrophe
    int end; // value as 1 denotes the end of the word.
};

/* This method is for creating a new node */
struct node *createNode()
{
    struct node *newNode = (struct node *)malloc(sizeof(struct node));
    newNode->end = 0; // set end as false i.e. 0
    for (int i = 0; i < 27; i++) // set all children as NULL
        newNode->next[i] = NULL;
    return newNode;
}

/* This method is for inserting a word in the trie */
void insert(struct node *root, char *word)
{
    struct node *curr = root;
    for (int i = 0; i < strlen(word); i++) // iterating character by character
    {
        int index;
        if (word[i] == '\'') // if character is apostrophe index is 26
            index = 26;
        else
            index = tolower(word[i]) - 'a'; // else the index as the alphabet sequence number starting from 0.
// for a - 0, b - 1, ..... z - 25
        if (!curr->next[index])
            curr->next[index] = createNode(); // create node of that character if not created yet
        curr = curr->next[index]; // then go for next character
    }
    curr->end = 1; // mark end as 1 to denote the ending of the word
}

/* This method is for searching a word in the trie */
int search(struct node *root, char *word)
{
    struct node *curr = root;
    for (int i = 0; i < strlen(word); i++) // iterating character by character
    {
/* Getting index same as insert function */
        int index;
        if (word[i] == '\'')
            index = 26;
        else
            index = tolower(word[i]) - 'a';
        if (!curr->next[index]) // if node of current character not found means the word doesn't exist in trie.
            return 0;
        curr = curr->next[index];
    }
    if (curr != NULL && curr->end) // if iterated all the characters and end is 1 then the word exists.
        return 1;
    else
        return 0; // otherwise doesn't exist.
}

int main()
{
/* Reading the file line by line */
    FILE *file;
    size_t len = 1000;
    char *word = (char *)malloc(len);
    file = fopen("word.txt", "r");
    struct node *root = createNode();
    while (fgets(word, len, file) != NULL) // iterating line by line
    {
        int len = strlen(word);
        if (word[len - 1] == '\n') // removing the newline which is at the end of the every line
            word[len - 1] = '\0';
        insert(root, word); // inserting every word
    }
    int ans;
    word = (char *)("error's"); // checking the existence of the word "error's"
    ans = search(root, word);
    if (ans == 1)
        printf("\"%s\" found!\n", word);
    else
        printf("\"%s\" not found!\n", word);
    word = (char *)("hilli");// checking the existence of the word "hilli"
    ans = search(root, word);
    if (ans == 1)
        printf("\"%s\" found!\n", word);
    else
        printf("\"%s\" not found!\n", word);
    return 0;
}

最佳答案

这是应该可以工作的代码。它确实可以在使用 GCC 9.2.0 和 XCode 11.3.1 的 macOS 10.15.2 Catalina 上运行,并启用编译器设置 fussy 和许多内存调试选项。它不会尝试释放它构建的特里树;它应该(能够释放您构建的结构是一个很好的练习)。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/* Node structure of trie */
struct node
{
    struct node *next[27]; // 26 for a-z and last one(26th index) is for apostrophe
    int end; // value as 1 denotes the end of the word.
};

/* This method is for creating a new node */
static struct node *createNode(void)
{
    struct node *newNode = (struct node *)malloc(sizeof(struct node));
    newNode->end = 0; // set end as false i.e. 0
    for (int i = 0; i < 27; i++) // set all children as NULL
        newNode->next[i] = NULL;
    return newNode;
}

/* This method is for inserting a word in the trie */
static void insert(struct node *root, char *word)
{
    struct node *curr = root;
    int length = strlen(word);
    for (int i = 0; i < length; i++) // iterating character by character
    {
        int index;
        if (word[i] == '\'') // if character is apostrophe index is 26
            index = 26;
        else
            index = tolower(word[i]) - 'a'; // else the index as the alphabet sequence number starting from 0.
// for a - 0, b - 1, ..... z - 25
        if (!curr->next[index])
            curr->next[index] = createNode(); // create node of that character if not created yet
        curr = curr->next[index]; // then go for next character
    }
    curr->end = 1; // mark end as 1 to denote the ending of the word
}

/* This method is for searching a word in the trie */
static int search(struct node *root, char *word)
{
    struct node *curr = root;
    int length = strlen(word);
    for (int i = 0; i < length; i++) // iterating character by character
    {
/* Getting index same as insert function */
        int index;
        if (word[i] == '\'')
            index = 26;
        else
            index = tolower(word[i]) - 'a';
        if (!curr->next[index]) // if node of current character not found means the word doesn't exist in trie.
            return 0;
        curr = curr->next[index];
    }
    if (curr != NULL && curr->end) // if iterated all the characters and end is 1 then the word exists.
        return 1;
    else
        return 0; // otherwise doesn't exist.
}

int main(void)
{
/* Reading the file line by line */
    FILE *file;
    size_t len = 1000;
    char *word = (char *)malloc(len);
    const char filename[] = "word.txt";
    file = fopen(filename, "r");
    if (file == 0)
    {
        fprintf(stderr, "Failed to open file '%s' for reading\n", filename);
        exit(EXIT_FAILURE);
    }
    struct node *root = createNode();
    while (fgets(word, len, file) != NULL) // iterating line by line
    {
        //int len = strlen(word);
        //if (word[len - 1] == '\n') // removing the newline which is at the end of the every line
        //    word[len - 1] = '\0';
        word[strcspn(word, "\r\n")] = '\0';
        printf("Word: [%s]\n", word);
        insert(root, word); // inserting every word
    }
    int ans;
    word = (char *)("error's"); // checking the existence of the word "error's"
    ans = search(root, word);
    if (ans == 1)
        printf("\"%s\" found!\n", word);
    else
        printf("\"%s\" not found!\n", word);
    word = (char *)("hilli");// checking the existence of the word "hilli"
    ans = search(root, word);
    if (ans == 1)
        printf("\"%s\" found!\n", word);
    else
        printf("\"%s\" not found!\n", word);
    return 0;
}

给定包含这些行的合适子集的数据文件,代码可以正确运行:

enough
abracadabra
acid
test
hilli
error's
tests
testing
tested
tester
testosterone
acidly
acidic

它使用 DOS(CRLF)和 Unix(NL 或 LF)行结尾进行了测试,并且对这两种行结尾都是安全的,因为它使用 strcspn() 来切换任一类型的行结尾:

word[strcspn(word, "\r\n")] = '\0';

如果你有旧的 Mac 风格的行结尾(仅限 CR),那么你会遇到 fgets() 无法识别行结尾的问题 - 但如果你修复了这个问题(使用 POSIX getdelim()例如),它也可以正确地处理这样的行。

对代码所做的更改基本上是装饰性的,但允许使用相当严格的选项干净地编译代码(源trie79.c;程序trie79):

$ gcc -O3 -g -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes \
>     trie79.c -o trie79 
$

关于c - 为什么这会在一个系统上给我带来段错误,而在另一个系统上却不会?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59941386/

相关文章:

Clang 的 ASan 不检测悬空指针的使用

c - 无法将C中的模拟放到网页上

缺少 Python.h header

java - 如何使用 jSTL 迭代 map ?

python - 根据字典中的键对在数据框中创建不同的列

c++ - 指针操作导致 printf 以相反的顺序打印参数?

java - Java 引用和 C++ 引用有什么区别

c - 为什么当输入为 2、3、4 时 s 返回 0?

python - 如何在 Python 中创建映射到列表的键字典,其中列表中的每个元素都指向其他列表?

c - 这是一个有效的声明吗?