c - Bison:当我尝试在函数语法规则中添加操作时,出现段错误 11

标签 c compiler-construction bison

我有一个关于段错误 11 的问题。

每次,当我想在函数语法 block 中添加 Action 规则时,我都必须得到段错误11。

因此,我无法获取 dump.out,这是一个为我记录标识符的文件。

我认为问题不是由扫描仪文件引起的,但这仍然是可能的。

当然,问题应该与符号表有关,但确实很奇怪。

问题就这样发生:

函数:FN ID '(' ')' {if ($2->st_type == UNDEF) $2->st_type = FUNCTION_TYPE};

当我在 block 中添加操作时,会发生段错误 11。

不过,这没关系。 函数:FN ID '(' ')' {};

解析器文件并不包含所有内容,因为它太多了。

我使用 mac 操作系统

希望有人能帮助我。

还是谢谢你

<小时/>

错误发生的位置

1: // Hello World Example
<fn>
<id: main>
<(>
<)>
<{>
2: fn main() {
3:   // Print text to the console
<let>
<mut>
<id: a>
<:>
<int>
<=>
<integer: 10>
<;>
4:   let mut a:int = 10;
<let>
<mut>
<id: b>
<=>
<string: 1199>
<;>
5:   let mut b = "1199";
<let>
<mut>
<id: sum>
<[>
<str>
<,>
<integer: 10>
<]>
<;>
6:   let mut sum[str, 10];
<id: sum>
<[>
<integer: 0>
<]>
<=>
<string: 100>
<;>
7:   sum[0] = "100";
<id: b>
<=>
<string: 123>
<+>
<id: b>
<;>
8:   b = "123" + b;
<println>
<(>
<string: Hello World>
<)>
<;>
9:   println ("Hello World");
<}>
10: }

Symbol table:
a
b
sum
main
Segmentation fault: 11
<小时/>

输入文件

// Hello World Example
fn main() {
  // Print text to the console
  let mut a:int = 10;
  let mut b = "1199";
  let mut sum[str, 10];
  sum[0] = "100";
  b = "123" + b;
  println ("Hello World");
}
<小时/>

这是我的符号表头文件。

#include <stdio.h>
/* maximum size of hash table */
#define SIZE 211

/* maximum size of tokens-identifiers */
#define MAXTOKENLEN 40

/* token types */
#define UNDEF 0
#define INT_TYPE 1
#define REAL_TYPE 2
#define STR_TYPE 3
#define LOGIC_TYPE 4
#define ARRAY_TYPE 5
#define FUNCTION_TYPE 6
/* new type for parser */
#define CONST_INT_TYPE 7
#define CONST_REAL_TYPE 8
#define CONST_STR_TYPE 9
#define CONST_LOGIC_TYPE 10

/* how parameter is passed */
#define BY_VALUE 1
#define BY_REFER 2

/*
* Originally here, now it is in the symbols.c
* current scope
* int cur_scope = 0;
*/

/* parameter struct */
typedef struct Parameter{
    int par_type;
    char *param_name;
    // to store value
    int ival; double fval; char *st_sval; int bval; // boolean type
    int passing; // value or reference
    struct Parameter *next; // link to next one
}Param;

/* a linked list of references (lineno's) for each variable */
typedef struct Ref{ 
    int lineno;
    struct Ref *next;
    int type;
}RefList;

// struct that represents a list node
typedef struct list{
    char st_name[MAXTOKENLEN];
    int st_size;
    int scope;
    RefList *lines;
    // to store value and sometimes more information
    int st_ival; double st_fval; char *st_sval; int st_bval;
    // type
    int st_type;
    int inf_type; // for arrays (info type) and functions (return type)
    // array stuff
    int *i_vals; double *f_vals; char **s_vals; int *b_vals; // boolean type
    int array_size;
    // function parameters
    Param *parameters;
    int num_of_pars; // Meanwhile, it record the current position of the parameters
    // pointer to next item in the list
    struct list *next;
}list_t;

/* the hash table */
static list_t **hash_table;

// Function Declarations
void create(); // initialize hash table
unsigned int hash(char *key); // hash function for insert
void insert(char *name, int len, int type, int lineno); // insert entry
list_t *lookup(char *name); // search for entry
list_t *lookup_scope(char *name, int scope); // search for entry in scope
void hide_scope(); // hide the current scope
void incr_scope(); // go to next scope
void dump(FILE *of); // dump file
<小时/>

这是符号表代码文件。

#include "symbols.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* current scope */
int cur_scope = 0;

void create()
{
    int i; 
    hash_table = malloc(SIZE * sizeof(list_t*));
    for(i = 0; i < SIZE; i++) hash_table[i] = NULL;
}

unsigned int hash(char *key)
{
    unsigned int hashval = 0;
    for(;*key!='\0';key++) hashval += *key;
    hashval += key[0] % 11 + (key[0] << 3) - key[0];
    return hashval % SIZE;
}

void insert(char *name, int len, int type, int lineno)
{
    unsigned int hashval = hash(name); // hash function used
    list_t *l = hash_table[hashval];

    while ((l != NULL) && (strcmp(name,l->st_name) != 0)) l = l->next;

    /* variable not yet in table */
    if (l == NULL){
        l = (list_t*) malloc(sizeof(list_t));
        strncpy(l->st_name, name, len);  
        /* add to hashtable */
        l->st_type = type;
        l->scope = cur_scope;
        l->lines = (RefList*) malloc(sizeof(RefList));
        l->lines->lineno = lineno;
        l->lines->next = NULL;
        l->next = hash_table[hashval];
        hash_table[hashval] = l; 
        // printf("Inserted %s for the first time with linenumber %d!\n", name, lineno); // error checking
    }
    /* found in table, so just add line number */
    else{
        l->scope = cur_scope;
        RefList *t = l->lines;
        while (t->next != NULL) t = t->next;
        /* add linenumber to reference list */
        t->next = (RefList*) malloc(sizeof(RefList));
        t->next->lineno = lineno;
        t->next->next = NULL;
        // printf("Found %s again at line %d!\n", name, lineno);
    }
}

list_t *lookup(char *name)
{ /* return symbol if found or NULL if not found */
    unsigned int hashval = hash(name);
    list_t *l = hash_table[hashval];
    while ((l != NULL) && (strcmp(name,l->st_name) != 0)) l = l->next;
    return l; // NULL is not found
}

list_t *lookup_scope(char *name, int scope)
{ /* return symbol if found or NULL if not found */
    unsigned int hashval = hash(name);
    list_t *l = hash_table[hashval];
    while ((l != NULL) && (strcmp(name,l->st_name) != 0) && (scope != l->scope)) l = l->next;
    return l; // NULL is not found
}

void hide_scope()
{ /* hide the current scope */
    if(cur_scope > 0) cur_scope--;
}
void incr_scope()
{ /* go to next scope */
    cur_scope++;
}

/* print to stdout by default */ 
void dump(FILE * of)
{  
  int i; int count; // record whether first line prints or not.

  fprintf(of,"------------ ----------------- -------------\n");
  fprintf(of,"Name         Type              Line Numbers\n");
  fprintf(of,"------------ ----------------- -------------\n");

  for (i=0; i < SIZE; ++i){ 
    if (hash_table[i] != NULL){ 
        list_t *l = hash_table[i];
        while (l != NULL){ 
            RefList *t = l->lines;
            fprintf(of,"%-12s ",l->st_name);

            printf("%s\n", l->st_name); // print out all the names in the symbol table

            if (l->st_type == INT_TYPE) fprintf(of,"%-7s","int");
            else if (l->st_type == REAL_TYPE) fprintf(of,"%-7s","real");
            else if (l->st_type == STR_TYPE) fprintf(of,"%-7s","string");
            else if (l->st_type == LOGIC_TYPE)  fprintf(of,"%-7s","bool");
            else if (l->st_type == CONST_INT_TYPE) fprintf(of, "%-7s", "const_int"); // constant_int_type
            else if (l->st_type == CONST_REAL_TYPE) fprintf(of, "%-7s", "const_real"); // constant_real_type
            else if (l->st_type == CONST_STR_TYPE) fprintf(of, "%-7s", "const_string"); // constant_string_type
            else if (l->st_type == CONST_LOGIC_TYPE) fprintf(of, "%-7s", "const_bool"); // const_logic_type
            else if (l->st_type == ARRAY_TYPE){
                fprintf(of,"array of ");
                if (l->inf_type == INT_TYPE)           fprintf(of,"%-7s","int");
                else if (l->inf_type  == REAL_TYPE)    fprintf(of,"%-7s","real");
                else if (l->inf_type  == STR_TYPE)     fprintf(of,"%-7s","string");
                else if (l->inf_type == LOGIC_TYPE)    fprintf(of,"%-7s","bool");
                else fprintf(of,"%-7s","undef");
            }
            else if (l->st_type == FUNCTION_TYPE){
                fprintf(of,"%-7s %s","function returns ");
                if (l->inf_type == INT_TYPE)           fprintf(of,"%-7s","int");
                else if (l->inf_type  == REAL_TYPE)    fprintf(of,"%-7s","real");
                else if (l->inf_type  == STR_TYPE)     fprintf(of,"%-7s","string");
                else if (l->inf_type == LOGIC_TYPE)    fprintf(of,"-7%s","bool");
                else fprintf(of,"%-7s","undef");
            }
            else fprintf(of,"%-7s","undef"); // if UNDEF or 0

            count = 0;
            while (t != NULL){
                if (count == 0)
                {
                if (l->st_type == INT_TYPE || l->st_type == REAL_TYPE || l->st_type == STR_TYPE || l->st_type == UNDEF)
                    fprintf(of,"%13d ", t->lineno);
                else if (l->st_type == CONST_INT_TYPE || l->st_type == CONST_REAL_TYPE || l->st_type == CONST_STR_TYPE || l->st_type == CONST_LOGIC_TYPE)
                    fprintf(of,"%10d", t->lineno);
                else if (l->st_type == ARRAY_TYPE || l->st_type == FUNCTION_TYPE)
                    fprintf(of,"%4d", t->lineno);
                }
                else
                    fprintf(of,"%3d", t->lineno);
                count++;
                t = t->next;
            }
            fprintf(of,"\n");
            l = l->next;
        }
    }
  }
}
<小时/>

扫描文件

%option noyywrap

%{
#include "symbols.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.tab.h"

#define LIST     strcat(buf,yytext)
#define token(t) {LIST; printf("<%s>\n");}
#define tokenInteger(t, i) {LIST; printf("<%s: %d>\n", t, i);}
#define tokenReal(t, r) {LIST; printf("<%s: %lf>\n", t, r);}
#define tokenString(t, s) {LIST; printf("<%s: %s>\n", t, s);}

#define MAX_LINE_LENG 256

extern FILE* yyin;
extern FILE* yyout;
int linenum = 1;
char buf[MAX_LINE_LENG];
char* tempStr;
int indexForStr = 1;
list_t* temp;
%}
%x Comment
%%
"//".*              {LIST;}

"/*"            {BEGIN(Comment); LIST;}
<Comment>"*/"       {LIST; BEGIN(0);}
<Comment>\n             {LIST; printf("%d: %s\n", linenum++, buf); buf[0] = '\0';}
<Comment>.      {LIST;}

"bool"      {
                token("BOOL");
                return BOOL;
                }
"break"     {token("BREAK"); return BREAK;}
"char"      {token("CHAR"); return CHAR;}
"continue"  {token("CONTINUE"); return CONTINUE;}
"do"        {token("DO"); return DO;}
"else"      {token("ELSE"); return ELSE;}
"enum"      {token("ENUM"); return ENUM;}
"extern"    {token("EXTERN"); return EXTERN;}
"false"     {token("FALSE"); yylval.boolVal = 0; return FALSE;}
"float"     {
                token("FLOAT");
                return FLOAT;
                }
"for"       {token("FOR"); return FOR;}
"fn"        {token("FN"); return FN;}
"if"        {token("IF"); return IF;}
"in"        {token("IN"); return IN;}
"int"       {
                token("INT");
                return INT;
                }
"let"       {token("LET"); return LET;}
"loop"      {token("LOOP"); return LOOP;}
"match"     {token("MATCH"); return MATCH;}
"mut"       {token("MUT"); return MUT;}
"print"     {token("PRINT"); return PRINT;}
"println"   {token("PRINTLN"); return PRINTLN;}
"pub"       {token("PUB"); return PUB;}
"return"    {token("RETURN"); return RETURN;}
"self"      {token("SELF"); return SELF;}
"static"    {token("STATIC"); return STATIC;}
"str"       {
                token("STR");
                return STR;
                }
"struct"    {token("STRUCT"); return STRUCT;}
"true"      {token("TRUE"); yylval.boolVal = 1; return TRUE;}
"use"       {token("USE"); return USE;}
"where"     {token("WHERE"); return WHERE;}
"while"     {token("WHILE"); return WHILE;}

","         {token("','"); return ',';}
":"         {token("':'"); return ':';}
";"         {token("';'"); return ';';}
"("         {token("'('"); return '(';}
")"         {token("')'"); return ')';}
"["         {token("'['"); return '[';}
"]"         {token("']'"); return ']';}
"{"         {token("'{'"); return '{';}
"}"         {token("'}'"); return '}';}

"+"         {token("'+'"); return '+';}
"-"         {token("'-'"); return '-';}
"*"         {token("'*'"); return '*';}
"/"         {token("'/'"); return '/';}
"++"        {token("'++'"); return '++';}
"--"        {token("'--'"); return '--';}
"%"     {token("'%'"); return '%';}

"<"         {token("'<'"); return LESS;}
"<="        {token("'<='"); return '<=';}
">="        {token("'>='"); return '>=';}
">"         {token("'>'"); return GREATER;}
"=="        {token("'=='"); return '==';}
"!="        {token("'!='"); return '!=';}

"&&"        {token("'&&'"); return '&&';}
"||"        {token("'||'"); return '||';}
"!"         {token("'!'"); return EXCLAMATION;}

"="         {token("'='"); return ASSIGN;}

"+="        {token("'+='"); return '+=';}
"-="        {token("'-='"); return '-=';}
"*="        {token("'*='"); return '*=';}
"/="        {token("'/='"); return '/=';}

"->"            {token("'->'"); return ARROW;}
"read"          {token("'READ'"); return READ;}

[A-Z_a-z]([A-Z_a-z]|[0-9])*             {
                                        insert(yytext, yyleng, UNDEF, linenum);
                                        yylval.symptr = lookup(yytext);
                                        tokenString("id", yylval.symptr->st_name);
                                        return ID;
                                        }               

"0"|[0-9][0-9]*                         {
                                        sscanf(yytext, "%d", &yylval.intVal);
                                        tokenInteger("integer", yylval.intVal);
                                    return INTEGER;
                                        }

[0-9_]+"."[0-9_]|[0-9_][Ee][+-]?[0-9_]+ {
                                        yylval.floatVal = atof(yytext);
                                        tokenReal("real", yylval.floatVal);
                                    return REAL;
                                        }

\"([\\.]|[^\\"])*\"                     {
                                        tempStr = malloc((strlen(yytext) - 1) * sizeof(char));
                                        for (int i = 0; i < strlen(yytext) - 2; i++)
                                        {
                                                tempStr[i] = yytext[indexForStr];
                                                indexForStr++;
                                        }
                                        tempStr[strlen(yytext) - 1] = '\0';

                                        yylval.stringVal = strdup(yytext);
                                        tokenString("string", tempStr);
                                        free(tempStr);
                                        indexForStr = 1;

                                    return STRING;
                                        }

\n      {
        LIST;
        printf("%d: %s", linenum++, buf);
        buf[0] = '\0';
        }

[ \t]*  {LIST;}

.       {
        LIST;
        printf("%d:%s\n", linenum+1, buf);
        printf("bad character:'%s'\n",yytext);
        exit(-1);
        }
%%
<小时/>

解析器文件

%{
#include "symbols.c"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>

#define Trace(t, line)        printf(t, line) // Trace where the error occurs and print the line number

#ifndef STRSIZE
#define STRSIZE     40
#endif

#ifndef PARAMSIZE
#define PARAMSIZE   40
#endif

extern FILE* yyin;
extern FILE* yyout;
extern int linenum;
extern int yylex();
void yyerror(char* msg);
%}

%union{
    char* stringVal;
    double floatVal;
    int intVal;
    int boolVal;
    list_t* symptr;
}

/* tokens */
%token <symptr> ID
%token <intVal> INTEGER
%token <floatVal> REAL
%token <stringVal> STRING
%token <boolVal> TRUE FALSE
%token INT FLOAT STR BOOL
%token BREAK CHAR CONTINUE DO ELSE
%token ENUM EXTERN FOR
%token FN IF IN LET 
%token LOOP MATCH MUT PRINT PRINTLN
%token RETURN SELF STATIC STRUCT
%token USE WHERE WHILE
%token READ PUB
%token LESS GREATER ASSIGN EXCLAMATION ARROW

/* precedence for operators */
%left '||'
%left '&&'
%left EXCLAMATION
%left LESS '<=' '>=' GREATER '==' '!='
%left '+' '-'
%left '*' '/'
%left UMINUS

/* types */
%type <intVal> integer_exp
%type <floatVal> real_exp
%type <stringVal> string_exp
%type <boolVal> bool_exp

%start program              /* the initial entry point */

%%
program:        functions | global_declaration functions
                ;

global_declaration:     global_declaration constant_declaration
                        | global_declaration variable_declaration
                        | global_declaration array_declaration
                        | constant_declaration
                        | variable_declaration
                        | array_declaration
                        ;

local_declaration:      local_declaration constant_declaration
                        | local_declaration variable_declaration
                        | local_declaration array_declaration
                        | constant_declaration
                        | variable_declaration
                        | array_declaration
                        ;

block:          start local_declaration statements end               
                | start local_declaration end                         
                | start statements end                                
                | start end                                           
                ;

start:          '{'                                                 {
                                                                    incr_scope();
                                                                    }

end:            '}'                                                 {
                                                                    hide_scope();
                                                                    }
                ;

functions:      functions function                                      
                | function
                ;

function:       FN ID '(' ')' start local_declaration statements end{
                                                                    if ($2->st_type == UNDEF)
                                                                    {
                                                                        $2->st_type = FUNCTION_TYPE;
                                                                        $2->inf_type = UNDEF;
                                                                    } 
                                                                    else
                                                                    {
                                                                        Trace("line %d: Redeclaration of identifier.\n", linenum);
                                                                    }
                                                                    }
                | FN ID '(' ')' start statements end                {
                                                                    if ($2->st_type == UNDEF)
                                                                    {
                                                                        $2->st_type = FUNCTION_TYPE;
                                                                        $2->inf_type = UNDEF;
                                                                    } 
                                                                    else
                                                                    {
                                                                        Trace("line %d: Redeclaration of identifier.\n", linenum);
                                                                    }
                                                                    }
                | FN ID '(' ')' start local_declaration end         {
                                                                    if ($2->st_type == UNDEF)
                                                                    {
                                                                        $2->st_type = FUNCTION_TYPE;
                                                                        $2->inf_type = UNDEF;
                                                                    } 
                                                                    else
                                                                    {
                                                                        Trace("line %d: Redeclaration of identifier.\n", linenum);
                                                                    }
                                                                    }
                | FN ID '(' ')' start end                           {
                                                                    if ($2->st_type == UNDEF)
                                                                    {
                                                                        $2->st_type = FUNCTION_TYPE;
                                                                        $2->inf_type = UNDEF;
                                                                    } 
                                                                    else
                                                                    {
                                                                        Trace("line %d: Redeclaration of identifier.\n", linenum);
                                                                    }
                ;

%%
void yyerror(char* msg)
{
    fprintf(stderr, "line %d: %s\n", linenum, msg);
}

int main(int argc, char** argv)
{
    /* create the hash table */
    create();

    /* open the source program file */
    if (argc != 2) {
        printf ("Usage: sc filename\n");
        exit(1);
    }
    yyin = fopen(argv[1], "r");         /* open input file */

    int flag;
    flag = yyparse();

    /* perform parsing */
    if (flag == 1)                      /* parsing */
        yyerror("Parsing error !");     /* syntax error */

    fclose(yyin);                       /* close input file */

    /* output symbol table */
    printf("\nSymbol table:\n");
    yyout = fopen("dump.out", "w");
    dump(yyout);
    fclose(yyout);

    return 0;
}

最佳答案

显然,问题发生在dump期间。当 token 类型为FUNCTION_TYPE时的函数。从调试输出( dump 可能在发生段错误时执行)和更改报告(当操作将 st_type 字段设置为 FUNCTION_TYPE 时出现问题)可以清楚地看出这一点。

目视检查if dump() 中的条款对应于该条件显示以下明显错误:

fprintf(of,"%-7s %s","function returns ");

调用fprintf有一个格式字符串,其中有两个 %s转换。然而,只需要插入一个参数。

您应该问自己的真正问题是“如何在不花费大量时间或求助于外部专家的情况下轻松找到这样的愚蠢的拼写错误?”

作为第一个近似值,该错误非常常见且易于检测,大多数编译器都会向您发出警告。因此,您的第一步是确保您始终在启用警告的情况下进行编译(-Wall,如果您使用的是 gcc 或 clang),并且您阅读了警告。

即使没有警告,使用调试器(如gdb)也可以直接找到错误。 。只需在 dump 设置断点即可并单步执行,直到出现段错误。

最后,当您第一次构建一个包含大量组件的大型复杂程序然后才开始调试它时,您的生活会变得更加复杂。从长远来看,您会发现值得花时间使用某种测试工具单独测试每个组件(例如符号表),并且只有在您有足够的信心时才组装更复杂的程序。单独的作品可以发挥作用。这将避免识别错误发生位置的困难(就像在本例中,您显然因对解析器生成器的怀疑而分心,导致您错过与解析器无关的实际问题)。

<小时/>

顺便说一下,strncpy是一个明确的危险信号,尽管在这种情况下,您似乎很幸运(或不幸)没有遇到该错误。 strncpy很好,这不是您想要的,如果这是您想要的,那么长度参数应该是您可以容纳的最长字符串,而不是输入字符串的长度。 ( strncpy 旨在用于固定长度格式,这就是它将输出填充到指定长度的原因。)

如果使用输入字符串的长度,则会遇到两个问题:(1) 保证副本不会以 NUL 结尾,从而导致未定义的行为; (2) 在输入字符串太长的情况下,没有什么可以阻止副本溢出输出缓冲区。

即使使用正确,strncpy要求您手动 NUL 终止输出,这很麻烦。更好的解决方案是首先检查字符串是否太长( length < SIZE ),然后使用 strcpy ,这将正确地以 NUL 结尾。更好的是使 name 字段成为 char* 而不是数组,并动态分配正确长度的字符串(例如,参见 strdup ),从而避免任意限制标识符的大小。

关于c - Bison:当我尝试在函数语法规则中添加操作时,出现段错误 11,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50137766/

相关文章:

c - malloc 链表时出现段错误

c - C中的函数内函数

c - 为什么返回一个负的errno? (例如返回-EIO)

c - 为什么编译器将变量存储在寄存器中?

c++ - 为什么 g++ 编译这个?

c - Bison 语法错误 mac

C:递归打开子目录并创建新文件

编译器相关 - 这两个 C 代码真的相同吗?

c - x1 + x2 +...+xn 的解析器和扫描器

c++ - Bison C++ - 减法