c - 关于 Flex、Bison 和 Segmentation Fault

标签 c bison flex-lexer yacc lex

我正在尝试为简化的 Java 语言运行我自制的编译器,但我总是以段错误告终。 目标是构建抽象语法树。我也尝试使用 valgrind 进行调试,但看起来该工具在我的 PC 上对 bison 和 flex 有一些问题。请帮助我。

Lex 文件:

%{
#include <stdio.h>
#include "abst.h"
#include "yacc.tab.h"
int yylineno;
int column = 0;
int nesting_comments = 0;
int max_comment_level = 0;
int total_comments_number = 0;

void count() {
    int i;
    for(i = 0; yytext[i] != '\0'; i++){
        if(yytext[i] == '\n')
        column = 0;
        else if(yytext[i] == '\t')
        column += 8 - (column % 8);
        else
        column++;
    }
    //printf("%s", yytext);     Uncomment this Line for printing the input stream
}

void yyerror(char const *s);

%}

DIGIT       [0-9]
LETTER      [a-zA-Z]
WHITESPACE  [ \t\r]

VINT         {DIGIT}+
VFLOAT       {VINT}+"."{DIGIT}*
VDOUBLE      {VFLOAT}E{VINT}
VSTRING      \".*\"

COMMENT1    "//".*
%x          COMMENT2
COMMENT3    "/""*"([^{"*""/"}])*"*""/"

REL_OP      "<"|"<="|"=="|"=>"|">"|"&&"|"||"|"&"|"|"|"!="
ADD_OP       "+"|"-"|"%"
MUL_OP       "*"|"/"
UNARY_OP     "++"|"--"
ASSIGN_OP    "+="|"-="|"*="|"/="|"%="
TYPE        "boolean"|"int"|"float"|"double"

ID {LETTER}({LETTER}|{DIGIT})*

%%
"if"                            { count(); return IF; }
"else"                          { count(); return ELSE; }
"while"                         { count(); return WHILE; }
"for"                           { count(); return FOR; }
"read"                          { count(); return READ; }
"write"                         { count(); return WRITE; }
"final"                         { count(); return FINAL; }

{VINT}                          { count(); yylval.ival = atoi(yytext); return IVALUE; }
{VFLOAT}|{VDOUBLE}              { count(); yylval.fval = atof(yytext); return FVALUE; }
{VSTRING}                       { count(); yylval.identifier = strdup(yytext); return STRING; }
"true"                          { count(); return TRUE; }
"false"                         { count(); return FALSE; }
"boolean"                       { count(); return BOOLEAN; }
"int"                           { count(); return INT; }
"float"                         { count(); return FLOAT; }
"double"                        { count(); return DOUBLE; }


{ID}                            { count(); yylval.identifier = strdup(yytext); return IDENT; }
{REL_OP}                        { count(); yylval.ival = getEnum(yytext); return RELOP; }
{ADD_OP}                        { count(); yylval.ival = getEnum(yytext); return ADDOP; }
{MUL_OP}                        { count(); yylval.ival = getEnum(yytext); return MULOP; }
{UNARY_OP}                      { count(); yylval.ival = getEnum(yytext); return UNARYOP; }

"="                             { count(); return '='; }
"!"                             { count(); return '!'; }
"("                             { count(); return '('; }
")"                             { count(); return ')'; }
"["                             { count(); return '['; }
"]"                             { count(); return ']'; }
"{"                             { count(); return '{'; }
"}"                             { count(); return '}'; }
","                             { count(); return ','; }
";"                             { count(); return ';'; }

{COMMENT1}  {
    total_comments_number++;
    count();
}

"(*"  {
    total_comments_number++;
    BEGIN(COMMENT2);
    printf("(* [BEGIN NESTED %d]\n", nesting_comments++);
    count();
}

<COMMENT2>[^*()]*   {
    printf("%s", yytext);  /* Eat other characters */
    count();
}
<COMMENT2>"(*"   {
    printf("(* [BEGIN NESTED %d]\n", nesting_comments);
    if(++nesting_comments > max_comment_level) {
        max_comment_level = nesting_comments;
    }
    count();
}
<COMMENT2>"*)"   {
    printf("\n*) [END NESTED %d]", --nesting_comments);
    if (nesting_comments == 0){
        BEGIN(INITIAL);
    }
    count();
}
<COMMENT2>[*()] {
    printf("%s", yytext);  /* Eat the other characters if it is not a comment delimiter */
    count();
}

{COMMENT3}  {
    total_comments_number++;
    count();
}
{WHITESPACE}+                     { count(); }
\n                                { count(); yylineno++; }
.                                 { yyerror("Lex"); }

%%

Yacc 文件:

%{
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdarg.h>
    #include "abst.h"

    extern char *yytext;
    extern int yylineno;
    extern FILE *yyin;
    extern int column;

    extern int nesting_comments;
    extern int max_comment_level;
    extern int total_comments_number;

    int yylex(void);
    //int yydebug=1;

    node *root;

    void yyerror (char const *s){
        fprintf (stderr, "%s Error at line %d:%d\n", s, yylineno, column);
        fprintf (stderr, "Text was: %s\n", yytext);
        exit(EXIT_FAILURE);
    }
%}

%union {
    char *identifier;
    int ival;
    float fval;

    struct _node *node;
}

%type <identifier> IDENT STRING
%type <ival> IVALUE MULOP RELOP ADDOP UNARYOP
%type <fval> FVALUE
%type <node> program compStmt varDecList varListType type varList ident identArray stmtList
%type <node> statement assignStmt ifStmt elsePart whileStmt forStmt ioStmt ioCall paramList
%type <node> expr simpleExpr term factor mulop addop relop

%token IF ELSE WHILE FOR WRITE READ FINAL
%token IVALUE FVALUE TRUE FALSE STRING
%token BOOLEAN INT FLOAT DOUBLE
%token IDENT

%right UNARYOP '!' '='
%left MULOP ADDOP RELOP
%left '[' ']' '(' ')'

%start program

%%
program
:   compStmt                        { $$ = $1; root = $1;}
;

compStmt
:   '{' varDecList stmtList '}'     { $$ = new_node(COMP_STMT, (node*[]){ $2, $3, NULL });}
;


varDecList
:   varListType varDecList          { $$ = new_node(DECLARATION, (node*[]){ $1, $2, NULL }); }
|
;

varListType
:   type varList                    { $$ = new_node(VAR_LIST_TYPE, (node*[]){ $1, $2, NULL }); }
|   FINAL type varList              { $$ = new_node(VAR_LIST_TYPE, (node*[]){ new_string_node(TYPE, "final") , $2, $3, NULL }); }
;

type
:   BOOLEAN                         { $$ = new_string_node(TYPE, "boolean"); }
|   INT                             { $$ = new_string_node(TYPE, "int"); }
|   FLOAT                           { $$ = new_string_node(TYPE, "float"); }
|   DOUBLE                          { $$ = new_string_node(TYPE, "double"); }
;

varList
:   ident ',' varList               { $$ = new_node(VAR_LIST, (node*[]){ $1, $3, NULL }); }
|   ident ';'                       { $$ = new_node(VAR_LIST, (node*[]){ $1, NULL }); }
|   ident '=' expr ';'              { $$ = new_node(VAR_LIST, (node*[]){ $1, $3, NULL }); }
;

ident
:   IDENT                           { $$ = new_string_node(VAR, $1); }
|   identArray '[' simpleExpr ']'   { $$ = new_node(ARRAY, (node*[]){ $1, $3, NULL }); }
;

identArray
:   IDENT                           { $$ = new_string_node(VAR, $1); }
;

stmtList
:   statement stmtList              { $$ = new_node(STATEMENT_LIST, (node*[]){ $1, $2, NULL }); }
|   statement                       { $$ = new_node(STATEMENT_LIST, (node*[]){ $1, NULL }); }
;

statement
:   assignStmt ';'                  { $$ = $1; }
|   compStmt                        { $$ = $1; }
|   ifStmt                          { $$ = $1; }
|   whileStmt                       { $$ = $1; }
|   forStmt                         { $$ = $1; }
|   ioStmt                          { $$ = $1; }
;

assignStmt
:   ident '=' expr                  { $$ = new_node(ASSIGN_STMT, (node*[]){ $1, $3, NULL }); }
|   ident UNARYOP                   { $$ = new_node(ASSIGN_STMT, (node*[]){ $1, new_int_node(OP, $2), NULL }); }
;

ifStmt
:   IF '(' expr ')' statement elsePart {
        if($6->type != NONE){
            $$ = new_node(IF_STMT, (node*[]){ $3, $5, $6, NULL });
        }
        else {
            $$ = new_node(IF_STMT, (node*[]){ $3, $5, NULL });
        }
    }
;

elsePart
:   ELSE statement                  { $$ = $2; }
|                                   { $$ = new_none(); }
;

whileStmt
:   WHILE '(' expr ')' statement    { $$ = new_node(WHILE_STMT, (node*[]){ $3, $5, NULL }); }
;

forStmt
:   FOR '(' assignStmt ';' expr ';' assignStmt ')' statement {
                                      $$ = new_node(FOR_STMT, (node*[]){ $3, $5, $7, $9, NULL });
    }
;

ioStmt
:   ioCall '(' paramList ')' ';'    { $$ = new_node(IOFUNC, (node*[]){ $1, $3, NULL }); }
;

ioCall
:   READ                            { $$ = new_int_node(IOFUNCCALL, 0); }
|   WRITE                           { $$ = new_int_node(IOFUNCCALL, 1); }
;

paramList
:   ident ',' paramList             { $$ = new_node(PARAM_LIST, (node*[]){ $1, $3, NULL }); }
|   STRING ',' paramList            { $$ = new_node(PARAM_LIST, (node*[]){ new_string_node(STRING_CONST, $1), $3, NULL }); }
|   ident                           { $$ = new_node(PARAM_LIST, (node*[]){ $1, NULL }); }
|   STRING                          { $$ = new_string_node(STRING_CONST, $1); }
;

expr
:   simpleExpr relop simpleExpr     { $$ = new_node(EXPR, (node*[]){ $1, $2, $3, NULL }); }
|   simpleExpr                      { $$ = new_node(EXPR, (node*[]){ $1, NULL }); }
;

simpleExpr
:   term addop simpleExpr           { $$ = new_node(EXPR, (node*[]){ $1, $2 ,$3, NULL }); }
|   term                            { $$ = new_node(EXPR, (node*[]){ $1, NULL }); }
;

term
:   factor mulop term               { $$ = new_node(EXPR, (node*[]){ $1, $2, $3, NULL }); }
|   factor                          { $$ = new_node(EXPR, (node*[]){ $1, NULL }); }
;

factor
:   IVALUE                          { $$ = new_int_node(INT_CONST, $1); }
|   FVALUE                          { $$ = new_float_node(REAL_CONST, $1); }
|   TRUE                            { $$ = new_int_node(BOOL_CONST, 1); }
|   FALSE                           { $$ = new_int_node(BOOL_CONST, 0); }
|   ident                           { $$ = $1; }
|   '!' factor                      { $$ = new_node(EXPR, (node*[]){ $2, NULL }); }
|   '-' factor                      { $$ = new_node(EXPR, (node*[]){ $2, NULL }); }
|   '(' expr ')'                    { $$ = new_node(EXPR, (node*[]){ $2, NULL }); }
;

mulop
:   MULOP                           { $$ = new_int_node(OP, $1); }
;

addop
:   ADDOP                           { $$ = new_int_node(OP, $1); }
;

relop
:   RELOP                           { $$ = new_int_node(OP, $1); }
;

%%

int main(int argc, char **argv) {    
    if ( argc > 1 )
        yyin = fopen( argv[1], "r" );
    else
        yyin = stdin;
    yyparse();
    printf("\nCOMMENT-ANALYSIS:\n");
    printf("Total number of comments: %d\n", total_comments_number);
    printf("Maximum nested comment level: %d\n", max_comment_level);
    printf("\n");

    printNode(root);

    return 0;
}

我在其中清除了一些结构和枚举的 header :

#ifndef AST_TYPES_H
#define AST_TYPES_H

typedef enum {
    COMP_STMT = 0,
    ASSIGN,
    ASSIGN_STMT,
    IF_STMT,
    FOR_STMT,
    WHILE_STMT,
    STATEMENT,
    STATEMENT_LIST,
    CONST,
    VAR,
    ARRAY,
    VAR_LIST,
    TYPE,
    EXPR,
    SIMPLE_EXPR,
    INT_CONST,
    REAL_CONST,
    BOOL_CONST,
    STRING_CONST,
    IDENTIFIER,
    OP,
    IOFUNC,
    IOFUNCCALL,
    PARAM_LIST,
    DECLARATION,
    VAR_LIST_TYPE,
    NONE,
} node_type;

typedef enum {
    PLUS = 0,
    MINUS,
    MUL,
    DIV,
    MOD,
    LT,
    LE,
    GT,
    GE,
    EQ,
    NE,
    AND,
    OR,
    PLUSPLUS,
    MINUSMINUS,
    NOT,
} operator;

typedef union _node_value{
    int iValue; /*integer, true, false, compOp, addOp, mulOp */
    float fValue; /*number*/
    char* identifier; /*identifier*/
    /* list of BNF right-hand side symbols of nonterminal type */
    struct _node **body;
} node_value;

typedef struct _node {
    node_type type;
    int size;
    node_value value;
} node;

void printNode(node *the_node);
operator getEnum(char *value);
char *printEnum(node_type type);
node* new_none();
node* new_int_node(node_type type, int value);
node* new_float_node(node_type type, float value);
node* new_string_node(node_type type, char *value);
node* new_node(node_type type, node **nodes);

#endif //AST_TYPES_H

实现头函数的c文件:

#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include "abst.h"

node* new_none(){
    node *the_node = (node*) malloc(sizeof(node));
    if (the_node == NULL) {
        fprintf(stderr, "could not allocated memory for node\n");
        exit(EXIT_FAILURE);
    }
    the_node->type = NONE;
    the_node->size = 0;
    return the_node;
}

operator getEnum(char *value){
    switch (value[0]) {
        case '+':
            if(value[1] == '+')
                return PLUSPLUS;
            else
                return PLUS;
        case '-':
            if(value[1] == '-')
                return MINUSMINUS;
            else
                return MINUS;
        case '*':
            return MUL;
        case '/':
            return DIV;
        case '%':
            return MOD;
        case '<':
            if(value[1] == '=')
                return LE;
            else
                return LT;
        case '>':
            if(value[1] == '=')
                return GE;
            else
                return GT;
        case '=':
            if(value[1] == '!')
                return NE;
            else
                return EQ;
        case '&':
            return AND;
        case '|':
            return OR;
        case '!':
            return NOT;
    }
    return -1;
}

node* new_int_node(node_type type, int value){
    node *the_node = (node*) malloc(sizeof(node));
    if (the_node == NULL) {
        fprintf(stderr, "could not allocated memory for node\n");
        exit(EXIT_FAILURE);
    }
    the_node->type = type;
    the_node->size = 0;

    switch(type){
            case INT_CONST:
            case OP:
            case BOOL_CONST:
            case IOFUNCCALL:
            the_node->value.iValue = value;
            break;
        default:
            fprintf (stderr, "Node-Type %s is wrong for int value %d\n", printEnum(type), value);
    }
    return the_node;
}

node* new_float_node(node_type type, float value){
    node *the_node = (node*) malloc(sizeof(node));
    if (the_node == NULL) {
        fprintf(stderr, "could not allocated memory for node\n");
        exit(EXIT_FAILURE);
    }
    the_node->type = type;
    the_node->size = 0;

    if(type == REAL_CONST)
        the_node->value.fValue = value;
    else
        fprintf (stderr, "Node-Type %s is wrong for float value %f\n", printEnum(type), value);
    return the_node;
}

node* new_string_node(node_type type, char* value){
    node *the_node = (node*) malloc(sizeof(node));
    if (the_node == NULL) {
        fprintf(stderr, "could not allocated memory for node\n");
        exit(EXIT_FAILURE);
    }
    the_node->type = type;
    the_node->size = 0;

    switch(type){
        case VAR:
        case STRING_CONST:
        case TYPE:
            the_node->value.identifier = value;
            break;
        default:
            fprintf (stderr, "Node-Type %s is wrong for String %s\n", printEnum(type), value);
    }
    return the_node;
}

node* new_node(node_type type, node **nodes){
    int counter = 0;
    while(nodes[counter] != NULL){
        counter++;
        if(counter > 4){
            fprintf (stderr, "Could not find NULL for %s\n", printEnum(type));
            exit(EXIT_FAILURE);
        }
    }

    node *the_node = (node*) malloc(sizeof(node));
    the_node->type = type;
/*
    switch(type){
        case STATEMENT_LIST:
        case DECLARATION:
        case VAR_LIST:
            if(counter == 2 && nodes[1]->type != EXPR){
                int size = nodes[1]->size + 1;
                the_node->size = size;

                the_node->value.body = malloc(size*sizeof(node*));
                the_node->value.body[0] = nodes[0];

                for (int i = 1; i < size; i++) {
                    the_node->value.body[i] = nodes[1]->value.body[i-1];
                }
                return the_node;
            }
    }
   */ 
    the_node->value.body = (node**) malloc(counter*sizeof(node*));
    the_node->size = counter;

    for (int i = 0; i < counter; i++) {
        the_node->value.body[i] = nodes[i];
    }
    return the_node;
}

void printNode(node *the_node){
    if(the_node->type > NONE){
        fprintf (stderr, "Wrong Type %d\n", the_node->type);
        return;
    }

    printEnum(the_node->type);
    switch(the_node->type){
        case INT_CONST:
            printf("%s with INT-value: %d\n", printEnum(the_node->type), the_node->value.iValue);
            break;
        case REAL_CONST:
            printf("%s with FLOAT-value: %f\n", printEnum(the_node->type), the_node->value.fValue);
            break;
        case BOOL_CONST:
            printf("%s with BOOL-value: %d\n", printEnum(the_node->type), the_node->value.iValue);
            break;
        case VAR:
            printf("%s with Identifier: %s\n", printEnum(the_node->type), the_node->value.identifier);
            break;
        case OP:
            printf("%s with Operation: %d\n", printEnum(the_node->type), the_node->value.iValue);
            break;
        default:
            printf("%s with size %d\n", printEnum(the_node->type), the_node->size);
            for (int i = 0; i < the_node->size; i++) {
                printNode(the_node->value.body[i]);
            }
    }
}

char *printEnum(node_type type){
    switch (type) {
        case COMP_STMT:
            return "COMP_STMT";
        case ASSIGN:
            return "ASSIGN";
        case ASSIGN_STMT:
            return "ASSIGN_STMT";
        case IF_STMT:
            return "IF_STMT";
        case FOR_STMT:
            return "FOR_STMT";
        case WHILE_STMT:
            return "WHILE_STMT";
        case STATEMENT:
            return "STATEMENT";
        case STATEMENT_LIST:
            return "STATEMENT_LIST";
        case CONST:
            return "CONST";
        case VAR:
            return "VAR";
        case ARRAY:
            return "ARRAY";
        case VAR_LIST:
            return "VAR_LIST";
        case TYPE:
            return "TYPE";
        case EXPR:
            return "EXPR";
        case INT_CONST:
            return "INT_CONST";
        case REAL_CONST:
            return "REAL_CONST";
        case BOOL_CONST:
            return "BOOL_CONST";
        case STRING_CONST:
            return "STRING_CONST";
        case IDENTIFIER:
            return "IDENTIFIER";
        case OP:
            return "OP";
        case IOFUNC:
            return "IOFUNC";
        case IOFUNCCALL:
            return "IOFUNCCALL";
        case PARAM_LIST:
            return "PARAM_LIST";
        case DECLARATION:
            return "DECLARATION";
        case VAR_LIST_TYPE:
            return "VAR_LIST_TYPE";
        case NONE:
            return "NONE";
        default:
            printf("\nUNDEFINED: %d\n", type);
            return "UNDEFINED";
    }
}

我这样编译程序:

bison -d --debug --verbose yacc.y
flex lex.l
gcc yacc.tab.c lex.yy.c abst.c -std=c11 -Wall -o run -ll -g
./run < sample.java

这是应该编译的 java 语法的示例文件:

/* This program that implements a few well-known algorithms */

{ int x, y, z;
  final int n = 10;
  int a[n];

  // greatest common divisor
  { read(x, y);
    while(x * y != 0)
      if(x > y)
        x = x - y;
      else y = y - x;
    if(x == 0)
      write("gcd = ", y);
    else write("gcd = ", x);
  }

  // factorial calculation
  { read(x);
    y = 1;
    for(z = 2; z <= x; z++)
      y = y * z;
    write(x, " != ", y);
  }


  // prime number verification
  { int k;
    boolean prime;
    read(x);
    k = 2;
    prime = true;
    while((k < x/2) && prime) {
      if(x % k == 0)
        prime = false;
      k++;
    }
    write(x, " is prime is ",  prime);
  }

  // maximum array element
  { int i, max;
    for(i = 0; i < n; i++)
      read(a[i]);
    max = a[0];
    for(i = 0; i < n; i++)
      if(max > a[i])
        max = a[i];
    write("max = ", max);
  }


  // bubble sort
  { boolean k;
    float r;
    int i;
    k = true;
    while(k) {
      k = false;
      for(i = 0; i < n-1; i++)
        if(a[i] > a[i+1]) {
          b = a[i];
          a[i] = a[i+1];
          a[i+1] = b;
          k = true;
        }
    }
    write("sorted array: ");
    for(i = 0; i < n; i++)
      write(a[i]);
  }
}

最佳答案

不要直接从命令行运行程序,而是:

gdb run

然后在 (gdb) 提示下执行

run < sample.java

这将产生以类似结尾的输出:

Program received signal SIGSEGV, Segmentation fault.
0x00007ffff7a66c36 in vfprintf () from /lib/x86_64-linux-gnu/libc.so.6
(gdb)

告诉您问题出在传递给 printf 的内容中。从这里,使用 bt 命令获取堆栈回溯:

#0  0x00007ffff7a66c36 in vfprintf () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x00007ffff7a6f299 in printf () from /lib/x86_64-linux-gnu/libc.so.6
#2  0x0000000000404b14 in printNode (the_node=0x60c870) at abst.c:178
#3  0x0000000000404b94 in printNode (the_node=0x60c930) at abst.c:186
#4  0x0000000000404b94 in printNode (the_node=0x60c970) at abst.c:186
#5  0x0000000000404b94 in printNode (the_node=0x60ca70) at abst.c:186
#6  0x0000000000404b94 in printNode (the_node=0x60d190) at abst.c:186
#7  0x0000000000404b94 in printNode (the_node=0x60d6d0) at abst.c:186
#8  0x0000000000404b94 in printNode (the_node=0x60d710) at abst.c:186
#9  0x0000000000404b94 in printNode (the_node=0x60d750) at abst.c:186
#10 0x0000000000404b94 in printNode (the_node=0x613500) at abst.c:186
#11 0x0000000000404b94 in printNode (the_node=0x613540) at abst.c:186
#12 0x00000000004024a7 in main (argc=1, argv=0x7fffffffe0a8) at yacc.y:208

看看你是怎么进入这个状态的。现在使用 up 命令两次(在堆栈跟踪中“向上”移动两帧——在上面的列表中向下),你在:

#2  0x0000000000404b14 in printNode (the_node=0x60c870) at abst.c:178
178             printf("%s with Identifier: %s\n", printEnum(the_node->type), the_node->value.identifier);

从这里您可以使用 p 命令打印事物的值:

(gdb) p the_node->value
$1 = {
  iValue = 2, 
  fValue = 2.80259693e-45, 
  identifier = 0x2 <error: Cannot access memory at address 0x2>, 
  body = 0x2
}

它告诉您(紧迫的)问题是什么——您正在从 union 访问 identifier,而它实际上持有可能是 iValue 的东西,所以您重新将垃圾指针传递给导致崩溃的 printf

有关 gdb 命令的更多信息,请尝试使用 help 命令...

关于c - 关于 Flex、Bison 和 Segmentation Fault,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37766122/

相关文章:

regex - 如何使用 Flex 处理不同的换行约定

c - C中的 'short'数据类型是什么?

c - 如何使用条件#error

grammar - 如何在不允许所有字符的情况下处理 Bison Grammar 中的换行符?

c++ - Qt项目如何集成flex和bison?

parsing - 耶尔瓦尔和联盟

compiler-errors - flex无法识别的期权名词

c - 向客户端 C 显示文件夹服务器的内容

c - 使用 fputc() 写入一个字节

从 INTEGER 到 int 的代码生成