bison - Lexx 和 Yacc 将 C 语法解析为符号表

标签 bison yacc lex

我正在尝试使用 bison 和 lex 创建一个 c 类型解析器。是的,这是一项学校作业,但我很迷失,而且我在网上完成学校作业,所以我没有得到太多帮助。我需要这个来将信息解析到表中,但我很确定我的 parse.y 文件中有错误,因为它没有将信息加载到表中,它说没有代码。我知道我的 .y 文件缺少一些语法,但我认为其中一些应该按原样加载到符号表中。

运行 make 文件时的输出为:

default:
    clear
    yacc -d -v --debug parse.y
    lex -l scan.l
    gcc -o cfc symtab.c lex.yy.c y.tab.c
clean:
    $(RM) cfc *.o lex.yy.c parse.tab.c y.tab.h y.output dump.symtab

输出到屏幕:

> yacc -d -v --debug parse.y parse.y:39 parser name defined to default
> :"parse" conflicts:  2 reduce/reduce lex -l scan.l gcc -o cfc symtab.c
> lex.yy.c y.tab.c

通过解析器传递 60.cf:

> semantic error cnt: 0     lines of code: -1

解析器的结果是符号表为空,输出内容如下:

> Starting parse 
> Entering state 0 
> Reading a token: Next token is 292 (INT)
> parse error: line 0: Shifting error token, Entering state 1
> Reducing via rule 3 (line 57), error  -> block

加载60.cf的示例代码:

int foo()
{
    while ( i >= 0 )  {
       i = i * 1;
       c = c + 1;
    } 
    if ( c >= 0 )  
    { ; }
    else
    { i = 7; 
      c = i;
    }
    return 0;
} 

这是 scan.l 文件:

%option yylineno

%{
#ifndef TRUE
#define TRUE 1 
#endif

#ifndef FALSE 
#define FALSE 0 
#endif

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "symtab.h"
#include "y.tab.h"

int badtoken_cnt = 0;
int token_cnt = 0;
int col_cnt = 0;
int lineno = 0;

%}

comment     \/\*([^*]|\n)*\*\/
digit       [0-9]
ichar       [A-Z_a-z]
integer     {digit}+
newline     \n
strchar     ([ ~]|\\n)
identifier  {ichar}([0-9]|{ichar})*
whitespace  [ \t]+
float       ([+-]?{digit}+)?\.{digit}*(e?[+-]?{digit}+)?
chrliteral  '([!*]|\\n)'
nullstring  \"\"
escquote    [^"]*\\\"[^"]*
strliteral  \"[^"]*{escquote}*\"
%%

"if"            { return IF;}
"then"          { return THEN;}
"else"          { return ELSE;}
"while"         { return WHILE;}
"return"        { return RETURN;}
"break"         { return GOTO;}
"goto"          { return GOTO;}
"read"          { return READ;}
"write"         { return WRITE;}
"float"         { return REAL;}
"int"           { return INT;}
"void"          { return VOID;}
"char"          { return CHAR;}

"="             { return ASSIGN;}
"!="            { return NE;}
"=="            { return EQ;}
"<"             { return LT;}
"<="            { return LE;}
">"             { return GT;}
">="            { return GE;}
"&&"            { return AND;}
"||"            { return OR;}

"+"             { return PLUS;}
"-"             { return MINUS;}
"*"             { return TIMES;}
"/"             { return OVER;}
"%"             { return MOD;}

"{"             { return LBRACE;}
"}"             { return RBRACE;}
"["             { return LBRACK;}
"]"             { return RBRACK;}
"("             { return LPAREN;}
")"             { return RPAREN;}
";"             { return SEMI;}
","             { return COMMA;}

{float}         { 
                  yylval.tokname = malloc(sizeof(yytext));
                  strncpy(yylval.tokname,yytext,yyleng);
                  printf("yylval: %s\n",yylval.tokname);
                  insert(yytext, yyleng, REAL_TYPE, lineno);
                  printf("yytext: %s\n",yytext);
                  return FLOAT;
                }
{integer}       { 
                  yylval.tokname = malloc(sizeof(yytext));
                  printf("yylval: %s\n",yylval.tokname);
                  strncpy(yylval.tokname,yytext,yyleng);
                  insert(yytext, yyleng, INT_TYPE, lineno);
                  printf("yytext: %s\n",yytext); 
                  return INTEGER;
                }


{chrliteral}    { 
                  yylval.tokname = malloc(sizeof(yytext));
                  strncpy(yylval.tokname,yytext,yyleng);
                  printf("yylval: %s\n",yylval.tokname);
                  insert(yytext, yyleng, -1, lineno);
                  printf("yytext: %s\n",yytext);
                  return CHRLIT;
                }

{nullstring}    { 
                  yylval.tokname = malloc(sizeof(yytext));
                  strncpy(yylval.tokname,yytext,yyleng);
                  printf("yylval: %s\n",yylval.tokname);
                  insert(yytext, yyleng, -1, lineno);
                  printf("yytext: %s\n",yytext);
                  return STRLIT;
                }

{strliteral}    { 
                  yylval.tokname = malloc(sizeof(yytext));
                  strncpy(yylval.tokname,yytext,yyleng);
                  printf("yylval: %s\n",yylval.tokname);
                  insert(yytext, yyleng, STR_TYPE, lineno);
                  printf("yytext: %s\n",yytext);
                  return STRLIT;
                }

{identifier}    { 

                  return IDENT;
                }
{newline}       { col_cnt = 1; }

{whitespace}    { col_cnt+=yyleng; }

{comment}       { col_cnt = 0; }

"//"            { /* handle C++ style comments */
                    char c;
                    do { c = input();
                    } while (c != '\n');
                    lineno++;
                }

.               { return ERROR;}

%%

这是 parse.y 文件,我认为这是错误所在:

%{

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"

#define DEBUG 0
#define TRUE 1
#define FALSE 0
#define MAX_MSG_LEN 50
#define YYDEBUG 1

int errcnt = 0;
char errmsg[40];
extern char *yytext;
extern FILE *yyin;
extern FILE *yyout;
extern int yyparse();
extern int lineno;
int yydebug = 1;
int t;
%}

/* no warning for fewer than 1 shift/reduce conflicts and 0 reduce/reduce */ 
%expect 1 
%union { int tokid;
         char *tokname;
       }

%token <tokname> IDENT NUMBER
%token <tokid> ASSIGN PLUS LBRACE RBRACE LPAREN RPAREN SEMI ERROR FLOAT INTEGER 
/* ADDED */
%token <tokid> IF THEN ELSE WHILE RETURN GOTO READ WRITE VOID CHAR
/* ADDED */
%token <tokid> NE EQ LT LE GT GE AND OR MINUS TIMES OVER MOD INT REAL
/* ADDED */
%token <tokid> LBRACK RBRACK COMMA CHRLIT STRLIT
%type <tokid> block stmt_seq stmt decl expr term assignmnt decltype error

%start block

%% 

block       : LBRACE stmt_seq RBRACE  
            | LPAREN stmt_seq RPAREN  
            | error { yyerrok; return 0; } 
            ;

stmt_seq    : stmt_seq stmt SEMI
            | stmt SEMI
            | error  { yyerrok; return 0;} 
            ;

stmt        : expr
            | decl
            | assignmnt { $$ = $1; }
            | error  { yyerrok; return 0;} 
            ;

decl        : decltype IDENT { 
                setType($2,$1);
                fprintf(stdout,"set decltype to: %d for %s\n",$$,$2);  
            }
            ;

expr        : expr PLUS term 
              { /* add constraint here */ }

            | term { $$ = $1; } 
            | error    { yyerrok; return 0;} 
            ;

assignmnt   : IDENT ASSIGN expr  
              { /* add constraint here */ }
            ;

term        : NUMBER { $$ = lookupType($1); }

            | IDENT  { $$ = lookupType($1); }
            ;

decltype    : INTEGER  { $$ = INT_TYPE; }
            | FLOAT { $$ = REAL_TYPE; }
            ;

%%

int main( int argc,char *argv[] )
{
   strcpy(errmsg,"type error\n");
   int i;
   if(argc < 2) {
      printf("Usage: ./cfc <source filename>\n");
      exit(0);
   }
   FILE *fp = fopen(argv[1],"r");
   if(!fp) {
     printf("Unable to open file for reading\n");
     exit(0);
   }
   yyin = fp;

   fp = fopen("dump.symtab","w");
   if(!fp) {
     printf("Unable to open file for writing\n");
     exit(0);
   }

   int flag = yyparse();

   /* dump symtab for debugging if necessary  */
   symtab_dump(fp);  
   lineno--;  /* don't count the last newline */ 
   printf("\nsemantic error cnt: %d \tlines of code: %d\n",errcnt,lineno);

   /* cleanup */
   fclose(fp);
   fclose(yyin);

   return flag;
}


yywrap()
{
   return(1);
}

int yyerror(char * msg)
{ fprintf(stderr,"%s: line %d: \n",msg,lineno);
  return 0;
}

如果您需要它,这是我正在使用的符号表:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"

/* maximum size of hash table */
#define SIZE 200 
#define MAXTOKENLEN 40

/* power of two multiplier in hash function */
#define SHIFT 4

/* the hash function */
static int hash ( char * key )
{ int temp = 0;
  int i = 0;
  while (key[i] != '\0')
  { temp = ((temp << SHIFT) + key[i]) % SIZE;
    ++i;
  }
  return temp;
}

/* a linked list of references (line nos) for each variable */
typedef struct RefListRec { 
     int lineno;
     struct RefListRec * next;
     /* ADDED */
     int type;
} * RefList;


/* hash entry holds variable name and its reference list */
typedef struct HashRec { 
     char st_name[MAXTOKENLEN];
     int st_size;
     RefList lines;
     int st_value;
     /* ADDED */
     int st_type;
     struct HashRec * next;
} * Node;

/* the hash table */
static Node hashTable[SIZE];

 /* insert an entry with its line number - if entry
  *  already exists just add its reference line no.  
  */
void insert( char * name, int len, int type, int lineno )
{ 
  /* ADDED */
  /*int len = strlen(name);*/
  int h = hash(name);
  Node l =  hashTable[h];
  while ((l != NULL) && (strcmp(name,l->st_name) != 0))
    l = l->next;
  if (l == NULL) /* variable not yet in table */
  { l = (Node) malloc(sizeof(struct HashRec));
    strncpy(l->st_name, name, len);  
    /* ADDED */
    l->st_type = type;
    l->lines = (RefList) malloc(sizeof(struct RefListRec));
    l->lines->lineno = lineno;
    l->lines->next = NULL;
    l->next = hashTable[h];
    hashTable[h] = l; }
  else /* found in table, so just add line number */
  { RefList t = l->lines;
    while (t->next != NULL) t = t->next;
    t->next = (RefList) malloc(sizeof(struct RefListRec));
    t->next->lineno = lineno;
    t->next->next = NULL;
  }
} 

/* return value (address) of symbol if found or -1 if not found */
int lookup ( char * name )
{ int h = hash(name);
  Node l =  hashTable[h];
  while ((l != NULL) && (strcmp(name,l->st_name) != 0))
    l = l->next;
  if (l == NULL) return -1;
  else return l->st_value;
}

/* return type value of symbol or -1 if symbol not found */
int lookupType( char * name )
{
  int h = hash(name);
  Node l =  hashTable[h];
  while ((l != NULL) && (strcmp(name,l->st_name) != 0))
    l = l->next;
  if (l == NULL) return -1;
  else return l->st_type;
}

/* set datatype of symbol returns 0 if symbol not found */
int setType( char * name, int t )
{
   int h = hash(name);
   Node l =  hashTable[h];
   while ((l != NULL) && (strcmp(name,l->st_name) != 0))
     l = l->next;
   if (l == NULL) return -1;
   else {
     l->st_type = t;
     return 0;
   }
}

/* print to stdout by default */ 
void symtab_dump(FILE * of) {  
  int i;
  fprintf(of,"------------ ------ ------------\n");
  fprintf(of,"Name         Type   Line Numbers\n");
  fprintf(of,"------------ ------ -------------\n");
  for (i=0; i < SIZE; ++i)
  { if (hashTable[i] != NULL)
    { Node l = hashTable[i];
      while (l != NULL)
      { RefList t = l->lines;
        fprintf(of,"%-12s ",l->st_name);

        if (l->st_type == INT_TYPE)
           fprintf(of,"%-7s","int ");
        if (l->st_type == REAL_TYPE)
           fprintf(of,"%-7s","real");
        if (l->st_type == STR_TYPE)
           fprintf(of,"%-7s","string");


        while (t != NULL)
        { fprintf(of,"%4d ",t->lineno);
          t = t->next;
        }
        fprintf(of,"\n");
        l = l->next;
      }
    }
  }
} 

最佳答案

由于竞争的错误产生而发生冲突。您的 stmt 会产生 error。但是stmt也派生了expr,而expr派生了error。将其放入 expr 中。

查看 yacc 生成的 y.output 文件(因为您传递了 -v)。

您的语法不可能与输入程序匹配,因为您的起始符号派生出大括号或括号括起来的语句 block ,而不是外部函数定义的序列。您需要添加将处理函数定义的语法片段:decltype IDENT arglist body,等等。

一件小事立即脱颖而出:

comment     \/\*([^*]|\n)*\*\/

您是说评论不能包含星号。

匹配真实 C 注释的正则表达式为:[/][*]([^*]|[*]*[^*/])*[*]+[/] (但请注意,这没有考虑 Lex 的换行约定)。

您不需要为单字符标记提供标记名称。这只会增加语法的冗长,你必须写 LBRACE 之类的东西,而不仅仅是“{”。

关于bison - Lexx 和 Yacc 将 C 语法解析为符号表,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/10640290/

相关文章:

c - 弹性默认规则

在 centos 9 上找不到 libl -/usr/bin/ld : cannot find -ll

c - 弹性/Bison : segmentation fault core dump

parsing - Bison 和语法 : replaying the parse stack

c - 我们如何定义用于识别给定系列中特定序列的规则?

bison - Lex/Yacc : Print message before input

c - 为什么 Visual Studio 2012 在未更改源文件时运行自定义构建步骤?

parsing - yacc/bison LALR(1) 算法如何处理 "empty"规则?

java - 识别 JFlex 1.4.3 中的小数

Javascript 词法分析器/分词器(在 Python 中?)