java - 使用 Java 从文件中读取算术表达式中的自然数

标签 java analyzer lexical

我正在用 java 构建一个词法分析器。这就是我现在拥有的:

import java.io.*;

enum TokenType{ NUM,SOMA, MULT,APar,FPar, EOF}

class Token{
  char lexema;
  TokenType token;

  Token (char l, TokenType t)
  { lexema=l;token = t;}    

}  

class AnaliseLexica {

BufferedReader arquivo;

AnaliseLexica(String a) throws Exception
{

    this.arquivo = new BufferedReader(new FileReader(a));

}

Token getNextToken() throws Exception
{   
    Token token;
    int eof = -1;
    char currchar;
    int currchar1;

        do{
            currchar1 =  arquivo.read();
            currchar = (char) currchar1;
        } while (currchar == '\n' || currchar == ' ' || currchar =='\t' || currchar == '\r');

        if(currchar1 != eof && currchar1 !=10)
        {


            if (currchar >= '0' && currchar <= '9')
                return (new Token (currchar, TokenType.NUM));
            else
                switch (currchar){
                    case '(':
                        return (new Token (currchar,TokenType.APar));
                    case ')':
                        return (new Token (currchar,TokenType.FPar));
                    case '+':
                        return (new Token (currchar,TokenType.SOMA));
                    case '*':
                        return (new Token (currchar,TokenType.MULT));

                    default: throw (new Exception("Caractere inválido: " + ((int) currchar)));
                }
        }

        arquivo.close();

    return (new Token(currchar,TokenType.EOF));

}

通过此代码,我可以使用这部分代码读取从“0”到“9”的数字以及“*”、“+”等运算符:

do{
        currchar1 =  arquivo.read();
        currchar = (char) currchar1;
    } while (currchar == '\n' || currchar == ' ' || currchar =='\t' || currchar == '\r');

如何从文件中读取自然数并继续读取算术运算符?

最佳答案

由于空格是 token 的有效分隔符,因此您可以使代码更简单。默认情况下,Scanner 类将用空格分隔读取的值。你只需要一篇一篇地阅读。当扫描仪没有更多数据可供读取时,我们将其关闭并返回 EOF token 。

import java.io.FileReader;
import java.io.IOException;
import java.util.Scanner;

public class AnalisadorLexico {

    public enum TokenType {
        NUM,
        SOMA,
        MULT,
        APar,
        FPar,
        EOF
    }

    public class Token {

        String lexema;
        TokenType token;

        Token( String l, TokenType t ) {
            lexema = l;
            token = t;
        }

        Token( char l, TokenType t ) {
            lexema = String.valueOf( l );
            token = t;
        }

        @Override
        public String toString() {
            return lexema + " (" + token + ")";
        }

    }

    private Scanner fileReader;
    private boolean scannerClosed;

    public AnalisadorLexico( String filePath ) throws IOException {
        fileReader = new Scanner( new FileReader( filePath ) );
    }

    public Token getNextToken() throws IOException {

        if ( !scannerClosed && fileReader.hasNext() ) {

            String currentData = fileReader.next();

            try {
                Integer.parseInt( currentData );
                return new Token( currentData, TokenType.NUM );
            } catch ( NumberFormatException exc ) {
            }

            switch ( currentData ) {
                case "(":
                    return new Token( currentData,TokenType.APar );
                case ")":
                    return new Token( currentData,TokenType.FPar );
                case "+":
                    return new Token( currentData,TokenType.SOMA );
                case "*":
                    return new Token( currentData,TokenType.MULT );
            }

        } else {
            scannerClosed = true;
            fileReader.close();
            return new Token( "", TokenType.EOF );
        }

        return null;

    }

    public static void main( String[] args ) throws IOException {

        AnalisadorLexico al = new AnalisadorLexico( "testAL.txt" );
        Token t = null;

        while ( ( t = al.getNextToken() ).token != TokenType.EOF ) {
            System.out.println( t );
        }

        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );

    }

}

如果您无法使用 Scanner 类,您可以继续使用 BufferedReader,标记其数据:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class AnalisadorLexico2 {

    public enum TokenType {
        NUM,
        SOMA,
        MULT,
        APar,
        FPar,
        EOF
    }

    public class Token {

        String lexema;
        TokenType token;

        Token( String l, TokenType t ) {
            lexema = l;
            token = t;
        }

        Token( char l, TokenType t ) {
            lexema = String.valueOf( l );
            token = t;
        }

        @Override
        public String toString() {
            return lexema + " (" + token + ")";
        }

    }

    private BufferedReader fileReader;
    private boolean fileReaderClosed;

    public AnalisadorLexico2( String filePath ) throws IOException {
        fileReader = new BufferedReader( new FileReader( filePath ) );
    }

    public Token getNextToken() throws IOException {

        String currentData = nextBufferedReaderToken();

        if ( currentData != null ) {

            try {
                Integer.parseInt( currentData );
                return new Token( currentData, TokenType.NUM );
            } catch ( NumberFormatException exc ) {
            }

            switch ( currentData ) {
                case "(":
                    return new Token( currentData,TokenType.APar );
                case ")":
                    return new Token( currentData,TokenType.FPar );
                case "+":
                    return new Token( currentData,TokenType.SOMA );
                case "*":
                    return new Token( currentData,TokenType.MULT );
            }

        } else {
            if ( !fileReaderClosed ) {
                fileReaderClosed = true;
                fileReader.close();
            }
            return new Token( "", TokenType.EOF );
        }

        return null;

    }

    public String nextBufferedReaderToken() throws IOException {

        boolean started = false;
        String data = null;

        while ( !fileReaderClosed ) {

            int d = fileReader.read();
            char c = (char) d;

            if ( d != -1 ) {

                if ( c == '\n' || c == ' ' || c == '\t' || c == '\r' ) {
                    if ( !started ) {
                        // discard...
                    } else {
                        break;
                    }
                } else {
                    if ( !started ) {
                        data = "";
                        started = true;
                    }
                    data += c;
                }

            } else {
                break;
            }

        }

        return data;

    }

    public static void main( String[] args ) throws IOException {

        AnalisadorLexico2 al = new AnalisadorLexico2( "testAL.txt" );
        Token t = null;

        while ( ( t = al.getNextToken() ).token != TokenType.EOF ) {
            System.out.println( t );
        }

        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );

    }

}

我的testAL.txt文件内容是:

    1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
    1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) ) 

关于java - 使用 Java 从文件中读取算术表达式中的自然数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46895053/

相关文章:

java - SpringMVC ajax 请求 - java.io.EOFException : No content to map to Object due to end of input

java - 如何打印没有最后一个逗号的数组字符串?

c++ - 为什么我的词法分析器无法识别引号 ""

java - Lucene:基于字典术语索引文档/实现自定义分析器

javascript - ES6/Node 中的词法作用域

javascript - 是否有一个 JavaScript 等价于 Python pass 语句,它什么都不做?

java - java.lang.UnsatisfiedLinkError:否GurobiJni/Tomcat

java写入文本文件

在 Elasticsearch 中搜索带空格的名称(文本)

reactjs - 将词汇数据发送到服务器并使用新的词汇组件读取它