代码是将字符串分解为标记。调试我的 C 代码会引发异常。我不知道如何处理 C 中的异常

标签 c visual-studio-2015

/*
    * tokenizer.c
    */

    // Header files
    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    #include <ctype.h>

    // constants for the white spaces
    // and the unprintable values
    #define BLANK 0x20
    #define TAB 0x09
    #define VTAB 0x0b
    #define FORM_FEED 0x0c
    #define NEW_LINE 0x0a
    #define CARRIAGE_RETURN 0x0d

    // define the boolean variables for ease of access
    #define TRUE 1
    #define FALSE 0

    /*
    * Tokenizer type.  You need to fill in the type as part of your implementation.
    */
    struct TokenizerT_
    {
        // to hold the actual string provided at the argument
        char *actual_string;

        // to mover the pointer with in the provided
        // string
        char *pointer;
    };

    typedef struct TokenizerT_ TokenizerT;

    /*
    * TKCreate creates a new TokenizerT object for a given token stream
    * (given as a string).
    *
    * TKCreate should copy the arguments so that it is not dependent on
    * them staying immutable after returning. (In the future, this may change
    * to increase efficiency.)
    *
    * If the function succeeds, it returns a non-NULL TokenizerT.
    * Else it returns NULL.
    *
    * You need to fill in this function as part of your implementation.
    */
    TokenizerT *TKCreate(char *ts)
    {
        TokenizerT *token = (TokenizerT *)malloc(sizeof(TokenizerT));

        // condition to check whether the provided string is 
        // empty or not. If it is empty return NULL
        if (strlen(ts) == 0)
        {
            return NULL;
        }

        token->actual_string = (char *)malloc(sizeof(char) * (strlen(ts) + 1));

        // copy the provided string into structure's actual string
        strcpy(token->actual_string, ts);

        // set the actual string value to the pointer
        token->pointer = token->actual_string;

        // return the token
        return token;
    }

    /*
    * TKDestroy destroys a TokenizerT object. It should free all dynamically
    * allocated memory that is part of the object being destroyed.
    *
    * You need to fill in this function as part of your implementation.
    */
    void TKDestroy(TokenizerT *tk)
    {
        // set the pointer to null
        tk->pointer = NULL;

        // free the memory of the pointer
        free(tk->actual_string);

        // free the structure pointer
        free(tk);
    }

    /*
    * isWhiteSpace used to determine if the parameter character
    * is a specified  whitespace constant or not. If it contains
    * returns TRUE else returns FALSE
    */
    int isWhiteSpace(char ch)
    {
        if (ch == BLANK || ch == NEW_LINE || ch == VTAB || ch == TAB || ch   == CARRIAGE_RETURN || ch == FORM_FEED)
        {
            return TRUE;
        }
        else
        {
            return FALSE;
        }
    }

    /*
    * getWhiteSpace: Returns the type of error is the character is?
    */

getWhitespace function gives me the "error: not all control paths return value". I know this error has been asked previously but I did not get how to fix in my code here.

    char *getWhiteSpace(char ch)
    {
        switch (ch)
        {
        case BLANK:
            return "0x20";

        case TAB:
            return "TAB";

        case VTAB:
            return "0x0b";

        case NEW_LINE:
            return "0x0a";

        case CARRIAGE_RETURN:
            return "0x0d";

        case FORM_FEED:
            return "0x0c";
        }
    }
    /*
    * TKGetNextToken returns the next token from the token stream as a
    * character string. Space for the returned token should be dynamically
    * allocated. The caller is responsible for freeing the space once it is
    * no longer needed.
    *
    * If the function succeeds, it returns a C string (delimited by '\0')
    * containing the token. Else it returns 0.
    *
    * You need to fill in this function as part of your implementation.
    */
    char *TKGetNextToken(TokenizerT *tk)
    {
        // Copy the pointer of the structure to the temporary variable
        // so as to not to modify the original
        char *temp_ptr = tk->pointer;

        // define length variable
        int length = 0;

        // condition to check whether the given character contains
        // constant whitespace
        if (isWhiteSpace(*temp_ptr))
        {
            while (isWhiteSpace(*temp_ptr))
            {
                char *whiteChar = getWhiteSpace(*temp_ptr);

                // print the respective error message
                printf("Error : [%s]\n", whiteChar);
                // increment the  pointer
                *temp_ptr++;
            }
        }
        // store the original length of temp_ptr
        length = strlen(temp_ptr);

        // define a new pointer to allocate the token that
        // is been tokenized
        char *token = (char *)malloc(length + 1);

        // copy the string from the pointer of the structure 
        // till the length
        int i = 0;

        // to know the length of the string to token at
        while (!isWhiteSpace(temp_ptr[i]) && i != length)
        {
            i++;
        }

        // copy the string into the token
        strncpy(token, temp_ptr, i);

        // set the end of the string with string terminator character
        token[i] = '\0';

        int j = 0;

In the for loop I am getting error: signed/unsigned mismatch error. I have declared the type of the j as 'int' so what is really giving me the error and how to fix it.

        // modify the pointer of the struture TokenizerT
        for (j = 0; j < length - strlen(token); j++)
        {
            tk->pointer[j] = temp_ptr[i];
            i++;
        }
        tk->pointer[j] = '\0';

        // return the token         
        return token;
    }


    /*
    * isDecimal: This accepts a string returns a boolean value if the given string is
    * a decimal value.
    */
    int isDecimalValue(char *s)
    {
        // copy the parameter string into local string pointer
        char *temp_ptr = s;

        // loop through the string until string terminator character is found
        while (*temp_ptr != '\0')
        {
            // by using isdigit function find whether the given
            // character is not a digit.
            if (!isdigit(*temp_ptr++))
            {
                // then return false
                return FALSE;
            }
        }

        // then return true value
        return TRUE;
    }

    /*
    * isFloat: This accepts a string and return a boolean value
    * This is used to check whether provided string is a floating pointing.
    * This also checks for decimal pointer and/or for an e-notation.
    */
    int isFloatValue(char *s)
    {
        // define a temporary string variable
        char *temp_ep = NULL;

        // convert the string to long 
        long i = strtol(s, &temp_ep, 0);

        // condition to check whether the temporary value is
        // not converted to long or int
        if (!*temp_ep)
        {
            // if the condition is true return false
            return FALSE;
        }

        // condition to check whether the temporary pointer
        // contains 'e' or 'E' or '.'(period)
        if (*temp_ep == 'e' || *temp_ep == 'E' || *temp_ep == '.')
        {
            // if the condition is true return true
            return TRUE;
        }

        // other wise return false
        return FALSE;
    }


    /*
    * isOctal: That accepts a string and returns a boolean value
    * This is used to check if the string is an octal number.
    * Whereas the first digit is a zero and the proceeding digits are 0-7
    */
    int isOctalValue(char *s)
    {
        // copy the string to local variable
        char *temp_ptr = s;

        // condition to check whether the first character is '0' or not
        if (*temp_ptr != '0')
        {
            // if true return false
            return FALSE;
        }

        // increment the pointer to check the rest of the characters
        // contain the octal value(0-7)
        *temp_ptr++;

        // loop though until the character is not 8 and 9
        // and is it also a digit 
        while (isdigit(*temp_ptr) && *temp_ptr != '8' && *temp_ptr != '9')
        {
            // increment the pointer
            *temp_ptr++;

            // Check whether it is a string terminator.
            if (*temp_ptr == '\0')
            {
                // if string satisfies all the conditions
                // then return true
                return TRUE;
            }
        }

        // return false, if the string in a not an octal
        // number
        return FALSE;
    }


    /*
    * isHex: This accepts a string and returns a boolean value
    * This checks if the string begins with '0x' or '0X' and contains
    * digits 0-9 and alpha A-F characters only
    */
    int isHexValue(char *s)
    {
        // copy the string to local variable
        char *temp_ptr = s;

        // condition to check whether the starting value contains
        // '0x' or '0X'
        if (*temp_ptr++ == '0' && (*temp_ptr == 'x' || *temp_ptr == 'X'))
        {
            // loop through the pointer until it reaches end of 
            // the string
            while (*(++temp_ptr) != '\0')
            {
                // condition to check the character contains
                // hexa decimal value
                if (!isxdigit(*temp_ptr))
                {
                    // if it is not a hexa decimal value return false
                    return FALSE;
                }
            }

            // if the string is hexa decimal value, return true
            return TRUE;
        }

        // if the string is not hexa decimal value, return false
        return FALSE;
    }

    /*
    * displayTypeOfToken: Accepts the string and prints the type of token
    *
    * Token types:
    * decimal integer - token is a digit (0-9) followed by any number of digits.
    * octal integer - token is a 0 followed by any number of octal digits (i.e. 0-7).
    * hexadecimal integer - token is 0x (or 0X) followed by any number of hexadecimal digits (i.e. 0-9, a-f, A-F).
    * floating point - token is follows the rules for floating-point constants in Java or C.
    */
    void displayTypeOfToken(char *token)
    {
        // condition to check if token is floating point and 
        // display the value 
        if (isFloatValue(token))
        {
            printf("Float: %s\n", token);
        }

        // condition to check if token is octal value and 
        // display the value
        else if (isOctalValue(token))
        {
            printf("Octal: %s\n", token);
        }

        // condition to check if token is hexa value and 
        // display the value
        else if (isHexValue(token))
        {
            printf("Hex: %s\n", token);
        }

        // condition to check if token is a decimal value and 
        // display the value
        else if (isDecimalValue(token))
        {
            printf("Decimal: %s\n", token);
        }
        // display if all the above conditon fails  
        else
        {
            printf("Invalid: '%s'\n", token);
        }

        //Free token memory
        free(token);
    }

    /*
    * main will have a string argument (in argv[1]).
    * The string argument contains the tokens.
    * Print out the tokens in the second string in left-to-right order.
    * Each token should be printed on a separate line.
    */
    int main(int argc, char **argv)
    {
        // declare a char pointer to hold the input string
        char *input = NULL;

        // declare a pointer to the structure TokenizerT 
        TokenizerT *tokenizer;

        // declare a character pointer to hold the tokens called at
        // each call of TKGetNextToken function
        char *token;

        // if the length of the argument at command line is greater than 1
        // then read the input from command line
        if (argc > 1)
        {
            input = (char *)malloc(strlen(argv[1]) + 1);
            strcpy(input, argv[1]);
            input[strlen(argv[1])] = '\0';
        }

        // call the function TKCreate by passing the
        // input string
        tokenizer = TKCreate(input);

        //condition to check if there is any problem in setting the value
        if (tokenizer == NULL)
        {
            return 1;
        }

        // Print the input value present 
        printf("Provided input is: \"%s\"\n\n", tokenizer->actual_string);


        // loop through end of the string
        while (*(token = TKGetNextToken(tokenizer)))
        {
            //token = TKGetNextToken(tokenizer);
            displayTypeOfToken(token);
        }

        // free the memory allocated to the structure pointer
        TKDestroy(tokenizer);

        return 0;
    }

最佳答案

您的函数 getWhitespace 返回一个 char*。如果 ch 不匹配任何大小写怎么办?您必须在开关中添加默认情况才能处理任何其他值。

函数strlen返回一个类型size_t,它是一个无符号类型。这就是警告的原因,您比较 int (有符号)和 size_t。

关于代码是将字符串分解为标记。调试我的 C 代码会引发异常。我不知道如何处理 C 中的异常,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39796122/

相关文章:

debugging - 如何有条件地内联 F# 中的成员,或完全关闭调试版本中的内联

c# - 在 native C++ 项目中导入 C# dll 库

c - Arduino C - 无法创建新方法和数组

c - 如何在函数中分配数组,然后重新分配它

typescript TS5042 内部版本 : Option 'project' cannot be mixed with source files on a command line

javascript - 扩展类时 typescript 文件顺序问题

c# - 无法使用 ADB 在 VS 中调试 Xamarin Android 应用程序

c - 使用 typedef 和结构时出错

c++ - 远程 GDB 调试

unsigned int可以替换C中的指针吗