所以我已经为此做了一些工作,但遇到了一些奇怪的问题。最终目标是通过空格和引号拆分输入字符串(即这个"is"非常“非常复杂”的示例转到{this,是一个非常非常复杂的示例)。现在,除了第一个字符串外,它似乎已将所有内容正确拆分。
它在这里(buff 正在使用来自 getline 的值传入):
char **tokens = (char **)malloc(sizeof(char));
char *temp;
int count = 0;
int prev = 0;
// Get tokens
for (int i = 0; i <= strlen(command) && running; i++) {
if (i > prev && strncmp((buff + i), " ", 1) == 0) {
temp = (char **)realloc(tokens, (sizeof(char)) * WORD_SIZE * (++count));
if (temp == NULL) {
fprintf(stderr, "Error in parsing: ran out of memory\n");
running = false;
free(tokens);
}
else {
tokens = temp;
*(temp) = (buff + i);
strncpy(*(temp), "\0", 1);
temp = tokens + WORD_SIZE * (count - 1);
*(temp) = buff+prev;
prev = i+1;
}
}
else if (strncmp((buff + i), "\"", 1) == 0) {
*(temp) = (buff + i);
strncpy(*(temp), "\0", 1);
i++;
prev = i;
for (; strncmp((buff + i), "\"", 1) != 0; i++) { }
temp = (char **)realloc(tokens, (sizeof(char)) * WORD_SIZE * (++count));
if (temp == NULL) {
fprintf(stderr, "Error in parsing: ran out of memory\n");
running = false;
free(tokens);
}
else {
tokens = temp;
*(temp) = (buff + i);
strncpy(*(temp), "\0", 1);
temp = tokens + WORD_SIZE * (count - 1);
*(temp) = buff+prev;
prev = i+1;
}
}
else if (strncmp((buff + i), "\0", 1) == 0) {
temp = (char **)realloc(tokens, (sizeof(char)) * WORD_SIZE * (++count));
if (temp == NULL) {
fprintf(stderr, "Error in parsing: ran out of memory\n");
running = false;
free(tokens);
}
else {
tokens = temp;
temp = tokens + WORD_SIZE * (count - 1);
*(temp) = buff+prev;
prev = i+1;
}
}
}
for (int i = 0; i < count; i++)
printf("\t%i: %s\n", i, *tokens + sizeof(char) * WORD_SIZE * i);
现在,如果我输入“这是一个测试”(没有引号),我会得到:
0:
1: 是
2: 一个
3: 测试
引用有点乱,因为“这是一个”非常“非常复杂”的测试”我得到:
0:
1: 是一个
2:
3:非常复杂
4: 测试
最佳答案
您说替代代码可以。如果您使用确定性有限自动机模型来考虑简单的字符串解析算法,它们几乎总是更容易生成更易于维护的代码。网络上有许多 DFA 的免费引用资料。
这是一个可以解决您的问题的 DFA。
[any] 的意思是“所有其他”。换句话说,如果没有其他转换匹配,则采用这个。它成为 C switch
中的 default
情况。 [eos] 的含义是“字符串结尾”或空字符。
请注意,DFA 可让您系统地处理所有情况,例如在单词中间出现的引号。在这里,我将其视为当前单词的结尾和新引用单词的开头。如果规范发生变化,DFA 也很容易更改,并且无需深思熟虑即可将更改转化为代码。
剩下的就是添加“操作代码”以捕获 token 开始并覆盖明显位置的空终止符。在 C 中,我们有:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **tokenize(char *str, int *n_tokens_rtn)
{
// State of the DFA.
enum { Error = -1, Start, InQuoted, InWord } state = Start;
// String pointer and current character
int cp = 0;
#define CURRENT_CHAR (str[cp])
#define ADVANCE_TO_NEXT_CHAR do { ++cp; } while (0)
#define MARK_END_OF_TOKEN do { str[cp] = '\0'; } while (0)
// Token pointer and buffer. Allocate biggest possible and shrink at end.
int tp = 0;
char **tokens = safe_malloc((1 + strlen(str) / 2) * sizeof *tokens);
#define SAVE_TOKEN do { tokens[tp++] = &str[cp]; } while (0)
// Each iteration is one DFA transition.
for (;;) {
switch (state) {
case Start:
switch (CURRENT_CHAR) {
case '\0':
goto done_scanning;
case ' ': case '\t': case '\n':
ADVANCE_TO_NEXT_CHAR;
break;
case '"':
state = InQuoted;
ADVANCE_TO_NEXT_CHAR;
SAVE_TOKEN;
break;
default:
state = InWord;
SAVE_TOKEN;
ADVANCE_TO_NEXT_CHAR;
break;
}
break;
case InQuoted:
switch (CURRENT_CHAR) {
case '\0':
state = Error; // Missing close quote.
break;
case '"':
state = Start;
MARK_END_OF_TOKEN;
ADVANCE_TO_NEXT_CHAR;
break;
default:
ADVANCE_TO_NEXT_CHAR;
break;
}
break;
case InWord:
switch (CURRENT_CHAR) {
case '\0':
goto done_scanning;
case ' ': case '\t': case '\n':
state = Start;
MARK_END_OF_TOKEN;
ADVANCE_TO_NEXT_CHAR;
break;
case '"': // Word ended in quote, not space.
state = InQuoted;
MARK_END_OF_TOKEN;
ADVANCE_TO_NEXT_CHAR;
SAVE_TOKEN;
break;
default:
ADVANCE_TO_NEXT_CHAR;
break;
}
break;
case Error:
fprintf(stderr, "Syntax error.\n");
goto done_scanning;
}
}
done_scanning:
// Return number of tokens if caller is interested.
if (n_tokens_rtn) *n_tokens_rtn = tp;
// Append a null terminator for good measure.
tokens[tp++] = NULL;
// Trim the returned value to the right size.
return realloc(tokens, tp * sizeof *tokens);
}
int main(void)
{
char str[] = "this \"is a\" very \"very complex\" example";
char **tokens = tokenize(str, NULL);
for (int i = 0; tokens[i]; i++)
printf("%s\n", tokens[i]);
return 0;
}
关于c - 尝试用空格和引号标记字符串,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23587423/