搜索数组以查找字符序列的 C 程序

标签 c arrays function for-loop do-while

我只是 C 编程的初学者。请帮我解决以下问题。

问题:搜索包含字符序列的给定数组的程序。这些字符仅限于字母 A、G、T 或 C。序列中的最后一个字符设置为代码 0,以便轻松检测到结束。

找不到我在这里做错了什么,但不断出现错误。

/*A program that searches through a given array that contains a sequence of characters. These characters are restricted 
to be the letters A, G, T, or C. The last character in the sequence is set to be the code 0, so that the end is easily
detected. That array should be declared and initialized.*/

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
void input_sequence(int length,char input[]);
void search(char C[],char DNA[],int length);

int main(void) {
    //Given array
    char DNA[] = {'A', 'G', 'C', 'G', 'G', 'G', 'A', 'C', 'C', 'G', 'T', 'C', 
          'C', 'C', 'G', 'A', 'C', 'A', 'T', 'T', 'G', 'A', 'T', 'G', 
          'A', 'A', 'G', 'G', 'G', 'T', 'C', 'A', 'T', 'A', 'G', 'A', 
          'C', 'C', 'C', 'A', 'A', 'T', 'A', 'C', 'G', 'C', 'C', 'A', 
          'C', 'C', 'A', 'C', 'C', 'C', 'C', 'A', 'A', 'G', 'T', 'T', 
          'T', 'T', 'C', 'C', 'T', 'G', 'T', 'G', 'T', 'C', 'T', 'T', 
          'C', 'C', 'A', 'T', 'T', 'G', 'A', 'G', 'T', 'A', 'G', 'A', 
          'T', 'T', 'G', 'A', 'C', 'A', 'C', 'T', 'C', 'C', 'C', 'A', 
          'G', 'A', 'T', 'G', '\0'};
    int length,i=0,k;
    /*Program should repeatedly ask the user for two things: the length of a search sequence,
    and the search sequence itself*/
    /*The program should terminate when the length of the input sequence is zero or less*/
    do{
        printf("Enter length of DNA sequence to match: ");
        scanf("%d",&length);
        Search sequence array
        char input[length];
        //input sequence length has to be >0
        if(length>0){
            input_sequence(length,input[]);
            /*The elements of the search sequence may take on one of five characters: A,G,T,C and *. The
            meaning of the ‘*’ character is that it matches all four nucleotides: A,G,T and C.*/
            for(i=0; i<length; i++){
                k=0;
                if(input[i]!='A'&&input[i]!='G'&&input[i]!='T'&&input[i]!='C'&&input[i]!='*'){
                    printf("Erroneous character input ’%c’ exiting\n",input[i]);
                    k=1;
                }
                if(k==1)
                    break;             
            }
            if(k==0){
                search(input,DNA,length);
            }
            k=0;
        }
    }
    while(length>0);
    printf("Goodbye");

    return (EXIT_SUCCESS);
}

//Function to search for input sequence in the given array
void search(char C[],char DNA[],int length){
    int numFound = 0,i,foundIndex;
    bool found = false;
    for(i=0;i<length && !found;i++) {
        int n=0;
        char temp=C[i];
        if (temp==DNA[i]) {
            numFound++;
            if (numFound == length) {
                found = true;
                foundIndex = i - (length-1);
            }
        }
        else numFound = 0;
    }
    if (found)
        printf("Match of search sequence found at element %d\n",foundIndex);   
}

void input_sequence(int length,char input[]){
    int i;
    printf("Enter %d characters (one of AGTC*) as a search sequence: ",length);
    for(i=0; i<length; i++){
        scanf(" %c", &input[i]);
        }
}

最佳答案

这里使用 GNU C library regexp 的示例:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>

void search(const char *regexp_str, const char *DNA, int length)
{
    int reti;
    const char *p = DNA;
    const int n_matches = 5;
    regmatch_t m[n_matches];
    regex_t regex;
    (void)length;

    reti = regcomp(&regex, regexp_str, 0);
    if(reti) {
        printf("Could not compile regex: %s\n", regexp_str);
        return;
    }

    while(1) {//based on http://www.lemoda.net/c/unix-regex/
        int nomatch = regexec(&regex, p, n_matches, m, 0);
        if(nomatch) {
            printf ("No more matches.\n");
            return;
        }
        if(m[0].rm_so != -1) {
            int start = m[0].rm_so + (p - DNA);
            int finish = m[0].rm_eo + (p - DNA);
            printf("'%.*s' (bytes %d:%d)\n",
                    m[0].rm_eo - m[0].rm_so, m[0].rm_so + p,
                    start, finish);
        }
        p += m[0].rm_eo;
    }
    regfree(&regex);
}

int main(void) {
    const char *DNA = "AGCGGGACCGTCCCGACATTGATGAAGGGTCATAGACCCA"
                      "ATACGCCACCACCCCAAGTTTTCCTGTGTCTTCCATTGAG"
                      "TAGATTGACACTCCCAGATG";
    while(1) {
        int length;
        char input[256];

        printf("Enter length of DNA sequence to match: ");
        fgets(input, sizeof(input), stdin);
        length = strtol(input, NULL, 10);
        if(length <= 0) {//input sequence length has to be >0
            break;
        } else if(length >= (int)(sizeof(input) - 1)) {
            printf("ERROR: Too big length=%d, max supported length=%d\n",
                   length, sizeof(input) - 1);
            break;
        }

        while(1) {
            const char *validInputs = "AGTC*";
            printf("Enter %d characters (one of AGTC*) as a search sequence: ",length);
            fgets(input, sizeof(input), stdin);

            int valid = 1;
            for(int i = 0; i < length; i++) {
                if(strchr(validInputs, input[i]) == NULL) {
                  printf("Erroneous character input '%c' in '%s'\n", input[i], input);
                  valid = 0;
                  break;
                }
            }
            if(valid) {
                break;
            }
        }
        input[length] = 0;
        //substitute '*' on '.' for using in regexp
        char *ptr = input;
        while((ptr = strchr(ptr, '*')) != NULL) {
            *ptr = '.';
        };
        printf("search for: %s\n", input);
        search(input, DNA, length);
    }
    printf("Goodbye\n");
    return (EXIT_SUCCESS);
}

另外使用 C++11 std::regex (仅需要更改 search()):

#include <regex>
#include <iterator>

void search(const char *C, const char *DNA, int )
{
    std::regex regex(C);
    std::string str(DNA);
    auto words_begin = std::sregex_iterator(str.begin(), str.end(), regex);
    auto words_end = std::sregex_iterator();
    printf("Found %d matches:\n", std::distance(words_begin, words_end));
    for(std::sregex_iterator i = words_begin; i != words_end; ++i) {
        std::smatch match = *i;
        printf(" match: %s, pos=%d\n", match.str().c_str(), match.position());
    }
}

关于搜索数组以查找字符序列的 C 程序,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/33405375/

相关文章:

c - 理解为什么换行符会在下一次迭代中被捕获

javascript - 从数组值中获取值以形成聊天

function - 将函数添加到 Matlab 路径

javascript - 一个简单的 JavaScript 函数由于奇怪的错误而拒绝工作

c++ - 多级继承/多态和虚函数

c - 段错误 - 32/64 位问题?

c - C 中的局部 i 变量?

c - 为什么我的霍夫曼代码的节点没有正确排序? C

ios - 通过 swift 重新排序字典数组

java - 具有多于一列的动态数组