我只是 C 编程的初学者。请帮我解决以下问题。
问题:搜索包含字符序列的给定数组的程序。这些字符仅限于字母 A、G、T 或 C。序列中的最后一个字符设置为代码 0,以便轻松检测到结束。
找不到我在这里做错了什么,但不断出现错误。
/*A program that searches through a given array that contains a sequence of characters. These characters are restricted
to be the letters A, G, T, or C. The last character in the sequence is set to be the code 0, so that the end is easily
detected. That array should be declared and initialized.*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
void input_sequence(int length,char input[]);
void search(char C[],char DNA[],int length);
int main(void) {
//Given array
char DNA[] = {'A', 'G', 'C', 'G', 'G', 'G', 'A', 'C', 'C', 'G', 'T', 'C',
'C', 'C', 'G', 'A', 'C', 'A', 'T', 'T', 'G', 'A', 'T', 'G',
'A', 'A', 'G', 'G', 'G', 'T', 'C', 'A', 'T', 'A', 'G', 'A',
'C', 'C', 'C', 'A', 'A', 'T', 'A', 'C', 'G', 'C', 'C', 'A',
'C', 'C', 'A', 'C', 'C', 'C', 'C', 'A', 'A', 'G', 'T', 'T',
'T', 'T', 'C', 'C', 'T', 'G', 'T', 'G', 'T', 'C', 'T', 'T',
'C', 'C', 'A', 'T', 'T', 'G', 'A', 'G', 'T', 'A', 'G', 'A',
'T', 'T', 'G', 'A', 'C', 'A', 'C', 'T', 'C', 'C', 'C', 'A',
'G', 'A', 'T', 'G', '\0'};
int length,i=0,k;
/*Program should repeatedly ask the user for two things: the length of a search sequence,
and the search sequence itself*/
/*The program should terminate when the length of the input sequence is zero or less*/
do{
printf("Enter length of DNA sequence to match: ");
scanf("%d",&length);
Search sequence array
char input[length];
//input sequence length has to be >0
if(length>0){
input_sequence(length,input[]);
/*The elements of the search sequence may take on one of five characters: A,G,T,C and *. The
meaning of the ‘*’ character is that it matches all four nucleotides: A,G,T and C.*/
for(i=0; i<length; i++){
k=0;
if(input[i]!='A'&&input[i]!='G'&&input[i]!='T'&&input[i]!='C'&&input[i]!='*'){
printf("Erroneous character input ’%c’ exiting\n",input[i]);
k=1;
}
if(k==1)
break;
}
if(k==0){
search(input,DNA,length);
}
k=0;
}
}
while(length>0);
printf("Goodbye");
return (EXIT_SUCCESS);
}
//Function to search for input sequence in the given array
void search(char C[],char DNA[],int length){
int numFound = 0,i,foundIndex;
bool found = false;
for(i=0;i<length && !found;i++) {
int n=0;
char temp=C[i];
if (temp==DNA[i]) {
numFound++;
if (numFound == length) {
found = true;
foundIndex = i - (length-1);
}
}
else numFound = 0;
}
if (found)
printf("Match of search sequence found at element %d\n",foundIndex);
}
void input_sequence(int length,char input[]){
int i;
printf("Enter %d characters (one of AGTC*) as a search sequence: ",length);
for(i=0; i<length; i++){
scanf(" %c", &input[i]);
}
}
最佳答案
这里使用 GNU C library regexp 的示例:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
void search(const char *regexp_str, const char *DNA, int length)
{
int reti;
const char *p = DNA;
const int n_matches = 5;
regmatch_t m[n_matches];
regex_t regex;
(void)length;
reti = regcomp(®ex, regexp_str, 0);
if(reti) {
printf("Could not compile regex: %s\n", regexp_str);
return;
}
while(1) {//based on http://www.lemoda.net/c/unix-regex/
int nomatch = regexec(®ex, p, n_matches, m, 0);
if(nomatch) {
printf ("No more matches.\n");
return;
}
if(m[0].rm_so != -1) {
int start = m[0].rm_so + (p - DNA);
int finish = m[0].rm_eo + (p - DNA);
printf("'%.*s' (bytes %d:%d)\n",
m[0].rm_eo - m[0].rm_so, m[0].rm_so + p,
start, finish);
}
p += m[0].rm_eo;
}
regfree(®ex);
}
int main(void) {
const char *DNA = "AGCGGGACCGTCCCGACATTGATGAAGGGTCATAGACCCA"
"ATACGCCACCACCCCAAGTTTTCCTGTGTCTTCCATTGAG"
"TAGATTGACACTCCCAGATG";
while(1) {
int length;
char input[256];
printf("Enter length of DNA sequence to match: ");
fgets(input, sizeof(input), stdin);
length = strtol(input, NULL, 10);
if(length <= 0) {//input sequence length has to be >0
break;
} else if(length >= (int)(sizeof(input) - 1)) {
printf("ERROR: Too big length=%d, max supported length=%d\n",
length, sizeof(input) - 1);
break;
}
while(1) {
const char *validInputs = "AGTC*";
printf("Enter %d characters (one of AGTC*) as a search sequence: ",length);
fgets(input, sizeof(input), stdin);
int valid = 1;
for(int i = 0; i < length; i++) {
if(strchr(validInputs, input[i]) == NULL) {
printf("Erroneous character input '%c' in '%s'\n", input[i], input);
valid = 0;
break;
}
}
if(valid) {
break;
}
}
input[length] = 0;
//substitute '*' on '.' for using in regexp
char *ptr = input;
while((ptr = strchr(ptr, '*')) != NULL) {
*ptr = '.';
};
printf("search for: %s\n", input);
search(input, DNA, length);
}
printf("Goodbye\n");
return (EXIT_SUCCESS);
}
另外使用 C++11 std::regex
(仅需要更改 search()
):
#include <regex>
#include <iterator>
void search(const char *C, const char *DNA, int )
{
std::regex regex(C);
std::string str(DNA);
auto words_begin = std::sregex_iterator(str.begin(), str.end(), regex);
auto words_end = std::sregex_iterator();
printf("Found %d matches:\n", std::distance(words_begin, words_end));
for(std::sregex_iterator i = words_begin; i != words_end; ++i) {
std::smatch match = *i;
printf(" match: %s, pos=%d\n", match.str().c_str(), match.position());
}
}
关于搜索数组以查找字符序列的 C 程序,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/33405375/