c - 在 C 中没有匹配项时在字符串中插入一个字符

标签 c string algorithm

我正在尝试找出一种方法,在该字符不匹配的情况下将字符插入到字符串中。

假设我有这两个字符串:

s1: CGGGTATCCAA
s2: CCCTAGGTCCCA

它应该输出这个:

s1: C----GGGTATCC-AA
s2: CCCTAGG-T--CCCA-

算法如下:

if(lengthOfs1 > lengthOfs2)
    if character mismatch
       put a dash on s2
    else
       put the original character
else if(lengthOfs1 <= lengthOfs2)
    if character mistmatch
       put a dash on s1
    else
       put the original character

我试图通过使用两个原始字符串并循环遍历 for 循环直到我在字符串中命中 '\0' 来完成此操作。然后我进行比较,最后使用类似的东西:

strncpy(&s1_final_string[i + 1], &s1[i], 1) // if they are equal
strncpy(&s1_final_string[i], "-", 1); // if I need to put a dash

是否有一种简单的方法来处理这种情况并在我们不匹配时复制一个“-”字符?

最佳答案

这是一个匹配您的输入和输出的贪心差分算法。请注意,此算法不会发现任何两个字符串之间的最小不匹配。相反,在每个不匹配点,它会在两个字符串中向前扫描以找到下一个匹配点并使用更接近的那个。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int greedy_diff_str(const char *s1, const char *s2, char **s1_final_ptr, char **s2_final_ptr)
{
  size_t s1_len = strlen(s1);
  size_t s2_len = strlen(s2);
  size_t s1_index = 0, s2_index = 0, final_index = 0;
  char  *s1_final, *s2_final;

  if (NULL == (s1_final = *s1_final_ptr = (char*) calloc(s1_len + s2_len + 1, 1)))
  {
    *s2_final_ptr = NULL;
    return -1;
  }

  if (NULL == (s2_final = *s2_final_ptr = (char*) calloc(s1_len + s2_len + 1, 1)))
  {
    free(s1_final);
    *s1_final_ptr = NULL;
    return -1;
  }

  while ('\0' != s1[s1_index] && '\0' != s2[s2_index])
  {
    if (s1[s1_index] == s2[s2_index])
    {
      s1_final[final_index]   = s1[s1_index++];
      s2_final[final_index++] = s2[s2_index++];
      //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
    }
    else
    {
      size_t s1_dashes, s2_dashes, i;

      /* count how many dashes we'd have to add to s1 to reach next match point with s2 */

      for (i = s2_index + 1; '\0' != s2[i] && s1[s1_index] != s2[i]; ++i);
      s1_dashes = i - s2_index;

      /* count how many dashes we'd have to add to s2 to reach next match point with s1 */

      for (i = s1_index + 1; '\0' != s1[i] && s2[s2_index] != s1[i]; ++i);
      s2_dashes = i - s1_index;

      //printf("mismatch at s1[%lu] = '%c'; s2[%lu] = '%c'; s1_dashes = %lu; s2_dashes = %lu\n", s1_index, s1[s1_index], s2_index, s2[s2_index], s1_dashes, s2_dashes); 

      /* pick whichever path results in less dashes; break ties by adding dashes to string from which we've consumed more */

      if (s1_dashes < s2_dashes || (s1_dashes == s2_dashes && s1_index >= s2_index))
      {
        while (s1_dashes--)
        {
          s1_final[final_index]   = '-';
          s2_final[final_index++] = s2[s2_index++];
        }        
      }
      else
      {
        while (s2_dashes--)
        {
          s1_final[final_index]   = s1[s1_index++];
          s2_final[final_index++] = '-';
        }
      }

      //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
    }
  }

  for (; '\0' != s1[s1_index]; ++s1_index, ++final_index)
  {
    s1_final[final_index] = s1[s1_index];
    s2_final[final_index] = '-';
    //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
  }

  for (; '\0' != s2[s2_index]; ++s2_index, ++final_index)
  {
    s1_final[final_index] = '-';
    s2_final[final_index] = s2[s2_index];
    //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
  }

  s1_final[final_index] = '\0';
  s2_final[final_index] = '\0';

  return 0;
}

int main()
{
  char s1[] = "CGGGTATCCAA", s2[] = "CCCTAGGTCCCA", *s1_fin, *s2_fin;

  printf("Input:\n");
  printf("s1: '%s'\n", s1);
  printf("s2: '%s'\n", s2);

  greedy_diff_str(s1, s2, &s1_fin, &s2_fin);

  printf("Output:\n");
  printf("s1: '%s'\n", s1_fin);
  printf("s2: '%s'\n", s2_fin);

  return 0;
}

这是运行的输出:

john-schultzs-macbook-pro:~ jschultz$ ./a.out
Input:
s1: 'CGGGTATCCAA'
s2: 'CCCTAGGTCCCA'
Output:
s1: 'C----GGGTATCC-AA'
s2: 'CCCTAGG-T--CCCA-'

关于c - 在 C 中没有匹配项时在字符串中插入一个字符,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28620773/

相关文章:

c - Strlwr 函数 - 在 xcode 9.2 中出现错误

java - 关于推荐引擎

algorithm - 纯函数图连通性

c - gcc undefined reference 当我知道引用是正确的

我可以在字符串中存储 NULL 吗?

R - 仅用一个替换多个出现的字符。正则表达式

java - 为简单阈值分类器寻找多类阈值

c - printf 和 fprintf 之间的不同行为

c - 函数 gets() 不会停止接受输入

c - 打印双指针字符数组时出现运行时错误