c - 在 C 中,我应该如何在字符和另一个字符之间剪切文本文件?

标签 c file gcc cygwin text-files

我尝试编写一个程序,在一个字符和另一个字符之间剪切文本文件。为了说明这一点,我想要剪切的 example.txt 包含数字和一些字符,例如 '[[[']]] (它们可能会因包机的类型和数量而异)。这些字符标记切割过程的开始或结束。

这个过程看起来很简单;然而,这可能会很累,而且有风险,很可能会犯错误。不管怎样,当我写问题时,除了保存到另一个文本文件之外,我解决了很多关于我的问题的问题。 [[[的计数是不确定的:也就是说,我不知道我要创建多少个文件。我对新思想和想法持开放态度,无论是哪种编程语言。

输入文件示例及其输出:

示例.txt

005076,-0.00008389,0.00039446,-0.00007079,0.00024138,0.00059449,-0.000 24860,[[[ 0.00032513,0.00000142,0.00001302,0.00000965,0.00005201,0.00001 395,-0.00003729,-0.00001774,-0.00002842,0.00000745,0.00143565,-0.00014 975,0.00166676,0.00086413,-0.00071107,-0.00081336,-0.10930688,0.062458 9,-0.00000142,0.00001301,0.00000965,-0.00005203,-0.00001394,-0.0000373 0,0.00001775,0.00002840,0.00000747,0.00143637,0.00015079,0.00166945 ]]] 0.00086406,0.00071132,-0.00081439,0.10930956,-0.06242029,-0.21353303,0 00003668,-0.00000001,0.00088168,-0.03589109,-0.00000584,0.00366844,0.0 [[[ 0.3708,-0.00000623,-0.00000093,-0.00001597,-0.00010373,-0.00001235,- 0.00008982,0.00008547,0.00000138,0.00001692,-0.00005915,0.00000005,0.0 090,0.00000182,-0.00000396,-0.00000575,0.00000367,0.00000364,-0.000012 73,0.00004112,-0.00000056,0.00007256,0.00001237,-0.00000176 ]]]

cut1.txt

0.00032513,0.00000142,0.00001302,0.00000965,0.00005201,0.00001 395,-0.00003729,-0.00001774,-0.00002842,0.00000745,0.00143565,-0.00014 975,0.00166676,0.00086413,-0.00071107,-0.00081336,-0.10930688,0.062458 9,-0.00000142,0.00001301,0.00000965,-0.00005203,-0.00001394,-0.0000373 0,0.00001775,0.00002840,0.00000747,0.00143637,0.00015079,0.00166945

cut2.txt

0.3708,-0.00000623,-0.00000093,-0.00001597,-0.00010373,-0.00001235,- 0.00008982,0.00008547,0.00000138,0.00001692,-0.00005915,0.00000005,0.0 090,0.00000182,-0.00000396,-0.00000575,0.00000367,0.00000364,-0.000012 73,0.00004112,-0.00000056,0.00007256,0.00001237,-0.00000176

编译器系统信息:

$ cygcheck --version
cygcheck (cygwin) 2.0.4
System Checker for Cygwin
Copyright (C) 1998 - 2015 Red Hat, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

$ gcc --version
gcc (GCC) 4.9.2
Copyright (C) 2014 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

编译信息

$ gcc -o cutt cutt.c

$ ./cutt

0.00032513,0.00000142,0.00001302,0.00000965,0.00005201,0.00001 395,-0.00003729,-0.00001774,-0.00002842,0.00000745,0.00143565,-0.00014 975,0.00166676,0.00086413,-0.00071107,-0.00081336,-0.10930688,0.062458 9,-0.00000142,0.00001301,0.00000965,-0.00005203,-0.00001394,-0.0000373 0,0.00001775,0.00002840,0.00000747,0.00143637,0.00015079,0.00166945 ]][ 0.3708,-0.00000623,-0.00000093,-0.00001597,-0.00010373,-0.00001235,- 0.00008982,0.00008547,0.00000138,0.00001692,-0.00005915,0.00000005,0.0 090,0.00000182,-0.00000396,-0.00000575,0.00000367,0.00000364,-0.000012 73,0.00004112,-0.00000056,0.00007256,0.00001237,-0.00000176

如果是,有多少个 [[[]]] v cutN.txt 文件。需要。

卡特代码:

#include<stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define MAX_LEN 1024

int testp=0, r=0;
char cx[MAX_LEN],cz[MAX_LEN];
void Cutt(char[],char[],int);

main()
{
    FILE *fpR, *fpW;
    char ch[MAX_LEN],sEArch4[3];
    int chc,i=0,cxc;
    sEArch4[1]=0x5b;sEArch4[2]=0x5b;sEArch4[3]=0x5b;

    fpR = fopen("example.txt","r");
    if (fpR==NULL) { 
        printf("Could not open example.txt !\n"); 
        return 1;
    }

    fpW = fopen("cut1.txt","w");
    if (fpW==NULL) { 
        printf("Could not open cut1.txt!\n"); 
        return 1;
    }

    while(fgets(ch,MAX_LEN,fpR)) {
        //printf("%s",ch);
        chc = strlen(ch);
        for(i=0;i<chc;i++)
            //printf("%s %d %d",ch,i,chc);
            Cutt(ch,sEArch4,i);
    }
    //printf("%s",cx);
    cxc = strlen(cx);
    for(i=1;i<cxc-2;i++)
        cz[i-1] = cx[i];
    printf("%s",cz);
//Jump; 
    return 0;
}
void Cutt(char ch[],char sEArch4[],int i)
{
    int j,k=0;
    for (j=i ; j<i+strlen(sEArch4) ; j++ ) {
        if(ch[j]!=sEArch4[k]){
            if ( ch [j] == 0x5b && ch [j-1] == 0x5b && ch [j-2] == 0x5b ) {
                testp = 1;
            }
            if ( ch [j] == 0x5d && ch [j-1] == 0x5d && ch [j-2] == 0x5d ) {
                testp = 2;
            }   
            if(testp==1) {  
                cx[r] = ch[j];
                r++;
                return;
            }
            //printf("testp:%d \nch:%s\n",testp,x);
            if(testp==2)
                return;
            //goto Jump;
            else
                return; 
            k++;
        }
    }
}

注意: 对于单次切割,它运行没有问题。对于多次剪切,有 ]][ 字符。查看编译信息。我认为它将用于剪切。

最佳答案

有几种方法可以解决这个问题。一种是像您一样逐行阅读,另一种是逐个字符地阅读。逐个字符的方法有一些优点,但主要优点是,如果一次处理一个字符,则不必担心特殊情况,例如包含以下内容的单行:

0.345[[[0.456]]]0.001[[[0.002]]]0.999[[[

如果你一次扫描一行,你必须相当小心地处理这些困难的数据。通过复制问题中的数据并不完全清楚它是否全部在一行上。此外,代码必须准确处理 [[[ 后跟 [ 之外的其他内容,以及与 ]]< 类似的处理] 后跟 ] 以外的内容。

您似乎没有每次识别 [[[ 序列时打开不同文件的代码;您也没有明确关闭辅助文件的代码。据推测,如果 ]]] 序列之前的最后一个字符不是换行符,则代码应输出换行符,以便文本文件实际上以换行符结尾。

通用命令将具有参数来指定打开序列、关闭序列、输出文件名的前缀(可能还有后缀),并且将读取命令行上指定的文件,或者默认为标准输入(如果没有)文件已指定。出于本练习的目的,我们将保留所有内容的硬连线,但从长远来看,这不是应该这样做的方式。

这就是我要做的。提交它作为答案时要小心——它会做一些你可能没有想到的事情。

#include <stdio.h>
#include <stdlib.h>

static int filenum = 0;
static const char s_mark[] = "[[[";
static const char e_mark[] = "]]]";
static const char f_prefix[] = "cut";
static const char f_suffix[] = ".txt";
static const char datafile[] = "example.txt";

static FILE *open_file(void)
{
    char filename[32];
    snprintf(filename, sizeof(filename), "%s%d%s", f_prefix, ++filenum, f_suffix);
    FILE *fp = fopen(filename, "w");
    if (fp == 0)
    {
        fprintf(stderr, "Failed to create file %s for writing\n", filename);
        exit(EXIT_FAILURE);
    }
    return fp;
}

static void dump_to_file(FILE *ifp)
{
    FILE *ofp = open_file();
    int ch;
    char last = '\0';
    int e_mark_idx = 0;

    while ((ch = getc(ifp)) != EOF)
    {
        if (ch == e_mark[e_mark_idx])
        {
            e_mark_idx++;
            if (e_mark[e_mark_idx] == '\0')
            {
                /* All done */
                if (last != '\n')
                    putc('\n', ofp);
                break;
            }
        }
        else
        {
            if (e_mark_idx > 0)
                fprintf(ofp, "%.*s", e_mark_idx, e_mark);
            e_mark_idx = 0;
            putc(ch, ofp);
            last = ch;
        }
    }

    fclose(ofp);
}

int main(void)
{
    int s_mark_idx = 0;
    int ch;
    FILE *ifp = fopen(datafile, "r");

    if (ifp == 0)
    {
        fprintf(stderr, "Failed to open file %s for reading\n", datafile);
        exit(EXIT_FAILURE);
    }

    while ((ch = getc(ifp)) != EOF)
    {
        if (ch == s_mark[s_mark_idx])
        {
            s_mark_idx++;
            if (s_mark[s_mark_idx] == '\0')
            {
                /* Found start marker - dup following text to file */
                dump_to_file(ifp);
                s_mark_idx = 0;
            }
        }
        else
        {
            if (s_mark_idx > 0)
                printf("%.*s", s_mark_idx, s_mark);
            s_mark_idx = 0;
            putchar(ch);
        }
    }

    fclose(ifp);

    return 0;
}

给定此示例输入文件:

005076,-0.00008389,0.00039446,-0.00007079,0.00024138,0.00059449,-0.000 24860,[[[ 0.00032513,0.00000142,0.00001302,0.00000965,0.00005201,0.00001 395,-0.00003729,-0.00001774,-0.00002842,0.00000745,0.00143565,-0.00014 975,0.00166676,0.00086413,-0.00071107,-0.00081336,-0.10930688,0.062458 9,-0.00000142,0.00001301,0.00000965,-0.00005203,-0.00001394,-0.0000373 0,0.00001775,0.00002840,0.00000747,0.00143637,0.00015079,0.00166945 ]]] 0.00086406,0.00071132,-0.00081439,0.10930956,-0.06242029,-0.21353303,0 00003668,-0.00000001,0.00088168,-0.03589109,-0.00000584,0.00366844,0.0 [[[ 0.3708,-0.00000623,-0.00000093,-0.00001597,-0.00010373,-0.00001235,- 0.00008982,0.00008547,0.00000138,0.00001692,-0.00005915,0.00000005,0.0 090,0.00000182,-0.00000396,-0.00000575,0.00000367,0.00000364,-0.000012 73,0.00004112,-0.00000056,0.00007256,0.00001237,-0.00000176 ]]]
0.345[[[0.456]]]0.001[[[0.002]]]0.999[[[
0.2333]]]
[[0.234[234.567]]]0.000[[[0.123]0.234]]0.345]]]

它生成标准输出:

005076,-0.00008389,0.00039446,-0.00007079,0.00024138,0.00059449,-0.000 24860, 0.00086406,0.00071132,-0.00081439,0.10930956,-0.06242029,-0.21353303,0 00003668,-0.00000001,0.00088168,-0.03589109,-0.00000584,0.00366844,0.0 
0.3450.0010.999
[[0.234[234.567]]]0.000

并将 cut1.txt 文件写入 cut6.txt:

cut1.txt: 0.00032513,0.00000142,0.00001302,0.00000965,0.00005201,0.00001 395,-0.00003729,-0.00001774,-0.00002842,0.00000745,0.00143565,-0.00014 975,0.00166676,0.00086413,-0.00071107,-0.00081336,-0.10930688,0.062458 9,-0.00000142,0.00001301,0.00000965,-0.00005203,-0.00001394,-0.0000373 0,0.00001775,0.00002840,0.00000747,0.00143637,0.00015079,0.00166945 
cut2.txt: 0.3708,-0.00000623,-0.00000093,-0.00001597,-0.00010373,-0.00001235,- 0.00008982,0.00008547,0.00000138,0.00001692,-0.00005915,0.00000005,0.0 090,0.00000182,-0.00000396,-0.00000575,0.00000367,0.00000364,-0.000012 73,0.00004112,-0.00000056,0.00007256,0.00001237,-0.00000176 
cut3.txt:0.456
cut4.txt:0.002
cut5.txt:
cut5.txt:0.2333
cut6.txt:0.123]0.234]]0.345

关于c - 在 C 中,我应该如何在字符和另一个字符之间剪切文本文件?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31307441/

相关文章:

C 程序卡在 strcmp 附近

perror 示例可以改进吗?

gcc - 构建 R 包时如何链接静态库

c++ - GCC(通过CUDA)内部函数的编译器错误,但我没有使用任何

c - 如何在 C 中通过 malloc 为字符串分配指针数组?

c - 为什么我的二进制更新打印乱码/垃圾而不是我插入的信息?

c++ - 为什么我的程序中的第二个 while 循环在它前面有一个 while 循环时不起作用?

C# FileInfo - 查找重复文件

c++ - 是否有使用 C 库中的符号而不是通过命名空间 std 的 GCC 警告?

c - sleep 系统调用,默认 sleep 时间是多少?