c++ - C语言在不同架构上的文件操作

作为一个研究项目，我们正在标准 C (BINARY) 文件处理库 (stdio) 之上编写一个抽象层，通过提供一些用于事务处理文件的额外功能。

工作流程如下:

用户使用我们的 API(或标准 fopen)打开一个文件。都返回 FILE* .文件以二进制模式打开!

用户使用标准库命令(例如 fwrite)将数据写入文件

用户使用我们的 API 在打开的文件上打开一个交易:TRANSACTION a = trans_start(FILE* )

用户为 TRANSACTION 设置数据验证器对象 ( set_validator(TRANSACTION, int(*)(char*))

用户使用我们自己的 API ( int trans_write_string(TRANSACTION*, char*, length)

实际上，这个“写入”将其数据放在上面定义的验证器的内存中，它可能对数据进行操作并在某处设置一些标志......与问题无关。

用户使用 trans_commit(TRANSACTION)以便实际将数据写入文件。现在，根据验证器设置的标志，这可能不会将数据写入文件，而是向用户报告错误(可以通过编程解决。...与问题无关)。

用户使用标准 API 关闭文件 fclose .

到目前为止，我们只有 API 的字符串处理方法( trans_write_string )，它运行良好。它构建自己的内存数据缓冲区，根据需要修改内容，调用验证器等......在连续调用中，它将新数据附加到其内部内存缓冲区中，处理分配等......并在成功提交时写入使用 fwrite 将数据写入文件(是的，这主要是一个 C 项目，但也不排除 C++ 答案)。

但是现在我们想要(...必须)扩展 API 以能够写入数字(16 位、32 位、64 位)并且也可以 float ......以与标准 C 非常相似的方式stdio API 做到了。使用已经存在的字符串实现，假设我们在内存中有一个数据缓冲区，其中保存 N字节的字符(字符串本身)，对于 16 位值，我们可能需要 2 个字节，然后是另一个 M另一个字符串的字节，64 位值的 8 个字节，16 位值的 2 个字节，等等......

我们陷入了“如何在文件中表示数字以便其他使用不同计算机/架构/操作系统/字节序的人也能读取”的问题。

理论上可以通过转换为字符( char* addr = &my_16bit_int )并放置 *(addr) 将数字插入内存流中。和 *(addr + 1)到所需的地址(即:在字符串的 N 字符之后)并将其写入文件也是可能的，但是如果我想在字节序不同的不同体系结构上读取结果文件怎么办？如果“另一台”计算机只是一堆 16 位的古老金属怎么办？在这种情况下，写入文件的 64 位值会发生什么？

有哪些好的做法可以解决此类问题？

编辑 : 目标文件必须是二进制文件，它会附带一个描述其格式的文本文件(XML)(例如:N 8 字节字符，1 16 位值等)(此文本文件是基于在我们心爱的验证器的输出上)。验证器“说”这样的话，是的，我接受这个 16 位值，不，我拒绝这个长字符串，等等......其他人正在根据这个“输出”创建数据格式 XML。

编辑2 :是的，我们需要在各种平台上共享文件，即使是 20 年的冰箱大小的巨大盒子:)

编辑3 : 是的，我们也需要 float !

最佳答案

类型转换不够，我觉得sockets方法htons和 htonl将是 int16 和 int32 的足够解决方案。对于 int64，您应该自己构建它，因为没有官方方法:

请注意，所有函数都颠倒了字节顺序 仅在需要时 ，因此您也可以使用相同的方法将数字“修复”恢复正常。

typedef union{
    unsigned char c[2];
    unsigned short s;
}U2;

//you can use the standard htons or this
unsigned short htons(unsigned short s)
{
    U2 mask,res;
    unsigned char* p = (unsigned char*)&s; 
    mask.s = 0x0001;
    res.c[mask.c[0]] = p[0];
    res.c[mask.c[1]] = p[1];
    return res.s;
}

//the same for 4 bytes
typedef union{
    unsigned char c[4];
    unsigned short s[2];
    unsigned long l;
}U4;

//you can use the standard htonl or this
unsigned long htonl(unsigned long l)
{
    U4 mask,res;
    unsigned char* p = (unsigned char*)&l; 
    mask.l = 0x00010203;
    res.c[mask.c[0]] = p[0];
    res.c[mask.c[1]] = p[1];
    res.c[mask.c[2]] = p[2];
    res.c[mask.c[3]] = p[3];
    return res.l;
}

typedef union{
    unsigned char c[8];
    unsigned char c2[2][4];
    unsigned short s[4];    
    unsigned long l[2];
    unsigned long long ll; 
}U8; 

//for int64 you can use the int64 and do the same, or you can to do it with 2*4 like i did
//you can give a void pointer as well.. 
unsigned long long htonll(unsigned long long ll)//void htonll(void* arg, void* result)
{
    U2 mask1;
    U4 mask2;
    U8 res;

    unsigned char* p = (unsigned char*)&ll; //or (unsigned char*)arg   
    mask1.s = 0x0001;
    mask2.l = 0x00010203;
    //I didn't use the int64 for convertion 
    res.c2[mask1.c[0]][mask2.c[0]] = p[0];
    res.c2[mask1.c[0]][mask2.c[1]] = p[1];
    res.c2[mask1.c[0]][mask2.c[2]] = p[2];
    res.c2[mask1.c[0]][mask2.c[3]] = p[3];
    res.c2[mask1.c[1]][mask2.c[0]] = p[4];
    res.c2[mask1.c[1]][mask2.c[1]] = p[5];
    res.c2[mask1.c[1]][mask2.c[2]] = p[6];
    res.c2[mask1.c[1]][mask2.c[3]] = p[7];

    //memcpy(result,res.c,8);
    return res.ll;
}
//or if you want to use the htonl:
unsigned long long htonll2(unsigned long long ll)
{
    U2 mask1;
    U8 res;
    mask1.s = 0x0001;
    unsigned long* p = (unsigned long*)&ll;
    res.l[0] = htonl(p[mask1.c[0]]);
    res.l[1] = htonl(p[mask1.c[1]]);
    return res.ll;
}

int main()
{
    unsigned short s = 0x1122;
    cout<<hex<<htons(s)<<endl;
    unsigned long l = 0x11223344;
    cout<<hex<<htonl(l)<<endl;
    unsigned long long ll=0x1122334455667788;
    cout<<hex<<htonll(ll)<<endl;
    cout<<hex<<htonll2(ll)<<endl;
    return 0;
}

关于c++ - C语言在不同架构上的文件操作，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/19904170/

c++ - C语言在不同架构上的文件操作

上一篇：c++ - 如何删除包含结构的 map 或包含结构的 map ？

下一篇：c++ - 带有 Protocol Buffer 的 RPC