c - 为什么 C 中的结构指针(方法)比普通函数慢得多?

标签 c pointers struct

我最近在越来越多的项目中使用 C,并且几乎最终用结构指针创建了我自己的“对象实现”。但是,我很好奇纯函数式风格(带有结构)与以更现代的面向对象风格调用函数指针的结构之间的速度差异。



代码本身只是在宏 LOOP_LEN 中指定的持续时间内在 for 循环中将三个数字重复加在一起。

请注意,我测量的函数都是内联,编译器优化从无到完全优化 (/Ox)(我在 Visual Studio 中运行它作为纯 .c 文件)。


// MAGIC object 
typedef struct {

    // Properties
    int* x;
    int* y;
    int* z;

    // Methods
    void(*init)(struct magic* self, int x, int y, int z);
    int(*sum)(struct magic* self);


// Variable init function
void* init(magic* self, int x, int y, int z) {

    // Assign variables to properties
    *self->x = x;
    *self->y = y;
    *self->z = y;



// Add all variables together
inline int sum(magic* self) {
    return ((*self->x) + (*self->y) + (*self->z));

// Magic object constructor
magic* new_m(int x, int y, int z) {

    // Allocate self
    magic* self = malloc(sizeof(magic));

    // Allocate member pointers
    self->x = malloc(sizeof(int));
    self->y = malloc(sizeof(int));
    self->z = malloc(sizeof(int));

    // Allocate method pointers
    self->init = init;
    self->sum = sum;

    // Return instance
    return self;

// Destructor
void delete_m(magic* self) {

    // Deallocate memory from constructor
    free(self->x); self->x = NULL;
    free(self->y); self->y = NULL;
    free(self->z); self->z = NULL;
    free(self); self = NULL;




// None object oriented approach
typedef struct {
    int* x;
    int* y;
    int* z;

// Magic struct constructor
str_magic* new_m_str(int x, int y, int z) {

    // Allocate self
    str_magic* self = malloc(sizeof(str_magic));

    // Allocate member pointers
    self->x = malloc(sizeof(int));
    self->y = malloc(sizeof(int));
    self->z = malloc(sizeof(int));

    // Return instance
    return self;

// Destructor
void delete_m_str(str_magic* self) {

    // Deallocate memory from constructor
    free(self->x); self->x = NULL;
    free(self->y); self->y = NULL;
    free(self->z); self->z = NULL;
    free(self); self = NULL;



// Sum using normal structure type
inline int sum_str(str_magic* self) {
    return ((*self->x) + (*self->y) + (*self->z));


#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define LOOP_LEN 1000000000

// Main entry point
int main(void) {

    // Start timer for first task
    clock_t start1, end1, start2, end2;
    double cpu_time_used1, cpu_time_used2;

    // Init instances before timer
    magic* object1 = new_m(1, 2, 3);

    // Start task1 clock
    start1 = clock();

    for (int i = 0; i < LOOP_LEN; i++) {
        // Perform method sum and store result
        int result1 = object1->sum(object1);

    // Stop task1 clock
    end1 = clock();

    // Remove from memory

    // Calculate task1 execution time
    cpu_time_used1 = ((double)(end1 - start1)) / CLOCKS_PER_SEC;

    // Init instances before timer
    str_magic* object2 = new_m_str(1, 2, 3);

    // Start task2 clock
    start2 = clock();

    for (int i = 0; i < LOOP_LEN; i++) {
        // Perform function and store result
        int result2 = sum_str(object2);

    // Stop task2 clock
    end2 = clock();

    // Remove from memory

    // Calculate task 2 execution time
    cpu_time_used2 = ((double)(end2 - start2)) / CLOCKS_PER_SEC;

    // Print time results
    printf("----------------------\n    Task 1 : %.*e\n----------------------\n    Task 2 : %.*e\n----------------------\n", cpu_time_used1, cpu_time_used2);

    if (cpu_time_used1 < cpu_time_used2) {
        printf("Object Oriented Approach was faster by %.*e\n", cpu_time_used2-cpu_time_used1);
    else {
        printf("Functional Oriented Approach was faster by %.*e\n", cpu_time_used1 - cpu_time_used2);

    // Wait for keyboard interrupt

    return 0;




带有 /O2 循环的第二个循环被编译成:

    call     clock
    mov      edi, eax ; this is used later to calculate time
    call     clock

例如根本没有代码。编译器能够理解 sum_str 函数的结果未被使用,因此将其完全删除。对于第一种情况,编译器无法执行相同的操作。




    cmp      DWORD PTR i$1[rsp], 1000000000
    jge      SHORT $LN3@main                 ; loop exit
    mov      rcx, QWORD PTR object1$[rsp]
    mov      rax, QWORD PTR object1$[rsp]    ; extra instruction
    call     QWORD PTR [rax+32]              ; indirect call
    mov      DWORD PTR result1$3[rsp], eax
    jmp      SHORT $LN2@main                 ; jump to the next iteration


    cmp      DWORD PTR i$2[rsp], 1000000000
    jge      SHORT $LN6@main                 ; loop exit
    mov      rcx, QWORD PTR object2$[rsp]
    call     sum_str
    mov      DWORD PTR result2$4[rsp], eax
    jmp      SHORT $LN5@main                 ; jump to the next iteration

sumsum_str 都被编译成等效的指令序列。

区别在于循环中的一条指令,而且间接调用速度较慢。总的来说,没有优化的两个版本之间应该不会有太大差异 - 都应该很慢。

关于c - 为什么 C 中的结构指针(方法)比普通函数慢得多?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48647130/


c - 在 C 中收到 "member not in struct"错误

c++ - 删除列表中的结构元素

c - Linux驱动编写,file_operations incompatible pointer type错误

c++ - 直接访问指针是否比通过结构访问指针更快?

c - 如何让 gcc 吐出从流程图到源代码行号的映射

c++ - C函数反转char数组字符串

c - 指针、结构和 malloc()

c - 将地址或指针传递给 C 函数

c - memcpy函数的实现

objective-c - 多个if条件