c - gcc 内置的 __atomic 或 __sync 函数无法提供对全局变量的一致访问

标签 c linux multithreading gcc

我有一个全局整数(下面程序中 struct LogBufferDescriptorinput_level 字段),它将由一个线程写入,并由超过 1 个线程读取).我使用 gcc 内置的 __atomic_store_n()/__atomic_load_n() 函数或遗留的 __sync_lock_test_and_set()/__sync_fetch_and_add() 访问它的功能。但两者都不能提供一致的结果。

gcc 内置的 __atomic 函数和遗留的 __sync 函数都不能提供对整数的一致访问。目前,只有信号量和 pthread_mutex 工作正常。

/*
gcc -g -o simulate_case simulate_case.c -lpthread
*/

#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/time.h>
#include <semaphore.h>

#define LOG_BUFFER_INPUT_LEVEL_DEFAULT     2
#define LOG_BUFFER_INPUT_LEVEL_NO_PRINTS   6
#define MS_TO_US(ms)                ((ms) * 1000)
#define TIMEOUT_MS                  100
#define SYNC_GCC_SYNC


typedef struct LogBufferDescriptor
{
    int                          input_level;
    pthread_mutex_t              log_buffer_lock;
    pthread_mutex_t              input_level_lock;
    sem_t                        input_level_sem;
} LogBufferDescriptor;

static LogBufferDescriptor       G_log_buffer;


/********** choose one implementation by defining corresponding macro ***********/

#ifdef SYNC_NONE
static void print_sync_mechanism(void)
{
    printf("Sync mechanism = NONE\n");
}
static void set_input_level(int level)
{
    G_log_buffer.input_level = level;
}
static int get_input_level(void)
{
    return G_log_buffer.input_level;
}
#endif

#ifdef SYNC_GCC_MUTEX
static void print_sync_mechanism(void)
{
    printf("Sync mechanism = MUTEX\n");
}
static void set_input_level(int level)
{
    pthread_mutex_lock(&G_log_buffer.input_level_lock);
    G_log_buffer.input_level = level;
    pthread_mutex_unlock(&G_log_buffer.input_level_lock);
}
static int get_input_level(void)
{
    int level;
    pthread_mutex_lock(&G_log_buffer.input_level_lock);
    level = G_log_buffer.input_level;
    pthread_mutex_unlock(&G_log_buffer.input_level_lock);
    return level;
}
#endif

#ifdef SYNC_SEM
static void print_sync_mechanism(void)
{
    printf("Sync mechanism = semaphore\n");
}
static void set_input_level(int level)
{
    sem_wait(&G_log_buffer.input_level_sem);
    G_log_buffer.input_level = level;
    sem_post(&G_log_buffer.input_level_sem);
}
static int get_input_level(void)
{
    int level;
    sem_wait(&G_log_buffer.input_level_sem);
    level = G_log_buffer.input_level;
    sem_post(&G_log_buffer.input_level_sem);
    return level;
}
#endif

#ifdef SYNC_GCC_ATOMIC
static void print_sync_mechanism(void)
{
    printf("Sync mechanism = GCC ATOMIC\n");
}
static void set_input_level(int level)
{
    __atomic_store_n(&G_log_buffer.input_level, level, __ATOMIC_SEQ_CST);
}
static int get_input_level(void)
{
    return __atomic_load_n(&G_log_buffer.input_level, __ATOMIC_SEQ_CST);
}
#endif

#ifdef SYNC_GCC_SYNC
static void print_sync_mechanism(void)
{
    printf("Sync mechanism = GCC LEGACY SYNC\n");
}
static void set_input_level(int level)
{
    __sync_lock_test_and_set(&G_log_buffer.input_level, level);
}
static int get_input_level(void)
{
    return __sync_fetch_and_add(&G_log_buffer.input_level, 0);
}
#endif


/********** log collecting thread ***********/

static void dump_log_buffer()
{
    set_input_level(LOG_BUFFER_INPUT_LEVEL_NO_PRINTS);  // do not allow input during dumping

    pthread_mutex_lock(&G_log_buffer.log_buffer_lock);    
    usleep(MS_TO_US(TIMEOUT_MS + 1));  // simulate dumping log buffer
    pthread_mutex_unlock(&G_log_buffer.log_buffer_lock);

    set_input_level(LOG_BUFFER_INPUT_LEVEL_DEFAULT);  // restore
}

static void *log_thread(void *arg)
{
    unsigned long count = 0;
    int seedp = 1;
    srand(seedp);
    for (; ;)
    {
        dump_log_buffer();
        usleep(MS_TO_US(rand_r(&seedp) % 5));
        ++count;
        if (count % 160 == 0) printf("Dumped %lu times of logs.\n", count);  // prove thread is running
    }
    return NULL;
}

/********** business thread ***********/

static void write_log(int severity)
{
    static unsigned int count = 0;
    struct timeval      start, end;

    gettimeofday(&start, NULL);

    if (severity < get_input_level()) return;  // abort
    pthread_mutex_lock(&G_log_buffer.log_buffer_lock);
    usleep(100);  // simulate writing to log buffer
    pthread_mutex_unlock(&G_log_buffer.log_buffer_lock);

    gettimeofday(&end, NULL);
    int diff = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_usec - start.tv_usec) / 1000;  // in ms
    if (diff >= TIMEOUT_MS) printf("***** Business delayed %u time(s)! *****\n", ++count);
}

/* business thread prints logs randomly */
static void *business_thread(void *arg)
{
    unsigned long count = 0;
    for (; ;)
    {
        write_log(LOG_BUFFER_INPUT_LEVEL_DEFAULT);
        ++count;
        if (count % 131072000 == 0) printf("Printed %lu lines of logs.\n", count);  // prove thread is running
    }
    return NULL;
}

/********** init ***********/

static void init()
{
    G_log_buffer.input_level = LOG_BUFFER_INPUT_LEVEL_DEFAULT;
    pthread_mutex_init(&G_log_buffer.log_buffer_lock, NULL);
    pthread_mutex_init(&G_log_buffer.input_level_lock, NULL);
    sem_init(&G_log_buffer.input_level_sem, 0, 1);  // binary semaphore
}

static void deinit()
{
    pthread_mutex_destroy(&G_log_buffer.log_buffer_lock);
    pthread_mutex_destroy(&G_log_buffer.input_level_lock);
    sem_destroy(&G_log_buffer.input_level_sem);
}

int main(int argc, char *argv[])
{
    init();
    print_sync_mechanism();

    pthread_t thread_id1, thread_id2;
    pthread_create (&thread_id1, NULL, &log_thread, NULL);
    pthread_create (&thread_id2, NULL, &business_thread, NULL);
    pthread_join(thread_id1, NULL);
    pthread_join(thread_id2, NULL);

    deinit();
    return 0;
}

在尝试锁定 log_buffer_lock 之前,我已经检查了 input_level,并且 input_level 受到保护。理论上,业务不应该被延迟(由于等待log_buffer_lock mutex)。

但它实际上延迟了。使用 gcc 遗留 __sync 函数时的程序输出:

$ ./simulate_case
Sync mechanism = GCC LEGACY SYNC
......
Dumped 2080 times of logs.
Printed 5373952000 lines of logs.
***** Business delayed 1 time(s)! *****

操作系统和 gcc 信息:

$ cat /etc/redhat-release 
Red Hat Enterprise Linux Server release 6.5 (Santiago)
$ uname -a
Linux *** 3.17.8-13.el6.x86_64 #1 SMP Tue Mar 28 20:56:38 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux
$ gcc -v
Using built-in specs.
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk --disable-dssi --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-1.5.0.0/jre --enable-libgcj-multifile --enable-java-maintainer-mode --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --disable-libjava-multilib --with-ppl --with-cloog --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux
Thread model: posix
gcc version 4.4.7 20120313 (Red Hat 4.4.7-4) (GCC) 

也发生在 Ubuntu 机器上(Windows 10 Linux 子系统):

$ uname -a
Linux N-5CG8205MFD 4.4.0-43-Microsoft #1-Microsoft Wed Dec 31 14:42:53 PST 2014 x86_64 x86_64 x86_64 GNU/Linux
$ cat /etc/issue
Ubuntu 18.04.1 LTS \n \l
$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/7/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 7.3.0-27ubuntu1~18.04' --with-bugurl=file:///usr/share/doc/gcc-7/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++ --prefix=/usr --with-gcc-major-version-only --program-suffix=-7 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 7.3.0 (Ubuntu 7.3.0-27ubuntu1~18.04)

最佳答案

谢谢大家的评论。

当我创建2个业务线程时,即使是二进制信号量和互斥量也不能保证不超时。在log_buffer_lock加锁/解锁前后添加日志,找到原因:

# prefix B = business thread, 155 and 156 below; L = log collecting thread, 154 below.
# first column is sequence.
1D <00:24:49.617510> tid 156; B will lock.   # at this time, input level is 2
1E <00:24:49.617995> tid 155; B will unlock.
1F <00:24:49.617998> tid 154; L will lock.   # log collecting thread acquired mutex, even thread 156 waits first.
20 <00:24:49.618004> tid 154; L locked.
21 <00:24:49.618005> tid 155; B unlocked.
22 <00:24:49.719956> tid 154; L will unlock.
23 <00:24:49.719974> tid 154; L unlocked.
24 <00:24:49.720066> tid 155; B locked without waiting.
25 <00:24:49.721051> tid 155; B will unlock.
26 <00:24:49.721065> tid 155; B unlocked.
27 <00:24:49.721069> tid 156; B locked.      # finally thread 156 got the mutex.
28 <00:24:49.721073> tid 155; B will lock.
29 <00:24:49.721538> tid 156; B will unlock.
2A <00:24:49.721561> tid 156; B unlocked.
2B <00:24:49.721644> tid 155; B locked.
2C <00:24:49.722047> tid 155; B will unlock.
2D <00:24:49.722083> tid 155; B unlocked.
2E <00:24:49.722093> tid 155; B locked without waiting.
2F <00:24:49.722119> tid 156; ***** Business delayed 1 time(s)! *****

当有>=2个线程在等待同一个互斥量时,无论其优先级如何,都无法确定哪个先得到互斥量(我尝试将业务线程的调度策略设置为REALTIME,优先级设置为99。这样做not mitigate.) 如果log_buffer_lock被业务线程155获取,业务线程156和日志收集线程都在等待,kernel可能会在mutex释放后给日志收集线程。在这种情况下,业务线程 156 会饿死并超时。

@Hasturkun 你是对的,在检查输入电平和获取互斥锁之间有一个窗口。

  • 当只有一个业务线程时,使用 pthread_mutex_trylock() 并在失败时重新检查输入级别将起作用,因为如果 pthread_mutex_trylock() 失败,我们可以 100 % 确定它是由日志收集线程获取的。
  • 当有>= 2个业务线程时,这将不起作用,因为log_buffer_lock不仅可能被日志收集线程获取,还可能被另一个业务线程获取。

所以这个问题与 gcc 内置原子函数关系不大。要修复它,pthread_mutex_timedlock() 是不可避免的。使用以下 write_log() 函数,即使没有对 input_level 进行保护,超时也不会发生 1 小时。

    static void write_log(int severity)
    {
        static unsigned int count = 0;
        struct timeval      start, end;

        gettimeofday(&start, NULL);

        if (severity < get_input_level()) return;  // abort

        #if BUSINESS_THREADS_COUNT == 1
        if (0 != pthread_mutex_trylock(&G_log_buffer_desc.log_buffer_lock))
        {
            // `log_buffer_lock` is 100% acquired by log collecting thread.
            if (severity < get_input_level()) return;  // abort
            else {
                papillon_log("B will lock.\n");
                pthread_mutex_lock(&G_log_buffer_desc.log_buffer_lock);
                papillon_log("B locked.\n");
            }
        }
        else papillon_log("B locked without waiting.\n");
        #else
        // When there are >= 2 business threads, testing `log_buffer_lock` will not work, because it may be not only acquired by log collecting thread, but also by another business thread.
        struct timespec wait_time;
        clock_gettime(CLOCK_REALTIME , &wait_time);
        wait_time.tv_nsec += (TIMEOUT_MS / 10) * 1000;  // in ns
        papillon_log("B will lock.\n");
        if (0 != pthread_mutex_timedlock(&G_log_buffer_desc.log_buffer_lock, &wait_time)) return;  // abort
        papillon_log("B locked.\n");
        #endif  /* BUSINESS_THREADS_COUNT */

        usleep(100);  // to simulate writing to the log buffer
        papillon_log("B will unlock.\n");
        pthread_mutex_unlock(&G_log_buffer_desc.log_buffer_lock);
        papillon_log("B unlocked.\n");

        gettimeofday(&end, NULL);
        int diff = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_usec - start.tv_usec) / 1000;  // in ms
        if (diff >= TIMEOUT_MS) papillon_log("***** Business delayed %u time(s)! *****\n", ++count);
    }

关于c - gcc 内置的 __atomic 或 __sync 函数无法提供对全局变量的一致访问,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57491133/

相关文章:

来回发送的条件变量

c++ - 方法不占用 long long int 值显示错误

c - 如何从 C 中的函数返回二维数组

c - 将多字节存储到 char 数组中

python - 如何在多线程环境中使用requests-html渲染异步页面?

android - 如何使用上下文访问/操作另一个类/Activity

c - 当我尝试运行黄道十二宫程序时,Windows 卡住

python - 查找名称相同但内容不同的文件

php - 通过 Cron 显示 PHP 脚本运行的次数

python - 获取进程的 PID 号,然后用 Python 杀死它