c++ - 为什么 MSVC 静态局部变量的线程安全初始化使用 TLS

标签 c++ ssl visual-c++ thread-safety

VS 2015以上,用/Zc:threadSafeInit,静态局部变量的初始化是线程安全的,但是MSDN说

Thread-safe static local variables use thread-local storage (TLS) internally to provide efficient execution when the static has already been initialized.

如果使用 LoadLibrary 动态加载 Dll,Windows XP 将忽略 .tls 段,因此

  1. 为什么要使用 TLS,它是如何高效的工作的?
  2. std::call_once 是否使用 TLS?
  3. 如何在没有 TLS 的情况下实现线程安全初始化?

引文链接在这里Thread-safe Local Static Initialization

The implementation of this feature relies on Windows operating system support functions in Windows Vista and later operating systems. Windows XP, Windows Server 2003, and older operating systems do not have this support, so they do not get the efficiency advantage.

我的测试代码:

class AA
{
public:
    int m_a = 1;
};

AA* getAA()
{
    static AA a;
    return &a;
}

int main()
{
    AA* pa = getAA();
    return 0;
}

使用 /Zc:threadSafeInit,windbg 中的函数 getAA 反汇编为:

008c1000 55              push    ebp
008c1001 8bec            mov     ebp,esp
008c1003 64a12c000000    mov     eax,dword ptr fs:[0000002Ch]
008c1009 8b08            mov     ecx,dword ptr [eax]
008c100b 8b15b8338c00    mov     edx,dword ptr [testStatic!__favor+0x4 (008c33b8)]
008c1011 3b9104000000    cmp     edx,dword ptr [ecx+4]
008c1017 7e2d            jle     testStatic!getAA+0x46 (008c1046)
008c1019 68b8338c00      push    offset testStatic!__favor+0x4 (008c33b8)
008c101e e809020000      call    testStatic!_Init_thread_header (008c122c)
008c1023 83c404          add     esp,4
008c1026 833db8338c00ff  cmp     dword ptr [testStatic!__favor+0x4 (008c33b8)],0FFFFFFFFh
008c102d 7517            jne     testStatic!getAA+0x46 (008c1046)
008c102f b9bc338c00      mov     ecx,offset testStatic!a (008c33bc)
008c1034 e817000000      call    testStatic!AA::AA (008c1050)
008c1039 68b8338c00      push    offset testStatic!__favor+0x4 (008c33b8)
008c103e e89f010000      call    testStatic!_Init_thread_footer (008c11e2)
008c1043 83c404          add     esp,4
008c1046 b8bc338c00      mov     eax,offset testStatic!a (008c33bc)
008c104b 5d              pop     ebp
008c104c c3              ret

并使用/Zc:threadSafeInit-(关闭该功能),函数getAA反汇编为:

010e1000 55              push    ebp
010e1001 8bec            mov     ebp,esp
010e1003 a180330e01      mov     eax,dword ptr [testStatic!a+0x4 (010e3380)]
010e1008 83e001          and     eax,1
010e100b 7519            jne     testStatic!getAA+0x26 (010e1026)
010e100d 8b0d80330e01    mov     ecx,dword ptr [testStatic!a+0x4 (010e3380)]
010e1013 83c901          or      ecx,1
010e1016 890d80330e01    mov     dword ptr [testStatic!a+0x4 (010e3380)],ecx
010e101c b97c330e01      mov     ecx,offset testStatic!a (010e337c)
010e1021 e80a000000      call    testStatic!AA::AA (010e1030)
010e1026 b87c330e01      mov     eax,offset testStatic!a (010e337c)
010e102b 5d              pop     ebp
010e102c c3              ret

最佳答案

Why MSVC thread-safe initialization of static local variables use TLS

TLS 仅用作优化。对于静态初始化,它使用单个全局临界区或在可用时使用 SRW 锁。

可以在VC++ CRT源码crt/src/vcruntime/thread_safe_statics.cpp中看到。

它是开源的,可作为 MSVC 安装的一部分使用。我在此处包含相关片段以供引用:

//
// thread_safe_statics.cpp
//
//      Copyright (c) Microsoft Corporation. All rights reserved.
//
// Helper functions used by thread-safe static initialization.
//
#ifdef _M_CEE
    #error This file cannot be built as managed
#endif

#include <vcstartup_internal.h>
#include <vcruntime_internal.h>
#include <limits.h>

static DWORD const xp_timeout = 100; // ms
static int const uninitialized = 0;
static int const being_initialized = -1;
static int const epoch_start = INT_MIN;

// Access to these variables is guarded in the below functions.  They may only
// be modified while the lock is held.  _Tss_epoch is readable from user
// code and is read without taking the lock.
extern "C"
{
    int _Init_global_epoch = epoch_start;
    __declspec(thread) int _Init_thread_epoch = epoch_start;
}

// On Vista or newer, the native CONDITION_VARIABLE type is used.  On XP, we use a simple
// Windows event.  This is not safe to use as a complete condition variable, but for the purposes
// of this feature the event is sufficient but not optimal.  See the code in _Tss_wait
// below.
//
// For Windows OS components:  The OS supports APISets downlevel to Windows 7,
// and OS components that run downlevel to Windows 7 may build against APISets.
// However, these components cannot use CONDITION_VARIABLE directly because it
// is not available via APISets until Windows 8.  Thus, for Windows OS components,
// we use the "ancient" code path and first try the APISet and then fall back to
// kernel32.dll.
#if defined _SCRT_ENCLAVE_BUILD || defined _CRT_APP || defined _CRT_WINDOWS_USE_VISTA_TSS \
    || (!defined _CRT_WINDOWS && (defined _ONECORE || defined _M_ARM || defined _M_ARM64))
#define _USE_VISTA_THREAD_SAFE_STATICS 1
#else
#define _USE_VISTA_THREAD_SAFE_STATICS 0
#endif

static CONDITION_VARIABLE g_tss_cv;

#if _USE_VISTA_THREAD_SAFE_STATICS
static SRWLOCK g_tss_srw;
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv //
static HANDLE             g_tss_event;
static CRITICAL_SECTION   g_tss_mutex;
static decltype(SleepConditionVariableCS)* g_sleep_condition_variable_cs;
static decltype(WakeAllConditionVariable)* g_wake_all_condition_variable;

static void __cdecl __scrt_initialize_thread_safe_statics_platform_specific() noexcept
{
    // This can fail pre-Vista and that is ignored.
    InitializeCriticalSectionAndSpinCount(&g_tss_mutex, 4000);

    // CONDITION_VARIABLE is available via this APISet starting on Windows 8.
    HMODULE kernel_dll = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
    if (kernel_dll == nullptr)
    {
        kernel_dll = GetModuleHandleW(L"kernel32.dll");
    }

    if (kernel_dll == nullptr)
    {
        __scrt_fastfail(FAST_FAIL_FATAL_APP_EXIT);
    }

    #define GET_PROC_ADDRESS(m, f) reinterpret_cast<decltype(f)*>(GetProcAddress(m, _CRT_STRINGIZE(f)))

    auto const sleep_condition_variable_cs = GET_PROC_ADDRESS(kernel_dll, SleepConditionVariableCS);
    auto const wake_all_condition_variable = GET_PROC_ADDRESS(kernel_dll, WakeAllConditionVariable);

    #undef GET_PROC_ADDRESS

    if (sleep_condition_variable_cs && wake_all_condition_variable)
    {
        g_sleep_condition_variable_cs = sleep_condition_variable_cs;
        g_wake_all_condition_variable = wake_all_condition_variable;
    }
    else
    {
        g_tss_event = CreateEventW(NULL, TRUE, FALSE, NULL);
        if (g_tss_event == nullptr)
        {
            __scrt_fastfail(FAST_FAIL_FATAL_APP_EXIT);
        }
    }
}

// Terminator for synchronization data structures.
static void __cdecl __scrt_uninitialize_thread_safe_statics() noexcept
{
    DeleteCriticalSection(&g_tss_mutex);
    if (g_tss_event != nullptr)
    {
        CloseHandle(g_tss_event);
    }
}

// Initializer for synchronization data structures.
static int __cdecl __scrt_initialize_thread_safe_statics() noexcept
{
    __scrt_initialize_thread_safe_statics_platform_specific();

    // If CRT initialization was skipped then we should initialize the atexit tables.
    // This will only be needed when using a managed DLL with /NOENTRY specified.
    if (!__scrt_initialize_onexit_tables(__scrt_module_type::dll))
    {
        __scrt_fastfail(FAST_FAIL_FATAL_APP_EXIT);
    }
    atexit(__scrt_uninitialize_thread_safe_statics);
    return 0;
}

_CRTALLOC(".CRT$XIC") static _PIFV __scrt_initialize_tss_var = __scrt_initialize_thread_safe_statics;
#endif // _USE_VISTA_THREAD_SAFE_STATICS

// Helper functions for accessing the mutex and condition variable.  Can be replaced with
// more suitable data structures provided by the CRT, preferably ones that use the most
// efficient synchronization primitives available on the platform.
// This is not intended to be a recursive lock.
extern "C" void __cdecl _Init_thread_lock()
{
#if _USE_VISTA_THREAD_SAFE_STATICS
    AcquireSRWLockExclusive(&g_tss_srw);
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv
    EnterCriticalSection(&g_tss_mutex);
#endif // _USE_VISTA_THREAD_SAFE_STATICS
}

extern "C" void __cdecl _Init_thread_unlock()
{
#if _USE_VISTA_THREAD_SAFE_STATICS
    ReleaseSRWLockExclusive(&g_tss_srw);
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv
    LeaveCriticalSection(&g_tss_mutex);
#endif // _USE_VISTA_THREAD_SAFE_STATICS
}

// Wait on the condition variable.  In the XP implementation using only a Windows event
// we can't guarantee that we'll ever actually receive the notification signal, so we
// must use a non-infinite timeout.  This is not optimal: we may wake up early if the
// initializer is long-running, or we may miss the signal and not wake up until the
// timeout expires.  The signal may be missed because the sleeping threads may be
// stolen by the kernel to service an APC, or due to the race condition between the
// unlock call and the WaitForSingleObject call.
extern "C" void __cdecl _Init_thread_wait(DWORD const timeout)
{
#if _USE_VISTA_THREAD_SAFE_STATICS
    SleepConditionVariableSRW(&g_tss_cv, &g_tss_srw, timeout, 0);
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv //
    if (g_sleep_condition_variable_cs)
    {
        // Vista+ code is first because it's most likely
        g_sleep_condition_variable_cs(&g_tss_cv, &g_tss_mutex, timeout);
        return;
    }

    _ASSERT(timeout != INFINITE);
    _Init_thread_unlock();
    WaitForSingleObjectEx(g_tss_event, timeout, FALSE);
    _Init_thread_lock();
#endif // _USE_VISTA_THREAD_SAFE_STATICS
}

extern "C" void __cdecl _Init_thread_notify()
{
#if _USE_VISTA_THREAD_SAFE_STATICS
    WakeAllConditionVariable(&g_tss_cv);
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv
    if (g_wake_all_condition_variable)
    {
        // Vista+ code is first because it's most likely
        g_wake_all_condition_variable(&g_tss_cv);
        return;
    }

    SetEvent(g_tss_event);
    ResetEvent(g_tss_event);
#endif // _USE_VISTA_THREAD_SAFE_STATICS
}

// Control access to the initialization expression.  Only one thread may leave
// this function before the variable has completed initialization, this thread
// will perform initialization.  All other threads are blocked until the
// initialization completes or fails due to an exception.
extern "C" void __cdecl _Init_thread_header(int* const pOnce) noexcept
{
    _Init_thread_lock();

    if (*pOnce == uninitialized)
    {
        *pOnce = being_initialized;
    }
    else
    {
        while (*pOnce == being_initialized)
        {
            // Timeout can be replaced with an infinite wait when XP support is
            // removed or the XP-based condition variable is sophisticated enough
            // to guarantee all waiting threads will be woken when the variable is
            // signalled.
            _Init_thread_wait(xp_timeout);

            if (*pOnce == uninitialized)
            {
                *pOnce = being_initialized;
                _Init_thread_unlock();
                return;
            }
        }
        _Init_thread_epoch = _Init_global_epoch;
    }

    _Init_thread_unlock();
}

// Abort processing of the initializer due to an exception.  Reset the state
// to uninitialized and release waiting threads (one of which will take over
// initialization, any remaining will again sleep).
extern "C" void __cdecl _Init_thread_abort(int* const pOnce) noexcept
{
    _Init_thread_lock();
    *pOnce = uninitialized;
    _Init_thread_unlock();
    _Init_thread_notify();
}

// Called by the thread that completes initialization of a variable.
// Increment the global and per thread counters, mark the variable as
// initialized, and release waiting threads.
extern "C" void __cdecl _Init_thread_footer(int* const pOnce) noexcept
{
    _Init_thread_lock();
    ++_Init_global_epoch;
    *pOnce = _Init_global_epoch;
    _Init_thread_epoch = _Init_global_epoch;
    _Init_thread_unlock();
    _Init_thread_notify();
}

需要锁的原因源于 ISO C++ 标准 ( [stmt.dcl]/4 ),它要求 block 作用域的 static 局部变量以线程安全的方式仅初始化一次(这关于 /Zc:threadSafeInit 的 MSDN 文章中也解释了这一部分,它可以作为编译器扩展来放宽此要求。

关于c++ - 为什么 MSVC 静态局部变量的线程安全初始化使用 TLS,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56458581/

相关文章:

azure - Multi-Tenancy Web 应用程序的 SSL 证书

python - urllib 和服务器证书的验证

c++ - 为什么使用构造函数而不是函数?

php - 如何将现有套接字转换为使用 TLS/SSL?

c++ - 无法访问 header

c++ - 表面上的 SDL blitting 表面

c++ - GCC 和 VC++ 之间 std::vector::emplace_back 的区别

c++ - 创建派生类实例时提供基类构造函数参数

c++ - 计算机视觉系统中机械臂的 Controller

c++ - 删除对象时可能出现堆损坏问题