我正在使用特定于 MS 的关键字来强制内联全局函数,但我注意到如果该函数使用具有显式平凡析构函数的对象,则该函数无法内联自身。
引自MSDN
Even with
__forceinline
, the compiler cannot inline code in all circumstances. The compiler cannot inline a function if:
The function or its caller is compiled with
/Ob0
(the default option for debug builds).The function and the caller use different types of exception handling (C++ exception handling in one, structured exception handling in the other).
The function has a variable argument list.
The function uses inline assembly, unless compiled with
/Og
,/Ox
,/O1
, or/O2
.The function is recursive and not accompanied by
#pragma inline_recursion(on)
. With the pragma, recursive functions are inlined to a default depth of 16 calls. To reduce the inlining depth, useinline_depth
pragma.The function is virtual and is called virtually. Direct calls to virtual functions can be inlined.
The program takes the address of the function and the call is made via the pointer to the function. Direct calls to functions that have had their address taken can be inlined.
The function is also marked with the naked
__declspec
modifier.
我正在尝试使用以下自包含程序来测试行为
#include <iostream>
#define INLINE __forceinline
template <class T>
struct rvalue
{
T& r_;
explicit INLINE rvalue(T& r) : r_(r) {}
};
template <class T>
INLINE
T movz(T& t)
{
return T(rvalue<T>(t));
}
template <class T>
class Spam
{
public:
INLINE operator rvalue<Spam>() { return rvalue<Spam>(*this); }
INLINE Spam() : m_value(0) {}
INLINE Spam(rvalue<Spam> p) : m_value(p.r_.m_value) {}
INLINE Spam& operator= (rvalue<Spam> p)
{
m_value = p.r_.m_value;
return *this;
}
INLINE explicit Spam(T value) : m_value(value) { }
INLINE operator T() { return m_value; };
template <class U, class E> INLINE Spam& operator= (Spam<U> u) { return *this; }
INLINE ~Spam() {}
private:
Spam(Spam<T>&); // not defined
Spam& operator= (Spam&); // not defined
private:
T m_value;
};
INLINE int foo()
{
Spam<int> p1(int(5)), p2;
p2 = movz(p1);
return p2;
}
int main()
{
std::cout << foo() << std::endl;
}
有了简单的析构函数 INLINE ~Spam() {}
,我们有以下反汇编
int main()
{
000000013F4B1010 sub rsp,28h
std::cout << foo() << std::endl;
000000013F4B1014 lea rdx,[rsp+30h]
000000013F4B1019 lea rcx,[rsp+38h]
000000013F4B101E mov dword ptr [rsp+30h],5
000000013F4B1026 call movz<Spam<int> > (013F4B1000h)
000000013F4B102B mov rcx,qword ptr [__imp_std::cout (013F4B2050h)]
000000013F4B1032 mov edx,dword ptr [rax]
000000013F4B1034 call qword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (013F4B2040h)]
000000013F4B103A mov rdx,qword ptr [__imp_std::endl (013F4B2048h)]
000000013F4B1041 mov rcx,rax
000000013F4B1044 call qword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (013F4B2058h)]
}
没有析构函数 INLINE ~Spam() {}
我们有以下反汇编
int main()
{
000000013FF01000 sub rsp,28h
std::cout << foo() << std::endl;
000000013FF01004 mov rcx,qword ptr [__imp_std::cout (013FF02050h)]
000000013FF0100B mov edx,5
000000013FF01010 call qword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (013FF02040h)]
000000013FF01016 mov rdx,qword ptr [__imp_std::endl (013FF02048h)]
000000013FF0101D mov rcx,rax
000000013FF01020 call qword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (013FF02058h)]
}
000000013FF01026 xor eax,eax
}
我不明白,为什么在存在析构函数的情况下,编译器无法内联函数 T movz(T& t)
- 注意从 2008 年到 2013 年的行为是一致的
- 注意 我用 cygwin-gcc 检查过,但编译器确实内联了代码。我目前无法验证其他编译器,但如果需要,会在接下来的 12 小时内更新
最佳答案
是的,这是一个错误。 我已经通过 MinGW 编译器环境在 Qt 上对其进行了测试。它可以很好地优化一切。
首先,为了更容易查看汇编代码,我对您的代码进行了一些如下更改:
int main()
{
int i = foo();
std::cout << i << std::endl;
}
从我的 Qt 调试反汇编:
45 int main()
46 {
0x401600 lea 0x4(%esp),%ecx
0x401604 <+0x0004> and $0xfffffff0,%esp
0x401607 <+0x0007> pushl -0x4(%ecx)
0x40160a <+0x000a> push %ebp
0x40160b <+0x000b> mov %esp,%ebp
0x40160d <+0x000d> push %ecx
0x40160e <+0x000e> sub $0x54,%esp
0x401611 <+0x0011> call 0x402160 <__main>
0x401616 <+0x0016> movl $0x5,-0x10(%ebp)
47 int i = foo();
0x401683 <+0x0083> mov %eax,-0xc(%ebp)
48 std::cout << i << std::endl;
0x401686 <+0x0086> mov -0xc(%ebp),%eax
0x401689 <+0x0089> mov %eax,(%esp)
0x40168c <+0x008c> mov $0x6fcba2c0,%ecx
0x401691 <+0x0091> call 0x401714 <_ZNSolsEi>
0x401696 <+0x0096> sub $0x4,%esp
0x401699 <+0x0099> movl $0x40171c,(%esp)
0x4016a0 <+0x00a0> mov %eax,%ecx
0x4016a2 <+0x00a2> call 0x401724 <_ZNSolsEPFRSoS_E>
0x4016a7 <+0x00a7> sub $0x4,%esp
49 }
0x4016aa <+0x00aa> mov $0x0,%eax
0x4016af <+0x00af> mov -0x4(%ebp),%ecx
0x4016b2 <+0x00b2> leave
0x4016b3 <+0x00b3> lea -0x4(%ecx),%esp
0x4016b6 <+0x00b6> ret
您甚至可以看到 foo() 已优化。可以看到变量'i'直接赋值给5并打印出来。
关于c++ - 了解 C++ 函数内联,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25796011/