在编写自己的小型发现程序以弄清楚 Visual C++ 如何为动态数组分配内存时,我有点困惑。我必须指出,我从未见过描述任何 C++ 实现的 new[]/delete[] 运算符的这个问题的技术文档。
一开始我以为new[]和delete[]如果解释成简单的C的话就是类似下面的东西:
void fake_int_ctor(int _this) {
printf("borns with 0x%08X in the heap\n", _this);
}
void fake_int_dtor(int _this) {
printf("dies with %d\n", _this);
}
void *new_array(unsigned int single_item_size, unsigned int count, void (*ctor)()) {
unsigned int i;
unsigned int *p = malloc(sizeof(single_item_size) + sizeof(count) + single_item_size * count);
p[0] = single_item_size; // keep single item size for delete_array
p[1] = count; // and then keep items count for delete_array
p += 2;
for ( i = 0; i < count; i++ ) {
ctor(p[i]); // simulate constructor calling
}
return p;
}
void delete_array(void *p, void (*dtor)()) {
unsigned int *casted_p = p;
unsigned int single_item_size = casted_p[-2];
unsigned int count = casted_p[-1];
unsigned int i;
for ( i = 0; i < count; i++ ) {
dtor(casted_p[i]); // simulate destructor
}
free(casted_p - 2);
}
void test_allocators(void) {
unsigned int count = 10;
unsigned int i;
int *p = new_array(sizeof(int), count, fake_int_ctor); // allocate 10 ints and simulate constructors
for ( i = 0; i < count; i++ ) {
p[i] = i + i; // do something
}
delete_array(p, fake_int_dtor); // deletes the array printing death-agony-values from 0 to 19 stepping 2
}
此代码隐含以下动态数组结构:
-2..-1..0.....|.....|.....|.....
^ ^ ^
| | +-- start of user data, slots may have variable size
| | depending on "single item size" slot
| +------ "items count" slot
+---------- "single item size" slot
我的 VC++ 编译器生成了产生以下输出的程序:
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
borns with 0xCDCDCDCD in the heap
dies with 0
dies with 2
dies with 4
dies with 6
dies with 8
dies with 10
dies with 12
dies with 14
dies with 16
dies with 18
显然,在这种情况下一切都很好。但是现在,当我试图发现“ native ”VC++ 动态数组分配器的本质时,我明白我错了(至少对于 VC++ 而言)。所以我有几个问题。动态数组大小的值存储在哪里?动态数组分配器如何工作?他们对动态数组使用哪种逐字节结构?或者...或者您能否提供任何链接来为我澄清这一点(VC++ 具有最高优先级)?
最佳答案
它是实现定义的。 Scott meyer 还指出,实际上许多 C++ 编译器会保存返回地址之前的元素数。我已经检查过 VC2010 和 g++ 3.4.2(mingw)。两个编译器都以这种方式实现 operator new[]/delete[]:
+--------------------+-------------------------+
| Num of elems(4byte)| Your Object or Array |
+--------------------+-------------------------+
#include <stdio.h>
#include <stdlib.h>
struct X {
int i;
~X() {
puts("a");
}
};
int main()
{
volatile int s = 3;
printf("input a size: ");
fflush(stdout);
scanf("%d", &s);
X * px = reinterpret_cast<X *>(new X[s]);
printf("%d\n", *( (int*)px - 1));
delete[] px;
return 0;
}
我遵循了 VC2010 中的汇编指令,如果您使用调试符号编译代码,它并不难读:
cl /MTd /Zi array_test.cpp
请注意,fflush 的目的是确保在您实际输入尺寸并按回车键之前将“输入尺寸:”输出到屏幕。
使用scanf获取size有两个原因: 1.给你一个机会将进程附加到 VS 调试器 2. 确保大小不会被优化为即时值。
您最好输入一个小数字,例如 5,这样您在执行汇编指令时会更轻松,因为您可以验证某些指令的结果是否符合您的预期。
以下是对实际汇编指令的逐行注释:
X * px = reinterpret_cast<X *>(new X[s]); ; assume s = 5
00401070 mov ecx,dword ptr [s] ; the number of element is saved to ecx
00401073 mov dword ptr [ebp-0Ch],ecx ; save it to somewhere on the stack
00401076 xor ecx,ecx
00401078 mov eax,dword ptr [ebp-0Ch] ; trace it! now it's in eax
0040107B mov edx,4 ; because sizeof(X) == 4
00401080 mul eax,edx ; Get the total bytes needed for the whole array
00401082 seto cl ; handle the scenario: big size which overflow
00401085 neg ecx ; typically not, so cl = 0, and ecx = 0
00401087 or ecx,eax ; now ecx = eax = 4 * 5 = 20
00401089 xor eax,eax ; clear eax, now eax = 0
0040108B add ecx,4 ; add 4 to ecx, why 4? for save the overhead array size
0040108E setb al ; set al to 1 if carry flag is set, typically 0
00401091 neg eax ; eax = 0, neg eax also result 0
00401093 or eax,ecx ; eax = ecx = 24
00401095 push eax ;
00401096 call operator new (40122Ch) ; same as scalar new
0040109B add esp,4 ; balance the stack
0040109E mov dword ptr [ebp-10h],eax ; function's return value typically saved in EAX
; [ebp-10h] is somewhere on stack, used to save the
; returned raw memory pointer
004010A1 cmp dword ptr [ebp-10h],0 ; check whether returned NULL pointer
004010A5 je main+8Ah (4010BAh)
004010A7 mov ecx,dword ptr [ebp-10h] ; now ECX point to 24 bytes raw memory
004010AA mov edx,dword ptr [ebp-0Ch] ; Check address 00401073, edx is 5 now
004010AD mov dword ptr [ecx],edx ; !!!! 5 saved to the start of the 24 bytes raw memory
004010AF mov eax,dword ptr [ebp-10h] ; load start address of the 24 raw memory to EAX
004010B2 add eax,4 ; advance the EAX with 4 bytes, now EAX point to the
; start address of your first object in the array
004010B5 mov dword ptr [ebp-1Ch],eax ; Save this address to somewhere on the stack
004010B8 jmp main+91h (4010C1h) ; not NULL pointer, so skip it
004010BA mov dword ptr [ebp-1Ch],0 ; See address 004010A5
004010C1 mov ecx,dword ptr [ebp-1Ch] ; Load the address to ECX
004010C4 mov dword ptr [px],ecx ; Load the address in ECX to px. -The End-
删除[]部分:
delete[] px;
004010DC mov ecx,dword ptr [px] ; the address of the first object
004010DF mov dword ptr [ebp-18h],ecx ; save to somewhereon the stack
004010E2 mov edx,dword ptr [ebp-18h] ; save it again to edx
004010E5 mov dword ptr [ebp-14h],edx ; move around
004010E8 cmp dword ptr [ebp-14h],0 ; Check NULL pointer
004010EC je main+0CDh (4010FDh)
004010EE push 3 ; Looks silly, just because I init it to 3?
004010F0 mov ecx,dword ptr [ebp-14h] ; again, ECX is just the address of first object
; [px] -> ecx -> [ebp-18h] -> edx -> [ebp-14h] -> ecx
004010F3 call X::`vector deleting destructor' (401014h) ; A good function name, lets follow it!
X::`vector deleting destructor':
00401014 jmp X::`vector deleting destructor' (401140h)
X::`vector deleting destructor':
00401140 push ebp
00401141 mov ebp,esp
00401143 push ecx ; Address of the first object
00401144 mov dword ptr [ebp-4],ecx ; save it to somewhere on stack
00401147 mov eax,dword ptr [ebp+8] ; See address 004010EE, it's 3
0040114A and eax,2 ; ??
0040114D je X::`vector deleting destructor'+45h (401185h)
0040114F push offset X::~X (401005h) ; (S1) Put address of the descructor to stack
00401154 mov ecx,dword ptr [this] ; Address of first object
00401157 mov edx,dword ptr [ecx-4] ; !! important, ECX - 4 to get the
; address of the 24-bytes raw memory
; The value in it is the number of the elements
; Save it to EDX(=5, see 004010AD)
0040115A push edx ; (S2) Put it on stack
0040115B push 4 ; (S3) Put the sizeof(X) on stack
0040115D mov eax,dword ptr [this] ; save the address of the first object to EAX
00401160 push eax ; (S4) Put it on stack
00401161 call `vector destructor iterator' (40100Ah) ; Good function name, follow it
`vector destructor iterator':
0040100A jmp `vector destructor iterator' (4011F0h)
`vector destructor iterator':
004011F0 push ebp
004011F1 mov ebp,esp
004011F3 mov eax,dword ptr [__s] ; Some tricks here, by inspecting the value and
; some guess work, __s = 4(S3)
004011F6 imul eax,dword ptr [__n] ; __n = 5 (S2)
004011FA add eax,dword ptr [__t] ; __t = (S4), add it to EAX, now point to end
; of the array
004011FD mov dword ptr [__t],eax ; __t point to end of array
00401200 mov ecx,dword ptr [__n] ; loop counter
00401203 sub ecx,1
00401206 mov dword ptr [__n],ecx
00401209 js `vector destructor iterator'+2Ch (40121Ch) ; jump out of loop if value less than 0
0040120B mov edx,dword ptr [__t] ; Load addr: 1-byte passed the end of the array
0040120E sub edx,dword ptr [__s] ; Now point to the address of last element
00401211 mov dword ptr [__t],edx ; Update this address to __t
00401214 mov ecx,dword ptr [__t] ; save the address to ECX
00401217 call dword ptr [__f] ; __f is the address of destructor function
0040121A jmp `vector destructor iterator'+10h (401200h)
0040121C pop ebp
0040121D ret 10h ; Because we have S1, S2, S3, S4
; 4 pushes
struct X {
int i;
~X() {
004011D0 push ebp
004011D1 mov ebp,esp
004011D3 push ecx ; the address of current object: this in C++
004011D4 mov dword ptr [ebp-4],ecx ; save this to [ebp-4], although not used it
puts("a"); ;
004011D7 push offset string "a" (403758h)
004011DC call dword ptr [__imp__puts (406240h)]
004011E2 add esp,4
}
004011E5 mov esp,ebp
004011E7 pop ebp
004011E8 ret
关于c++ - VC++ 中的动态数组大小和动态数组分配器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/1598673/