最近我在使用 copy.deepcopy
时遇到了奇怪的内存使用情况。 .
我有以下代码示例:
import copy
import gc
import os
import psutil
from pympler.asizeof import asizeof
from humanize import filesize
class Foo(object):
__slots__ = ["name", "foos", "bars"]
def __init__(self, name):
self.name = name
self.foos = {}
self.bars = {}
def add_foo(self, foo):
self.foos[foo.name] = foo
def add_bar(self, bar):
self.bars[bar.name] = bar
def __getstate__(self):
return {k: getattr(self, k) for k in self.__slots__}
def __setstate__(self, state):
for k, v in state.items():
setattr(self, k, v)
class Bar(object):
__slots__ = ["name", "description"]
def __init__(self, name, description):
self.name = name
self.description = description
def __getstate__(self):
return {k: getattr(self, k) for k in self.__slots__}
def __setstate__(self, state):
for k, v in state.items():
setattr(self, k, v)
def get_ram():
return psutil.Process(os.getpid()).memory_info()[0]
def get_foo():
sub_foo = Foo("SubFoo1")
for i in range(5000):
sub_foo.add_bar(Bar("BarInSubFoo{}".format(i), "BarInSubFoo{}".format(i)))
foo = Foo("Foo")
foo.add_foo(sub_foo)
for i in range(5000):
foo.add_bar(Bar("BarInFoo{}".format(i), "BarInFoo{}".format(i)))
return foo
def main():
foo = get_foo()
foo_size = asizeof(foo)
gc.collect()
ram1 = get_ram()
foo_copy = copy.deepcopy(foo)
gc.collect()
ram2 = get_ram()
foo_copy_size = asizeof(foo_copy)
print("Original object size: {}, Ram before: {}\nCopied object size: {}, Ram after: {}\nDiff in ram: {}".format(
filesize.naturalsize(foo_size), filesize.naturalsize(ram1), filesize.naturalsize(foo_copy_size),
filesize.naturalsize(ram2), filesize.naturalsize(ram2-ram1)
))
if __name__ == "__main__":
main()
我试图做的是测试copy.deepcopy
前后程序使用的内存量。 .为此,我创建了两个类。我预计在调用 deepcopy 后我的内存使用量会增加,其数量等于原始对象的大小。
奇怪的是我得到了这些结果:
Original object size: 2.1 MB, Ram before: 18.6 MB
Copied object size: 2.1 MB, Ram after: 24.7 MB
Diff in ram: 6.1 MB
正如您所看到的,内存使用量的差异大约是。复制对象大小的 300%。** 这些结果是在 Windows 10 64 位上使用 Python 3.8.5 获得的
我尝试了什么?
Original object size: 2.3 MB, Ram before: 34.3 MB
Copied object size: 2.3 MB, Ram after: 46.2 MB
Diff in ram: 11.9 MB
__getstate__
中返回元组列表而不是 dict得到了更好的结果,但与我预期的相差甚远 Foo
中使用列表而不是字典object 也得到了更好的结果,但也远非我所期望的。 pickle.dumps
& pickle.loads
以复制对象产生了相同的结果。 有什么技巧吗?
最佳答案
其中一些可能是因为 deepcopy
保留它访问过的所有对象的缓存,以避免陷入无限循环(set
我很确定)。对于这种事情,您可能应该编写自己的高效复制功能。 deepcopy
编写为能够处理任意输入,不一定是高效的。
如果你想要一个高效的复制功能,你可以自己写。这对于深拷贝来说已经足够了,效果如下:
import copy
import gc
import os
import psutil
from pympler.asizeof import asizeof
from humanize import filesize
class Foo(object):
__slots__ = ["name", "foos", "bars"]
def __init__(self, name):
self.name = name
self.foos = {}
self.bars = {}
def add_foo(self, foo):
self.foos[foo.name] = foo
def add_bar(self, bar):
self.bars[bar.name] = bar
def copy(self):
new = Foo(self.name)
new.foos = {k:foo.copy() for k, foo in self.foos.items()}
new.bars = {k:bar.copy() for k, bar in self.bars.items()}
return new
class Bar(object):
__slots__ = ["name", "description"]
def __init__(self, name, description):
self.name = name
self.description = description
def copy(self):
return Bar(self.name, self.description)
def get_ram():
return psutil.Process(os.getpid()).memory_info()[0]
def get_foo():
sub_foo = Foo("SubFoo1")
for i in range(5000):
sub_foo.add_bar(Bar("BarInSubFoo{}".format(i), "BarInSubFoo{}".format(i)))
foo = Foo("Foo")
foo.add_foo(sub_foo)
for i in range(5000):
foo.add_bar(Bar("BarInFoo{}".format(i), "BarInFoo{}".format(i)))
return foo
def main():
foo = get_foo()
foo_size = asizeof(foo)
gc.collect()
ram1 = get_ram()
foo_copy = foo.copy()
gc.collect()
ram2 = get_ram()
foo_copy_size = asizeof(foo_copy)
print("Original object size: {}, Ram before: {}\nCopied object size: {}, Ram after: {}\nDiff in ram: {}".format(
filesize.naturalsize(foo_size), filesize.naturalsize(ram1), filesize.naturalsize(foo_copy_size),
filesize.naturalsize(ram2), filesize.naturalsize(ram2-ram1)
))
if __name__ == "__main__":
main()
关于Python deepcopy 使用的内存比需要的多,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64908080/