python - 训练速度较慢的简单 DQN

我一直在尝试使用本文中的 DQN 解决 OpenAI 登月游戏

https://arxiv.org/pdf/2006.04938v2.pdf

问题是训练 50 集需要 12 个小时，所以一定是出了什么问题。

import os
import random
import gym
import numpy as np
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model

ENV_NAME = "LunarLander-v2"

DISCOUNT_FACTOR = 0.9
LEARNING_RATE = 0.001

MEMORY_SIZE = 2000
TRAIN_START = 1000
BATCH_SIZE = 24

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.99

class MyModel(Model):
    def __init__(self, input_size, output_size):
        super(MyModel, self).__init__()
        self.d1 = Dense(128, input_shape=(input_size,), activation="relu")
        self.d2 = Dense(128, activation="relu")
        self.d3 = Dense(output_size, activation="linear")

    def call(self, x):
        x = self.d1(x)
        x = self.d2(x)
        return self.d3(x)

class DQNSolver():

    def __init__(self, observation_space, action_space):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)

        self.model = MyModel(observation_space,action_space)
        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.action_space)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        batch = random.sample(self.memory, BATCH_SIZE)
        state_batch, q_values_batch = [], []
        for state, action, reward, state_next, terminal in batch:
            # q-value prediction for a given state
            q_values_cs = self.model.predict(state)
            # target q-value
            max_q_value_ns = np.amax(self.model.predict(state_next)[0])
            # correction on the Q value for the action used
            if terminal:
                q_values_cs[0][action] = reward
            else:
                q_values_cs[0][action] = reward + DISCOUNT_FACTOR * max_q_value_ns
            state_batch.append(state[0])
            q_values_batch.append(q_values_cs[0])
        # train the Q network
        self.model.fit(np.array(state_batch),
                        np.array(q_values_batch),
                        batch_size = BATCH_SIZE,
                        epochs = 1, verbose = 0)
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)

def lunar_lander():
    env = gym.make(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    episode = 0
    print("Running")
    while True:
        episode += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        scores = []
        score = 0
        while True:
            action = dqn_solver.act(state)
            state_next, reward, terminal, _ = env.step(action)
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            dqn_solver.experience_replay()
            state = state_next
            score += reward
            if terminal:
                print("Episode: " + str(episode) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(score))
                scores.append(score)
                break
        if np.mean(scores[-min(100, len(scores)):]) >= 195:
            print("Problem is solved in {} episodes.".format(episode))
            break
    env.close
if __name__ == "__main__":
    lunar_lander()

这是日志

root@b11438e3d3e8:~# /usr/bin/python3 /root/test.py
2021-01-03 13:42:38.055593: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.338231: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2021-01-03 13:42:39.368192: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.368693: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8095GHz coreCount: 20 deviceMemorySize: 7.92GiB deviceMemoryBandwidth: 298.32GiB/s
2021-01-03 13:42:39.368729: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.370269: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-01-03 13:42:39.371430: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2021-01-03 13:42:39.371704: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2021-01-03 13:42:39.373318: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2021-01-03 13:42:39.374243: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2021-01-03 13:42:39.377939: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2021-01-03 13:42:39.378118: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.378702: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.379127: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2021-01-03 13:42:39.386525: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 3411185000 Hz
2021-01-03 13:42:39.386867: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4fb44c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-01-03 13:42:39.386891: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2021-01-03 13:42:39.498097: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.498786: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4fdf030 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2021-01-03 13:42:39.498814: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): GeForce GTX 1080, Compute Capability 6.1
2021-01-03 13:42:39.498987: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.499416: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8095GHz coreCount: 20 deviceMemorySize: 7.92GiB deviceMemoryBandwidth: 298.32GiB/s
2021-01-03 13:42:39.499448: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.499483: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-01-03 13:42:39.499504: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2021-01-03 13:42:39.499523: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2021-01-03 13:42:39.499543: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2021-01-03 13:42:39.499562: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2021-01-03 13:42:39.499581: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2021-01-03 13:42:39.499643: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.500113: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.500730: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2021-01-03 13:42:39.500772: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.915228: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-01-03 13:42:39.915298: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263]      0 
2021-01-03 13:42:39.915322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0:   N 
2021-01-03 13:42:39.915568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.916104: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.916555: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6668 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080, pci bus id: 0000:01:00.0, compute capability: 6.1)
Running
2021-01-03 13:42:40.267699: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10

这是 GPU 统计信息

+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66       Driver Version: 450.66       CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  GeForce GTX 1080    Off  | 00000000:01:00.0  On |                  N/A |
|  0%   53C    P2    46W / 198W |   7718MiB /  8111MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

如您所见，TensorFlow 不在 GPU 上计算，而是保留内存，所以我假设这是因为神经网络的输入太小，它使用 CPU 代替。

为了确保 GPU 安装正确，我从他们的文档中运行了一个示例，它使用了 GPU。您也可以在 google colab here 上重现该问题。

是算法问题还是代码问题？

在这种情况下有没有办法利用 GPU？

更新

事实证明，智能体学习飞行而不是学习着陆，所以我添加了最大步长 150 来限制剧集时间，但它仍然很慢。

最佳答案

在笔记本设置中启用 GPU 的情况下在 colab 上运行我能够使用 wandb.ai 监控 GPU 使用情况，它正在尝试使用 GPU，但利用率为 0%。所以我能够重现这个问题，因为它对我来说也运行得很慢。我最好的猜测，基于 this post和 also this one , 是 env 中的 step 函数阻碍了这个过程。我尝试将批量大小增加 10 倍以查看是否影响速度或 GPU 使用但没有成功。对于这个网络的大小和结构，CPU 可能更高效，因此这是唯一被使用的东西。无法解释的是为什么您链接的论文在几个小时内就得出了结果，比我们得到的时间短得多，除非他们使用的笔记本电脑中的 CPU 速度明显更快。

关于python - 训练速度较慢的简单 DQN，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/65555792/

python - 训练速度较慢的简单 DQN

上一篇：python - 创建第二个实例时， Canvas 上的第一个实例图像消失

下一篇： flutter :FirebaseException:发生内部错误。 [ CONFIGURATION_NOT_FOUND ]