c++ - Tensorflow C++ 不使用 GPU

Windows 10，amd64
使用 CMAKE GUI + MSBUILD 构建支持 tensorflow GPU 的 C++ 静态库
构建成功。
LABEL_IMAGE 教程示例执行时间:
... Main.cc 执行:9.17 秒
... Label_image.py 执行(tensorflow):10.34 秒
... Label_image.py 执行(tensorflow-gpu):1.62 秒
知道为什么吗？非常感谢
Main.cc 具有较小的自定义:
    #define NOMINMAX

    #include <fstream>
    #include <utility>
    #include <vector>

    #include "tensorflow/cc/ops/const_op.h"
    #include "tensorflow/cc/ops/image_ops.h"
    #include "tensorflow/cc/ops/standard_ops.h"
    #include "tensorflow/core/framework/graph.pb.h"
    #include "tensorflow/core/framework/tensor.h"
    #include "tensorflow/core/graph/default_device.h"
    #include "tensorflow/core/graph/graph_def_builder.h"
    #include "tensorflow/core/lib/core/errors.h"
    #include "tensorflow/core/lib/core/stringpiece.h"
    #include "tensorflow/core/lib/core/threadpool.h"
    #include "tensorflow/core/lib/io/path.h"
    #include "tensorflow/core/lib/strings/stringprintf.h"
    #include "tensorflow/core/platform/env.h"
    #include "tensorflow/core/platform/init_main.h"
    #include "tensorflow/core/platform/logging.h"
    #include "tensorflow/core/platform/types.h"
    #include "tensorflow/core/public/session.h"
    #include "tensorflow/core/util/command_line_flags.h"

    // These are all common classes it's handy to reference with no namespace.
    using tensorflow::Flag;
    using tensorflow::Tensor;
    using tensorflow::Status;
    using tensorflow::string;
    using tensorflow::int32;


    static Status ReadEntireFile(tensorflow::Env* env, const string& filename, Tensor* output) {
        tensorflow::uint64 file_size = 0;
        TF_RETURN_IF_ERROR(env->GetFileSize(filename, &file_size));

        string contents;
        contents.resize(file_size);

        std::unique_ptr<tensorflow::RandomAccessFile> file;
        TF_RETURN_IF_ERROR(env->NewRandomAccessFile(filename, &file));

        tensorflow::StringPiece data;
        TF_RETURN_IF_ERROR(file->Read(0, file_size, &data, &(contents)[0]));
        if (data.size() != file_size) {
            return tensorflow::errors::DataLoss("Truncated read of '", filename, "' expected ", file_size, " got ", data.size());
        }
        output->scalar<string>()() = data.ToString();
        return Status::OK();
    }

    // Given an image file name, read in the data, try to decode it as an image,
    // resize it to the requested size, and then scale the values as desired.
    Status ReadTensorFromImageFile(const string file_name, const int input_height, const int input_width, const float input_mean, const float input_std, std::vector<Tensor>* out_tensors) {
        auto root = tensorflow::Scope::NewRootScope();
        using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)

        string input_name = "file_reader";
        string output_name = "dim";

        // read file_name into a tensor named input
        Tensor input(tensorflow::DT_STRING, tensorflow::TensorShape());
        TF_RETURN_IF_ERROR(ReadEntireFile(tensorflow::Env::Default(), file_name, &input));
        // use a placeholder to read input data
        auto file_reader = Placeholder(root.WithOpName("input"), tensorflow::DataType::DT_STRING);
        std::vector<std::pair<string, tensorflow::Tensor>> inputs = { { "input", input }, };
        // Now try to figure out what kind of file it is and decode it.
        const int wanted_channels = 3;
        tensorflow::Output image_reader;
        if (tensorflow::StringPiece(file_name).ends_with(".png")) {
            image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, DecodePng::Channels(wanted_channels));
        }
        else if (tensorflow::StringPiece(file_name).ends_with(".gif")) {
            // gif decoder returns 4-D tensor, remove the first dim
            image_reader = Squeeze(root.WithOpName("squeeze_first_dim"), DecodeGif(root.WithOpName("gif_reader"), file_reader));
        }
        else if (tensorflow::StringPiece(file_name).ends_with(".bmp")) {
            image_reader = DecodeBmp(root.WithOpName("bmp_reader"), file_reader);
        }
        else {
            // Assume if it's neither a PNG nor a GIF then it must be a JPEG.
            image_reader = DecodeJpeg(root.WithOpName("jpeg_reader"), file_reader, DecodeJpeg::Channels(wanted_channels));
        }
        // Now cast the image data to float so we can do normal math on it.
        auto uint8_caster = Cast(root.WithOpName("uint8_caster"), image_reader, tensorflow::DT_UINT8);
        // The convention for image ops in TensorFlow is that all images are expected
        // to be in batches, so that they're four-dimensional arrays with indices of
        // [batch, height, width, channel]. Because we only have a single image, we
        // have to add a batch dimension of 1 to the start with ExpandDims().
        auto dims_expander = ExpandDims(root.WithOpName(output_name), uint8_caster, 0);
        // Bilinearly resize the image to fit the required dimensions.
        //auto resized = ResizeBilinear(root, dims_expander,Const(root.WithOpName("size"), { input_height, input_width }));
        // Subtract the mean and divide by the scale.
        //Div(root.WithOpName(output_name), Sub(root, resized, { input_mean }),{ input_std });
        // This runs the GraphDef network definition that we've just constructed, and
        // returns the results in the output tensor.
        tensorflow::GraphDef graph;
        TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
        tensorflow::SessionOptions options;
        std::unique_ptr<tensorflow::Session> session(tensorflow::NewSession(options));
        TF_RETURN_IF_ERROR(session->Create(graph));
        TF_RETURN_IF_ERROR(session->Run({ inputs }, { output_name }, {}, out_tensors));
        return Status::OK();
    }

    // Reads a model graph definition from disk, and creates a session object you
    // can use to run it.
    Status LoadGraph(const string& graph_file_name, std::unique_ptr<tensorflow::Session>* session) {
        tensorflow::GraphDef graph_def;
        Status load_graph_status = ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
        if (!load_graph_status.ok()) {return tensorflow::errors::NotFound("Failed to load compute graph at '",graph_file_name, "'");}
        tensorflow::SessionOptions options;
        session->reset(tensorflow::NewSession(options));
        Status session_create_status = (*session)->Create(graph_def);
        if (!session_create_status.ok()) {return session_create_status; }
        return Status::OK();
    }


    int main(int argc, char* argv[]) {
        // These are the command-line flags the program can understand.
        // They define where the graph and input data is located, and what kind of
        // input the model expects. If you train your own model, or use something
        // other than inception_v3, then you'll need to update these.
        string image = "tensorflow/examples/label_image/data/grace_hopper.jpg";
        string graph = "tensorflow/examples/label_image/data/faster_rcnn_resnet101_coco_11_06_2017/frozen_inference_graph.pb";
        string labels = "/tensorflow/tensorflow/examples/label_image/data/faster_rcnn_resnet101_coco_11_06_2017/graph.pbtxt";
        int32 input_width = 299;
        int32 input_height = 299;
        float input_mean = 0;
        float input_std = 255;
        string input_layer = "image_tensor:0";
        std::vector<string> output_layer = { "detection_boxes:0", "detection_scores:0", "detection_classes:0", "num_detections:0" };
        string o_layer = "detection_boxes:0, detection_scores : 0, detection_classes : 0, num_detections : 0"; //dummy for Flag structure 
        bool self_test = false;
        string root_dir = "/tensorflow/";
        std::vector<Flag> flag_list = {
            Flag("image", &image, "image to be processed"),
            Flag("graph", &graph, "graph to be executed"),
            Flag("labels", &labels, "name of file containing labels"),
            Flag("input_width", &input_width, "resize image to this width in pixels"),
            Flag("input_height", &input_height,
            "resize image to this height in pixels"),
            Flag("input_mean", &input_mean, "scale pixel values to this mean"),
            Flag("input_std", &input_std, "scale pixel values to this std deviation"),
            Flag("input_layer", &input_layer, "name of input layer"),
            Flag("output_layer", &o_layer, "name of output layer"),
            Flag("self_test", &self_test, "run a self test"),
            Flag("root_dir", &root_dir,
            "interpret image and graph file names relative to this directory"),
        };
        string usage = tensorflow::Flags::Usage(argv[0], flag_list);
        const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
        if (!parse_result) {
            LOG(ERROR) << usage;
            return -1;
        }
        // We need to call this to set up global state for TensorFlow.
        tensorflow::port::InitMain(argv[0], &argc, &argv);
        if (argc > 1) {
            LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
            return -1;
        }
        // First we load and initialize the model.
        std::unique_ptr<tensorflow::Session> session;
        string graph_path = tensorflow::io::JoinPath(root_dir, graph);
        Status load_graph_status = LoadGraph(graph_path, &session);
        if (!load_graph_status.ok()) {
            LOG(ERROR) << load_graph_status;
            return -1;
        }
        // Get the image from disk as a float array of numbers, resized and normalized
        // to the specifications the main graph expects.
        std::vector<Tensor> resized_tensors;
        string image_path = tensorflow::io::JoinPath(root_dir, image);

        //-------------------------------------
        LOG(ERROR) << "Detection Basla....";
        Status read_tensor_status = ReadTensorFromImageFile(image_path, input_height, input_width, input_mean, input_std, &resized_tensors);
        if (!read_tensor_status.ok()) {
            LOG(ERROR) << read_tensor_status;
            return -1;
        }
        const Tensor resized_tensor = resized_tensors[0];
        // Actually run the image through the model.
        std::vector<Tensor> outputs;
        Status run_status = session->Run({ { input_layer, resized_tensor } }, { output_layer }, {}, &outputs);
        LOG(ERROR) << "Detection Bit......";
        //-----------------------------------------

        if (!run_status.ok()) {
            LOG(ERROR) << "Running model failed: " << run_status;
            return -1;
        }

        tensorflow::TTypes<float>::Flat scores = outputs[1].flat<float>();
        tensorflow::TTypes<float>::Flat classes = outputs[2].flat<float>();
        tensorflow::TTypes<float>::Flat num_detections = outputs[3].flat<float>();
        auto boxes = outputs[0].flat_outer_dims<float, 3>();

        LOG(ERROR) << "num_detections:" << num_detections(0) << "," << outputs[0].shape().DebugString();

        for (size_t i = 0; i < num_detections(0) && i < 20; ++i)
        {
            if (scores(i) > 0.5)
            {
                LOG(ERROR) << i << ",score:" << scores(i) << ",class:" << classes(i) << ",box:" << "," << boxes(0, i, 0) << "," << boxes(0, i, 1) << "," << boxes(0, i, 2) << "," << boxes(0, i, 3);
            }
        }

        return 0;
    }
最佳答案
成功构建后，我运行代码并收到“_pywrap_tensorflow_internal.pyd not found”消息。
我搜索了 PC，在 phython/tensorflow 路径中找到了一个。
我将那个复制到执行路径，除了 gpu 使用之外一切正常
突然有什么话对我耳语； “嘿你不朽!!你应该是最近生成的 pywrap_tensorflow_internal.dll 并将其重命名为 _pywrap_tensorflow_internal.pyd 并将其复制到执行路径。
正在使用 GPU
关于c++ - Tensorflow C++ 不使用 GPU，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/47762137/
c++ - Tensorflow C++ 不使用 GPU

上一篇：c++ - 如何创建一个简单的 cpu 基准测试？

下一篇：c++ - 带 ACE react 器的 ZeroMQ