python - Tensorflow frozen graph protobuf不预测使用c api

我已经使用 this repo 训练了语义分割模型，得到了很好的结果，并尝试在用 tensorflow c API 编写的小型库中使用这个网络。我使用 this repo 将我的 keras 模型转换为 protobuf 文件并使用此代码运行 session :

typedef struct model_t {
    TF_Graph* graph;
    TF_Session* session;
    TF_Status* status;

    TF_Output input, target, output;

    TF_Operation *init_op, *train_op, *save_op, *restore_op;
    TF_Output checkpoint_file;
} model_t;

typedef struct NetProperties {
    int width;
    int height;
    int border;
    int classes;
    int inputSize;
} NetProperties;

static model_t * model;
static NetProperties * properties;

extern "C" EXPORT int ModelCreate(const char* nnFilename, const char* inputName, const char* outputName, int pictureWidth, int pictureHeight, int border, int classes) {
    ModelDestroy();
    model = (model_t*)malloc(sizeof(model_t));;
    model->status = TF_NewStatus();
    model->graph = TF_NewGraph();
    properties = (NetProperties*)malloc(sizeof(NetProperties));
    properties->width = pictureWidth;
    properties->height = pictureHeight;
    properties->border = border;
    properties->classes = classes;
    properties->inputSize = (pictureWidth + border * 2) * (pictureHeight + border * 2) * 3;
    {
        // Create the session.
        TF_SessionOptions* opts = TF_NewSessionOptions();
        model->session = TF_NewSession(model->graph, opts, model->status);
        TF_DeleteSessionOptions(opts);
        if (!Okay(model->status)) return 0;
    }

    TF_Graph* g = model->graph;

    {
        // Import the graph.
        TF_Buffer* graph_def = read_file(nnFilename);
        if (graph_def == NULL) return 0;
        printf("Read GraphDef of %zu bytes\n", graph_def->length);
        TF_ImportGraphDefOptions* opts = TF_NewImportGraphDefOptions();
        TF_GraphImportGraphDef(g, graph_def, opts, model->status);
        TF_DeleteImportGraphDefOptions(opts);
        TF_DeleteBuffer(graph_def);
        if (!Okay(model->status)) return 0;
    }

    // Handles to the interesting operations in the graph.
    model->input.oper = TF_GraphOperationByName(g, inputName);
    model->input.index = 0;
    model->target.oper = TF_GraphOperationByName(g, "target");
    model->target.index = 0;
    model->output.oper = TF_GraphOperationByName(g, outputName);
    model->output.index = 0;


    model->init_op = TF_GraphOperationByName(g, "init");
    model->train_op = TF_GraphOperationByName(g, "train");
    model->save_op = TF_GraphOperationByName(g, "save/control_dependency");
    model->restore_op = TF_GraphOperationByName(g, "save/restore_all");

    model->checkpoint_file.oper = TF_GraphOperationByName(g, "save/Const");
    model->checkpoint_file.index = 0;
    // first prediction is slow
    unsigned char * randomData = (unsigned char*)malloc(properties->inputSize * sizeof(unsigned char));
    for (int i = 0; i < properties->inputSize; i++) {
        randomData[i] = (unsigned char)100;
    }
    ModelPredict(randomData);
    free(randomData);
    return 1;
}

extern "C" EXPORT void ModelDestroy() {
    if (model == nullptr) return;
    TF_DeleteSession(model->session, model->status);
    Okay(model->status);
    TF_DeleteGraph(model->graph);
    TF_DeleteStatus(model->status);
    free(model);
}

extern "C" EXPORT unsigned char* ModelPredict(unsigned char * batch1) {
    if (model == NULL) return NULL;

    const int64_t dims[4] = { 1, properties->height + properties->border * 2, properties->width + properties->border * 2, 3 };
    size_t nbytes = properties->inputSize;

    // can be faster
    float * arrayOfFloats = (float*)malloc(nbytes * sizeof(float));
    //float sumUp = 0;

    for (int i = 0; i < properties->inputSize; i++) {
        arrayOfFloats[i] = batch1[i] * (1.f / 255.f);
        //sumUp += arrayOfFloats[i];
    }
    //std::cout << sumUp << std::endl;
    // removed due to jdehesa answer
    //float ** inputFloats = (float**)malloc(nbytes * sizeof(float*));
    //inputFloats[0] = arrayOfFloats;


    // Optionally, you can check that your input_op and input tensors are correct
    //// by using some of the functions provided by the C API.
    //std::cout << "Input op info: " << TF_OperationNumOutputs(input_op) << "\n";
    //std::cout << "Input data info: " << TF_Dim(input, 0) << "\n";

    std::vector<TF_Output> inputs;
    std::vector<TF_Tensor*> input_values;
    TF_Operation* input_op = model->input.oper;
    TF_Output input_opout = { input_op, 0 };
    inputs.push_back(input_opout);
    // reworked due to jdehesa answer
    //TF_Tensor* input = TF_NewTensor(TF_FLOAT, dims, 4, (void*)inputFloats, //nbytes * sizeof(float), &Deallocator, NULL);
    TF_Tensor* input = TF_NewTensor(TF_FLOAT, dims, 4, (void*)arrayOfFloats, nbytes * sizeof(float), &Deallocator, NULL);
    input_values.push_back(input);

    int outputSize = properties->width * properties->height * properties->classes;

    int64_t out_dims[] = { 1, properties->height, properties->width, properties->classes };

    // Create vector to store graph output operations
    std::vector<TF_Output> outputs;
    TF_Operation* output_op = model->output.oper;
    TF_Output output_opout = { output_op, 0 };
    outputs.push_back(output_opout);

    // Create TF_Tensor* vector
    //std::vector<TF_Tensor*> output_values(outputs.size(), nullptr);

    // Similar to creating the input tensor, however here we don't yet have the
    // output values, so we use TF_AllocateTensor()
    TF_Tensor* output_value = TF_AllocateTensor(TF_FLOAT, out_dims, 4, outputSize * sizeof(float));
    //output_values.push_back(output_value);

    //// As with inputs, check the values for the output operation and output tensor
    //std::cout << "Output: " << TF_OperationName(output_op) << "\n";
    //std::cout << "Output info: " << TF_Dim(output_value, 0) << "\n";

    TF_SessionRun(model->session, NULL,
        &inputs[0], &input_values[0], inputs.size(),
        &outputs[0], &output_value, outputs.size(),
        /* No target operations to run */
        NULL, 0, NULL, model->status);
    if (!Okay(model->status)) return NULL;

    TF_DeleteTensor(input_values[0]);

    // memory allocations take place here
    float* prediction = (float*)TF_TensorData(output_value);
    //float* prediction = (float*)malloc(sizeof(float) * properties->inputSize / 3 * properties->classes);
    //memcpy(prediction, TF_TensorData(output_value), sizeof(float) * properties->inputSize / 3 * properties->classes);
    unsigned char * charPrediction = new unsigned char[outputSize * sizeof(unsigned char)];
    sumUp = 0;
    for (int i = 0; i < outputSize; i++) {
        charPrediction[i] = (unsigned char)((prediction[i] * 255));
        //sumUp += prediction[i];
    }
    //std::cout << sumUp << std::endl << std::endl;
    //free(prediction);
    TF_DeleteTensor(output_value);
    return charPrediction;
}

问题是预测结果总是一样的。我尝试传递随机数据和真实图像，但结果相同。然而，不同的训练模型给出不同的预测结果，但对于每个模型它总是相同的。正如您在代码片段中看到的，我检查了每次传递不同的数据并得到相同的预测

// first is float sum of passed picture, second is the float sum of answer
724306
22982.6

692004
22982.6

718490
22982.6

692004
22982.6

720861
22982.6

692004
22982.6

我尝试编写自己的 keras 到 tensorflow .pb 转换器，但结果是一样的。

import os, argparse

import tensorflow as tf 
from tensorflow.keras.utils import get_custom_objects
from segmentation_models.losses import bce_dice_loss,dice_loss,cce_dice_loss
from segmentation_models.metrics import iou_score

# some custom functions from segmentation_models
get_custom_objects().update({
      'dice_loss': dice_loss,
      'bce_dice_loss': bce_dice_loss,
      'cce_dice_loss': cce_dice_loss,
      'iou_score': iou_score,
    })

def freeze_keras(model_name):
    tf.keras.backend.set_learning_phase(0)
    model = tf.keras.models.load_model(model_name)
    sess = tf.keras.backend.get_session()
    constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), [out.op.name for out in model.outputs])
    tf.train.write_graph(constant_graph, './', 'saved_model.pb', as_text=False)

freeze_keras('best-weights.hdf5')

帮我找出如何在 c api 中修复预测结果。

更新 1:按照 jdehesa 的建议修改了输入数组

更新 2:添加了 model 和 NetProperties 的定义

最佳答案

我认为您没有正确设置输入数据。让我们看看。

float * arrayOfFloats1 = (float*)malloc(nbytes * sizeof(float));
float sumUp = 0;

在这里，您创建了 arrayOfFloats1 来保存所有图像数据。

for (int i = 0; i < properties->inputSize; i++) {
    arrayOfFloats1[i] = batch1[i] * (1.f / 255.f);
    sumUp += arrayOfFloats1[i];
}
std::cout << sumUp << std::endl;

在这里，您将 arrayOfFloats1 设置为图像数据。这一切都很好。

然后:

float ** inputFloats = (float**)malloc(nbytes * sizeof(float*));

这里有 inputFloats，其中有 nbytes 浮点指针的空间。首先，您可能希望为浮点值分配空间，而不是浮点指针(它们的大小可能不同)。然后:

inputFloats[0] = arrayOfFloats1;

在这里，您将第一个 nbytes 指针设置为指针 arrayOfFloats1。然后 inputFloats 用作模型的输入。但剩余的 nbytes - 1 指针尚未设置为任何值。虽然不是必需的，但它们可能都设置为零。

如果您只想使用 arrayOfFloats1 创建一个“ float 组的数组”，您不需要分配任何内存，您可以简单地执行以下操作:

float ** inputFloats = &arrayOfFloats1;

但是你实际上是这样使用 inputFloats 的:

TF_Tensor* input = TF_NewTensor(
    TF_FLOAT, dims, 4, (void*)inputFloats, nbytes * sizeof(float), &Deallocator, NULL);

所以这里你是说input是由inputFloats中的数据组成的，它会是一个指向arrayOfFloats1的指针然后未初始化内存。可能你真的想要这样的东西:

TF_Tensor* input = TF_NewTensor(
    TF_FLOAT, dims, 4, (void*)arrayOfFloats1, nbytes * sizeof(float), &Deallocator, NULL);

这意味着 input 将是由您之前复制的 arrayOfFloats1 中的数据组成的张量。事实上，我认为您的代码根本不需要 inputFloats。

否则，据我所知，其余代码似乎是正确的。您应该确保在所有情况下都正确释放所有分配的内存(例如，当您执行 if (!Okay(model->status)) return NULL; 您可能应该在返回之前删除输入和输出张量)，但这是一个不同的问题。

关于python - Tensorflow frozen graph protobuf不预测使用c api，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/57109509/

python - Tensorflow frozen graph protobuf不预测使用c api

上一篇：c - C中具有不同数据类型的键值

下一篇：c - 如何使用 Termios 通过 socat 数据传输循环发送字节