所以我想看看是否可以通过 Google Cloud Vision API 响应在 NodeJS 中实现非最大值抑制,例如响应如下所示:
[
{
"mid": "/m/09728",
"languageCode": "",
"name": "Bread",
"score": 0.8558391332626343,
"boundingPoly": {
"vertices": [],
"normalizedVertices": [
{
"x": 0.010737711563706398,
"y": 0.26679491996765137
},
{
"x": 0.9930269718170166,
"y": 0.26679491996765137
},
{
"x": 0.9930269718170166,
"y": 0.7275580167770386
},
{
"x": 0.010737711563706398,
"y": 0.7275580167770386
}
]
}
},
{
"mid": "/m/052lwg6",
"languageCode": "",
"name": "Baked goods",
"score": 0.6180902123451233,
"boundingPoly": {
"vertices": [],
"normalizedVertices": [
{
"x": 0.010737711563706398,
"y": 0.26679491996765137
},
{
"x": 0.9930269718170166,
"y": 0.26679491996765137
},
{
"x": 0.9930269718170166,
"y": 0.7275580167770386
},
{
"x": 0.010737711563706398,
"y": 0.7275580167770386
}
]
}
},
{
"mid": "/m/02wbm",
"languageCode": "",
"name": "Food",
"score": 0.5861617922782898,
"boundingPoly": {
"vertices": [],
"normalizedVertices": [
{
"x": 0.321802020072937,
"y": 0.2874892055988312
},
{
"x": 0.999139130115509,
"y": 0.2874892055988312
},
{
"x": 0.999139130115509,
"y": 0.6866284608840942
},
{
"x": 0.321802020072937,
"y": 0.6866284608840942
}
]
}
}
]
所以实际上应该在外面的边界框是食物,如下所示:
我找到了 Python 中的示例 this ,但这意味着我需要在 Node 中使用子进程来执行 python 脚本,然后拉回响应,这感觉有点脏。
显然,来自 google 的框值需要乘以图像的高度和宽度,因此,如果我们假设它是 288 X 512:
const left = Math.round(vertices[0].x * 288);
const top = Math.round(vertices[0].y * 512);
const width = Math.round((vertices[2].x * 288)) - left;
const height = Math.round((vertices[2].y * 512)) - top;
我的改编脚本如下(只是对阈值进行硬编码并从命令行获取框数组):
# import the necessary packages
import numpy as np
import sys
import json
# Malisiewicz et al.
def non_max_suppression_fast():
overlapThresh = 0.3
boxes = json.loads(sys.argv[1])
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
# compute the area of the bounding boxes and sort the bounding
# boxes by the bottom-right y-coordinate of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(y2)
# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[0])))
# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick].astype("int")
请问有人可以给我指点吗?我很确定这只是计算每个盒子的总面积,但我无法完全理解它。
最佳答案
好吧,实际上,如果您使用 Tensorflow.js,这非常简单 - 使用以下函数从 google Vision 获取响应:
注意 288 和 512 是我的图像宽度和高度,您需要设置自己的图像。
function nonMaxSuppression(objects){
return new Promise((resolve) => {
// Loop through the objects and convert the vertices into the right format.
for (let index = 0; index < objects.length; index++) {
const verts = objects[index].boundingPoly.normalizedVertices;
// As above note 288 and 512 are image width and image height for me.
const left = Math.round(verts[0].x * 288);
const top = Math.round(verts[0].y * 512);
const width = Math.round((verts[2].x * 288)) - left;
const height = Math.round((verts[2].y * 512)) - top;
// we need an array of boxes AND an array of scores
this.boxes.push([left, top, width, height]);
this.scores.push(objects[index].score);
}
// Params are boxes, scores, max number of boxes to select.
const theBox = tf.image.nonMaxSuppression(this.boxes, this.scores, 2);
// the function returns the box number that matched from this.boxes, seems like it's not zero based at least in my tests so we need to - 1 to get the index from the original array.
resolve(theBox.id -1 );
});
}
塔达香蕉!
关于python - 谷歌云视觉响应nodejs的非最大抑制,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57748071/