javascript - 聚合将文档键展开为新文档

我在更改使用 Mongo DB 构建的时间序列数据库所使用的模式时遇到一些问题。目前，我有如下所示的记录:

{
    "_id" : 20,
    "name" : "Bob,
    "location" : "London",
    "01/01/1993" : {
         "height" : "110cm",
         "weight" : "60kg",
    },
   "02/01/1993" : {
         "height" : "112cm",
         "weight" : "61kg",
    }

}

我希望使用聚合框架为每个“人”创建多条记录，为原始记录中的每个“时间值”子文档创建一条记录:

 {
    "_id" : 20,
    "name" : "Bob,
    "date" : "01/01/1993"
    "location" : "London",
    "height" : "110cm",
    "weight" : "60kg",
 },

 {
    "_id" : 20,
    "name" : "Bob,
    "date" : "02/01/1993"
    "location" : "London",
    "height" : "112cm",
    "weight" : "61kg",
 }

在向每条记录添加大量时间序列值时，新方案应该更加高效，并且我不应该遇到最大文档大小错误!

任何有关如何使用 Mongo DB 聚合管道执行此操作的帮助将不胜感激!

最佳答案

虽然聚合框架的现代版本中的某些功能可以让您执行此类操作，但效果可能会因它是否实际上是最佳解决方案而有所不同。

本质上，您可以创建一个由文档键组成的条目数组，“不包括”其他顶级键，然后这些键将包含在文档中。然后可以使用 $unwind 处理该数组并将整个结果重新调整为新文档:

db.getCollection('input').aggregate([
  { "$project": {
    "name": 1,
    "location": 1,
    "data": {
      "$filter": {
        "input": { "$objectToArray": "$$ROOT" },
        "as": "d",
        "cond": {
          "$not": { "$in": [ "$$d.k", ["_id","name","location"] ] }    
        }
      }  
    }  
  }},
  { "$unwind": "$data" },
  { "$replaceRoot": {
    "newRoot": {  
      "$arrayToObject": {
        "$concatArrays": [  
          [{ "k": "id", "v": "$_id" },
           { "k": "name", "v": "$name" },
           { "k": "location", "v": "$location" },
           { "k": "date", "v": "$data.k" }],
          { "$objectToArray": "$data.v" }
        ]
      }
    }
  }},
  { "$out": "output" }
])

或者交替执行初始 $project 中的所有 reshape 在生成的数组元素内:

db.getCollection('input').aggregate([
  { "$project": {
    "_id": 0,
    "data": {
      "$map": {
        "input": {
          "$filter": {
            "input": { "$objectToArray": "$$ROOT" },
            "as": "d",
            "cond": {
              "$not": { "$in": [ "$$d.k", ["_id", "name", "location"] ] }    
            }
          }
        },
        "as": "d",
        "in": {
          "$arrayToObject": {
            "$concatArrays": [
              { "$filter": {
                "input": { "$objectToArray": "$$ROOT" },
                "as": "r",
                "cond": { "$in": [ "$$r.k", ["_id", "name", "location"] ] }
              }},
              [{ "k": "date", "v": "$$d.k" }],
              { "$objectToArray": "$$d.v" }
            ]   
          }
        }
      }
    }  
  }},
  { "$unwind": "$data" },
  { "$replaceRoot": { "newRoot": "$data" } },
  { "$out": "output" }
])

所以你使用$objectToArray和 $filter为了从实际上包含每个日期的数据点的键创建一个数组。

$unwind之后我们基本上适用$arrayToObject位于“数组格式”的一组命名键上，以便为 $replaceRoot 构造 newRoot然后使用 $out 写入新集合，作为每个数据键的一个新文档.

但这可能只能帮助您完成部分任务，因为您确实应该将“date”数据更改为 BSON 日期。它占用的存储空间少得多，并且也更容易查询。

var updates = [];
db.getCollection('output').find().forEach( d => {
  updates.push({
    "updateOne": {
      "filter": { "_id": d._id },
      "update": {
        "$set": {
          "date": new Date(
            Date.UTC.apply(null,
              d.date.split('/')
                .reverse().map((e,i) => (i == 1) ? parseInt(e)-1: parseInt(e) )
            )
          )
        }
      }
    }
  });
  if ( updates.length >= 500 ) {
    db.getCollection('output').bulkWrite(updates);
    updates = [];
  }
})

if ( updates.length != 0 ) {
  db.getCollection('output').bulkWrite(updates);
  updates = [];
}

当然，如果您的 MongoDB 服务器缺乏这些聚合功能，那么您最好首先通过迭代循环将输出写入新集合:

var output = [];

db.getCollection('input').find().forEach( d => {
  output = [
    ...output,
    ...Object.keys(d)
      .filter(k => ['_id','name','location'].indexOf(k) === -1)
      .map(k => Object.assign(
        { 
          id: d._id,
          name: d.name,
          location: d.location,
          date: new Date(
            Date.UTC.apply(null,
              k.split('/')
                .reverse().map((e,i) => (i == 1) ? parseInt(e)-1: parseInt(e) )
            )
          )
        },
        d[k]
      ))
  ];

  if ( output.length >= 500 ) {
    db.getCollection('output').insertMany(output);
    output = [];    
  }
})

if ( output.length != 0 ) {
  db.getCollection('output').insertMany(output); 
  output = [];
}

在这两种情况下，我们都想申请 Date.UTC从现有的基于“字符串”的日期反转字符串元素，并获取一个可以转换为 BSON 日期的值。

聚合框架本身不允许类型转换，因此该部分(并且它是必要的部分)的唯一解决方案是实际循环和更新，但是使用表单至少可以提高循环和更新的效率。

两种情况都会给你相同的最终输出:

/* 1 */
{
    "_id" : ObjectId("599275b1e38f41729f1d64fe"),
    "id" : 20.0,
    "name" : "Bob",
    "location" : "London",
    "date" : ISODate("1993-01-01T00:00:00.000Z"),
    "height" : "110cm",
    "weight" : "60kg"
}

/* 2 */
{
    "_id" : ObjectId("599275b1e38f41729f1d64ff"),
    "id" : 20.0,
    "name" : "Bob",
    "location" : "London",
    "date" : ISODate("1993-01-02T00:00:00.000Z"),
    "height" : "112cm",
    "weight" : "61kg"
}

关于javascript - 聚合将文档键展开为新文档，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/45682203/

javascript - 聚合将文档键展开为新文档

上一篇：database - 每行约 10,000 个 bool 状态的 PostgreSQL 数据库

下一篇：python - Pandas :创建一个在两列之间交替的新列