python - 加快客户mysql切片切 block

标签 python mysql pandas elasticsearch

目前我们正在使用 AWS RDS(Mysql) + Pandas。我们有订单、客户、产品表等。要获得客户并根据这些客户的各种过滤器(总共 18 个过滤器)开展事件,需要花费太多时间。 “订单”的表本身有数百万行。所以为了加快速度,我们开始使用 elasticsearch 进行 poc,因为我们的过滤器包含太多文本搜索,例如“产品名称”、“供应商名称”等。

我们面临的问题是 1)过滤AOV bucket(平均订单值(value)),以及相关文档详细信息 2)过滤订单数量 3) 按 first_order_date 和 last_order_date 过滤

我们的文档结构是

{
    "order_id":"6",
    "customer_id":"1",
    "customer_name":"shailendra",
    "mailing_addres":"shailendra@gmail.com",
    "actual_order_date":"2000-04-30",
    "is_veg":"0",
    "total_amount":"2499",
    "store_id":"276",
    "city_id":"12",
    "payment_mode":"cod",
    "is_elite":"0",
    "product":["1","2"],
    "coupon_id":"",
    "client_source":"1",
    "vendor_id":"",
    "vendor_name: "",
    "brand_id":"",
    "third_party_source":""

}

这是查询

{
    "aggs": {
        "customer_ids":{
            "terms":{
                "field":"customer_id"
            }
        }
    }
}

它返回结果为

{
  "took": 13,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 8,
    "max_score": 1,
    "hits": [
      {
        "_index": "customers4",
        "_type": "details",
        "_id": "5",
        "_score": 1,
        "_source": {
          "order_id": "5",
          "customer_id": "5",
          "customer_name": "ashish",
          "mailing_addres": "ashish@gmail.com",
          "actual_order_date": "2016-05-30",
          "is_veg": "1",
          "total_amount": "300",
          "store_id": "2",
          "city_id": "",
          "payment_mode": "cod",
          "is_elite": "0",
          "product": [
            "1",
            "2"
          ],
          "coupon_id": "",
          "client_source": "1",
          "vendor_id": "",
          "brand_id": "",
          "third_party_source": ""
        }
      },
      {
        "_index": "customers4",
        "_type": "details",
        "_id": "8",
        "_score": 1,
        "_source": {
          "order_id": "8",
          "customer_id": "2",
          "customer_name": "nikhil",
          "mailing_addres": "nikhil@gmail.com",
          "actual_order_date": "2016-05-30",
          "is_veg": "0",
          "total_amount": "249",
          "store_id": "2",
          "city_id": "",
          "payment_mode": "cod",
          "is_elite": "0",
          "product": [
            "1",
            "2"
          ],
          "coupon_id": "",
          "client_source": "1",
          "vendor_id": "",
          "brand_id": "",
          "third_party_source": ""
        }
      },
      {
        "_index": "customers4",
        "_type": "details",
        "_id": "2",
        "_score": 1,
        "_source": {
          "order_id": "2",
          "customer_id": "2",
          "customer_name": "nikhil",
          "mailing_addres": "nikhil.01@gmail.com",
          "actual_order_date": "2016-01-30",
          "is_veg": "1",
          "total_amount": "255",
          "store_id": "1",
          "city_id": "",
          "payment_mode": "cod",
          "is_elite": "0",
          "product": [
            "1",
            "2",
            "3"
          ],
          "coupon_id": "",
          "client_source": "1",
          "vendor_id": "",
          "brand_id": "",
          "third_party_source": ""
        }
      },
      {
        "_index": "customers4",
        "_type": "details",
        "_id": "4",
        "_score": 1,
        "_source": {
          "order_id": "4",
          "customer_id": "4",
          "customer_name": "vivek",
          "mailing_addres": "vivek@gmail.com",
          "actual_order_date": "2016-04-30",
          "is_veg": "0",
          "total_amount": "249",
          "store_id": "2",
          "city_id": "",
          "payment_mode": "cod",
          "is_elite": "0",
          "product": [
            "1",
            "2"
          ],
          "coupon_id": "",
          "client_source": "1",
          "vendor_id": "",
          "brand_id": "",
          "third_party_source": ""
        }
      },
      {
        "_index": "customers4",
        "_type": "details",
        "_id": "6",
        "_score": 1,
        "_source": {
          "order_id": "7",
          "customer_id": "1",
          "customer_name": "shailendra",
          "mailing_addres": "shailendra07121@gmail.com",
          "actual_order_date": "2016-05-30",
          "is_veg": "0",
          "total_amount": "249",
          "store_id": "2",
          "city_id": "",
          "payment_mode": "cod",
          "is_elite": "0",
          "product": [
            "1",
            "2"
          ],
          "coupon_id": "",
          "client_source": "1",
          "vendor_id": "",
          "brand_id": "",
          "third_party_source": ""
        }
      },
      {
        "_index": "customers4",
        "_type": "details",
        "_id": "1",
        "_score": 1,
        "_source": {
          "order_id": "1",
          "customer_id": "1",
          "customer_name": "shailendra",
          "mailing_addres": "shailendra07121@gmail.com",
          "actual_order_date": "2016-01-30",
          "is_veg": "1",
          "total_amount": "251",
          "store_id": "1",
          "city_id": "",
          "payment_mode": "cod",
          "is_elite": "0",
          "product": [
            "1",
            "2",
            "3"
          ],
          "coupon_id": "",
          "client_source": "1",
          "vendor_id": "",
          "brand_id": "",
          "third_party_source": ""
        }
      },
      {
        "_index": "customers4",
        "_type": "details",
        "_id": "7",
        "_score": 1,
        "_source": {
          "order_id": "6",
          "customer_id": "4",
          "customer_name": "vivek",
          "mailing_addres": "vivek@gmail.com",
          "actual_order_date": "2016-05-30",
          "is_veg": "0",
          "total_amount": "249",
          "store_id": "2",
          "city_id": "",
          "payment_mode": "cod",
          "is_elite": "0",
          "product": [
            "1",
            "2"
          ],
          "coupon_id": "",
          "client_source": "1",
          "vendor_id": "",
          "brand_id": "",
          "third_party_source": ""
        }
      },
      {
        "_index": "customers4",
        "_type": "details",
        "_id": "3",
        "_score": 1,
        "_source": {
          "order_id": "3",
          "customer_id": "3",
          "customer_name": "manish",
          "mailing_addres": "manish@gmail.com",
          "actual_order_date": "2016-03-30",
          "is_veg": "0",
          "total_amount": "249",
          "store_id": "2",
          "city_id": "",
          "payment_mode": "cod",
          "is_elite": "0",
          "product": [
            "1",
            "2"
          ],
          "coupon_id": "",
          "client_source": "1",
          "vendor_id": "",
          "brand_id": "",
          "third_party_source": ""
        }
      }
    ]
  },
  "aggregations": {
    "customer_ids": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "1",
          "doc_count": 2
        },
        {
          "key": "2",
          "doc_count": 2
        },
        {
          "key": "4",
          "doc_count": 2
        },
        {
          "key": "3",
          "doc_count": 1
        },
        {
          "key": "5",
          "doc_count": 1
        }
      ]
    }
  }
}

如您所见,此处仅返回文档计数。我们需要文档的所有字段以及文档计数

最佳答案

您可以使用 top_hits aggregation检索每个客户桶的文档。

{
    "aggs": {
        "customer_ids":{
            "terms":{
                "field":"customer_id"
            },
            "aggs": {
              "docs": {
                 "top_hits": {
                    "size": 3
                 }
              }
            }
        }
    }
}

关于python - 加快客户mysql切片切 block ,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/38969135/

相关文章:

python - 从 TD 中提取类别值 -

mysql - 是什么导致 MyISAM 损坏?

具有内连接的 MySql 存储过程

python - 如何跨子图添加层次轴以标记组?

python - Django 类似连接的查询集扩展

python - Pandas 相当于 dplyr dot

javascript - Node.JS Express 到 HTML 数据传输

Python:Pandas 数据帧总和

python - 循环遍历 DF 列以删除包含西类牙语文本的行

python - 如何在qlistview python中过滤文件