Elasticsearch 日期直方图聚合(最小/最大/平均值)

标签 elasticsearch

使用elasticsearch 5.2。

我的数据格式如下:

{
    "_id": "635636", 
    "_index": "test", 
    "_source": {
        "ad_id": 9368, 
        "body": 1,
        "drivetrain": 1, 
        "engine_capacity": 1, 
        "fuel_type": 1, 
        "has_exchange": false, 
        "id": 635636, 
        "manufacturer_id": 12, 
        "model_id": 10, 
        "odometer_state": 110000, 
        "price_byn": 22802, 
        "price_usd": 12000, 
        "source": 2, 
        "source_date": "2016-10-06", 
        "source_id": "12194309", 
        "state": 2, 
        "state_date": "2017-03-07", 
        "transmission_type": 1, 
        "year": 2012
    }, 
    "_type": "ads", 
    "_version": 4, 
    "found": true
}

我正在尝试按日期获取最小/最大/平均聚合。使用下一个查询:

{
    "size":0,
    "aggs":{
        "avg_price_per_day":{
            "date_histogram":{
                "field":"state_date",
                "interval":"day"
            },
            "aggs":{
                "prices":{
                    "avg":{
                        "field":"price_usd"
                    }
                }
            }
        },
        "max_price_per_day":{
            "date_histogram":{
                "field":"state_date",
                "interval":"day"
            },
            "aggs":{
                "prices":{
                    "max":{
                        "field":"price_usd"
                    }
                }
            }
        },
        "min_price_per_day":{
            "date_histogram":{
                "field":"state_date",
                "interval":"day"
            },
            "aggs":{
                "prices":{
                    "min":{
                        "field":"price_usd"
                    }
                }
            }
        },
        "max_daily_price":{
            "max_bucket":{
                "buckets_path":"max_price_per_day>prices"
            }
        },
        "min_daily_price":{
            "min_bucket":{
                "buckets_path":"min_price_per_day>prices"
            }
        },
        "avg_daily_price":{
            "max_bucket":{
                "buckets_path":"avg_price_per_day>prices"
            }
        }
    },
    "query":{
        "bool":{
            "filter":[
                {
                    "range":{
                        "price_usd":{
                            "gt":0
                        }
                    }
                },
                {
                    "term":{
                        "manufacturer_id":{
                            "value":11,
                            "boost":1
                        }
                    }
                },
                {
                    "term":{
                        "model_id":{
                            "value":7,
                            "boost":1
                        }
                    }
                }
            ]
        }
    }
}

但它只返回一个月的聚合:

{
"took":23,
"timed_out":false,
"_shards":{
    "total":1,
    "successful":1,
    "failed":0
},
"hits":{
    "total":6046,
    "max_score":0.0,
    "hits":[

    ]
},
"aggregations":{
    "avg_price_per_day":{
        "buckets":[
            {
                "key_as_string":"2017-01-02",
                "key":1483315200000,
                "doc_count":1494,
                "prices":{
                    "value":4431.045515394913
                }
            },
            {
                "key_as_string":"2017-01-09",
                "key":1483920000000,
                "doc_count":0,
                "prices":{
                    "value":null
                }
            },
            {
                "key_as_string":"2017-01-16",
                "key":1484524800000,
                "doc_count":840,
                "prices":{
                    "value":4299.322619047619
                }
            },
            {
                "key_as_string":"2017-01-23",
                "key":1485129600000,
                "doc_count":3712,
                "prices":{
                    "value":4383.441540948276
                }
            }
        ]
    },
    "max_price_per_day":{
        "buckets":[
            {
                "key_as_string":"2017-01-02",
                "key":1483315200000,
                "doc_count":1494,
                "prices":{
                    "value":45000.0
                }
            },
            {
                "key_as_string":"2017-01-09",
                "key":1483920000000,
                "doc_count":0,
                "prices":{
                    "value":null
                }
            },
            {
                "key_as_string":"2017-01-16",
                "key":1484524800000,
                "doc_count":840,
                "prices":{
                    "value":15500.0
                }
            },
            {
                "key_as_string":"2017-01-23",
                "key":1485129600000,
                "doc_count":3712,
                "prices":{
                    "value":45000.0
                }
            }
        ]
    },
    "min_price_per_day":{
        "buckets":[
            {
                "key_as_string":"2017-01-02",
                "key":1483315200000,
                "doc_count":1494,
                "prices":{
                    "value":110.0
                }
            },
            {
                "key_as_string":"2017-01-09",
                "key":1483920000000,
                "doc_count":0,
                "prices":{
                    "value":null
                }
            },
            {
                "key_as_string":"2017-01-16",
                "key":1484524800000,
                "doc_count":840,
                "prices":{
                    "value":200.0
                }
            },
            {
                "key_as_string":"2017-01-23",
                "key":1485129600000,
                "doc_count":3712,
                "prices":{
                    "value":200.0
                }
            }
        ]
    },
    "max_daily_price":{
        "value":45000.0,
        "keys":[
            "2017-01-02",
            "2017-01-23"
        ]
    },
    "min_daily_price":{
        "value":110.0,
        "keys":[
            "2017-01-02"
        ]
    },
    "avg_daily_price":{
        "value":4431.045515394913,
        "keys":[
            "2017-01-02"
        ]
    }
}
}

我还索引了二月和三月的数据,但它不包含在聚合中。如何将它们全部包含在内?

更新

curl -XPOST localhost:9200/avtostat/ads/_search -d '{"query":{"bool":{"filter":[{"range":{"state_date":{"gt":"2017-02-01"}}},{"range":{"price_usd":{"gt":0}}},{"term":{"manufacturer_id":{"value":11,"boost":1}}},{"term":{"model_id":{"value":7,"boost":1}}}]}}}'

{
"took":166,
"timed_out":false,
"_shards":{
    "total":1,
    "successful":1,
    "failed":0
},
"hits":{
    "total":6046,
    "max_score":0.0,
    "hits":[
        {
            "_index":"avtostat",
            "_type":"ads",
            "_id":"272894",
            "_score":0.0,
            "_source":{
                "id":272894,
                "ad_id":111602,
                "manufacturer_id":11,
                "model_id":7,
                "fuel_type":3,
                "engine_capacity":1.6,
                "transmission_type":2,
                "year":1999,
                "body":6,
                "drivetrain":1,
                "state":2,
                "odometer_state":303000,
                "has_exchange":true,
                "price_byn":4816,
                "price_usd":2500,
                "state_date":"2017-02-05",
                "source":1,
                "source_id":"3215650",
                "source_date":"2017-02-05"
            }
        },
        ...
    ]
}
}

最佳答案

您的日期格式不正确。请参阅here大写字母相对于小写字母 (yyyy-MM-dd) 的含义。因此,您需要的是 yyyy-MM-dd 而不是 YYYY-MM-DD。特别是 D 在这里有明显不同的含义:

Symbol     Meaning         Presentation     Examples

D              day of year     number             189

d              day of month   number            10

这是我的意思的相关示例:

DELETE test
PUT test
{
  "mappings": {
    "test": {
      "properties": {
        "state_date": {
          "type": "date",
          "format": "YYYY-MM-DD"
        },
        "some_id": {
          "type": "long"
        }
      }
    }
  }
}

POST test/test/_bulk
{"index":{}}
{"some_id":272894,"state_date":"2017-08-05"}
{"index":{}}
{"some_id":272894,"state_date":"2017-08-04"}
{"index":{}}
{"some_id":272894,"state_date":"2017-08-03"}
{"index":{}}
{"some_id":272894,"state_date":"2017-08-09"}
{"index":{}}
{"some_id":272894,"state_date":"2017-10-12"}

GET /test/_search
{
  "size": 0,
  "aggs": {
    "avg_price_per_day": {
      "date_histogram": {
        "field": "state_date",
        "interval": "day"
      }
    }
  }
}

因此,在我的测试中,您会看到 2017 年 8 月和 10 月的日期。但是,根据日期的文档和格式(大写 D),天数是一年中的天数,而不是日期月,这意味着 08-05 是 2017 年的第五天,而不是八月。 08-09 是 2017 年的第 9 天,而不是 8 月等等。

这意味着您的所有日子实际上都是一月的日子。从聚合结果可以看出:

  "aggregations": {
    "avg_price_per_day": {
      "buckets": [
        {
          "key_as_string": "2017-01-03",
          "key": 1483401600000,
          "doc_count": 1
        },
        {
          "key_as_string": "2017-01-04",
          "key": 1483488000000,
          "doc_count": 1
        },
        {
          "key_as_string": "2017-01-05",
          "key": 1483574400000,
          "doc_count": 1
        },
        {
          "key_as_string": "2017-01-06",
          "key": 1483660800000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-07",
          "key": 1483747200000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-08",
          "key": 1483833600000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-09",
          "key": 1483920000000,
          "doc_count": 1
        },
        {
          "key_as_string": "2017-01-10",
          "key": 1484006400000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-11",
          "key": 1484092800000,
          "doc_count": 0
        },
        {
          "key_as_string": "2017-01-12",
          "key": 1484179200000,
          "doc_count": 1
        }
      ]
    }
  }

关于Elasticsearch 日期直方图聚合(最小/最大/平均值),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43227101/

相关文章:

elasticsearch - 使用Solr空间搜索在近距离范围内找到彼此最近的点

elasticsearch - 根据许可证,我们可以在我们作为监视的一部分的产品中使用 Elasticsearch 基础吗?

java - Elasticsearch,嵌套 "ANDS"和 "ORS"

elasticsearch - 如何通过字段的子字符串或正则表达式制作 Kibana 图表?

java - 基于 JSON 文档为 Elasticsearch 创建 id(key)

elasticsearch - 我可以根据我在Elasticsearch中的自定义字段的最高得分和/或最大值重新评分吗?

amazon-web-services - ElasticSearch 的缩放

elasticsearch - 更新成本与在 elasticsearch 中插入

elasticsearch - Elasticsearch : Difference between include & filter in aggregation query

string - Elasticsearch一开始不能使用斜杠