我在 S3 中保存了这样的 JSON 数据。我正在使用 ATHENA 编写选择语句。
{
"sample_data":{
"people":[
{
"firstName":"Emily",
"address":{
"streetAddress":"101",
"city":"abc",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"3"
},
{
"type":"city",
"number":"4"
}
]
}
},
{
"firstName":"Smily",
"address":{
"streetAddress":"102",
"city":"def",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"1"
},
{
"type":"city",
"number":"1"
}
]
}
}
]
}
}
如何编写一个选择语句来选择街道地址和城市,其中home>2和city=4>;
我尝试了 UNNEST 但没有帮助。
预期输出:
streetAddress city
101 abc
尝试了此 UNNEST,但它将电话号码提取到多行。所以不能 按 home 和 city 进行查询,因为它们现在位于不同行中。
SELECT idx,JSON_EXTRACT_SCALAR(x.n, '$.address.streetaddress') as streetaddress,
JSON_EXTRACT_SCALAR(x.n, '$.address.city') as city, JSON_EXTRACT_SCALAR(x.m, '$.type') as type, JSON_EXTRACT_SCALAR(x.m, '$.number') as value
FROM sample_data1 cross join
UNNEST (CAST(JSON_EXTRACT(sample_data,'$.people') AS ARRAY<JSON>)) AS x(n)
CROSS JOIN
UNNEST (CAST(JSON_EXTRACT(x.n,'$.address.phonenumbers') AS ARRAY<JSON>)) WITH ordinality AS x(m,idx) ;
最佳答案
unnest
将数据展平为多行,因此您可以处理数组而无需使用数组函数取消嵌套。 Athena 目前使用的 Presto 版本不支持 any_match
所以你需要使用cardinality
+ filter
组合(并且不支持通过json路径过滤):
-- sample data
WITH dataset (json_str) AS (
VALUES (
json '{
"firstName":"Emily",
"address":{
"streetAddress":"101",
"city":"abc",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"11"
},
{
"type":"city",
"number":"4"
}
]
}
}'
),
(
json '{
"firstName":"Smily",
"address":{
"streetAddress":"102",
"city":"def",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"1"
},
{
"type":"city",
"number":"1"
}
]
}
}'
)
) -- query
select street_address,
city
from (
select JSON_EXTRACT_SCALAR(json_str, '$.address.streetAddress') as street_address,
JSON_EXTRACT_SCALAR(json_str, '$.address.city') as city,
cast(
JSON_EXTRACT(json_str, '$.address.phoneNumbers') as array(json)
) phones
from dataset
)
where cardinality(
filter(
phones,
js->json_extract_scalar(js, '$.type') = 'home'
and try_cast(json_extract_scalar(js, '$.number') as integer) > 2
)
) > 0 -- check for home
and
cardinality(
filter(
phones,
js->json_extract_scalar(js, '$.type') = 'city'
and json_extract_scalar(js, '$.number') = '4'
)
) > 0 -- check for city
输出:
关于sql - AWS Athena 使用 AND 条件查询 JSON 数组,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/72152957/