sql - 计算列中不同值的出现次数

标签 sql postgresql jsonb

我有以下查询:

select 
    jsonb_build_object('high', count(*) filter (where total = 'High')) ||
    jsonb_build_object('medium', count(*) filter (where total = 'Medium')) ||
    jsonb_build_object('low', count(*) filter (where total = 'Low')) as total,
    jsonb_build_object('high', count(*) filter (where social = 'High')) ||
    jsonb_build_object('medium', count(*) filter (where social = 'Medium')) ||
    jsonb_build_object('low', count(*) filter (where social = 'Low')) as social
from (
    select score_labels->>'total' as total, 
    score_labels->>'social' as social,

    from survey_results
    ) s;

请问有什么方法可以简化吗?假设使用迭代而不是重复 jsonb_build_object 语句?

此查询返回以下结果:

total                                  social
-------------------------------------  -------------------------------
{"low": 80, "high": 282, "medium": 0}  {"low": 103, "high": 115, "medium": 0} 

最佳答案

这个特殊情况

你需要一个 plpgsql 函数:

create or replace function my_arr_to_jsonb(text[])
returns jsonb language plpgsql as $$
declare
    agg int[] = array[0, 0, 0];
    s text;
begin
    foreach s in array $1 loop
        if lower(s) = 'high' then
            agg[1]:= agg[1]+ 1;
        elsif lower(s) = 'medium' then
            agg[2]:= agg[2]+ 1;
        else
            agg[3]:= agg[3]+ 1;
        end if;
    end loop;
    return jsonb_build_object(
        'high', agg[1],
        'medium', agg[2],
        'low', agg[3]);
end $$;

实际功能:

with my_table (id, score_labels) as (
values
(1, '{"total": "High", "risk": "High"}'::jsonb),
(2, '{"total": "High", "risk": "Low"}'::jsonb),
(3, '{"total": "Low", "risk": "Medium"}'::jsonb)
)

select 
    my_arr_to_jsonb(array_agg(score_labels->>'total')) as total, 
    my_arr_to_jsonb(array_agg(score_labels->>'risk')) as risk
from my_table

               total                |                risk                
------------------------------------+------------------------------------
 {"low": 1, "high": 2, "medium": 0} | {"low": 1, "high": 1, "medium": 1}
(1 row)

可以在函数中使用算法来创建自定义聚合函数(见下文)。

通用解决方案

这个问题触及了一个有趣的话题,即使用单个聚合函数计算表列中不同值的出现次数。

create or replace function count_labels_state(text[], text)
returns text[] language plpgsql as $$
declare
    i int;
begin
    if $2 is not null then
        i:= array_position($1, quote_ident($2));
        if i is null then
            $1:= $1 || array[quote_ident($2), '0'];
            i:= cardinality($1)- 1;
        end if;
        $1[i+1]:= $1[i+1]::int+ 1;
    end if;
    return $1;
end $$;

create or replace function count_labels_final(text[])
returns jsonb language plpgsql as $$
declare
    j jsonb = '{}';
    i int = 1;
begin
    loop exit when i > cardinality($1); 
        j:= j || jsonb_build_object(trim($1[i], '"'), $1[i+1]::int);
        i:= i+ 2;
    end loop;
    return j;
end $$;

create aggregate count_labels(text) (
    sfunc = count_labels_state,
    stype = text[],
    finalfunc = count_labels_final
);

用法。而不是:

with my_table (label) as (
values
    ('low'), ('medium'), ('high'), ('low'),
    ('low'), ('medium'), ('high'), ('low'),
    ('low'), ('unknown')
)

select
    count(*) filter (where label = 'low') as low,
    count(*) filter (where label = 'medium') as medium,
    count(*) filter (where label = 'high') as high,
    count(*) filter (where label = 'unknown') as unknown
from my_table;

 low | medium | high | unknown 
-----+--------+------+---------
   5 |      2 |    2 |       1
(1 row)

你可以使用这个(你不必知道标签):

select count_labels(label) as labels
from my_table;

                      labels                      
--------------------------------------------------
 {"low": 5, "high": 2, "medium": 2, "unknown": 1}
(1 row)

聚合在整数列上运行良好:

with my_table (n) as (
values 
    (1), (2), (3), (4),
    (1), (2), (1), (2)
)

select count_labels(n::text) as integers
from my_table;

              integers              
----------------------------------
 {"1": 3, "2": 3, "3": 1, "4": 1}
(1 row) 

对于其他类型,应该记住聚合适用于值的文本表示(例如数字 1.10 = 1.1'1.10' <> '1.1' )。

关于sql - 计算列中不同值的出现次数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45975636/

相关文章:

sql - 对于执行缓慢的查询,您最酷的 SQL 优化是什么?

postgresql - Sequelize – 按值存在排序

sql - 如何从父字符串中分离出结果?

postgresql - 在 Postgresql 中从 JSONB 中提取值

postgresql - 如何将 JSONB 值设置为 null

php - 使用 mysqli 查询失败

mysql - 如何删除mysql中唯一的记录?

python - 查询产生不正确的结果

postgresql - Ubuntu & PostgreSQL 第 24 行 :/usr/local/bin/psql. bin:没有那个文件或目录

postgresql - Postgres 在 map 中连接不同的键