×
Community Blog PostgreSQL group x, sort by y, and get (N dynamic) Records for Each Group-Subquery + Subquery Aggregation Use

PostgreSQL group x, sort by y, and get (N dynamic) Records for Each Group-Subquery + Subquery Aggregation Use

This short article explains PostgreSEL group filtering and aggregation usage.

By digoal

Background

We will use the window query to output to each group. This example only discusses the output groups provided by users rather than all the groups.

  • Group: Tag ID
  • Sorting: Tag weight
  • limit: The number of entries returned for each label

Custom grouping + limit number of records

create type sf as (  
  c int, -- 分组条件  
  k int, -- 分组条件  
  l int  -- limit多少条  
);  

SQL principle:

select   
  -- 子查询  
  (select string_agg(vid::text, ',')  
  from   
    (select vid  
    from   
      test_video_box_a ,   
      -- 其他过滤条件的来源  
      ( select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
               coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
        from video_user_read where user_id = 7619473860554752   
      ) as  t   
    where   
    -- 分组条件c,k  
    categray = sf.c and keyword = sf.k   
    -- 其他过滤条件   
    and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
    -- 分组内的排序条件   
    order by weight desc   
    -- 每个分组limit多少条 l   
    limit sf.l)   
  t) as x   
from   
-- 分组条件, 每个分组取多少条,   
-- 通过自定义类型的数组来构建, 从而可以很方便的得到分组以及每个分组需要limit多少条  
unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf   

Aggregation of multiple queries:

select string_agg(x, ',')  
from (  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_a , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 7619473860554752) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf   
union all  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_b , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 7619473860554752) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf   
union all  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_xxxx , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 7619473860554752) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest('{"(5,63,4)","(0,59,4)","(9,31,4)","(7,33,4)","(6,18,4)","(7,13,4)","(9,77,4)","(8,70,4)","(3,30,4)","(2,75,4)","(9,3,4)","(5,41,4)","(3,35,4)","(4,27,4)","(6,35,4)","(8,13,4)","(10,85,4)","(7,85,4)","(2,41,4)","(2,74,4)","(6,83,4)","(9,50,4)","(6,69,4)","(2,89,4)","(10,44,4)","(6,84,4)","(5,13,4)","(2,75,4)","(5,61,4)","(0,7,4)","(10,86,4)","(2,94,4)","(7,93,4)","(8,88,4)","(3,97,4)","(6,92,4)","(8,49,4)","(4,41,4)","(2,62,4)","(3,16,4)","(1,95,4)","(10,54,4)","(1,24,4)","(3,55,4)","(9,31,4)","(6,81,4)","(2,83,4)","(8,88,4)","(8,54,4)","(8,9,4)"}'::sf[]) sf   
) t;  

Performance stress testing randomly generates query groups. The total number returned of the sf array of the limit records is 4*50=200 entries.

CREATE OR REPLACE FUNCTION public.gen_array_sf()  
 RETURNS sf[]  
 LANGUAGE plpgsql  
 STRICT  
AS $function$  
declare  
  out_arr sf[];  
begin  
  for i in 1..50 loop  
    out_arr[i] := row (random() * 10, random() * 100,  4)::sf;   
  end loop;  
  return out_arr;  
end;  
$function$;  

Stress testing function:

create or replace function test_union() returns text as $$  
declare  
  sql text;   
  cond sf[] := gen_array_sf();  
  res text;  
begin  
  sql := format($_$  
select string_agg(x, ',')  
from (  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_a , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 26503284841971712) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest(%L::sf[]) sf   
union all  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_b , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 26503284841971712) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest(%L::sf[]) sf   
union all  
         select (select string_agg(vid::text, ',')  
                 from (select vid  
                       from test_video_box_xxxx , (select coalesce(w1,hll_empty()) w1, coalesce(w2,hll_empty()) w2, coalesce(w3,hll_empty()) w3, coalesce(w4,hll_empty()) w4,   
    coalesce(w5,hll_empty()) w5, coalesce(w6,hll_empty()) w6, coalesce(w7,hll_empty()) w7, coalesce(whole,hll_empty()) whole   
    from video_user_read where user_id = 26503284841971712) as  t   
                       where categray = sf.c and keyword = sf.k  
                       and  (t.w1 || hll_hash_bigint(vid) <> t.w1 and t.w2 || hll_hash_bigint(vid) <> t.w2)   
                       order by weight desc   
                       limit sf.l) t) as x   
         from unnest(%L::sf[]) sf   
) t  
$_$,  
cond,cond,cond);  
-- raise notice '%', sql;  
execute sql into res;  
return res;  
end;  
$$ language plpgsql strict;  

Stress testing method:

vi test.sql  
select test_union();  
  
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 16 -j 16 -T 1200  
0 0 0
Share on

digoal

276 posts | 24 followers

You may also like

Comments