當前AnalyticDB for PostgreSQL對Recursive CTE支援較為有限。在分布式情境下,為確保計劃及執行層結果的正確性,AnalyticDB for PostgreSQL對Recursive CTE執行過程中的WorkTableScan運算元施加了限制:禁止出現Motion,且該運算元必須位於JOIN的最左側。因此,當Recursive CTE中出現資料量較大的表且執行個體的計算節點數量較多時,其執行效能表現極為不佳。AnalyticDB for PostgreSQL對暫存資料表的執行計畫沒有任何限制,因此建議您通過PL/SQL函數加暫存資料表的方案改寫Recursive CTE。在每次迭代計算後,改寫後的方案都可給出較優的執行計畫,從而提升查詢效能,滿足業務需求。
改寫樣本
測試資料
準備測試表和測試資料如下。
CREATE TABLE city(id varchar(4), pid varchar(4), name varchar(10), gdp int);
INSERT INTO city VALUES('33', NULL, '浙江省', 20134);
INSERT INTO city VALUES('3301', '33', '杭州市', 5112);
INSERT INTO city VALUES('3302', '33', '寧波市', 3992);
INSERT INTO city VALUES('3303', '33', '溫州市', 2125);
INSERT INTO city VALUES('3304', '33', '嘉興市', 1688);
INSERT INTO city VALUES('3306', '33', '紹興市', 1852);
INSERT INTO city VALUES('3305', '33', '湖州市', 964);
INSERT INTO city VALUES('3307', '33', '金華市', 1445);
INSERT INTO city VALUES('3308', '33', '衢州市', 507);
INSERT INTO city VALUES('3309', '33', '舟山市', 491);
INSERT INTO city VALUES('3310', '33', '台州市', 1486);
INSERT INTO city VALUES('3311', '33', '麗水市', 472);
INSERT INTO city VALUES('32', NULL, '江蘇省', 30862);
INSERT INTO city VALUES('3201', '32', '南京市', 4359);
INSERT INTO city VALUES('3202', '32', '無錫市', 3584);
INSERT INTO city VALUES('3203', '32', '徐州市', 2118);
INSERT INTO city VALUES('3204', '32', '常州市', 2269);
INSERT INTO city VALUES('3205', '32', '蘇州市', 5548);
INSERT INTO city VALUES('3206', '32', '南通市', 2982);
INSERT INTO city VALUES('3207', '32', '連雲港市', 976);
INSERT INTO city VALUES('3208', '32', '淮安市', 1257);
INSERT INTO city VALUES('3209', '32', '鹽城市', 1796);
INSERT INTO city VALUES('3210', '32', '揚州市', 1868);
INSERT INTO city VALUES('3211', '32', '鎮江市', 1372);
INSERT INTO city VALUES('3212', '32', '泰州市', 1752);
INSERT INTO city VALUES('3213', '32', '宿遷市', 981);原始查詢
通過Recursive CTE找出目標省份及其下屬所有城市的GDP。
WITH RECURSIVE CTE AS
(
SELECT id, CAST(name AS varchar(100)), gdp FROM city WHERE name = '浙江省'
UNION ALL
SELECT son.id, CAST(parent.name || '>' || son.name AS varchar(100)), son.gdp AS name
FROM city son INNER JOIN CTE parent ON son.pid = parent.id
)
SELECT id, name, gdp FROM CTE ORDER BY gdp DESC;執行計畫
在以上原始查詢SQL前加上EXPLAIN語句即可查看執行計畫,執行計畫詳情如下。
從執行計畫可以看出,city表的資料被廣播到所有計算節點上。當city表中的資料量較大時,執行效能將顯著降低。此外,ORCA最佳化器不支援Recursive CTE,因此只能回退至planner最佳化器。
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=13568.80..13971.85 rows=28451 width=242)
Merge Key: city.gdp
-> Sort (cost=13568.80..13592.51 rows=9484 width=242)
Sort Key: city.gdp DESC
-> Recursive Union (cost=0.00..12942.36 rows=9484 width=242)
-> Seq Scan on city (cost=0.00..155.67 rows=10 width=242)
Filter: ((name)::text = '浙江省'::text)
-> Hash Join (cost=885.67..1259.70 rows=947 width=242)
Hash Cond: ((parent.id)::text = (son.pid)::text)
-> WorkTable Scan on CTE parent (cost=0.00..1.95 rows=97 width=238)
-> Hash (cost=520.67..520.67 rows=29200 width=82)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..520.67 rows=29200 width=82)
-> Seq Scan on city son (cost=0.00..131.33 rows=9733 width=82)
Optimizer: Postgres-based planner
(14 rows)查詢結果
查詢結果詳情如下。
id name gdp
33 浙江省 20134
3301 浙江省>杭州市 5112
3302 浙江省>寧波市 3992
3303 浙江省>溫州市 2125
3306 浙江省>紹興市 1852
3304 浙江省>嘉興市 1688
3310 浙江省>台州市 1486
3307 浙江省>金華市 1445
3305 浙江省>湖州市 964
3308 浙江省>衢州市 507
3309 浙江省>舟山市 491
3311 浙江省>麗水市 472改寫查詢
UNION ALL情境下的改寫
以下樣本將為您展示在UNION ALL情境下對Recursive CTE的改寫方法。通過PL/SQL函數改寫原始查詢,改寫詳情如下。
-- 函數的參數為可變參數的值
CREATE OR REPLACE FUNCTION city_gdp(
target_name varchar(10)
) RETURNS TABLE(
id varchar(4),
name varchar(100),
gdp int
) AS $$
-- 函數執行過程中用到的中間變數
DECLARE prev_count INT := 0;
curr_count INT := 0;
curr_level INT := 1;
BEGIN
-- 建立暫存資料表,表結構相較於Recursive CTE中的欄位多了level欄位
CREATE TEMP TABLE temp_result(
id varchar(4),
name varchar(100),
gdp int,
level int
) ON COMMIT DROP DISTRIBUTED BY(id);
-- 向暫存資料表中寫入Rescursive CTE的非recursive部分
INSERT INTO temp_result
SELECT
parent.id,
CAST(parent.name AS varchar(100)),
parent.gdp,
1 AS level
FROM city parent
WHERE parent.name = target_name;
-- 統計當前暫存資料表的行數,用於後續終止迴圈
prev_count := (
SELECT COUNT(*) FROM temp_result
);
LOOP
-- 可選,analyze暫存資料表temp_result是為了後續能出更優的執行計畫
ANALYZE temp_result;
-- 將Recursive CTE的Recursive中的部分寫入到暫存資料表中,注意level需要+1且WHERE條件中需要過濾屬於當前level的資料
INSERT INTO temp_result
SELECT
son.id,
CAST(parent.name || '>' || son.name AS varchar(100)),
son.gdp,
parent.level + 1 AS level
FROM city son
INNER JOIN temp_result parent ON parent.id = son.pid
WHERE parent.level = curr_level;
-- 再次統計當前暫存資料表的行數
curr_count := (
SELECT COUNT(*) FROM temp_result
);
-- 若暫存資料表無資料新增,則退出迴圈
IF curr_count = prev_count THEN EXIT;
END IF;
-- 在下一次迴圈前更新prev_count和curr_level
prev_count := curr_count;
curr_level := curr_level + 1;
END LOOP;
-- SQL的主查詢部分,將暫存資料表整體作為Recursive CTE引用即可
RETURN QUERY
SELECT
CTE.id,
CTE.name,
CTE.gdp
FROM temp_result CTE
ORDER BY gdp DESC;
END;
$$ LANGUAGE plpgsql;執行計畫
查看函數執行過程中的計劃可以發現,每次迴圈時,ORCA最佳化器會自動產生更優的執行計畫。
-- 第一次迴圈
LOG: Insert on temp_result (cost=0.00..862.04 rows=1 width=24)
-> Result (cost=0.00..0.00 rows=0 width=0)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=28)
Hash Key: city.id
-> Hash Join (cost=0.00..862.00 rows=1 width=34)
Hash Cond: ((city.pid)::text = (temp_result_1.id)::text)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=28)
Hash Key: city.pid
-> Seq Scan on city (cost=0.00..431.00 rows=1 width=28)
-> Hash (cost=431.00..431.00 rows=1 width=17)
-> Seq Scan on temp_result temp_result_1 (cost=0.00..431.00 rows=1 width=17)
Filter: (level = 1)
Optimizer: GPORCA
-- 第二次迴圈
LOG: Insert on temp_result (cost=0.00..862.04 rows=1 width=24)
-> Result (cost=0.00..0.00 rows=0 width=0)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=28)
Hash Key: city.id
-> Hash Join (cost=0.00..862.00 rows=1 width=43)
Hash Cond: ((temp_result_1.id)::text = (city.pid)::text)
-> Seq Scan on temp_result temp_result_1 (cost=0.00..431.00 rows=5 width=27)
Filter: (level = 2)
-> Hash (cost=431.00..431.00 rows=1 width=28)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=28)
Hash Key: city.pid
-> Seq Scan on city (cost=0.00..431.00 rows=1 width=28)
Optimizer: GPORCA由於未對city表執行ANALYZE(ANALYZE文法詳情請參見ANALYZE用法),上述展示的執行計畫並非最優,僅為表明每次迴圈可產生不同的執行計畫。
查詢結果
查詢時傳入合理的參數值,例如“浙江省”。
SELECT * FROM city_gdp('浙江省');結果如下。
id name gdp
33 浙江省 20134
3301 浙江省>杭州市 5112
3302 浙江省>寧波市 3992
3303 浙江省>溫州市 2125
3306 浙江省>紹興市 1852
3304 浙江省>嘉興市 1688
3310 浙江省>台州市 1486
3307 浙江省>金華市 1445
3305 浙江省>湖州市 964
3308 浙江省>衢州市 507
3309 浙江省>舟山市 491
3311 浙江省>麗水市 472UNION情境下的改寫
Recursive CTE使用UNION可以自動去重,避免無限遞迴。
基於上文的樣本,我們寫入兩條重複資料,類比一個需要去重的情境,以便在下文體驗UNION情境的改寫。
INSERT INTO city VALUES('1111', '1111', '虛擬市', 1000);
INSERT INTO city VALUES('1111', '1111', '虛擬市', 1000);在這個類比情境中,Recursive CTE使用UNION去除重複資料,樣本如下。
WITH RECURSIVE CTE AS
(
SELECT id, gdp FROM city WHERE name = '虛擬市'
UNION
SELECT son.id, son.gdp AS name FROM city son INNER JOIN CTE parent ON son.pid = parent.id
)
SELECT id, gdp FROM CTE ORDER BY gdp DESC; 改寫時,您可以對暫存資料表建立唯一索引,在去重後通過INSERT INTO ON CONFLICT方式寫入資料。具體改寫如下。
-- 函數的參數為可變參數的值
CREATE OR REPLACE FUNCTION city_gdp(
target_name varchar(10)
) RETURNS TABLE(
id varchar(4),
gdp int
) AS $$
-- 函數執行過程中用到的中間變數
DECLARE prev_count INT := 0;
curr_count INT := 0;
curr_level INT := 1;
BEGIN
-- 建立暫存資料表,表結構相較於Recursive CTE裡的欄位多了個level欄位
CREATE TEMP TABLE temp_result(
id varchar(4),
gdp int,
level int
) ON COMMIT DROP DISTRIBUTED BY(id);
-- UNION情境新增:建立唯一索引
CREATE UNIQUE INDEX ON temp_result(id, gdp);
-- 向暫存資料表中插入Rescursive CTE的非Recursive部分
INSERT INTO temp_result
SELECT
parent.id,
parent.gdp,
1 AS level
FROM city parent
WHERE parent.name = target_name
-- UNION情境新增
GROUP BY 1,2;
-- 統計當前暫存資料表的行數,用於後續終止迴圈
prev_count := (
SELECT COUNT(*) FROM temp_result
);
LOOP
-- 可選,ANALYZE暫存資料表目的是為了後續能出更優的執行計畫
ANALYZE temp_result;
-- 將Recursive CTE的Recursive中的部分寫入到暫存資料表中,注意level需要+1且WHERE條件中需要過濾屬於當前level的資料
INSERT INTO temp_result
SELECT
son.id,
son.gdp,
parent.level + 1 AS level
FROM city son
INNER JOIN temp_result parent ON parent.id = son.pid
WHERE parent.level = curr_level
-- UNION情境新增
GROUP BY 1,2,3
ON CONFLICT DO NOTHING;
-- 再次統計當前暫存資料表的行數
curr_count := (
SELECT COUNT(*) FROM temp_result
);
-- 若暫存資料表無資料新增,則退出迴圈
IF curr_count = prev_count THEN EXIT;
END IF;
-- 在下一次迴圈前更新prev_count和curr_level
prev_count := curr_count;
curr_level := curr_level + 1;
END LOOP;
-- SQL的主查詢部分,將暫存資料表整體作為Recursive CTE引用即可
RETURN QUERY
SELECT
CTE.id,
CTE.gdp
FROM temp_result CTE ORDER BY gdp DESC;
END;
$$ LANGUAGE plpgsql;改寫完成後,通過SELECT * FROM city_gdppp('虛擬市')查詢到與改寫前相同的結果。
效能對比
測試資料
準備測試表和測試資料如下。
CREATE TABLE test_table(
id varchar(100),
parent_id varchar(100),
float1 float,
float2 float,
varchar1 varchar(100),
varchar2 varchar(100)) DISTRIBUTED BY (id);
INSERT INTO test_table VALUES('1-CCCCCCCCCCCCCCCCCCCC', '', 1.01, 1.01, 'AAAAAAAAAAAAAAAAAAAA', 'BBBBBBBBBBBBBBBBBBBB');
INSERT INTO test_table SELECT i || '-CCCCCCCCCCCCCCCCCCCC', '1-CCCCCCCCCCCCCCCCCCCC', 1.01, 1.01, 'AAAAAAAAAAAAAAAAAAAA', 'BBBBBBBBBBBBBBBBBBBB' FROM generate_series(2, 10000) i;
INSERT INTO test_table SELECT i || '-CCCCCCCCCCCCCCCCCCCC', 'test2-CCCCCCCCCCCCCCCCCCCC', 1.01, 1.01, 'AAAAAAAAAAAAAAAAAAAA', 'BBBBBBBBBBBBBBBBBBBB' FROM generate_series(10001, 5000000) i;原始查詢
改寫前查詢如下。
WITH RECURSIVE CTE AS (
SELECT
parent.id,
parent.parent_id,
CAST(parent.id AS varchar(4000)) id_seq,
parent.float1,
parent.float2,
parent.varchar1,
parent.varchar2
FROM test_table parent
WHERE parent.id IN ('1-CCCCCCCCCCCCCCCCCCCC')
UNION ALL
SELECT
son.id,
son.parent_id,
CAST(CTE.id_seq || '>' || son.id AS varchar(4000)) id_seq,
son.float1,
son.float2,
son.varchar1,
son.varchar2
FROM test_table son
INNER JOIN CTE ON CTE.id = son.parent_id
) SELECT * FROM CTE;執行計畫如下。
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 2:1 (slice1; segments: 2) (cost=0.00..4008018.27 rows=50000002 width=1404) (actual time=1698.654..1733.896 rows=10000 loops=1)
-> Recursive Union (cost=0.00..3258018.24 rows=25000001 width=628) (actual time=0.062..1694.406 rows=10000 loops=1)
-> Seq Scan on test_table parent (cost=0.00..42563.00 rows=1 width=628) (actual time=0.058..80.130 rows=1 loops=1)
Filter: ((id)::text = '1-CCCCCCCCCCCCCCCCCCCC'::text)
Rows Removed by Filter: 2499158
-> Hash Join (cost=194565.00..271545.52 rows=2500000 width=628) (actual time=792.825..806.534 rows=5000 loops=2)
Hash Cond: ((cte.id)::text = (son.parent_id)::text)
Extra Text: (seg1) Hash chain length 1666666.7 avg, 4990000 max, using 3 of 8388608 buckets.
-> WorkTable Scan on cte (cost=0.00..0.20 rows=10 width=734) (actual time=0.002..0.415 rows=5000 loops=2)
-> Hash (cost=111313.00..111313.00 rows=5000000 width=112) (actual time=1591.270..1591.270 rows=5000000 loops=1)
Buckets: 8388608 Batches: 1 Memory Usage: 0kB
-> Broadcast Motion 2:2 (slice2; segments: 2) (cost=0.00..111313.00 rows=5000000 width=112) (actual time=0.138..359.646 rows=5000000 loops=1)
-> Seq Scan on test_table son (cost=0.00..36313.00 rows=2500000 width=112) (actual time=0.081..179.119 rows=2500841 loops=1)
Optimizer: Postgres-based planner
Planning Time: 0.523 ms
(slice0) Executor memory: 581K bytes.
(slice1) Executor memory: 1224280K bytes avg x 2 workers, 1225892K bytes max (seg1). Work_mem: 1223292K bytes max.
(slice2) Executor memory: 672K bytes avg x 2 workers, 672K bytes max (seg0).
Memory used: 8388608kB
Query Id: 5832811209844029710
Execution Time: 1773.102 ms
(21 rows)改寫查詢
改寫前需要將執行記憶體調整到8 GB。
SET statement_mem to '8GB';改寫詳情如下。
CREATE OR REPLACE FUNCTION rewrite_query(
parent_id_arr character varying []
) RETURNS TABLE(
id varchar(100),
parent_id varchar(100),
id_seq varchar(4000),
float1 float,
float2 float2,
varchar1 varchar(100),
varchar2 varchar(100)
) AS $$
DECLARE prev_count INT := 0;
curr_count INT := 0;
curr_level INT := 1;
BEGIN
CREATE TEMP TABLE temp_result(
id varchar(100),
parent_id varchar(100),
id_seq varchar(4000),
float1 float,
float2 float2,
varchar1 varchar(100),
varchar2 varchar(100),
level int
) ON COMMIT DROP DISTRIBUTED BY(id);
INSERT INTO temp_result
SELECT
parent.id,
parent.parent_id,
CAST(parent.id AS varchar(4000)) id_seq,
parent.float1,
parent.float2,
parent.varchar1,
parent.varchar2,
1 AS level
FROM test_table parent
WHERE parent.id = ANY(parent_id_arr);
prev_count := (
SELECT COUNT(*) FROM temp_result
);
LOOP
ANALYZE temp_result;
INSERT INTO temp_result
SELECT
son.id,
son.parent_id,
CAST(temp_result.id_seq || '>' || son.id AS varchar(4000)) id_seq,
son.float1,
son.float2,
son.varchar1,
son.varchar2,
temp_result.level + 1 AS level
FROM test_table son
INNER JOIN temp_result ON temp_result.id = son.parent_id
WHERE temp_result.level = curr_level;
curr_count := (
SELECT COUNT(*) FROM temp_result
);
IF curr_count = prev_count THEN EXIT;
END IF;
prev_count := curr_count;
curr_level := curr_level + 1;
END LOOP;
RETURN QUERY
SELECT
CTE.id,
CTE.parent_id,
CTE.id_seq,
CTE.float1,
CTE.float2,
CTE.varchar1,
CTE.varchar2
FROM temp_result CTE;
END;
$$ LANGUAGE plpgsql;從以下執行計畫中看出,改寫後執行時間約0.9s,執行效能提升約一倍。
explain analyze SELECT * FROM rewrite_query(array['1-CCCCCCCCCCCCCCCCCCCC']);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Function Scan on rewrite_query (cost=0.25..10.25 rows=1000 width=170) (actual time=875.001..875.652 rows=10000 loops=1)
Optimizer: Postgres-based planner
Planning Time: 0.040 ms
(slice0) Executor memory: 2644K bytes. Work_mem: 2785K bytes max.
Memory used: 1048576kB
Query Id: 338320616919474816
Execution Time: 875.896 ms
(7 rows)真實案例
以下案例來源於某真實客戶的生產情境。該情境相較於測試情境,計算節點數量更多且資料量更大,經改寫查詢後,執行時間由186s提升到2s內。
原始查詢
WITH RECURSIVE CTE AS (
SELECT
parent.id,
parent.parent_id,
CAST(parent.parent_id AS varchar(4000)) id_seq,
parent.float1,
parent.float2,
parent.varchar1,
parent.varchar2
FROM test_table parent
WHERE
parent.parent_id IN ('test_id1','test_id2')
UNION ALL
SELECT
son.id,
son.parent_id,
CAST(CTE.id_seq || '>' || son.parent_id AS varchar(4000)) id_seq,
son.float1,
son.float2,
son.varchar1,
son.varchar2
FROM test_table son
INNER JOIN CTE ON CTE.id = son.parent_id
)
SELECT
m.varchar1,
m.varchar3,
CTE.parent_id,
CTE.id,
split_part(CTE.id_seq, '>', 1) id_1,
CTE.float1,
SUM(CTE.float2) sum_float2
FROM CTE
INNER JOIN other_table m ON m.varchar1 = CTE.varchar1
GROUP BY
m.varchar1,
m.varchar3,
CTE.parent_id,
CTE.id,
id_1,
CTE.float1
ORDER BY 7 DESC;改寫查詢
CREATE OR REPLACE FUNCTION rewrite_query(
parent_id_arr character varying []
) RETURNS TABLE(
varchar1 varchar(100),
varchar3 varchar(100),
id varchar(100),
parent_id varchar(100),
id_1 text,
float1 float,
sum_float2 float
) AS $$
DECLARE prev_count INT := 0;
curr_count INT := 0;
curr_level INT := 1;
BEGIN
CREATE TEMP TABLE temp_result(
id varchar(100),
parent_id varchar(100),
id_seq varchar(4000),
float1 float,
float2 float2,
varchar1 varchar(100),
varchar2 varchar(100),
level int
) ON COMMIT DROP DISTRIBUTED BY(id);
INSERT INTO temp_result
SELECT
parent.id,
parent.parent_id,
CAST(parent.parent_id AS varchar(4000)) id_seq,
parent.float1,
parent.float2,
parent.varchar1,
parent.varchar2,
1 AS level
FROM test_table parent
WHERE parent.parent_id = ANY(parent_id_arr);
prev_count := (
SELECT COUNT(*) FROM temp_result
);
LOOP
ANALYZE temp_result;
INSERT INTO temp_result
SELECT
son.id,
son.parent_id,
CAST(temp_result.id_seq || '>' || son.parent_id as varchar(4000)) id_seq,
son.float1,
son.float2,
son.varchar1,
son.varchar2,
temp_result.level + 1 AS level
FROM test_table son
INNER JOIN temp_result ON temp_result.id = son.parent_id
WHERE temp_result.level = curr_level;
curr_count := (
SELECT COUNT(*) FROM temp_result
);
IF curr_count = prev_count THEN EXIT;
END IF;
prev_count := curr_count;
curr_level := curr_level + 1;
END LOOP;
RETURN QUERY
SELECT
m.varchar1,
m.varchar3,
CTE.parent_id,
CTE.id,
split_part(CTE.id_seq, '>', 1) id_1,
CTE.float1,
SUM(CTE.float2) sum_float2
FROM temp_result CTE
INNER JOIN other_table m ON m.varchar1 = CTE.varchar1
GROUP BY
m.varchar1,
m.varchar3,
CTE.parent_id,
CTE.id,
id_1,
CTE.float1
ORDER BY 7 DESC;
END;
$$ LANGUAGE plpgsql;SELECT * FROM rewrite_query(ARRAY['test_id1', 'test_id2']);