从一个游戏SQL的优化说起‐2 - l1t1/note GitHub Wiki

1秒钟算出11张牌的结果不够快,程宁同学把问题交给DeepSeek AI来优化,AI给出的结果如下(一个字母都没改):

WITH 
-- 将108张牌分配给4个人,用轮流、顺序分配的方案
tmp0 AS (
    SELECT 
        a.i AS category,
        b.i AS seq,
        SUBSTRING(c.j, (a.i * 27) + b.i, 1) AS chr,
        ASCII(chr) - 126982 AS n,
        (n - 1) // 9 AS hs,
        CASE n % 9 WHEN 0 THEN 9 ELSE n % 9 END AS pd
    FROM 
        range(4) a(i),
        range(1, 28) b(i),
        games c
),
-- 第一步,能凑成对子、2张顺子情况
tmp1 AS (
    SELECT 
        a.*,
        MIN(b.seq) FILTER (WHERE a.category = b.category AND a.hs = b.hs AND a.pd = b.pd AND b.seq > a.seq) AS dz,
        MIN(b.seq) FILTER (WHERE a.category = b.category AND a.hs = b.hs AND a.pd = b.pd + 1 AND b.seq > a.seq) AS sz1,
        MIN(b.seq) FILTER (WHERE a.category = b.category AND a.hs = b.hs AND a.pd = b.pd - 1 AND b.seq > a.seq) AS sz2
    FROM 
        tmp0 a
    LEFT JOIN 
        tmp0 b ON a.category = b.category AND a.hs = b.hs
    GROUP BY 
        a.category, a.seq, a.chr, a.n, a.hs, a.pd
    ORDER BY 
        a.category, a.seq
),
-- 第二步,加能凑成三张顺子的情况
tmp2 AS (
    SELECT 
        a.*,
        MIN(b.seq) FILTER (WHERE a.category = b.category AND a.hs = b.hs AND a.pd = b.pd + 2 AND b.seq > a.seq AND sz1 IS NOT NULL) AS sz11,
        MIN(b.seq) FILTER (WHERE a.category = b.category AND a.hs = b.hs AND a.pd = b.pd - 2 AND b.seq > a.seq AND sz2 IS NOT NULL) AS sz22,
        MIN(b.seq) FILTER (WHERE a.category = b.category AND a.hs = b.hs AND a.pd = b.pd AND b.seq > a.dz AND dz IS NOT NULL) AS sz
    FROM 
        tmp1 a
    LEFT JOIN 
        tmp0 b ON a.category = b.category AND a.hs = b.hs
    GROUP BY 
        a.category, a.seq, a.chr, a.n, a.hs, a.pd, a.dz, a.sz1, a.sz2
    ORDER BY 
        a.category, a.seq
),
-- 生成所有可以形成3张组合的牌
tmp_3z AS (
    SELECT 
        category, 
        LIST_SORT([seq, sz1, sz11]) AS n 
    FROM 
        tmp2 
    WHERE 
        sz11 IS NOT NULL
    UNION  
    SELECT 
        category, 
        LIST_SORT([seq, sz2, sz22]) AS n 
    FROM 
        tmp2 
    WHERE 
        sz22 IS NOT NULL
    UNION 
    SELECT 
        category, 
        LIST_SORT([seq, dz, sz]) AS n 
    FROM 
        tmp2 
    WHERE 
        sz IS NOT NULL
),
-- 生成所有对子组合
tmp_dz AS (
    SELECT 
        category, 
        LIST_SORT([seq, dz]) AS n 
    FROM 
        tmp1 
    WHERE 
        dz IS NOT NULL
),
-- 生成所有可能的组合
tmp_all AS (
    SELECT 
        a.category,
        a.n AS sz1,
        b.n AS sz2,
        c.n AS sz3,
        d.n AS dz,
        LIST_REVERSE(LIST_SORT(ARRAY_DISTINCT(
            ARRAY_CONCAT(a.n, ARRAY_CONCAT(b.n, ARRAY_CONCAT(c.n, d.n)))
        ))) AS n
    FROM 
        tmp_3z a 
    LEFT JOIN 
        tmp_3z b ON a.category = b.category
    LEFT JOIN 
        tmp_3z c ON a.category = c.category
    LEFT JOIN 
        tmp_dz d ON a.category = d.category
    WHERE 
        a.n < b.n AND b.n < c.n
        AND ARRAY_LENGTH(ARRAY_DISTINCT(ARRAY_CONCAT(a.n, ARRAY_CONCAT(b.n, ARRAY_CONCAT(c.n, d.n))))) = 11
),
-- 用最少摸牌次数能胡牌的组合
tmp_min AS (
    SELECT 
        MIN(n) AS n 
    FROM 
        tmp_all
),
-- 最终结果
final_result AS (
    SELECT 
        STRING_AGG(a.chr, '' ORDER BY a.chr) AS ret
    FROM 
        tmp1 a
    JOIN 
        tmp_min m ON a.seq IN (SELECT UNNEST(m.n))
    JOIN 
        tmp_all b ON b.n = m.n AND a.category = b.category
)
SELECT * FROM final_result;
运行结果如下:
│ 🀌🀍🀎🀔🀕🀖🀙🀙🀚🀛🀜 │

Run Time (s): real 0.046 user 0.108000 sys 0.020000
AI的优化卓有成效,比优化前提高了约30倍,主要是把相关标量子查询(select min(seq) from tmp0 b where a.category = b.category and a.hs = b.hs and a.pd = b.pd and b.seq > a.seq) dz改写成表连接和条件聚合MIN(b.seq) FILTER (WHERE a.category = b.category AND a.hs = b.hs AND a.pd = b.pd AND b.seq > a.seq) AS dz的功劳。 在极端情况下,上述代码的性能依然不够好。比如如下数据集:
WITH 
games as(with t as (select chr(126983+i::int) pic from generate_series(0,26)t(i))select 2, string_agg(pic,''order by pic)j from t,generate_series(1,4) )
玩家的牌完全按花色和点数排序,估计27张牌的3张组合达到27种,再组成4组就是大约power(27,4),再叠加对子的组合,能胡牌的组合比随机数多了很多倍。
⚠️ **GitHub.com Fallback** ⚠️