The following queries lead to the following explain:
EXPLAIN(
WITH salaries_count AS (
SELECT
CASE WHEN income < 20000 THEN 'Low Salary'
WHEN income <= 50000 THEN 'Average Salary'
ELSE 'High Salary'
END AS salary_category,
COUNT(*) AS sc_count
FROM accounts
GROUP BY salary_category
)
SELECT salary_levels.salary_category AS category,
COALESCE(salaries_count.sc_count, 0) AS accounts_count
FROM (VALUES ('Low Salary'), ('Average Salary'), ('High Salary'))
AS salary_levels(salary_category)
LEFT JOIN salaries_count
ON salary_levels.salary_category = salaries_count.salary_category
);
Hash Left Join (cost=1.23..1.29 rows=3 width=40) -- Left join between predefined VALUES table and aggregated salary categories.
Hash Cond: ("*VALUES*".column1 = salaries_count.salary_category) -- Matching the predefined salary categories to the aggregated results.
-> Values Scan on "*VALUES*" (cost=0.00..0.04 rows=3 width=32) -- Scanning ('Low Salary', 'Average Salary', 'High Salary').
-> Hash (cost=1.18..1.18 rows=4 width=40) -- Hashing the aggregated salary data before performing the join.
-> Subquery Scan on salaries_count (cost=1.08..1.18 rows=4 width=40) -- Wrapping the aggregated salary data in a subquery.
-> HashAggregate (cost=1.08..1.14 rows=4 width=40) -- Aggregating the salary categories after classification.
Group Key: CASE WHEN (accounts.income < 20000) THEN 'Low Salary'::text
WHEN (accounts.income <= 50000) THEN 'Average Salary'::text
ELSE 'High Salary'::text END -- Aggregating based on CASE WHEN logic.
-> Seq Scan on accounts (cost=0.00..1.06 rows=4 width=32) -- Scanning accounts **only once** for classification.
while the following query produce a different version plan
EXPLAIN(
WITH salaries_count AS (
SELECT salary_category, COUNT(*) AS sc_count
FROM (
SELECT 'Low Salary' AS salary_category FROM Accounts WHERE income < 20000
UNION ALL
SELECT 'Average Salary' FROM Accounts WHERE income <=50000 and income >= 20000
UNION ALL
SELECT 'High Salary' FROM Accounts WHERE income > 50000
) categorized
GROUP BY salary_category
)
SELECT salary_levels.salary_category AS category,
COALESCE(salaries_count.sc_count, 0) AS accounts_count
FROM (VALUES ('Low Salary'), ('Average Salary'), ('High Salary'))
AS salary_levels(salary_category)
LEFT JOIN salaries_count
ON salary_levels.salary_category = salaries_count.salary_category
);
Hash Right Join (cost=3.29..3.43 rows=3 width=40) -- Right join between the aggregated salary categories and predefined VALUES table.
Hash Cond: (('Low Salary'::text) = "*VALUES*".column1) -- Matching the 'Low Salary' category with the predefined values.
-> HashAggregate (cost=3.21..3.26 rows=5 width=40) -- Aggregating the salary categories after classification.
Group Key: ('Low Salary'::text) -- The aggregation groups by salary category.
-> Append (cost=0.00..3.19 rows=5 width=32) -- Combining three separate scans into one.
-> Seq Scan on accounts (cost=0.00..1.05 rows=1 width=32) -- Scanning accounts where income < 20,000.
Filter: (income < 20000) -- Filtering for 'Low Salary' category.
-> Seq Scan on accounts accounts_1 (cost=0.00..1.06 rows=1 width=32) -- Scanning accounts for 'Average Salary'.
Filter: ((income <= 50000) AND (income >= 20000)) -- Filtering for the 'Average Salary' category.
-> Seq Scan on accounts accounts_2 (cost=0.00..1.05 rows=3 width=32) -- Scanning accounts for 'High Salary'.
Filter: (income > 50000) -- Filtering for 'High Salary' category.
-> Hash (cost=0.04..0.04 rows=3 width=32) -- Hash table built for predefined salary categories.
-> Values Scan on "*VALUES*" (cost=0.00..0.04 rows=3 width=32) -- Scanning ('Low Salary', 'Average Salary', 'High Salary').