Common Table Expressions (CTEs): Writing Readable Complex Queries
Common Table Expressions (CTEs) are temporary named result sets that exist only within the execution of a single SQL statement. They make complex queries more readable and maintainable by breaking them into logical, named parts.
Basic CTE Syntax
-- Basic CTE structure
WITH cte_name AS (
SELECT column1, column2
FROM table_name
WHERE condition
)
SELECT * FROM cte_name;
Simple CTEs
Basic Example
-- Create a CTE for active users
WITH active_users AS (
SELECT id, name, email
FROM users
WHERE active = true
)
SELECT * FROM active_users;
-- Use the CTE in a more complex query
WITH active_users AS (
SELECT id, name, email
FROM users
WHERE active = true
)
SELECT au.name, COUNT(o.id) as order_count
FROM active_users au
LEFT JOIN orders o ON au.id = o.user_id
GROUP BY au.id, au.name;
Multiple CTEs
-- Define multiple CTEs
WITH
high_value_orders AS (
SELECT user_id, SUM(amount) as total_spent
FROM orders
WHERE amount > 100
GROUP BY user_id
),
user_stats AS (
SELECT
u.id,
u.name,
COUNT(o.id) as total_orders,
COALESCE(hvo.total_spent, 0) as high_value_total
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN high_value_orders hvo ON u.id = hvo.user_id
GROUP BY u.id, u.name, hvo.total_spent
)
SELECT
name,
total_orders,
high_value_total,
CASE
WHEN high_value_total > 1000 THEN 'VIP'
WHEN high_value_total > 500 THEN 'Premium'
ELSE 'Regular'
END as customer_tier
FROM user_stats
ORDER BY high_value_total DESC;
Complex Query Organization
Sales Analysis Example
-- Complex sales analysis using CTEs
WITH
daily_sales AS (
SELECT
DATE(sale_date) as sale_day,
SUM(amount) as daily_total,
COUNT(*) as transaction_count
FROM sales
WHERE sale_date >= '2023-01-01'
GROUP BY DATE(sale_date)
),
category_sales AS (
SELECT
category,
SUM(amount) as category_total,
AVG(amount) as category_avg
FROM sales
WHERE sale_date >= '2023-01-01'
GROUP BY category
),
top_products AS (
SELECT
product_name,
SUM(amount) as product_total,
ROW_NUMBER() OVER (ORDER BY SUM(amount) DESC) as rank
FROM sales
WHERE sale_date >= '2023-01-01'
GROUP BY product_name
)
SELECT
ds.sale_day,
ds.daily_total,
ds.transaction_count,
cs.category_total,
tp.product_name as top_product
FROM daily_sales ds
CROSS JOIN category_sales cs
LEFT JOIN top_products tp ON tp.rank = 1
ORDER BY ds.sale_day;
Recursive CTEs
Recursive CTEs are powerful for working with hierarchical data and generating sequences.
Basic Recursive CTE Structure
WITH RECURSIVE cte_name AS (
-- Base case (non-recursive term)
SELECT column1, column2, 1 as level
FROM table_name
WHERE condition
UNION ALL
-- Recursive case
SELECT t.column1, t.column2, c.level + 1
FROM table_name t
JOIN cte_name c ON t.parent_id = c.id
WHERE c.level < max_depth
)
SELECT * FROM cte_name;
Employee Hierarchy Example
-- Create employee hierarchy table
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
manager_id INTEGER REFERENCES employees(id),
title VARCHAR(100)
);
-- Insert sample data
INSERT INTO employees (name, manager_id, title) VALUES
('CEO', NULL, 'Chief Executive Officer'),
('CTO', 1, 'Chief Technology Officer'),
('CFO', 1, 'Chief Financial Officer'),
('Dev Manager', 2, 'Development Manager'),
('QA Manager', 2, 'QA Manager'),
('Senior Dev', 4, 'Senior Developer'),
('Junior Dev', 4, 'Junior Developer'),
('QA Engineer', 5, 'QA Engineer');
-- Recursive CTE to show hierarchy
WITH RECURSIVE employee_hierarchy AS (
-- Base case: top-level employees (no manager)
SELECT
id,
name,
manager_id,
title,
0 as level,
ARRAY[name] as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: employees with managers
SELECT
e.id,
e.name,
e.manager_id,
e.title,
eh.level + 1,
eh.path || e.name
FROM employees e
JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT
level,
REPEAT(' ', level) || name as hierarchy_display,
title,
path
FROM employee_hierarchy
ORDER BY path;
Number Sequence Generation
-- Generate a sequence of numbers
WITH RECURSIVE number_sequence AS (
SELECT 1 as num
UNION ALL
SELECT num + 1
FROM number_sequence
WHERE num < 100
)
SELECT num FROM number_sequence;
-- Generate date sequence
WITH RECURSIVE date_sequence AS (
SELECT '2023-01-01'::DATE as date_val
UNION ALL
SELECT date_val + INTERVAL '1 day'
FROM date_sequence
WHERE date_val < '2023-01-31'::DATE
)
SELECT date_val FROM date_sequence;
CTEs for Data Transformation
Data Cleaning and Transformation
-- Clean and transform user data
WITH
raw_users AS (
SELECT
id,
TRIM(name) as name,
LOWER(TRIM(email)) as email,
created_at
FROM users
WHERE email IS NOT NULL
),
valid_users AS (
SELECT *
FROM raw_users
WHERE email LIKE '%@%.%'
AND LENGTH(name) > 0
),
user_stats AS (
SELECT
id,
name,
email,
created_at,
EXTRACT(YEAR FROM created_at) as join_year,
EXTRACT(MONTH FROM created_at) as join_month
FROM valid_users
)
SELECT
join_year,
join_month,
COUNT(*) as new_users
FROM user_stats
GROUP BY join_year, join_month
ORDER BY join_year, join_month;
Complex Aggregations
-- Multi-level aggregation using CTEs
WITH
order_details AS (
SELECT
o.id,
o.user_id,
o.amount,
o.order_date,
u.name as user_name,
u.category as user_category
FROM orders o
JOIN users u ON o.user_id = u.id
WHERE o.order_date >= '2023-01-01'
),
user_totals AS (
SELECT
user_id,
user_name,
user_category,
COUNT(*) as order_count,
SUM(amount) as total_spent,
AVG(amount) as avg_order_value
FROM order_details
GROUP BY user_id, user_name, user_category
),
category_summary AS (
SELECT
user_category,
COUNT(*) as user_count,
SUM(total_spent) as category_revenue,
AVG(avg_order_value) as category_avg_order
FROM user_totals
GROUP BY user_category
)
SELECT
cs.user_category,
cs.user_count,
cs.category_revenue,
cs.category_avg_order,
ROUND(cs.category_revenue / SUM(cs.category_revenue) OVER() * 100, 2) as revenue_percentage
FROM category_summary cs
ORDER BY cs.category_revenue DESC;
CTEs for Performance Optimization
Materialized CTEs
-- Use CTEs to avoid repeated subqueries
WITH
expensive_calculation AS (
SELECT
user_id,
SUM(amount) as total_spent,
COUNT(*) as order_count,
AVG(amount) as avg_order_value
FROM orders
WHERE order_date >= '2023-01-01'
GROUP BY user_id
)
SELECT
u.name,
u.email,
ec.total_spent,
ec.order_count,
ec.avg_order_value
FROM users u
JOIN expensive_calculation ec ON u.id = ec.user_id
WHERE ec.total_spent > 1000;
Partitioned Analysis
-- Analyze data by partitions using CTEs
WITH
monthly_sales AS (
SELECT
DATE_TRUNC('month', sale_date) as month,
category,
SUM(amount) as monthly_total
FROM sales
WHERE sale_date >= '2023-01-01'
GROUP BY DATE_TRUNC('month', sale_date), category
),
category_growth AS (
SELECT
category,
month,
monthly_total,
LAG(monthly_total) OVER (
PARTITION BY category
ORDER BY month
) as prev_month_total,
ROUND(
(monthly_total - LAG(monthly_total) OVER (
PARTITION BY category
ORDER BY month
)) / LAG(monthly_total) OVER (
PARTITION BY category
ORDER BY month
) * 100, 2
) as growth_percentage
FROM monthly_sales
)
SELECT
category,
month,
monthly_total,
growth_percentage
FROM category_growth
WHERE growth_percentage IS NOT NULL
ORDER BY category, month;
CTEs vs Subqueries
When to Use CTEs
-- Use CTEs for complex, multi-step queries
WITH
step1 AS (SELECT * FROM table1 WHERE condition1),
step2 AS (SELECT * FROM step1 WHERE condition2),
step3 AS (SELECT * FROM step2 WHERE condition3)
SELECT * FROM step3;
-- vs nested subqueries (harder to read)
SELECT * FROM (
SELECT * FROM (
SELECT * FROM table1 WHERE condition1
) WHERE condition2
) WHERE condition3;
CTEs for Reusability
-- CTE can be referenced multiple times
WITH user_stats AS (
SELECT
user_id,
COUNT(*) as order_count,
SUM(amount) as total_spent
FROM orders
GROUP BY user_id
)
SELECT
u.name,
us1.order_count as orders_2023,
us2.order_count as orders_2022,
us1.total_spent as spent_2023,
us2.total_spent as spent_2022
FROM users u
LEFT JOIN user_stats us1 ON u.id = us1.user_id
LEFT JOIN user_stats us2 ON u.id = us2.user_id;
Best Practices
1. Use Descriptive Names
-- Good: Descriptive CTE names
WITH
high_value_customers AS (
SELECT user_id, SUM(amount) as total_spent
FROM orders
GROUP BY user_id
HAVING SUM(amount) > 1000
),
customer_segments AS (
SELECT
user_id,
CASE
WHEN total_spent > 5000 THEN 'VIP'
WHEN total_spent > 1000 THEN 'Premium'
ELSE 'Regular'
END as segment
FROM high_value_customers
)
SELECT * FROM customer_segments;
-- Bad: Generic names
WITH
temp1 AS (SELECT ...),
temp2 AS (SELECT ...)
SELECT * FROM temp2;
2. Keep CTEs Focused
-- Each CTE should have a single responsibility
WITH
-- CTE 1: Filter active users
active_users AS (
SELECT id, name, email
FROM users
WHERE active = true
),
-- CTE 2: Calculate user statistics
user_statistics AS (
SELECT
au.id,
au.name,
COUNT(o.id) as order_count,
SUM(o.amount) as total_spent
FROM active_users au
LEFT JOIN orders o ON au.id = o.user_id
GROUP BY au.id, au.name
)
SELECT * FROM user_statistics;
3. Use CTEs for Complex Logic
-- Break complex business logic into CTEs
WITH
-- Step 1: Identify eligible customers
eligible_customers AS (
SELECT user_id
FROM orders
WHERE order_date >= '2023-01-01'
GROUP BY user_id
HAVING SUM(amount) > 500
),
-- Step 2: Calculate discount amounts
discount_calculation AS (
SELECT
ec.user_id,
o.amount,
CASE
WHEN o.amount > 200 THEN o.amount * 0.1
ELSE o.amount * 0.05
END as discount_amount
FROM eligible_customers ec
JOIN orders o ON ec.user_id = o.user_id
WHERE o.order_date >= '2023-02-01'
),
-- Step 3: Apply discount limits
final_discounts AS (
SELECT
user_id,
SUM(discount_amount) as total_discount,
LEAST(SUM(discount_amount), 100) as capped_discount
FROM discount_calculation
GROUP BY user_id
)
SELECT * FROM final_discounts;
CTEs are powerful tools for writing complex, readable SQL queries. They help organize logic, avoid repetition, and make queries more maintainable. Use them to break down complex problems into manageable, logical steps.