Common Table Expressions (CTEs): Writing Readable Complex Queries

Common Table Expressions (CTEs) are temporary named result sets that exist only within the execution of a single SQL statement. They make complex queries more readable and maintainable by breaking them into logical, named parts.

Basic CTE Syntax

-- Basic CTE structure
WITH cte_name AS (
    SELECT column1, column2
    FROM table_name
    WHERE condition
)
SELECT * FROM cte_name;

Simple CTEs

Basic Example

-- Create a CTE for active users
WITH active_users AS (
    SELECT id, name, email
    FROM users
    WHERE active = true
)
SELECT * FROM active_users;

-- Use the CTE in a more complex query
WITH active_users AS (
    SELECT id, name, email
    FROM users
    WHERE active = true
)
SELECT au.name, COUNT(o.id) as order_count
FROM active_users au
LEFT JOIN orders o ON au.id = o.user_id
GROUP BY au.id, au.name;

Multiple CTEs

-- Define multiple CTEs
WITH 
    high_value_orders AS (
        SELECT user_id, SUM(amount) as total_spent
        FROM orders
        WHERE amount > 100
        GROUP BY user_id
    ),
    user_stats AS (
        SELECT 
            u.id,
            u.name,
            COUNT(o.id) as total_orders,
            COALESCE(hvo.total_spent, 0) as high_value_total
        FROM users u
        LEFT JOIN orders o ON u.id = o.user_id
        LEFT JOIN high_value_orders hvo ON u.id = hvo.user_id
        GROUP BY u.id, u.name, hvo.total_spent
    )
SELECT 
    name,
    total_orders,
    high_value_total,
    CASE 
        WHEN high_value_total > 1000 THEN 'VIP'
        WHEN high_value_total > 500 THEN 'Premium'
        ELSE 'Regular'
    END as customer_tier
FROM user_stats
ORDER BY high_value_total DESC;

Complex Query Organization

Sales Analysis Example

-- Complex sales analysis using CTEs
WITH 
    daily_sales AS (
        SELECT 
            DATE(sale_date) as sale_day,
            SUM(amount) as daily_total,
            COUNT(*) as transaction_count
        FROM sales
        WHERE sale_date >= '2023-01-01'
        GROUP BY DATE(sale_date)
    ),
    category_sales AS (
        SELECT 
            category,
            SUM(amount) as category_total,
            AVG(amount) as category_avg
        FROM sales
        WHERE sale_date >= '2023-01-01'
        GROUP BY category
    ),
    top_products AS (
        SELECT 
            product_name,
            SUM(amount) as product_total,
            ROW_NUMBER() OVER (ORDER BY SUM(amount) DESC) as rank
        FROM sales
        WHERE sale_date >= '2023-01-01'
        GROUP BY product_name
    )
SELECT 
    ds.sale_day,
    ds.daily_total,
    ds.transaction_count,
    cs.category_total,
    tp.product_name as top_product
FROM daily_sales ds
CROSS JOIN category_sales cs
LEFT JOIN top_products tp ON tp.rank = 1
ORDER BY ds.sale_day;

Recursive CTEs

Recursive CTEs are powerful for working with hierarchical data and generating sequences.

Basic Recursive CTE Structure

WITH RECURSIVE cte_name AS (
    -- Base case (non-recursive term)
    SELECT column1, column2, 1 as level
    FROM table_name
    WHERE condition
    
    UNION ALL
    
    -- Recursive case
    SELECT t.column1, t.column2, c.level + 1
    FROM table_name t
    JOIN cte_name c ON t.parent_id = c.id
    WHERE c.level < max_depth
)
SELECT * FROM cte_name;

Employee Hierarchy Example

-- Create employee hierarchy table
CREATE TABLE employees (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    manager_id INTEGER REFERENCES employees(id),
    title VARCHAR(100)
);

-- Insert sample data
INSERT INTO employees (name, manager_id, title) VALUES
('CEO', NULL, 'Chief Executive Officer'),
('CTO', 1, 'Chief Technology Officer'),
('CFO', 1, 'Chief Financial Officer'),
('Dev Manager', 2, 'Development Manager'),
('QA Manager', 2, 'QA Manager'),
('Senior Dev', 4, 'Senior Developer'),
('Junior Dev', 4, 'Junior Developer'),
('QA Engineer', 5, 'QA Engineer');

-- Recursive CTE to show hierarchy
WITH RECURSIVE employee_hierarchy AS (
    -- Base case: top-level employees (no manager)
    SELECT 
        id, 
        name, 
        manager_id, 
        title, 
        0 as level,
        ARRAY[name] as path
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- Recursive case: employees with managers
    SELECT 
        e.id, 
        e.name, 
        e.manager_id, 
        e.title, 
        eh.level + 1,
        eh.path || e.name
    FROM employees e
    JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT 
    level,
    REPEAT('  ', level) || name as hierarchy_display,
    title,
    path
FROM employee_hierarchy
ORDER BY path;

Number Sequence Generation

-- Generate a sequence of numbers
WITH RECURSIVE number_sequence AS (
    SELECT 1 as num
    UNION ALL
    SELECT num + 1
    FROM number_sequence
    WHERE num < 100
)
SELECT num FROM number_sequence;

-- Generate date sequence
WITH RECURSIVE date_sequence AS (
    SELECT '2023-01-01'::DATE as date_val
    UNION ALL
    SELECT date_val + INTERVAL '1 day'
    FROM date_sequence
    WHERE date_val < '2023-01-31'::DATE
)
SELECT date_val FROM date_sequence;

CTEs for Data Transformation

Data Cleaning and Transformation

-- Clean and transform user data
WITH 
    raw_users AS (
        SELECT 
            id,
            TRIM(name) as name,
            LOWER(TRIM(email)) as email,
            created_at
        FROM users
        WHERE email IS NOT NULL
    ),
    valid_users AS (
        SELECT *
        FROM raw_users
        WHERE email LIKE '%@%.%'
          AND LENGTH(name) > 0
    ),
    user_stats AS (
        SELECT 
            id,
            name,
            email,
            created_at,
            EXTRACT(YEAR FROM created_at) as join_year,
            EXTRACT(MONTH FROM created_at) as join_month
        FROM valid_users
    )
SELECT 
    join_year,
    join_month,
    COUNT(*) as new_users
FROM user_stats
GROUP BY join_year, join_month
ORDER BY join_year, join_month;

Complex Aggregations

-- Multi-level aggregation using CTEs
WITH 
    order_details AS (
        SELECT 
            o.id,
            o.user_id,
            o.amount,
            o.order_date,
            u.name as user_name,
            u.category as user_category
        FROM orders o
        JOIN users u ON o.user_id = u.id
        WHERE o.order_date >= '2023-01-01'
    ),
    user_totals AS (
        SELECT 
            user_id,
            user_name,
            user_category,
            COUNT(*) as order_count,
            SUM(amount) as total_spent,
            AVG(amount) as avg_order_value
        FROM order_details
        GROUP BY user_id, user_name, user_category
    ),
    category_summary AS (
        SELECT 
            user_category,
            COUNT(*) as user_count,
            SUM(total_spent) as category_revenue,
            AVG(avg_order_value) as category_avg_order
        FROM user_totals
        GROUP BY user_category
    )
SELECT 
    cs.user_category,
    cs.user_count,
    cs.category_revenue,
    cs.category_avg_order,
    ROUND(cs.category_revenue / SUM(cs.category_revenue) OVER() * 100, 2) as revenue_percentage
FROM category_summary cs
ORDER BY cs.category_revenue DESC;

CTEs for Performance Optimization

Materialized CTEs

-- Use CTEs to avoid repeated subqueries
WITH 
    expensive_calculation AS (
        SELECT 
            user_id,
            SUM(amount) as total_spent,
            COUNT(*) as order_count,
            AVG(amount) as avg_order_value
        FROM orders
        WHERE order_date >= '2023-01-01'
        GROUP BY user_id
    )
SELECT 
    u.name,
    u.email,
    ec.total_spent,
    ec.order_count,
    ec.avg_order_value
FROM users u
JOIN expensive_calculation ec ON u.id = ec.user_id
WHERE ec.total_spent > 1000;

Partitioned Analysis

-- Analyze data by partitions using CTEs
WITH 
    monthly_sales AS (
        SELECT 
            DATE_TRUNC('month', sale_date) as month,
            category,
            SUM(amount) as monthly_total
        FROM sales
        WHERE sale_date >= '2023-01-01'
        GROUP BY DATE_TRUNC('month', sale_date), category
    ),
    category_growth AS (
        SELECT 
            category,
            month,
            monthly_total,
            LAG(monthly_total) OVER (
                PARTITION BY category 
                ORDER BY month
            ) as prev_month_total,
            ROUND(
                (monthly_total - LAG(monthly_total) OVER (
                    PARTITION BY category 
                    ORDER BY month
                )) / LAG(monthly_total) OVER (
                    PARTITION BY category 
                    ORDER BY month
                ) * 100, 2
            ) as growth_percentage
        FROM monthly_sales
    )
SELECT 
    category,
    month,
    monthly_total,
    growth_percentage
FROM category_growth
WHERE growth_percentage IS NOT NULL
ORDER BY category, month;

CTEs vs Subqueries

When to Use CTEs

-- Use CTEs for complex, multi-step queries
WITH 
    step1 AS (SELECT * FROM table1 WHERE condition1),
    step2 AS (SELECT * FROM step1 WHERE condition2),
    step3 AS (SELECT * FROM step2 WHERE condition3)
SELECT * FROM step3;

-- vs nested subqueries (harder to read)
SELECT * FROM (
    SELECT * FROM (
        SELECT * FROM table1 WHERE condition1
    ) WHERE condition2
) WHERE condition3;

CTEs for Reusability

-- CTE can be referenced multiple times
WITH user_stats AS (
    SELECT 
        user_id,
        COUNT(*) as order_count,
        SUM(amount) as total_spent
    FROM orders
    GROUP BY user_id
)
SELECT 
    u.name,
    us1.order_count as orders_2023,
    us2.order_count as orders_2022,
    us1.total_spent as spent_2023,
    us2.total_spent as spent_2022
FROM users u
LEFT JOIN user_stats us1 ON u.id = us1.user_id
LEFT JOIN user_stats us2 ON u.id = us2.user_id;

Best Practices

1. Use Descriptive Names

-- Good: Descriptive CTE names
WITH 
    high_value_customers AS (
        SELECT user_id, SUM(amount) as total_spent
        FROM orders
        GROUP BY user_id
        HAVING SUM(amount) > 1000
    ),
    customer_segments AS (
        SELECT 
            user_id,
            CASE 
                WHEN total_spent > 5000 THEN 'VIP'
                WHEN total_spent > 1000 THEN 'Premium'
                ELSE 'Regular'
            END as segment
        FROM high_value_customers
    )
SELECT * FROM customer_segments;

-- Bad: Generic names
WITH 
    temp1 AS (SELECT ...),
    temp2 AS (SELECT ...)
SELECT * FROM temp2;

2. Keep CTEs Focused

-- Each CTE should have a single responsibility
WITH 
    -- CTE 1: Filter active users
    active_users AS (
        SELECT id, name, email
        FROM users
        WHERE active = true
    ),
    -- CTE 2: Calculate user statistics
    user_statistics AS (
        SELECT 
            au.id,
            au.name,
            COUNT(o.id) as order_count,
            SUM(o.amount) as total_spent
        FROM active_users au
        LEFT JOIN orders o ON au.id = o.user_id
        GROUP BY au.id, au.name
    )
SELECT * FROM user_statistics;

3. Use CTEs for Complex Logic

-- Break complex business logic into CTEs
WITH 
    -- Step 1: Identify eligible customers
    eligible_customers AS (
        SELECT user_id
        FROM orders
        WHERE order_date >= '2023-01-01'
        GROUP BY user_id
        HAVING SUM(amount) > 500
    ),
    -- Step 2: Calculate discount amounts
    discount_calculation AS (
        SELECT 
            ec.user_id,
            o.amount,
            CASE 
                WHEN o.amount > 200 THEN o.amount * 0.1
                ELSE o.amount * 0.05
            END as discount_amount
        FROM eligible_customers ec
        JOIN orders o ON ec.user_id = o.user_id
        WHERE o.order_date >= '2023-02-01'
    ),
    -- Step 3: Apply discount limits
    final_discounts AS (
        SELECT 
            user_id,
            SUM(discount_amount) as total_discount,
            LEAST(SUM(discount_amount), 100) as capped_discount
        FROM discount_calculation
        GROUP BY user_id
    )
SELECT * FROM final_discounts;

CTEs are powerful tools for writing complex, readable SQL queries. They help organize logic, avoid repetition, and make queries more maintainable. Use them to break down complex problems into manageable, logical steps.