Window Functions: Advanced Analytical Queries

Window functions are powerful SQL features that allow you to perform calculations across a set of rows related to the current row. Unlike aggregate functions that group rows, window functions maintain individual rows while providing access to other rows in the result set.

Understanding Window Functions

Window functions operate on a "window" of rows defined by the OVER clause. They don't reduce the number of rows like GROUP BY does—instead, they add calculated columns to each row.

Basic Syntax

-- Basic window function syntax
SELECT 
    column1,
    column2,
    window_function(column3) OVER (
        PARTITION BY column4 
        ORDER BY column5
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) as calculated_column
FROM table_name;

Sample Data

Let's use this sample data for our examples:

-- Create sample tables
CREATE TABLE sales (
    id SERIAL PRIMARY KEY,
    product_name VARCHAR(100),
    category VARCHAR(50),
    amount DECIMAL(10,2),
    sale_date DATE
);

-- Insert sample data
INSERT INTO sales (product_name, category, amount, sale_date) VALUES
('Laptop', 'Electronics', 1200.00, '2023-01-15'),
('Phone', 'Electronics', 800.00, '2023-01-16'),
('Desk', 'Furniture', 300.00, '2023-01-17'),
('Chair', 'Furniture', 150.00, '2023-01-18'),
('Tablet', 'Electronics', 500.00, '2023-01-19'),
('Bookshelf', 'Furniture', 200.00, '2023-01-20'),
('Monitor', 'Electronics', 400.00, '2023-01-21'),
('Couch', 'Furniture', 800.00, '2023-01-22');

Ranking Functions

ROW_NUMBER()

Assigns a unique sequential number to each row within a partition.

-- Rank sales by amount within each category
SELECT 
    product_name,
    category,
    amount,
    ROW_NUMBER() OVER (
        PARTITION BY category 
        ORDER BY amount DESC
    ) as rank_in_category
FROM sales;

-- Result:
-- product_name | category   | amount  | rank_in_category
-- Laptop       | Electronics| 1200.00 | 1
-- Phone        | Electronics| 800.00  | 2
-- Monitor      | Electronics| 400.00  | 3
-- Tablet       | Electronics| 500.00  | 4
-- Couch        | Furniture  | 800.00  | 1
-- Desk         | Furniture  | 300.00  | 2
-- Chair        | Furniture  | 150.00  | 3
-- Bookshelf    | Furniture  | 200.00  | 4

RANK() and DENSE_RANK()

Handle ties differently than ROW_NUMBER().

-- Compare ranking functions
SELECT 
    product_name,
    amount,
    ROW_NUMBER() OVER (ORDER BY amount DESC) as row_num,
    RANK() OVER (ORDER BY amount DESC) as rank_val,
    DENSE_RANK() OVER (ORDER BY amount DESC) as dense_rank_val
FROM sales;

-- ROW_NUMBER(): Always unique, even for ties
-- RANK(): Same rank for ties, gaps in sequence
-- DENSE_RANK(): Same rank for ties, no gaps

NTILE()

Divides rows into a specified number of groups.

-- Divide products into 3 price tiers
SELECT 
    product_name,
    amount,
    NTILE(3) OVER (ORDER BY amount DESC) as price_tier
FROM sales;

-- Result: 3 tiers based on price
-- Tier 1: Most expensive products
-- Tier 2: Medium-priced products  
-- Tier 3: Least expensive products

Aggregate Window Functions

Running Totals

-- Calculate running total of sales by date
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (
        ORDER BY sale_date 
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) as running_total
FROM sales
ORDER BY sale_date;

-- Result:
-- sale_date  | amount  | running_total
-- 2023-01-15 | 1200.00 | 1200.00
-- 2023-01-16 | 800.00  | 2000.00
-- 2023-01-17 | 300.00  | 2300.00
-- ...

Moving Averages

-- Calculate 3-day moving average
SELECT 
    sale_date,
    amount,
    AVG(amount) OVER (
        ORDER BY sale_date 
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) as moving_avg_3day
FROM sales
ORDER BY sale_date;

Category Totals

-- Show individual sales with category totals
SELECT 
    product_name,
    category,
    amount,
    SUM(amount) OVER (PARTITION BY category) as category_total,
    amount / SUM(amount) OVER (PARTITION BY category) * 100 as category_percentage
FROM sales;

Value Functions

LAG() and LEAD()

Access previous and next rows.

-- Compare current sale with previous sale
SELECT 
    sale_date,
    amount,
    LAG(amount) OVER (ORDER BY sale_date) as previous_amount,
    amount - LAG(amount) OVER (ORDER BY sale_date) as difference
FROM sales
ORDER BY sale_date;

-- Compare with next sale
SELECT 
    sale_date,
    amount,
    LEAD(amount) OVER (ORDER BY sale_date) as next_amount,
    LEAD(amount) OVER (ORDER BY sale_date) - amount as difference
FROM sales
ORDER BY sale_date;

FIRST_VALUE() and LAST_VALUE()

Access first and last values in a window.

-- Show each sale with the highest and lowest amounts in its category
SELECT 
    product_name,
    category,
    amount,
    FIRST_VALUE(amount) OVER (
        PARTITION BY category 
        ORDER BY amount DESC
    ) as highest_in_category,
    LAST_VALUE(amount) OVER (
        PARTITION BY category 
        ORDER BY amount DESC
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) as lowest_in_category
FROM sales;

Window Frame Specifications

ROWS vs RANGE

-- ROWS: Physical row count
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (
        ORDER BY sale_date 
        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
    ) as sum_3_rows
FROM sales;

-- RANGE: Logical value range
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (
        ORDER BY amount 
        RANGE BETWEEN 100 PRECEDING AND 100 FOLLOWING
    ) as sum_within_200_range
FROM sales;

Frame Boundaries

-- Different frame specifications
SELECT 
    sale_date,
    amount,
    -- All rows from start to current
    SUM(amount) OVER (
        ORDER BY sale_date 
        ROWS UNBOUNDED PRECEDING
    ) as cumulative_total,
    
    -- Current row only
    SUM(amount) OVER (
        ORDER BY sale_date 
        ROWS CURRENT ROW
    ) as current_only,
    
    -- 2 rows before to 1 row after
    SUM(amount) OVER (
        ORDER BY sale_date 
        ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING
    ) as sliding_window
FROM sales;

Practical Examples

Sales Analysis

-- Comprehensive sales analysis
SELECT 
    product_name,
    category,
    amount,
    sale_date,
    -- Ranking within category
    ROW_NUMBER() OVER (
        PARTITION BY category 
        ORDER BY amount DESC
    ) as category_rank,
    
    -- Running total by date
    SUM(amount) OVER (
        ORDER BY sale_date 
        ROWS UNBOUNDED PRECEDING
    ) as cumulative_sales,
    
    -- Category percentage
    ROUND(
        amount / SUM(amount) OVER (PARTITION BY category) * 100, 2
    ) as category_percentage,
    
    -- Moving average
    ROUND(
        AVG(amount) OVER (
            ORDER BY sale_date 
            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
        ), 2
    ) as moving_avg_3day
FROM sales
ORDER BY sale_date;

Employee Salary Analysis

-- Employee salary analysis (assuming we have employee data)
CREATE TABLE employees (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    department VARCHAR(50),
    salary DECIMAL(10,2),
    hire_date DATE
);

-- Sample query for employee analysis
SELECT 
    name,
    department,
    salary,
    hire_date,
    -- Salary rank in department
    RANK() OVER (
        PARTITION BY department 
        ORDER BY salary DESC
    ) as dept_salary_rank,
    
    -- Salary percentile
    NTILE(4) OVER (
        PARTITION BY department 
        ORDER BY salary DESC
    ) as salary_quartile,
    
    -- Salary difference from department average
    salary - AVG(salary) OVER (PARTITION BY department) as salary_diff_from_avg,
    
    -- Running total of salaries by hire date
    SUM(salary) OVER (
        ORDER BY hire_date 
        ROWS UNBOUNDED PRECEDING
    ) as cumulative_payroll
FROM employees;

Time Series Analysis

-- Daily sales with growth metrics
SELECT 
    sale_date,
    amount,
    -- Day-over-day growth
    ROUND(
        (amount - LAG(amount) OVER (ORDER BY sale_date)) / 
        LAG(amount) OVER (ORDER BY sale_date) * 100, 2
    ) as daily_growth_percent,
    
    -- 7-day moving average
    ROUND(
        AVG(amount) OVER (
            ORDER BY sale_date 
            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ), 2
    ) as weekly_moving_avg,
    
    -- Running total
    SUM(amount) OVER (
        ORDER BY sale_date 
        ROWS UNBOUNDED PRECEDING
    ) as cumulative_amount
FROM sales
ORDER BY sale_date;

Performance Considerations

Indexing for Window Functions

-- Create indexes to optimize window function performance
CREATE INDEX idx_sales_category_amount ON sales(category, amount DESC);
CREATE INDEX idx_sales_date ON sales(sale_date);
CREATE INDEX idx_sales_category_date ON sales(category, sale_date);

Partitioning Strategy

-- Use appropriate partitioning for large datasets
-- Example: Partition by date for time-series data
CREATE TABLE sales_partitioned (
    id SERIAL,
    product_name VARCHAR(100),
    amount DECIMAL(10,2),
    sale_date DATE
) PARTITION BY RANGE (sale_date);

-- Create partitions
CREATE TABLE sales_2023_01 PARTITION OF sales_partitioned
FOR VALUES FROM ('2023-01-01') TO ('2023-02-01');

Best Practices

1. Use Appropriate Window Frames

-- For running totals
SUM(amount) OVER (ORDER BY date ROWS UNBOUNDED PRECEDING)

-- For moving averages
AVG(amount) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)

-- For ranking
RANK() OVER (PARTITION BY category ORDER BY amount DESC)

2. Combine with Other SQL Features

-- Use window functions with CTEs for complex analysis
WITH daily_totals AS (
    SELECT 
        sale_date,
        SUM(amount) as daily_amount
    FROM sales
    GROUP BY sale_date
),
running_analysis AS (
    SELECT 
        sale_date,
        daily_amount,
        SUM(daily_amount) OVER (
            ORDER BY sale_date 
            ROWS UNBOUNDED PRECEDING
        ) as cumulative_total,
        AVG(daily_amount) OVER (
            ORDER BY sale_date 
            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ) as weekly_avg
    FROM daily_totals
)
SELECT * FROM running_analysis;

3. Avoid Overuse

-- Don't use window functions when simple aggregates suffice
-- Bad: Using window function for simple total
SELECT 
    category,
    SUM(amount) OVER (PARTITION BY category) as category_total
FROM sales;

-- Good: Use GROUP BY for simple aggregation
SELECT 
    category,
    SUM(amount) as category_total
FROM sales
GROUP BY category;

Window functions are incredibly powerful for analytical queries. They allow you to perform complex calculations while maintaining row-level detail, making them essential for data analysis and reporting in PostgreSQL.