Window Functions: Advanced Analytical Queries
Window functions are powerful SQL features that allow you to perform calculations across a set of rows related to the current row. Unlike aggregate functions that group rows, window functions maintain individual rows while providing access to other rows in the result set.
Understanding Window Functions
Window functions operate on a "window" of rows defined by the OVER
clause. They don't reduce the number of rows like GROUP BY does—instead, they add calculated columns to each row.
Basic Syntax
-- Basic window function syntax
SELECT
column1,
column2,
window_function(column3) OVER (
PARTITION BY column4
ORDER BY column5
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) as calculated_column
FROM table_name;
Sample Data
Let's use this sample data for our examples:
-- Create sample tables
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
product_name VARCHAR(100),
category VARCHAR(50),
amount DECIMAL(10,2),
sale_date DATE
);
-- Insert sample data
INSERT INTO sales (product_name, category, amount, sale_date) VALUES
('Laptop', 'Electronics', 1200.00, '2023-01-15'),
('Phone', 'Electronics', 800.00, '2023-01-16'),
('Desk', 'Furniture', 300.00, '2023-01-17'),
('Chair', 'Furniture', 150.00, '2023-01-18'),
('Tablet', 'Electronics', 500.00, '2023-01-19'),
('Bookshelf', 'Furniture', 200.00, '2023-01-20'),
('Monitor', 'Electronics', 400.00, '2023-01-21'),
('Couch', 'Furniture', 800.00, '2023-01-22');
Ranking Functions
ROW_NUMBER()
Assigns a unique sequential number to each row within a partition.
-- Rank sales by amount within each category
SELECT
product_name,
category,
amount,
ROW_NUMBER() OVER (
PARTITION BY category
ORDER BY amount DESC
) as rank_in_category
FROM sales;
-- Result:
-- product_name | category | amount | rank_in_category
-- Laptop | Electronics| 1200.00 | 1
-- Phone | Electronics| 800.00 | 2
-- Monitor | Electronics| 400.00 | 3
-- Tablet | Electronics| 500.00 | 4
-- Couch | Furniture | 800.00 | 1
-- Desk | Furniture | 300.00 | 2
-- Chair | Furniture | 150.00 | 3
-- Bookshelf | Furniture | 200.00 | 4
RANK() and DENSE_RANK()
Handle ties differently than ROW_NUMBER().
-- Compare ranking functions
SELECT
product_name,
amount,
ROW_NUMBER() OVER (ORDER BY amount DESC) as row_num,
RANK() OVER (ORDER BY amount DESC) as rank_val,
DENSE_RANK() OVER (ORDER BY amount DESC) as dense_rank_val
FROM sales;
-- ROW_NUMBER(): Always unique, even for ties
-- RANK(): Same rank for ties, gaps in sequence
-- DENSE_RANK(): Same rank for ties, no gaps
NTILE()
Divides rows into a specified number of groups.
-- Divide products into 3 price tiers
SELECT
product_name,
amount,
NTILE(3) OVER (ORDER BY amount DESC) as price_tier
FROM sales;
-- Result: 3 tiers based on price
-- Tier 1: Most expensive products
-- Tier 2: Medium-priced products
-- Tier 3: Least expensive products
Aggregate Window Functions
Running Totals
-- Calculate running total of sales by date
SELECT
sale_date,
amount,
SUM(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) as running_total
FROM sales
ORDER BY sale_date;
-- Result:
-- sale_date | amount | running_total
-- 2023-01-15 | 1200.00 | 1200.00
-- 2023-01-16 | 800.00 | 2000.00
-- 2023-01-17 | 300.00 | 2300.00
-- ...
Moving Averages
-- Calculate 3-day moving average
SELECT
sale_date,
amount,
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) as moving_avg_3day
FROM sales
ORDER BY sale_date;
Category Totals
-- Show individual sales with category totals
SELECT
product_name,
category,
amount,
SUM(amount) OVER (PARTITION BY category) as category_total,
amount / SUM(amount) OVER (PARTITION BY category) * 100 as category_percentage
FROM sales;
Value Functions
LAG() and LEAD()
Access previous and next rows.
-- Compare current sale with previous sale
SELECT
sale_date,
amount,
LAG(amount) OVER (ORDER BY sale_date) as previous_amount,
amount - LAG(amount) OVER (ORDER BY sale_date) as difference
FROM sales
ORDER BY sale_date;
-- Compare with next sale
SELECT
sale_date,
amount,
LEAD(amount) OVER (ORDER BY sale_date) as next_amount,
LEAD(amount) OVER (ORDER BY sale_date) - amount as difference
FROM sales
ORDER BY sale_date;
FIRST_VALUE() and LAST_VALUE()
Access first and last values in a window.
-- Show each sale with the highest and lowest amounts in its category
SELECT
product_name,
category,
amount,
FIRST_VALUE(amount) OVER (
PARTITION BY category
ORDER BY amount DESC
) as highest_in_category,
LAST_VALUE(amount) OVER (
PARTITION BY category
ORDER BY amount DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as lowest_in_category
FROM sales;
Window Frame Specifications
ROWS vs RANGE
-- ROWS: Physical row count
SELECT
sale_date,
amount,
SUM(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) as sum_3_rows
FROM sales;
-- RANGE: Logical value range
SELECT
sale_date,
amount,
SUM(amount) OVER (
ORDER BY amount
RANGE BETWEEN 100 PRECEDING AND 100 FOLLOWING
) as sum_within_200_range
FROM sales;
Frame Boundaries
-- Different frame specifications
SELECT
sale_date,
amount,
-- All rows from start to current
SUM(amount) OVER (
ORDER BY sale_date
ROWS UNBOUNDED PRECEDING
) as cumulative_total,
-- Current row only
SUM(amount) OVER (
ORDER BY sale_date
ROWS CURRENT ROW
) as current_only,
-- 2 rows before to 1 row after
SUM(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING
) as sliding_window
FROM sales;
Practical Examples
Sales Analysis
-- Comprehensive sales analysis
SELECT
product_name,
category,
amount,
sale_date,
-- Ranking within category
ROW_NUMBER() OVER (
PARTITION BY category
ORDER BY amount DESC
) as category_rank,
-- Running total by date
SUM(amount) OVER (
ORDER BY sale_date
ROWS UNBOUNDED PRECEDING
) as cumulative_sales,
-- Category percentage
ROUND(
amount / SUM(amount) OVER (PARTITION BY category) * 100, 2
) as category_percentage,
-- Moving average
ROUND(
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
), 2
) as moving_avg_3day
FROM sales
ORDER BY sale_date;
Employee Salary Analysis
-- Employee salary analysis (assuming we have employee data)
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
department VARCHAR(50),
salary DECIMAL(10,2),
hire_date DATE
);
-- Sample query for employee analysis
SELECT
name,
department,
salary,
hire_date,
-- Salary rank in department
RANK() OVER (
PARTITION BY department
ORDER BY salary DESC
) as dept_salary_rank,
-- Salary percentile
NTILE(4) OVER (
PARTITION BY department
ORDER BY salary DESC
) as salary_quartile,
-- Salary difference from department average
salary - AVG(salary) OVER (PARTITION BY department) as salary_diff_from_avg,
-- Running total of salaries by hire date
SUM(salary) OVER (
ORDER BY hire_date
ROWS UNBOUNDED PRECEDING
) as cumulative_payroll
FROM employees;
Time Series Analysis
-- Daily sales with growth metrics
SELECT
sale_date,
amount,
-- Day-over-day growth
ROUND(
(amount - LAG(amount) OVER (ORDER BY sale_date)) /
LAG(amount) OVER (ORDER BY sale_date) * 100, 2
) as daily_growth_percent,
-- 7-day moving average
ROUND(
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
), 2
) as weekly_moving_avg,
-- Running total
SUM(amount) OVER (
ORDER BY sale_date
ROWS UNBOUNDED PRECEDING
) as cumulative_amount
FROM sales
ORDER BY sale_date;
Performance Considerations
Indexing for Window Functions
-- Create indexes to optimize window function performance
CREATE INDEX idx_sales_category_amount ON sales(category, amount DESC);
CREATE INDEX idx_sales_date ON sales(sale_date);
CREATE INDEX idx_sales_category_date ON sales(category, sale_date);
Partitioning Strategy
-- Use appropriate partitioning for large datasets
-- Example: Partition by date for time-series data
CREATE TABLE sales_partitioned (
id SERIAL,
product_name VARCHAR(100),
amount DECIMAL(10,2),
sale_date DATE
) PARTITION BY RANGE (sale_date);
-- Create partitions
CREATE TABLE sales_2023_01 PARTITION OF sales_partitioned
FOR VALUES FROM ('2023-01-01') TO ('2023-02-01');
Best Practices
1. Use Appropriate Window Frames
-- For running totals
SUM(amount) OVER (ORDER BY date ROWS UNBOUNDED PRECEDING)
-- For moving averages
AVG(amount) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)
-- For ranking
RANK() OVER (PARTITION BY category ORDER BY amount DESC)
2. Combine with Other SQL Features
-- Use window functions with CTEs for complex analysis
WITH daily_totals AS (
SELECT
sale_date,
SUM(amount) as daily_amount
FROM sales
GROUP BY sale_date
),
running_analysis AS (
SELECT
sale_date,
daily_amount,
SUM(daily_amount) OVER (
ORDER BY sale_date
ROWS UNBOUNDED PRECEDING
) as cumulative_total,
AVG(daily_amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as weekly_avg
FROM daily_totals
)
SELECT * FROM running_analysis;
3. Avoid Overuse
-- Don't use window functions when simple aggregates suffice
-- Bad: Using window function for simple total
SELECT
category,
SUM(amount) OVER (PARTITION BY category) as category_total
FROM sales;
-- Good: Use GROUP BY for simple aggregation
SELECT
category,
SUM(amount) as category_total
FROM sales
GROUP BY category;
Window functions are incredibly powerful for analytical queries. They allow you to perform complex calculations while maintaining row-level detail, making them essential for data analysis and reporting in PostgreSQL.