Transactions and Concurrency: Managing Data Consistency

Transactions are fundamental to database systems. They ensure that your data remains consistent even when multiple users or processes are accessing it simultaneously. PostgreSQL provides robust transaction support with ACID properties and multiple isolation levels.

What Are Transactions?

A transaction is a logical unit of work that contains one or more SQL statements. It either completes entirely (COMMIT) or is completely undone (ROLLBACK), ensuring data consistency.

ACID Properties

PostgreSQL transactions follow the ACID properties:

Atomicity

All operations in a transaction succeed or fail together.

-- Example: Transfer money between accounts
BEGIN;
  UPDATE accounts SET balance = balance - 100 WHERE id = 1;
  UPDATE accounts SET balance = balance + 100 WHERE id = 2;
  -- If either UPDATE fails, both are rolled back
COMMIT;

Consistency

The database moves from one valid state to another.

-- Example: Maintaining referential integrity
BEGIN;
  INSERT INTO orders (user_id, amount) VALUES (999, 100.00);
  -- This will fail if user_id 999 doesn't exist in users table
  -- The entire transaction is rolled back
COMMIT;

Isolation

Concurrent transactions don't interfere with each other.

-- Transaction A
BEGIN;
SELECT balance FROM accounts WHERE id = 1; -- Reads 1000

-- Transaction B (concurrent)
BEGIN;
UPDATE accounts SET balance = balance - 200 WHERE id = 1;
COMMIT;

-- Transaction A continues with its own view of the data
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;

Durability

Committed transactions persist even after system failures.

-- Once committed, this data is permanently stored
BEGIN;
INSERT INTO audit_log (action, timestamp) VALUES ('user_login', NOW());
COMMIT;
-- Data survives system crashes and restarts

Transaction Control

Basic Transaction Commands

-- Start a transaction
BEGIN;
-- or
START TRANSACTION;

-- Commit a transaction
COMMIT;
-- or
END;

-- Rollback a transaction
ROLLBACK;

-- Savepoints for partial rollbacks
BEGIN;
INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com');
SAVEPOINT user_created;
INSERT INTO user_profiles (user_id, bio) VALUES (LASTVAL(), 'Hello world');
-- If profile creation fails, we can rollback to savepoint
ROLLBACK TO SAVEPOINT user_created;
-- User is still created, but profile creation is undone
COMMIT;

Auto-commit Mode

-- PostgreSQL runs in auto-commit mode by default
-- Each statement is automatically wrapped in a transaction
INSERT INTO users (name, email) VALUES ('Bob', 'bob@example.com');
-- This is automatically committed

-- Disable auto-commit for a session
SET autocommit = OFF;
-- Now you need explicit BEGIN/COMMIT

Isolation Levels

PostgreSQL supports four transaction isolation levels, each providing different guarantees about concurrent access.

Read Uncommitted

-- Not actually supported in PostgreSQL (same as Read Committed)
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
-- PostgreSQL will use Read Committed instead

Read Committed (Default)

-- Each statement sees a snapshot of data as of the start of the statement
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;

-- Example: Non-repeatable reads
-- Transaction A
BEGIN;
SELECT balance FROM accounts WHERE id = 1; -- Returns 1000

-- Transaction B
BEGIN;
UPDATE accounts SET balance = 1500 WHERE id = 1;
COMMIT;

-- Transaction A
SELECT balance FROM accounts WHERE id = 1; -- Returns 1500 (different!)
COMMIT;

Repeatable Read

-- Each transaction sees a snapshot as of the start of the transaction
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;

-- Example: Consistent reads within transaction
-- Transaction A
BEGIN;
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SELECT balance FROM accounts WHERE id = 1; -- Returns 1000

-- Transaction B
BEGIN;
UPDATE accounts SET balance = 1500 WHERE id = 1;
COMMIT;

-- Transaction A
SELECT balance FROM accounts WHERE id = 1; -- Still returns 1000
COMMIT;

Serializable

-- Highest isolation level, prevents all concurrency anomalies
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;

-- Example: Serializable transaction
BEGIN;
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;

-- This transaction will fail if another transaction modifies
-- the same data concurrently
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;

COMMIT;
-- If another transaction conflicts, you'll get a serialization error

Concurrency Control

Row-Level Locking

-- SELECT FOR UPDATE locks rows for update
BEGIN;
SELECT * FROM accounts WHERE id = 1 FOR UPDATE;
-- Other transactions must wait to update this row
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;

-- SELECT FOR SHARE allows concurrent reads but blocks writes
BEGIN;
SELECT * FROM accounts WHERE id = 1 FOR SHARE;
-- Other transactions can read but not modify
COMMIT;

Deadlock Prevention

-- Deadlock example
-- Transaction A
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
-- Waits for Transaction B to release lock on account 2

-- Transaction B
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 2;
-- Waits for Transaction A to release lock on account 1
-- DEADLOCK!

-- Solution: Always lock rows in the same order
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
COMMIT;

Practical Examples

Bank Transfer with Error Handling

-- Safe money transfer
DO $$
DECLARE
    from_account_id INTEGER := 1;
    to_account_id INTEGER := 2;
    transfer_amount DECIMAL(10,2) := 100.00;
    current_balance DECIMAL(10,2);
BEGIN
    -- Start transaction
    BEGIN
        -- Check if source account has sufficient funds
        SELECT balance INTO current_balance 
        FROM accounts 
        WHERE id = from_account_id 
        FOR UPDATE;
        
        IF current_balance < transfer_amount THEN
            RAISE EXCEPTION 'Insufficient funds: %', current_balance;
        END IF;
        
        -- Perform transfer
        UPDATE accounts SET balance = balance - transfer_amount 
        WHERE id = from_account_id;
        
        UPDATE accounts SET balance = balance + transfer_amount 
        WHERE id = to_account_id;
        
        -- Log the transaction
        INSERT INTO transfer_log (from_account, to_account, amount, timestamp)
        VALUES (from_account_id, to_account_id, transfer_amount, NOW());
        
        -- Commit transaction
        COMMIT;
        
        RAISE NOTICE 'Transfer completed successfully';
        
    EXCEPTION
        WHEN OTHERS THEN
            -- Rollback on any error
            ROLLBACK;
            RAISE NOTICE 'Transfer failed: %', SQLERRM;
    END;
END $$;

Inventory Management

-- Atomic inventory update
BEGIN;
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;

-- Check current inventory
SELECT quantity FROM inventory WHERE product_id = 123 FOR UPDATE;

-- Update inventory atomically
UPDATE inventory 
SET quantity = quantity - 1,
    last_updated = NOW()
WHERE product_id = 123 AND quantity > 0;

-- Check if update was successful
IF NOT FOUND THEN
    RAISE EXCEPTION 'Product out of stock';
END IF;

-- Create order
INSERT INTO orders (product_id, quantity, order_date)
VALUES (123, 1, NOW());

COMMIT;

Monitoring Transactions

View Active Transactions

-- Check current transactions
SELECT pid, usename, application_name, state, query_start, query
FROM pg_stat_activity
WHERE state = 'active';

-- Check for long-running transactions
SELECT pid, usename, application_name, 
       EXTRACT(EPOCH FROM (NOW() - query_start)) as duration_seconds,
       query
FROM pg_stat_activity
WHERE state = 'active' 
  AND query_start < NOW() - INTERVAL '5 minutes';

Check for Locks

-- View current locks
SELECT l.pid, l.mode, l.granted, 
       t.relname as table_name,
       a.usename, a.query
FROM pg_locks l
JOIN pg_class t ON l.relation = t.oid
JOIN pg_stat_activity a ON l.pid = a.pid
WHERE t.relname IS NOT NULL;

Best Practices

1. Keep Transactions Short

-- Bad: Long transaction
BEGIN;
-- Many operations that take time
-- ... (user interaction, API calls, etc.)
COMMIT;

-- Good: Short, focused transactions
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;

BEGIN;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
COMMIT;

2. Use Appropriate Isolation Levels

-- Use Read Committed for most operations
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;

-- Use Repeatable Read when you need consistent reads
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;

-- Use Serializable only when absolutely necessary
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;

3. Handle Deadlocks Gracefully

-- Retry logic for deadlocks
DO $$
DECLARE
    retry_count INTEGER := 0;
    max_retries INTEGER := 3;
BEGIN
    LOOP
        BEGIN
            -- Your transaction here
            UPDATE accounts SET balance = balance - 100 WHERE id = 1;
            UPDATE accounts SET balance = balance + 100 WHERE id = 2;
            EXIT; -- Success, exit loop
            
        EXCEPTION
            WHEN deadlock_detected THEN
                retry_count := retry_count + 1;
                IF retry_count >= max_retries THEN
                    RAISE EXCEPTION 'Max retries exceeded';
                END IF;
                -- Wait a bit before retrying
                PERFORM pg_sleep(0.1 * retry_count);
        END;
    END LOOP;
END $$;

4. Use Savepoints for Complex Operations

BEGIN;
-- Step 1: Create user
INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com');
SAVEPOINT user_created;

-- Step 2: Create profile
INSERT INTO user_profiles (user_id, bio) VALUES (LASTVAL(), 'Hello world');
SAVEPOINT profile_created;

-- Step 3: Send welcome email (might fail)
-- ... email sending logic ...

-- If email fails, rollback to profile creation
ROLLBACK TO SAVEPOINT profile_created;
-- User and profile remain, but email step is undone

COMMIT;

Understanding transactions and concurrency is crucial for building reliable database applications. PostgreSQL's robust transaction support helps ensure data consistency even in high-concurrency environments.