Advanced Data Types: Leveraging PostgreSQL's Rich Type System

PostgreSQL offers a rich set of data types beyond the standard SQL types. These advanced types can simplify your schema, improve performance, and provide powerful querying capabilities. Let's explore the most useful advanced data types and when to use them.

JSON and JSONB

PostgreSQL supports both JSON and JSONB types for storing JSON data. JSONB is generally preferred as it's more efficient and supports indexing.

JSON vs JSONB

-- JSON: Stores JSON as text, preserves formatting
CREATE TABLE products_json (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    metadata JSON
);

-- JSONB: Stores JSON in binary format, more efficient
CREATE TABLE products_jsonb (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    metadata JSONB
);

-- Insert data
INSERT INTO products_jsonb (name, metadata) VALUES
('Laptop', '{"brand": "Dell", "specs": {"cpu": "Intel i7", "ram": "16GB"}, "tags": ["electronics", "computer"]}'),
('Phone', '{"brand": "iPhone", "specs": {"storage": "128GB", "color": "black"}, "tags": ["electronics", "mobile"]}');

Querying JSON Data

-- Extract values using operators
SELECT name, metadata->>'brand' as brand
FROM products_jsonb;

-- Extract nested values
SELECT name, metadata->'specs'->>'cpu' as cpu
FROM products_jsonb;

-- Check if key exists
SELECT name FROM products_jsonb 
WHERE metadata ? 'brand';

-- Check if array contains value
SELECT name FROM products_jsonb 
WHERE metadata->'tags' ? 'electronics';

-- Complex queries
SELECT name, metadata->>'brand' as brand
FROM products_jsonb
WHERE metadata->'specs'->>'ram' = '16GB'
  AND metadata->'tags' ? 'electronics';

JSONB Indexing

-- GIN index for efficient JSONB queries
CREATE INDEX idx_products_metadata ON products_jsonb USING GIN (metadata);

-- Index specific keys
CREATE INDEX idx_products_brand ON products_jsonb USING GIN ((metadata->'brand'));

-- Index array elements
CREATE INDEX idx_products_tags ON products_jsonb USING GIN ((metadata->'tags'));

Arrays

PostgreSQL supports arrays of any data type, making it easy to store multiple values in a single column.

Basic Array Operations

-- Create table with array columns
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    email VARCHAR(255),
    phone_numbers TEXT[],
    preferences INTEGER[]
);

-- Insert data with arrays
INSERT INTO users (name, email, phone_numbers, preferences) VALUES
('Alice', 'alice@example.com', ARRAY['+1-555-1234', '+1-555-5678'], ARRAY[1, 2, 3]),
('Bob', 'bob@example.com', ARRAY['+1-555-9999'], ARRAY[2, 4]);

-- Query arrays
SELECT name, phone_numbers[1] as primary_phone
FROM users;

-- Check if array contains value
SELECT name FROM users 
WHERE 'electronics' = ANY(preferences);

-- Unnest arrays
SELECT name, unnest(phone_numbers) as phone
FROM users;

Array Functions

-- Array length
SELECT name, array_length(phone_numbers, 1) as phone_count
FROM users;

-- Append to array
UPDATE users 
SET phone_numbers = array_append(phone_numbers, '+1-555-0000')
WHERE id = 1;

-- Remove from array
UPDATE users 
SET preferences = array_remove(preferences, 2)
WHERE id = 1;

-- Concatenate arrays
SELECT name, phone_numbers || ARRAY['+1-555-emergency'] as all_phones
FROM users;

Geometric Types

PostgreSQL includes built-in support for geometric data types.

Point Type

-- Store coordinates
CREATE TABLE locations (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    coordinates POINT
);

-- Insert points
INSERT INTO locations (name, coordinates) VALUES
('Office', '(40.7128, -74.0060)'),
('Home', '(40.7589, -73.9851)');

-- Calculate distance
SELECT name, coordinates <-> '(40.7128, -74.0060)' as distance
FROM locations
ORDER BY distance;

Line and Polygon Types

-- Store routes and areas
CREATE TABLE routes (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    path LINE
);

CREATE TABLE areas (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    boundary POLYGON
);

-- Insert geometric data
INSERT INTO routes (name, path) VALUES
('Route 1', '[(0,0),(1,1),(2,0)]');

INSERT INTO areas (name, boundary) VALUES
('Downtown', '((0,0),(0,1),(1,1),(1,0),(0,0))');

UUID Type

UUIDs are useful for distributed systems and when you need globally unique identifiers.

-- Enable UUID extension
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";

-- Create table with UUID primary key
CREATE TABLE users (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    name VARCHAR(100),
    email VARCHAR(255)
);

-- Generate UUIDs
SELECT uuid_generate_v4();

-- Insert with generated UUID
INSERT INTO users (name, email) VALUES
('Alice', 'alice@example.com');
-- id will be automatically generated

Network Address Types

PostgreSQL provides specialized types for IP addresses and networks.

-- Store IP addresses
CREATE TABLE servers (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    ip_address INET,
    network CIDR
);

-- Insert network data
INSERT INTO servers (name, ip_address, network) VALUES
('Web Server', '192.168.1.100', '192.168.1.0/24'),
('Database Server', '10.0.0.50', '10.0.0.0/8');

-- Query by network
SELECT name, ip_address 
FROM servers 
WHERE ip_address << '192.168.1.0/24';

Range Types

Range types represent ranges of values, useful for scheduling and time periods.

-- Date ranges for bookings
CREATE TABLE bookings (
    id SERIAL PRIMARY KEY,
    room_id INTEGER,
    guest_name VARCHAR(100),
    stay_period DATERANGE,
    price_per_night DECIMAL(10,2)
);

-- Insert bookings
INSERT INTO bookings (room_id, guest_name, stay_period, price_per_night) VALUES
(1, 'Alice', '[2023-08-01,2023-08-05)', 150.00),
(1, 'Bob', '[2023-08-10,2023-08-15)', 150.00);

-- Check for overlapping bookings
SELECT b1.guest_name, b2.guest_name
FROM bookings b1, bookings b2
WHERE b1.id < b2.id 
  AND b1.room_id = b2.room_id
  AND b1.stay_period && b2.stay_period;

Custom Types

You can create your own data types in PostgreSQL.

ENUM Types

-- Create enum type
CREATE TYPE order_status AS ENUM ('pending', 'processing', 'shipped', 'delivered', 'cancelled');

-- Use enum in table
CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    user_id INTEGER,
    amount DECIMAL(10,2),
    status order_status DEFAULT 'pending'
);

-- Insert with enum
INSERT INTO orders (user_id, amount, status) VALUES
(1, 100.00, 'pending'),
(2, 200.00, 'processing');

Composite Types

-- Create composite type
CREATE TYPE address AS (
    street VARCHAR(100),
    city VARCHAR(50),
    state VARCHAR(2),
    zip_code VARCHAR(10)
);

-- Use composite type
CREATE TABLE customers (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    billing_address address,
    shipping_address address
);

-- Insert with composite type
INSERT INTO customers (name, billing_address, shipping_address) VALUES
('Alice', 
 ROW('123 Main St', 'New York', 'NY', '10001'),
 ROW('456 Oak Ave', 'Los Angeles', 'CA', '90210')
);

-- Query composite type fields
SELECT name, (billing_address).city as billing_city
FROM customers;

Full-Text Search Types

PostgreSQL provides powerful full-text search capabilities.

-- Create table with full-text search
CREATE TABLE documents (
    id SERIAL PRIMARY KEY,
    title VARCHAR(200),
    content TEXT,
    search_vector TSVECTOR
);

-- Insert documents
INSERT INTO documents (title, content) VALUES
('PostgreSQL Guide', 'PostgreSQL is a powerful open source database'),
('SQL Tutorial', 'Learn SQL basics and advanced features');

-- Generate search vectors
UPDATE documents 
SET search_vector = to_tsvector('english', title || ' ' || content);

-- Create GIN index for fast search
CREATE INDEX idx_documents_search ON documents USING GIN (search_vector);

-- Full-text search
SELECT title, content
FROM documents
WHERE search_vector @@ to_tsquery('english', 'postgresql & database');

Best Practices

1. Choose the Right Type

-- Use JSONB for flexible schema
CREATE TABLE user_profiles (
    id SERIAL PRIMARY KEY,
    user_id INTEGER,
    profile_data JSONB  -- Flexible profile data
);

-- Use arrays for simple lists
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    tags TEXT[]  -- Simple tag list
);

-- Use custom types for domain-specific data
CREATE TYPE money AS (
    amount DECIMAL(10,2),
    currency VARCHAR(3)
);

2. Index Appropriately

-- Index JSONB for efficient queries
CREATE INDEX idx_user_profiles_data ON user_profiles USING GIN (profile_data);

-- Index array elements
CREATE INDEX idx_products_tags ON products USING GIN (tags);

-- Index full-text search
CREATE INDEX idx_documents_search ON documents USING GIN (search_vector);

3. Use Constraints

-- Add constraints to custom types
CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    status order_status DEFAULT 'pending',
    CONSTRAINT valid_status CHECK (status IN ('pending', 'processing', 'shipped', 'delivered', 'cancelled'))
);

PostgreSQL's advanced data types can significantly simplify your schema and improve performance. Choose the right type for your use case and take advantage of PostgreSQL's powerful indexing and querying capabilities.