2026-02-1617 min readUpdated 2026-02-16

Mastering PostgreSQL: From Basics to Production-Grade Database Design

A comprehensive guide to PostgreSQL, covering schema design, query optimization, indexing strategies, and scaling for production workloads.

Why PostgreSQL won me over#

I used to think all SQL databases were basically the same. Then I discovered PostgreSQL's JSON support, full-text search, and window functions. Suddenly, I was solving problems that would have required multiple tools—all within a single database.

PostgreSQL isn't just a relational database. It's a data platform that grows with your application's complexity.

Let me share what I've learned building production systems on Postgres.

Part 1: Foundation - Schema Design#

Rule #1: Normalize, then denormalize strategically#

Start with normalization:

-- Users table
CREATE TABLE users (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  email VARCHAR(255) UNIQUE NOT NULL,
  username VARCHAR(50) UNIQUE NOT NULL,
  created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
  updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
 
-- Posts table
CREATE TABLE posts (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  author_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  title VARCHAR(255) NOT NULL,
  slug VARCHAR(255) UNIQUE NOT NULL,
  content TEXT NOT NULL,
  published_at TIMESTAMP WITH TIME ZONE,
  created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
  updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
 
-- Tags table (many-to-many)
CREATE TABLE tags (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  name VARCHAR(50) UNIQUE NOT NULL,
  slug VARCHAR(50) UNIQUE NOT NULL
);
 
CREATE TABLE post_tags (
  post_id UUID NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
  tag_id UUID NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
  PRIMARY KEY (post_id, tag_id)
);
 
-- Comments table
CREATE TABLE comments (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  post_id UUID NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
  author_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  content TEXT NOT NULL,
  parent_id UUID REFERENCES comments(id) ON DELETE CASCADE,
  created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
  updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

Add computed columns when queries get slow:

-- Denormalize comment count
ALTER TABLE posts
ADD COLUMN comment_count INTEGER DEFAULT 0;
 
-- Keep it in sync with a trigger
CREATE OR REPLACE FUNCTION update_post_comment_count()
RETURNS TRIGGER AS $$
BEGIN
  IF TG_OP = 'INSERT' THEN
    UPDATE posts SET comment_count = comment_count + 1 WHERE id = NEW.post_id;
  ELSIF TG_OP = 'DELETE' THEN
    UPDATE posts SET comment_count = comment_count - 1 WHERE id = OLD.post_id;
  END IF;
  RETURN NULL;
END;
$$ LANGUAGE plpgsql;
 
CREATE TRIGGER trigger_update_comment_count
AFTER INSERT OR DELETE ON comments
FOR EACH ROW EXECUTE FUNCTION update_post_comment_count();

Normalized schema diagram

Rule #2: Use the right data types#

-- ❌ Bad: Using TEXT for everything
CREATE TABLE events (
  id TEXT,
  user_id TEXT,
  happened_at TEXT,
  data TEXT
);
 
-- ✅ Good: Appropriate types
CREATE TABLE events (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  user_id UUID NOT NULL REFERENCES users(id),
  happened_at TIMESTAMP WITH TIME ZONE NOT NULL,
  event_type VARCHAR(50) NOT NULL,
  data JSONB NOT NULL,  -- Structured data
  metadata JSONB DEFAULT '{}'::jsonb,
 
  -- Constraints
  CONSTRAINT valid_event_type CHECK (event_type IN ('user_action', 'system_event', 'error'))
);
 
-- Create indexes for JSONB queries
CREATE INDEX idx_events_data_gin ON events USING GIN (data);
CREATE INDEX idx_events_event_type ON events(event_type);
CREATE INDEX idx_events_happened_at ON events(happened_at);

PostgreSQL's rich type system:

-- Arrays
CREATE TABLE products (
  id UUID PRIMARY KEY,
  name VARCHAR(255),
  tags TEXT[] DEFAULT '{}'
);
 
INSERT INTO products (id, name, tags)
VALUES (gen_random_uuid(), 'Laptop', ARRAY['electronics', 'computers']);
 
-- Query arrays
SELECT * FROM products WHERE 'electronics' = ANY(tags);
 
-- JSON/JSONB (JSONB is binary, faster)
CREATE TABLE user_preferences (
  user_id UUID PRIMARY KEY,
  settings JSONB DEFAULT '{}'::jsonb
);
 
INSERT INTO user_preferences (user_id, settings)
VALUES (gen_random_uuid(), '{"theme": "dark", "notifications": {"email": true}}');
 
-- Query JSON
SELECT * FROM user_preferences
WHERE settings->>'theme' = 'dark';
 
SELECT * FROM user_preferences
WHERE settings->'notifications'->>'email' = 'true';
 
-- Enums
CREATE TYPE user_role AS ENUM ('admin', 'editor', 'viewer');
 
ALTER TABLE users ADD COLUMN role user_role DEFAULT 'viewer';
 
-- Range types
CREATE TABLE hotel_bookings (
  id UUID PRIMARY KEY,
  room_id UUID,
  booking_period DATERANGE NOT NULL,
 
  -- Prevent overlapping bookings
  CONSTRAINT no_overlap EXCLUDE USING GIST (
    room_id WITH =,
    booking_period WITH &&
  )
);
 
-- Full-text search
CREATE TABLE articles (
  id UUID PRIMARY KEY,
  title TEXT,
  content TEXT,
  search_vector TSVECTOR
);
 
CREATE INDEX idx_articles_search ON articles USING GIN(search_vector);
 
-- Update search vector on insert/update
CREATE TRIGGER trigger_articles_search_update
BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION
tsvector_update_trigger(search_vector, 'pg_catalog.english', title, content);
 
-- Full-text search query
SELECT * FROM articles
WHERE search_vector @@ to_tsquery('english', 'postgresql & performance');

Part 2: Querying - From Simple to Advanced#

Window functions: Your secret weapon#

-- Ranking posts by views within each category
SELECT
  id,
  title,
  category,
  views,
  RANK() OVER (PARTITION BY category ORDER BY views DESC) as rank_in_category,
  PERCENT_RANK() OVER (PARTITION BY category ORDER BY views DESC) as percentile
FROM posts;
 
-- Running totals
SELECT
  created_at::DATE as date,
  COUNT(*) as new_users,
  SUM(COUNT(*)) OVER (ORDER BY created_at::DATE) as total_users
FROM users
GROUP BY created_at::DATE
ORDER BY date;
 
-- Moving average
SELECT
  date,
  revenue,
  AVG(revenue) OVER (
    ORDER BY date
    ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
  ) as seven_day_avg
FROM daily_revenue;
 
-- Lead/Lag for comparing with previous/next row
SELECT
  date,
  revenue,
  LAG(revenue, 1) OVER (ORDER BY date) as prev_day_revenue,
  revenue - LAG(revenue, 1) OVER (ORDER BY date) as day_over_day_change
FROM daily_revenue;

CTEs: Break complex queries into steps#

-- Common Table Expressions
WITH popular_posts AS (
  SELECT
    id,
    title,
    author_id,
    views
  FROM posts
  WHERE views > 1000
),
author_stats AS (
  SELECT
    author_id,
    COUNT(*) as popular_post_count,
    SUM(views) as total_views
  FROM popular_posts
  GROUP BY author_id
)
SELECT
  u.username,
  a.popular_post_count,
  a.total_views,
  ROUND(a.total_views::NUMERIC / a.popular_post_count, 2) as avg_views_per_post
FROM author_stats a
JOIN users u ON u.id = a.author_id
ORDER BY a.total_views DESC;
 
-- Recursive CTEs for hierarchical data (comments tree)
WITH RECURSIVE comment_tree AS (
  -- Base case: root comments
  SELECT
    id,
    content,
    parent_id,
    author_id,
    1 as depth,
    ARRAY[id] as path
  FROM comments
  WHERE parent_id IS NULL AND post_id = '...'
 
  UNION ALL
 
  -- Recursive case: replies
  SELECT
    c.id,
    c.content,
    c.parent_id,
    c.author_id,
    ct.depth + 1,
    ct.path || c.id
  FROM comments c
  JOIN comment_tree ct ON c.parent_id = ct.id
  WHERE ct.depth < 5  -- Limit depth
)
SELECT
  id,
  content,
  depth,
  path
FROM comment_tree
ORDER BY path;

Aggregations and grouping#

-- Basic aggregation
SELECT
  author_id,
  COUNT(*) as post_count,
  COUNT(DISTINCT DATE_TRUNC('month', created_at)) as active_months,
  MIN(created_at) as first_post,
  MAX(created_at) as latest_post
FROM posts
GROUP BY author_id
HAVING COUNT(*) > 10;
 
-- FILTER clause (better than CASE WHEN in aggregations)
SELECT
  DATE_TRUNC('month', created_at) as month,
  COUNT(*) as total_posts,
  COUNT(*) FILTER (WHERE published_at IS NOT NULL) as published_posts,
  COUNT(*) FILTER (WHERE published_at IS NULL) as draft_posts,
  AVG(views) FILTER (WHERE published_at IS NOT NULL) as avg_views_published
FROM posts
GROUP BY DATE_TRUNC('month', created_at)
ORDER BY month DESC;
 
-- GROUPING SETS for multiple aggregation levels
SELECT
  category,
  author_id,
  COUNT(*) as post_count
FROM posts
GROUP BY GROUPING SETS (
  (category, author_id),  -- Posts per author per category
  (category),             -- Posts per category
  (author_id),            -- Posts per author
  ()                      -- Total posts
);

Part 3: Performance - Indexes and Query Optimization#

Index strategy#

-- B-tree index (default, best for equality and range queries)
CREATE INDEX idx_posts_published_at ON posts(published_at);
CREATE INDEX idx_posts_author_id ON posts(author_id);
 
-- Composite index (order matters!)
CREATE INDEX idx_posts_author_published
ON posts(author_id, published_at DESC);
 
-- This index can serve:
-- WHERE author_id = '...'
-- WHERE author_id = '...' AND published_at > '...'
-- WHERE author_id = '...' ORDER BY published_at DESC
 
-- But NOT:
-- WHERE published_at > '...'  -- Needs separate index
 
-- Partial index (smaller, faster)
CREATE INDEX idx_posts_published
ON posts(published_at)
WHERE published_at IS NOT NULL;
 
-- Covering index (includes extra columns)
CREATE INDEX idx_posts_author_title
ON posts(author_id)
INCLUDE (title, views);
 
-- Query can be satisfied entirely from index
SELECT title, views FROM posts WHERE author_id = '...';
 
-- GIN index for full-text search, JSONB, arrays
CREATE INDEX idx_posts_tags_gin ON posts USING GIN(tags);
CREATE INDEX idx_events_data_gin ON events USING GIN(data);
 
-- GiST index for geometric data, ranges
CREATE INDEX idx_bookings_period_gist
ON bookings USING GIST(booking_period);
 
-- Hash index (only equality, rarely needed)
CREATE INDEX idx_users_email_hash ON users USING HASH(email);

Analyze your indexes:

-- Find unused indexes
SELECT
  schemaname,
  tablename,
  indexname,
  idx_scan as scans,
  pg_size_pretty(pg_relation_size(indexrelid)) as size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
  AND indexrelname NOT LIKE '%_pkey'
ORDER BY pg_relation_size(indexrelid) DESC;
 
-- Find missing indexes (queries doing sequential scans)
SELECT
  schemaname,
  tablename,
  seq_scan,
  seq_tup_read,
  idx_scan,
  seq_tup_read / seq_scan as avg_seq_read
FROM pg_stat_user_tables
WHERE seq_scan > 0
ORDER BY seq_tup_read DESC
LIMIT 10;

Query optimization techniques#

-- Use EXPLAIN ANALYZE to understand query plans
EXPLAIN ANALYZE
SELECT p.title, u.username, COUNT(c.id) as comment_count
FROM posts p
JOIN users u ON p.author_id = u.id
LEFT JOIN comments c ON c.post_id = p.id
WHERE p.published_at > NOW() - INTERVAL '30 days'
GROUP BY p.id, p.title, u.username
ORDER BY comment_count DESC
LIMIT 10;
 
-- Look for:
-- - Seq Scan (should be Index Scan for large tables)
-- - High "cost" estimates
-- - "rows removed by filter" (missing indexes)
 
-- Optimize with EXISTS instead of IN for subqueries
-- ❌ Slower
SELECT * FROM posts
WHERE author_id IN (
  SELECT id FROM users WHERE role = 'admin'
);
 
-- ✅ Faster
SELECT * FROM posts
WHERE EXISTS (
  SELECT 1 FROM users
  WHERE users.id = posts.author_id AND users.role = 'admin'
);
 
-- Use LIMIT with indexes
SELECT * FROM posts
ORDER BY created_at DESC
LIMIT 10;
-- Requires index on created_at
 
-- Avoid SELECT *
-- ❌ Bad
SELECT * FROM posts;
 
-- ✅ Good
SELECT id, title, excerpt FROM posts;
 
-- Batch operations
-- ❌ Bad: N queries
-- for (const userId of userIds) {
--   await db.query('SELECT * FROM users WHERE id = $1', [userId]);
-- }
 
-- ✅ Good: 1 query
SELECT * FROM users WHERE id = ANY($1);
-- Pass array: [userId1, userId2, userId3]

Query execution plan visualization

Part 4: Advanced Features#

Transactions and concurrency#

-- ACID transactions
BEGIN;
  UPDATE accounts SET balance = balance - 100 WHERE id = 'sender';
  UPDATE accounts SET balance = balance + 100 WHERE id = 'receiver';
COMMIT;
 
-- Isolation levels
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
 
-- Optimistic locking with version column
CREATE TABLE documents (
  id UUID PRIMARY KEY,
  content TEXT,
  version INTEGER DEFAULT 1
);
 
-- In application:
-- 1. Read document and version
SELECT id, content, version FROM documents WHERE id = '...';
 
-- 2. Update with version check
UPDATE documents
SET content = $1, version = version + 1
WHERE id = $2 AND version = $3;
-- If no rows updated, someone else modified it
 
-- Row-level locks
BEGIN;
  SELECT * FROM posts WHERE id = '...' FOR UPDATE;
  -- Other transactions wait until this commits
  UPDATE posts SET views = views + 1 WHERE id = '...';
COMMIT;
 
-- Advisory locks (application-level coordination)
SELECT pg_advisory_lock(12345);
-- Do critical work
SELECT pg_advisory_unlock(12345);

Functions and triggers#

-- Function to update timestamps
CREATE OR REPLACE FUNCTION update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
  NEW.updated_at = NOW();
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;
 
-- Apply to all tables
CREATE TRIGGER trigger_posts_updated_at
BEFORE UPDATE ON posts
FOR EACH ROW EXECUTE FUNCTION update_updated_at();
 
-- Complex business logic in functions
CREATE OR REPLACE FUNCTION create_post_with_tags(
  p_author_id UUID,
  p_title VARCHAR,
  p_content TEXT,
  p_tag_names TEXT[]
)
RETURNS UUID AS $$
DECLARE
  v_post_id UUID;
  v_tag_name TEXT;
  v_tag_id UUID;
BEGIN
  -- Create post
  INSERT INTO posts (author_id, title, content)
  VALUES (p_author_id, p_title, p_content)
  RETURNING id INTO v_post_id;
 
  -- Link tags
  FOREACH v_tag_name IN ARRAY p_tag_names
  LOOP
    -- Get or create tag
    INSERT INTO tags (name, slug)
    VALUES (v_tag_name, lower(v_tag_name))
    ON CONFLICT (slug) DO NOTHING;
 
    SELECT id INTO v_tag_id FROM tags WHERE slug = lower(v_tag_name);
 
    -- Link to post
    INSERT INTO post_tags (post_id, tag_id)
    VALUES (v_post_id, v_tag_id)
    ON CONFLICT DO NOTHING;
  END LOOP;
 
  RETURN v_post_id;
END;
$$ LANGUAGE plpgsql;
 
-- Use it
SELECT create_post_with_tags(
  'user-uuid',
  'My Post',
  'Content here',
  ARRAY['postgresql', 'performance']
);

Views and materialized views#

-- View (virtual table, computed on each query)
CREATE VIEW post_stats AS
SELECT
  p.id,
  p.title,
  p.author_id,
  u.username as author_name,
  COUNT(DISTINCT c.id) as comment_count,
  COUNT(DISTINCT l.user_id) as like_count,
  p.views
FROM posts p
JOIN users u ON p.author_id = u.id
LEFT JOIN comments c ON c.post_id = p.id
LEFT JOIN likes l ON l.post_id = p.id
GROUP BY p.id, p.title, p.author_id, u.username, p.views;
 
-- Query the view
SELECT * FROM post_stats WHERE comment_count > 10;
 
-- Materialized view (cached, faster but needs refresh)
CREATE MATERIALIZED VIEW post_stats_cached AS
SELECT
  p.id,
  p.title,
  COUNT(DISTINCT c.id) as comment_count,
  COUNT(DISTINCT l.user_id) as like_count
FROM posts p
LEFT JOIN comments c ON c.post_id = p.id
LEFT JOIN likes l ON l.post_id = p.id
GROUP BY p.id, p.title;
 
-- Create index on materialized view
CREATE INDEX idx_post_stats_comment_count
ON post_stats_cached(comment_count);
 
-- Refresh periodically (via cron job)
REFRESH MATERIALIZED VIEW post_stats_cached;
 
-- Concurrent refresh (doesn't block reads)
REFRESH MATERIALIZED VIEW CONCURRENTLY post_stats_cached;

Part 5: Scaling PostgreSQL#

Connection pooling#

// lib/db.ts
import { Pool } from "pg";
 
const pool = new Pool({
  host: process.env.DB_HOST,
  port: Number(process.env.DB_PORT),
  database: process.env.DB_NAME,
  user: process.env.DB_USER,
  password: process.env.DB_PASSWORD,
 
  // Connection pool config
  max: 20, // Max connections
  min: 5, // Min connections to keep open
  idleTimeoutMillis: 30000, // Close idle connections after 30s
  connectionTimeoutMillis: 2000, // Wait 2s for connection
});
 
export async function query(text: string, params?: any[]) {
  const start = Date.now();
  const result = await pool.query(text, params);
  const duration = Date.now() - start;
 
  console.log("Query executed", { text, duration, rows: result.rowCount });
 
  return result;
}
 
// Usage
const { rows } = await query("SELECT * FROM posts WHERE author_id = $1", [
  authorId,
]);

Read replicas#

// lib/db.ts
import { Pool } from 'pg';
 
// Primary (write) connection
const primaryPool = new Pool({
  host: process.env.DB_PRIMARY_HOST,
  // ... config
});
 
// Replica (read) connection
const replicaPool = new Pool({
  host: process.env.DB_REPLICA_HOST,
  // ... config
});
 
export async function queryPrimary(text: string, params?: any[]) {
  return primaryPool.query(text, params);
}
 
export async function queryReplica(text: string, params?: any[]) {
  return replicaPool.query(text, params);
}
 
// Usage
// Writes go to primary
await queryPrimary('INSERT INTO posts ...', [...]);
 
// Reads can go to replica
const posts = await queryReplica('SELECT * FROM posts', []);

Partitioning large tables#

-- Partition by date range
CREATE TABLE events (
  id UUID,
  user_id UUID,
  event_type VARCHAR(50),
  happened_at TIMESTAMP WITH TIME ZONE,
  data JSONB
) PARTITION BY RANGE (happened_at);
 
-- Create partitions
CREATE TABLE events_2026_01 PARTITION OF events
FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');
 
CREATE TABLE events_2026_02 PARTITION OF events
FOR VALUES FROM ('2026-02-01') TO ('2026-03-01');
 
-- Queries automatically use right partition
SELECT * FROM events
WHERE happened_at >= '2026-01-15'
  AND happened_at < '2026-01-20';
-- Only scans events_2026_01
 
-- Create index on all partitions
CREATE INDEX idx_events_user_id ON events(user_id);
 
-- Automate partition creation
CREATE OR REPLACE FUNCTION create_monthly_partition()
RETURNS void AS $$
DECLARE
  v_start DATE;
  v_end DATE;
  v_partition_name TEXT;
BEGIN
  v_start := DATE_TRUNC('month', CURRENT_DATE + INTERVAL '1 month');
  v_end := v_start + INTERVAL '1 month';
  v_partition_name := 'events_' || TO_CHAR(v_start, 'YYYY_MM');
 
  EXECUTE format(
    'CREATE TABLE IF NOT EXISTS %I PARTITION OF events FOR VALUES FROM (%L) TO (%L)',
    v_partition_name,
    v_start,
    v_end
  );
END;
$$ LANGUAGE plpgsql;

Monitoring and maintenance#

-- Table sizes
SELECT
  schemaname,
  tablename,
  pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
  pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) as table_size,
  pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) - pg_relation_size(schemaname||'.'||tablename)) as indexes_size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
 
-- Slow queries
SELECT
  query,
  calls,
  total_exec_time / 1000 as total_time_seconds,
  mean_exec_time / 1000 as mean_time_seconds,
  max_exec_time / 1000 as max_time_seconds
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;
 
-- Connection stats
SELECT
  state,
  COUNT(*)
FROM pg_stat_activity
WHERE datname = current_database()
GROUP BY state;
 
-- Vacuum and analyze (run periodically)
VACUUM ANALYZE posts;
 
-- Reindex (if index became bloated)
REINDEX TABLE posts;

Part 6: Backup and Recovery#

# Backup entire database
pg_dump -h localhost -U postgres -d mydb -F c -f backup.dump
 
# Backup specific tables
pg_dump -h localhost -U postgres -d mydb -t posts -t comments -F c -f tables.dump
 
# Restore
pg_restore -h localhost -U postgres -d mydb backup.dump
 
# Continuous archiving with WAL
# In postgresql.conf:
# wal_level = replica
# archive_mode = on
# archive_command = 'cp %p /path/to/archive/%f'
 
# Point-in-time recovery
# 1. Restore base backup
# 2. Apply WAL files up to specific time

Automated backups with cron:

#!/bin/bash
# backup.sh
 
BACKUP_DIR="/backups"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
FILENAME="$BACKUP_DIR/backup_$TIMESTAMP.dump"
 
pg_dump -h localhost -U postgres -d mydb -F c -f "$FILENAME"
 
# Keep only last 7 days
find $BACKUP_DIR -name "backup_*.dump" -mtime +7 -delete
 
# Upload to S3
aws s3 cp "$FILENAME" "s3://my-backups/postgres/"

Part 7: Best Practices Checklist#

-- ✓ Use UUIDs for primary keys (better for distributed systems)
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
 
-- ✓ Always use TIMESTAMP WITH TIME ZONE
CREATE TABLE events (
  created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
 
-- ✓ Add constraints
ALTER TABLE posts
ADD CONSTRAINT posts_title_not_empty CHECK (title <> ''),
ADD CONSTRAINT posts_published_after_created CHECK (published_at >= created_at);
 
-- ✓ Use CASCADE carefully
CREATE TABLE comments (
  post_id UUID REFERENCES posts(id) ON DELETE CASCADE
);
 
-- ✓ Name your constraints
ALTER TABLE posts
ADD CONSTRAINT fk_posts_author FOREIGN KEY (author_id) REFERENCES users(id);
 
-- ✓ Create indexes before foreign keys (better performance)
CREATE INDEX idx_comments_post_id ON comments(post_id);
ALTER TABLE comments ADD FOREIGN KEY (post_id) REFERENCES posts(id);
 
-- ✓ Use transactions for multi-step operations
BEGIN;
  -- Multiple operations
COMMIT;
 
-- ✓ Set statement timeout to prevent runaway queries
SET statement_timeout = '30s';

PostgreSQL best practices visualization

Real-world patterns#

Pagination done right#

-- ❌ Bad: OFFSET gets slower as pages increase
SELECT * FROM posts
ORDER BY created_at DESC
LIMIT 20 OFFSET 10000;  -- Skips first 10,000 rows!
 
-- ✅ Good: Cursor-based pagination
SELECT * FROM posts
WHERE created_at < '2026-01-15 10:30:00'
ORDER BY created_at DESC
LIMIT 20;
 
-- Return last item's created_at as cursor

Audit logging#

CREATE TABLE audit_log (
  id BIGSERIAL PRIMARY KEY,
  table_name VARCHAR(50),
  record_id UUID,
  action VARCHAR(10),  -- INSERT, UPDATE, DELETE
  old_values JSONB,
  new_values JSONB,
  changed_by UUID,
  changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
 
-- Trigger to log changes
CREATE OR REPLACE FUNCTION audit_trigger()
RETURNS TRIGGER AS $$
BEGIN
  INSERT INTO audit_log (table_name, record_id, action, old_values, new_values, changed_by)
  VALUES (
    TG_TABLE_NAME,
    COALESCE(NEW.id, OLD.id),
    TG_OP,
    CASE WHEN TG_OP = 'DELETE' THEN row_to_json(OLD) ELSE NULL END,
    CASE WHEN TG_OP IN ('INSERT', 'UPDATE') THEN row_to_json(NEW) ELSE NULL END,
    CURRENT_SETTING('app.current_user_id', TRUE)::UUID
  );
 
  RETURN COALESCE(NEW, OLD);
END;
$$ LANGUAGE plpgsql;
 
-- Apply to tables
CREATE TRIGGER trigger_posts_audit
AFTER INSERT OR UPDATE OR DELETE ON posts
FOR EACH ROW EXECUTE FUNCTION audit_trigger();

Soft deletes#

ALTER TABLE posts ADD COLUMN deleted_at TIMESTAMP WITH TIME ZONE;
 
-- Create index for faster queries
CREATE INDEX idx_posts_not_deleted ON posts(id) WHERE deleted_at IS NULL;
 
-- "Delete" (actually just mark)
UPDATE posts SET deleted_at = NOW() WHERE id = '...';
 
-- Query undeleted
SELECT * FROM posts WHERE deleted_at IS NULL;
 
-- Or use a view
CREATE VIEW active_posts AS
SELECT * FROM posts WHERE deleted_at IS NULL;

The PostgreSQL mindset#

After years with Postgres, here's what I've learned:

Start simple: Don't over-engineer. Basic normalized schema first.
Measure before optimizing: Use EXPLAIN ANALYZE. Don't guess.
Indexes are not free: They speed up reads but slow down writes.
Embrace SQL: Complex queries in database > multiple queries in app.
Version your schema: Use migrations (Prisma, TypeORM, Flyway).

PostgreSQL rewards learning its features. JSON support, full-text search, window functions—they're not just nice-to-haves. They're what make Postgres a true data platform.

Resources to go deeper#

What PostgreSQL feature are you most excited to try? Let's build better databases together.