Database Schema - reza899/AutoSDLC GitHub Wiki
Database Schema Design
#AutoSDLC #Database #Schema #PostgreSQL
← Back to Index | ← WebSocket Events
Overview
The Database Schema Design document defines the data models, relationships, and storage strategies for the AutoSDLC system. The schema is designed to support high-performance agent operations, comprehensive audit trails, and scalable multi-tenant deployments.
Database Architecture
Technology Stack
databases:
primary:
type: PostgreSQL
version: "15.3"
purpose: "Transactional data, system state"
features:
- JSONB for flexible data
- Full-text search
- Row-level security
- Partitioning for large tables
cache:
type: Redis
version: "7.0"
purpose: "Session data, real-time state"
features:
- Pub/Sub for events
- Sorted sets for queues
- TTL for automatic cleanup
search:
type: Elasticsearch
version: "8.10"
purpose: "Logs, metrics, code search"
features:
- Time-series data
- Full-text search
- Aggregations
timeseries:
type: TimescaleDB
version: "2.11"
purpose: "Metrics, performance data"
features:
- Hypertables
- Continuous aggregates
- Data retention policies
Schema Design Principles
- Normalization: Properly normalized to 3NF where appropriate
- Performance: Denormalized for read-heavy operations
- Flexibility: JSONB columns for extensible data
- Audit Trail: Comprehensive change tracking
- Multi-tenancy: Row-level security for isolation
Core Schema
Organizations & Users
-- Organizations (multi-tenant support)
CREATE TABLE organizations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL,
slug VARCHAR(255) UNIQUE NOT NULL,
settings JSONB DEFAULT '{}',
subscription_tier VARCHAR(50) DEFAULT 'free',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
deleted_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_organizations_slug ON organizations(slug);
CREATE INDEX idx_organizations_deleted_at ON organizations(deleted_at) WHERE deleted_at IS NULL;
-- Users
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
email VARCHAR(255) UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
avatar_url TEXT,
github_id VARCHAR(255) UNIQUE,
auth_provider VARCHAR(50) NOT NULL DEFAULT 'local',
password_hash TEXT, -- NULL for OAuth users
mfa_enabled BOOLEAN DEFAULT FALSE,
mfa_secret TEXT,
last_login_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
deleted_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_github_id ON users(github_id) WHERE github_id IS NOT NULL;
-- Organization membership
CREATE TABLE organization_members (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
organization_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE,
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
role VARCHAR(50) NOT NULL DEFAULT 'member',
permissions JSONB DEFAULT '[]',
invited_by UUID REFERENCES users(id),
joined_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
UNIQUE(organization_id, user_id)
);
CREATE INDEX idx_org_members_org_id ON organization_members(organization_id);
CREATE INDEX idx_org_members_user_id ON organization_members(user_id);
Projects
-- Projects
CREATE TABLE projects (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
organization_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
slug VARCHAR(255) NOT NULL,
description TEXT,
repository_url TEXT,
github_repo_id VARCHAR(255),
settings JSONB DEFAULT '{}',
status VARCHAR(50) DEFAULT 'active',
created_by UUID REFERENCES users(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
archived_at TIMESTAMP WITH TIME ZONE,
UNIQUE(organization_id, slug)
);
CREATE INDEX idx_projects_org_id ON projects(organization_id);
CREATE INDEX idx_projects_status ON projects(status);
CREATE INDEX idx_projects_github_repo_id ON projects(github_repo_id) WHERE github_repo_id IS NOT NULL;
-- Project members
CREATE TABLE project_members (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
role VARCHAR(50) NOT NULL DEFAULT 'developer',
added_by UUID REFERENCES users(id),
added_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
UNIQUE(project_id, user_id)
);
CREATE INDEX idx_project_members_project_id ON project_members(project_id);
CREATE INDEX idx_project_members_user_id ON project_members(user_id);
Agents
-- Agent definitions
CREATE TABLE agents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
agent_type VARCHAR(50) NOT NULL,
instance_id VARCHAR(255) UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
status VARCHAR(50) DEFAULT 'inactive',
configuration JSONB DEFAULT '{}',
capabilities TEXT[] DEFAULT '{}',
version VARCHAR(50) NOT NULL,
-- Claude Code specific
working_directory TEXT,
claude_instructions TEXT, -- CLAUDE.md content
custom_commands JSONB DEFAULT '[]',
-- Performance metrics
performance_metrics JSONB DEFAULT '{}',
last_heartbeat_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
started_at TIMESTAMP WITH TIME ZONE,
stopped_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_agents_project_id ON agents(project_id);
CREATE INDEX idx_agents_type ON agents(agent_type);
CREATE INDEX idx_agents_status ON agents(status);
CREATE INDEX idx_agents_instance_id ON agents(instance_id);
-- Agent status history
CREATE TABLE agent_status_history (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
agent_id UUID NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
status VARCHAR(50) NOT NULL,
reason TEXT,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_agent_status_history_agent_id ON agent_status_history(agent_id);
CREATE INDEX idx_agent_status_history_created_at ON agent_status_history(created_at);
-- Agent output tracking
CREATE TABLE agent_outputs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
agent_id UUID NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
output_type VARCHAR(50) NOT NULL, -- 'status', 'log', 'metric'
content TEXT NOT NULL,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_agent_outputs_agent_id ON agent_outputs(agent_id);
CREATE INDEX idx_agent_outputs_type ON agent_outputs(output_type);
CREATE INDEX idx_agent_outputs_created_at ON agent_outputs(created_at);
-- Partition by month for large-scale deployments
-- CREATE TABLE agent_outputs_2024_01 PARTITION OF agent_outputs
-- FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
Tasks & Workflows
-- Workflows
CREATE TABLE workflows (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
type VARCHAR(50) NOT NULL,
definition JSONB NOT NULL, -- Workflow DAG
status VARCHAR(50) DEFAULT 'draft',
version INTEGER DEFAULT 1,
created_by UUID REFERENCES users(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_workflows_project_id ON workflows(project_id);
CREATE INDEX idx_workflows_type ON workflows(type);
CREATE INDEX idx_workflows_status ON workflows(status);
-- Workflow executions
CREATE TABLE workflow_executions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
workflow_id UUID NOT NULL REFERENCES workflows(id) ON DELETE CASCADE,
execution_number SERIAL,
status VARCHAR(50) DEFAULT 'pending',
context JSONB DEFAULT '{}',
variables JSONB DEFAULT '{}',
result JSONB,
error TEXT,
started_by UUID REFERENCES users(id),
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP WITH TIME ZONE,
UNIQUE(workflow_id, execution_number)
);
CREATE INDEX idx_workflow_executions_workflow_id ON workflow_executions(workflow_id);
CREATE INDEX idx_workflow_executions_status ON workflow_executions(status);
CREATE INDEX idx_workflow_executions_started_at ON workflow_executions(started_at);
-- Tasks
CREATE TABLE tasks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
workflow_execution_id UUID REFERENCES workflow_executions(id) ON DELETE CASCADE,
assigned_agent_id UUID REFERENCES agents(id),
type VARCHAR(50) NOT NULL,
status VARCHAR(50) DEFAULT 'pending',
priority INTEGER DEFAULT 0,
-- Task data
specification JSONB NOT NULL,
input_data JSONB DEFAULT '{}',
output_data JSONB,
error_details JSONB,
-- GitHub integration
github_issue_number INTEGER,
github_pr_number INTEGER,
-- TDD specific
test_specification TEXT,
tdd_phase VARCHAR(20), -- 'red', 'green', 'refactor'
test_results JSONB,
-- Timing
scheduled_at TIMESTAMP WITH TIME ZONE,
started_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE,
deadline_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_tasks_project_id ON tasks(project_id);
CREATE INDEX idx_tasks_workflow_execution_id ON tasks(workflow_execution_id);
CREATE INDEX idx_tasks_assigned_agent_id ON tasks(assigned_agent_id);
CREATE INDEX idx_tasks_status ON tasks(status);
CREATE INDEX idx_tasks_type ON tasks(type);
CREATE INDEX idx_tasks_priority ON tasks(priority DESC);
CREATE INDEX idx_tasks_scheduled_at ON tasks(scheduled_at) WHERE scheduled_at IS NOT NULL;
-- Task dependencies
CREATE TABLE task_dependencies (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
task_id UUID NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
depends_on_task_id UUID NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
dependency_type VARCHAR(50) DEFAULT 'finish_to_start',
UNIQUE(task_id, depends_on_task_id)
);
CREATE INDEX idx_task_dependencies_task_id ON task_dependencies(task_id);
CREATE INDEX idx_task_dependencies_depends_on ON task_dependencies(depends_on_task_id);
Communication & Messages
-- Inter-agent messages
CREATE TABLE agent_messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
message_id VARCHAR(255) UNIQUE NOT NULL,
source_agent_id UUID REFERENCES agents(id),
destination_agent_id UUID REFERENCES agents(id),
destination_type VARCHAR(50), -- 'agent' or 'broadcast'
message_type VARCHAR(50) NOT NULL,
priority INTEGER DEFAULT 1,
payload JSONB NOT NULL,
correlation_id VARCHAR(255),
requires_ack BOOLEAN DEFAULT FALSE,
ack_received_at TIMESTAMP WITH TIME ZONE,
ttl INTEGER, -- seconds
retry_count INTEGER DEFAULT 0,
max_retries INTEGER DEFAULT 3,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
delivered_at TIMESTAMP WITH TIME ZONE,
processed_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_agent_messages_source ON agent_messages(source_agent_id);
CREATE INDEX idx_agent_messages_destination ON agent_messages(destination_agent_id);
CREATE INDEX idx_agent_messages_type ON agent_messages(message_type);
CREATE INDEX idx_agent_messages_correlation_id ON agent_messages(correlation_id);
CREATE INDEX idx_agent_messages_created_at ON agent_messages(created_at);
-- Message queue for failed/delayed messages
CREATE TABLE message_queue (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
message_id UUID NOT NULL REFERENCES agent_messages(id) ON DELETE CASCADE,
status VARCHAR(50) DEFAULT 'pending',
scheduled_for TIMESTAMP WITH TIME ZONE NOT NULL,
attempts INTEGER DEFAULT 0,
last_error TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_message_queue_status ON message_queue(status);
CREATE INDEX idx_message_queue_scheduled ON message_queue(scheduled_for) WHERE status = 'pending';
GitHub Integration
-- GitHub repositories
CREATE TABLE github_repositories (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
github_id VARCHAR(255) UNIQUE NOT NULL,
owner VARCHAR(255) NOT NULL,
name VARCHAR(255) NOT NULL,
full_name VARCHAR(255) NOT NULL,
default_branch VARCHAR(255) DEFAULT 'main',
webhook_id VARCHAR(255),
webhook_secret TEXT,
permissions JSONB DEFAULT '{}',
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
synced_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_github_repos_project_id ON github_repositories(project_id);
CREATE INDEX idx_github_repos_full_name ON github_repositories(full_name);
-- GitHub issues
CREATE TABLE github_issues (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
repository_id UUID NOT NULL REFERENCES github_repositories(id) ON DELETE CASCADE,
github_id VARCHAR(255) NOT NULL,
number INTEGER NOT NULL,
title TEXT NOT NULL,
body TEXT,
state VARCHAR(50) NOT NULL,
labels JSONB DEFAULT '[]',
assignees JSONB DEFAULT '[]',
created_by VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE,
updated_at TIMESTAMP WITH TIME ZONE,
closed_at TIMESTAMP WITH TIME ZONE,
synced_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
UNIQUE(repository_id, number)
);
CREATE INDEX idx_github_issues_repo_id ON github_issues(repository_id);
CREATE INDEX idx_github_issues_state ON github_issues(state);
CREATE INDEX idx_github_issues_number ON github_issues(number);
-- GitHub pull requests
CREATE TABLE github_pull_requests (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
repository_id UUID NOT NULL REFERENCES github_repositories(id) ON DELETE CASCADE,
github_id VARCHAR(255) NOT NULL,
number INTEGER NOT NULL,
title TEXT NOT NULL,
body TEXT,
state VARCHAR(50) NOT NULL,
head_ref VARCHAR(255),
base_ref VARCHAR(255),
draft BOOLEAN DEFAULT FALSE,
merged BOOLEAN DEFAULT FALSE,
mergeable BOOLEAN,
labels JSONB DEFAULT '[]',
reviewers JSONB DEFAULT '[]',
created_by VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE,
updated_at TIMESTAMP WITH TIME ZONE,
merged_at TIMESTAMP WITH TIME ZONE,
closed_at TIMESTAMP WITH TIME ZONE,
synced_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
UNIQUE(repository_id, number)
);
CREATE INDEX idx_github_prs_repo_id ON github_pull_requests(repository_id);
CREATE INDEX idx_github_prs_state ON github_pull_requests(state);
CREATE INDEX idx_github_prs_number ON github_pull_requests(number);
Test Results & Coverage
-- Test suites
CREATE TABLE test_suites (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
task_id UUID REFERENCES tasks(id),
name VARCHAR(255) NOT NULL,
test_framework VARCHAR(50),
total_tests INTEGER DEFAULT 0,
passed_tests INTEGER DEFAULT 0,
failed_tests INTEGER DEFAULT 0,
skipped_tests INTEGER DEFAULT 0,
duration_ms INTEGER,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
executed_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_test_suites_project_id ON test_suites(project_id);
CREATE INDEX idx_test_suites_task_id ON test_suites(task_id);
CREATE INDEX idx_test_suites_executed_at ON test_suites(executed_at);
-- Individual test results
CREATE TABLE test_results (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
test_suite_id UUID NOT NULL REFERENCES test_suites(id) ON DELETE CASCADE,
test_name TEXT NOT NULL,
test_path TEXT,
status VARCHAR(20) NOT NULL, -- 'passed', 'failed', 'skipped'
duration_ms INTEGER,
error_message TEXT,
error_stack TEXT,
metadata JSONB DEFAULT '{}'
);
CREATE INDEX idx_test_results_suite_id ON test_results(test_suite_id);
CREATE INDEX idx_test_results_status ON test_results(status);
-- Code coverage
CREATE TABLE code_coverage (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
test_suite_id UUID REFERENCES test_suites(id),
commit_sha VARCHAR(255),
line_coverage DECIMAL(5,2),
branch_coverage DECIMAL(5,2),
function_coverage DECIMAL(5,2),
statement_coverage DECIMAL(5,2),
coverage_data JSONB, -- Detailed coverage per file
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_code_coverage_project_id ON code_coverage(project_id);
CREATE INDEX idx_code_coverage_suite_id ON code_coverage(test_suite_id);
CREATE INDEX idx_code_coverage_commit ON code_coverage(commit_sha);
Audit & Security
-- Audit log
CREATE TABLE audit_logs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID REFERENCES users(id),
organization_id UUID REFERENCES organizations(id),
action VARCHAR(255) NOT NULL,
resource_type VARCHAR(100) NOT NULL,
resource_id UUID,
old_values JSONB,
new_values JSONB,
ip_address INET,
user_agent TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_audit_logs_user_id ON audit_logs(user_id);
CREATE INDEX idx_audit_logs_org_id ON audit_logs(organization_id);
CREATE INDEX idx_audit_logs_resource ON audit_logs(resource_type, resource_id);
CREATE INDEX idx_audit_logs_created_at ON audit_logs(created_at);
-- API keys
CREATE TABLE api_keys (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID REFERENCES users(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
key_hash VARCHAR(255) UNIQUE NOT NULL,
permissions JSONB DEFAULT '[]',
expires_at TIMESTAMP WITH TIME ZONE,
last_used_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
revoked_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_api_keys_user_id ON api_keys(user_id);
CREATE INDEX idx_api_keys_key_hash ON api_keys(key_hash);
CREATE INDEX idx_api_keys_expires_at ON api_keys(expires_at) WHERE expires_at IS NOT NULL;
-- Sessions
CREATE TABLE sessions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
token_hash VARCHAR(255) UNIQUE NOT NULL,
ip_address INET,
user_agent TEXT,
expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_activity_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_sessions_user_id ON sessions(user_id);
CREATE INDEX idx_sessions_token_hash ON sessions(token_hash);
CREATE INDEX idx_sessions_expires_at ON sessions(expires_at);
Performance Optimization
Indexes Strategy
-- Composite indexes for common queries
CREATE INDEX idx_tasks_project_status ON tasks(project_id, status);
CREATE INDEX idx_tasks_agent_status ON tasks(assigned_agent_id, status) WHERE assigned_agent_id IS NOT NULL;
CREATE INDEX idx_agent_messages_dest_unprocessed ON agent_messages(destination_agent_id, created_at)
WHERE processed_at IS NULL;
-- Partial indexes for specific conditions
CREATE INDEX idx_tasks_pending ON tasks(scheduled_at)
WHERE status = 'pending' AND scheduled_at IS NOT NULL;
CREATE INDEX idx_agents_active ON agents(project_id, agent_type)
WHERE status = 'active';
-- GIN indexes for JSONB queries
CREATE INDEX idx_agent_config_gin ON agents USING GIN (configuration);
CREATE INDEX idx_task_spec_gin ON tasks USING GIN (specification);
CREATE INDEX idx_workflow_def_gin ON workflows USING GIN (definition);
Partitioning Strategy
-- Partition large tables by time
-- Agent outputs partitioned by month
CREATE TABLE agent_outputs_partitioned (
LIKE agent_outputs INCLUDING ALL
) PARTITION BY RANGE (created_at);
CREATE TABLE agent_outputs_2024_01 PARTITION OF agent_outputs_partitioned
FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
-- Automated partition creation
CREATE OR REPLACE FUNCTION create_monthly_partition()
RETURNS void AS $$
DECLARE
start_date date;
end_date date;
partition_name text;
BEGIN
start_date := date_trunc('month', CURRENT_DATE);
end_date := start_date + interval '1 month';
partition_name := 'agent_outputs_' || to_char(start_date, 'YYYY_MM');
EXECUTE format('CREATE TABLE IF NOT EXISTS %I PARTITION OF agent_outputs_partitioned FOR VALUES FROM (%L) TO (%L)',
partition_name, start_date, end_date);
END;
$$ LANGUAGE plpgsql;
-- Schedule monthly partition creation
SELECT cron.schedule('create-partitions', '0 0 1 * *', 'SELECT create_monthly_partition()');
Materialized Views
-- Agent performance metrics
CREATE MATERIALIZED VIEW agent_performance_daily AS
SELECT
a.id as agent_id,
a.agent_type,
a.project_id,
DATE(t.completed_at) as date,
COUNT(t.id) as tasks_completed,
AVG(EXTRACT(EPOCH FROM (t.completed_at - t.started_at))) as avg_task_duration_seconds,
SUM(CASE WHEN t.status = 'completed' THEN 1 ELSE 0 END)::float / COUNT(t.id) as success_rate
FROM agents a
LEFT JOIN tasks t ON a.id = t.assigned_agent_id
WHERE t.completed_at IS NOT NULL
GROUP BY a.id, a.agent_type, a.project_id, DATE(t.completed_at);
CREATE INDEX idx_agent_perf_daily ON agent_performance_daily(agent_id, date);
-- Refresh schedule
SELECT cron.schedule('refresh-agent-performance', '0 1 * * *',
'REFRESH MATERIALIZED VIEW CONCURRENTLY agent_performance_daily');
-- Project task statistics
CREATE MATERIALIZED VIEW project_task_stats AS
SELECT
p.id as project_id,
COUNT(DISTINCT t.id) as total_tasks,
COUNT(DISTINCT CASE WHEN t.status = 'completed' THEN t.id END) as completed_tasks,
COUNT(DISTINCT CASE WHEN t.status = 'failed' THEN t.id END) as failed_tasks,
AVG(CASE WHEN t.completed_at IS NOT NULL
THEN EXTRACT(EPOCH FROM (t.completed_at - t.created_at))
END) as avg_completion_time_seconds,
MAX(t.updated_at) as last_activity
FROM projects p
LEFT JOIN tasks t ON p.id = t.project_id
GROUP BY p.id;
CREATE UNIQUE INDEX idx_project_task_stats ON project_task_stats(project_id);
Data Retention & Archival
Retention Policies
-- Create archive schema
CREATE SCHEMA IF NOT EXISTS archive;
-- Archive old agent outputs
CREATE OR REPLACE FUNCTION archive_old_agent_outputs()
RETURNS void AS $$
BEGIN
-- Move data older than 90 days to archive
INSERT INTO archive.agent_outputs
SELECT * FROM agent_outputs
WHERE created_at < CURRENT_DATE - INTERVAL '90 days';
DELETE FROM agent_outputs
WHERE created_at < CURRENT_DATE - INTERVAL '90 days';
END;
$$ LANGUAGE plpgsql;
-- Schedule archival
SELECT cron.schedule('archive-agent-outputs', '0 2 * * 0', 'SELECT archive_old_agent_outputs()');
-- Data retention policies
CREATE TABLE data_retention_policies (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
table_name VARCHAR(255) NOT NULL,
retention_days INTEGER NOT NULL,
archive_enabled BOOLEAN DEFAULT TRUE,
delete_enabled BOOLEAN DEFAULT FALSE,
last_run_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO data_retention_policies (table_name, retention_days, archive_enabled, delete_enabled) VALUES
('agent_outputs', 90, true, false),
('agent_messages', 30, true, true),
('audit_logs', 365, true, false),
('test_results', 180, true, false),
('agent_status_history', 30, false, true);
Migrations
Migration Management
-- Migration tracking
CREATE TABLE IF NOT EXISTS schema_migrations (
version VARCHAR(255) PRIMARY KEY,
applied_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Example migration: Add TDD support
-- migrations/001_add_tdd_support.sql
BEGIN;
ALTER TABLE tasks
ADD COLUMN IF NOT EXISTS test_specification TEXT,
ADD COLUMN IF NOT EXISTS tdd_phase VARCHAR(20),
ADD COLUMN IF NOT EXISTS test_results JSONB;
ALTER TABLE agents
ADD COLUMN IF NOT EXISTS tdd_metrics JSONB DEFAULT '{}';
INSERT INTO schema_migrations (version) VALUES ('001_add_tdd_support');
COMMIT;
Rollback Procedures
-- Rollback script example
-- rollback/001_add_tdd_support.sql
BEGIN;
ALTER TABLE tasks
DROP COLUMN IF EXISTS test_specification,
DROP COLUMN IF EXISTS tdd_phase,
DROP COLUMN IF EXISTS test_results;
ALTER TABLE agents
DROP COLUMN IF EXISTS tdd_metrics;
DELETE FROM schema_migrations WHERE version = '001_add_tdd_support';
COMMIT;
Security & Access Control
Row Level Security
-- Enable RLS on sensitive tables
ALTER TABLE projects ENABLE ROW LEVEL SECURITY;
ALTER TABLE tasks ENABLE ROW LEVEL SECURITY;
ALTER TABLE agent_messages ENABLE ROW LEVEL SECURITY;
-- Project access policy
CREATE POLICY project_access ON projects
FOR ALL
TO application_user
USING (
EXISTS (
SELECT 1 FROM project_members pm
WHERE pm.project_id = projects.id
AND pm.user_id = current_setting('app.current_user_id')::UUID
)
OR
EXISTS (
SELECT 1 FROM organization_members om
WHERE om.organization_id = projects.organization_id
AND om.user_id = current_setting('app.current_user_id')::UUID
AND om.role IN ('admin', 'owner')
)
);
-- Task access policy
CREATE POLICY task_access ON tasks
FOR ALL
TO application_user
USING (
project_id IN (
SELECT p.id FROM projects p
JOIN project_members pm ON p.id = pm.project_id
WHERE pm.user_id = current_setting('app.current_user_id')::UUID
)
);
Encryption
-- Transparent Data Encryption setup
-- Enable pgcrypto extension
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- Encrypted columns for sensitive data
ALTER TABLE users
ADD COLUMN mfa_secret_encrypted BYTEA;
-- Encryption functions
CREATE OR REPLACE FUNCTION encrypt_sensitive(data TEXT)
RETURNS BYTEA AS $$
BEGIN
RETURN pgp_sym_encrypt(
data,
current_setting('app.encryption_key')
);
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
CREATE OR REPLACE FUNCTION decrypt_sensitive(data BYTEA)
RETURNS TEXT AS $$
BEGIN
RETURN pgp_sym_decrypt(
data,
current_setting('app.encryption_key')
);
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
Monitoring & Maintenance
Database Health Queries
-- Table sizes and bloat
CREATE VIEW table_sizes AS
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) - pg_relation_size(schemaname||'.'||tablename)) AS indexes_size
FROM pg_tables
WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
-- Slow queries
CREATE VIEW slow_queries AS
SELECT
query,
calls,
mean_exec_time,
total_exec_time,
min_exec_time,
max_exec_time,
stddev_exec_time
FROM pg_stat_statements
WHERE mean_exec_time > 100 -- queries slower than 100ms
ORDER BY mean_exec_time DESC;
-- Index usage
CREATE VIEW index_usage AS
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan;
Maintenance Tasks
-- Vacuum and analyze schedule
CREATE OR REPLACE FUNCTION maintenance_vacuum_analyze()
RETURNS void AS $$
DECLARE
table_record RECORD;
BEGIN
FOR table_record IN
SELECT schemaname, tablename
FROM pg_tables
WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
LOOP
EXECUTE format('VACUUM ANALYZE %I.%I',
table_record.schemaname,
table_record.tablename);
END LOOP;
END;
$$ LANGUAGE plpgsql;
-- Schedule maintenance
SELECT cron.schedule('vacuum-analyze', '0 3 * * 0', 'SELECT maintenance_vacuum_analyze()');
-- Update table statistics
CREATE OR REPLACE FUNCTION update_table_statistics()
RETURNS void AS $$
BEGIN
ANALYZE;
END;
$$ LANGUAGE plpgsql;
SELECT cron.schedule('update-statistics', '0 */6 * * *', 'SELECT update_table_statistics()');
Backup & Recovery
Backup Strategy
backup_strategy:
full_backup:
schedule: "0 2 * * 0" # Weekly on Sunday
retention: 4 # Keep 4 weeks
incremental_backup:
schedule: "0 2 * * 1-6" # Daily except Sunday
retention: 7 # Keep 7 days
wal_archiving:
enabled: true
retention: "7 days"
point_in_time_recovery:
enabled: true
target_recovery_time: "5 minutes"
Backup Scripts
#!/bin/bash
# backup.sh
# Full backup
pg_dump \
--host=$DB_HOST \
--port=$DB_PORT \
--username=$DB_USER \
--dbname=$DB_NAME \
--format=custom \
--blobs \
--verbose \
--file="/backups/full_$(date +%Y%m%d_%H%M%S).dump"
# Backup specific schemas
pg_dump \
--host=$DB_HOST \
--port=$DB_PORT \
--username=$DB_USER \
--dbname=$DB_NAME \
--schema=public \
--schema=archive \
--format=custom \
--file="/backups/schema_$(date +%Y%m%d_%H%M%S).dump"
Best Practices
1. Schema Design
- Use UUIDs for primary keys
- Include created_at/updated_at timestamps
- Use JSONB for flexible, searchable data
- Implement soft deletes where appropriate
2. Performance
- Create indexes for foreign keys
- Use partial indexes for filtered queries
- Partition large tables by time
- Regular VACUUM and ANALYZE
3. Security
- Enable Row Level Security
- Encrypt sensitive data
- Use prepared statements
- Regular security audits
4. Maintenance
- Monitor query performance
- Track table growth
- Regular backups
- Test recovery procedures
5. Development
- Use migrations for schema changes
- Document complex queries
- Version control database scripts
- Test with production-like data
Related Documents
Tags: #AutoSDLC #Database #PostgreSQL #Schema #Design Last Updated: 2025-06-09 Next: UI Architecture →