SQL Reference - NYCPlanning/data-engineering GitHub Wiki

PostgresSQL

Handy code

-- Create a database cluster role
SELECT * FROM pg_roles;
CREATE ROLE readonly;
GRANT CONNECT ON DATABASE db-pluto TO readonly;
GRANT CONNECT ON DATABASE db-colp TO readonly;
GRANT CONNECT ON DATABASE db-devdb TO readonly;

-- Grant privileges to a role
-- Queries about schemas will be applied to the database of the current conenction
GRANT USAGE ON SCHEMA nightly_qa TO readonly;
GRANT SELECT ON ALL TABLES IN SCHEMA nightly_qa TO readonly;

-- Create a user assigned to a role
SELECT * FROM pg_users;
CREATE USER first_last WITH PASSWORD 'A_GOOD_PASSWORD';
GRANT readonly TO first_last;
-- See the size of all schemas in a database
SELECT schema_name,
       pg_size_pretty(sum(table_size)::bigint),
       (sum(table_size) / pg_database_size(current_database())) * 100 as percent_of_db
FROM (
  SELECT pg_catalog.pg_namespace.nspname as schema_name,
         pg_relation_size(pg_catalog.pg_class.oid) as table_size
  FROM   pg_catalog.pg_class
     JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
) t
GROUP BY schema_name
ORDER BY schema_name;
-- Check if two tables are identical or different
WITH table1_check AS (
    SELECT COUNT(*) as row_count, 
           MD5(CAST((array_agg(t.*::text ORDER BY t.*::text)) AS text)) as checksum
    FROM shema_1.table_name t
),
table2_check AS (
    SELECT COUNT(*) as row_count,
           MD5(CAST((array_agg(t.*::text ORDER BY t.*::text)) AS text)) as checksum 
    FROM shema_2.table_name t
)
SELECT 
    CASE 
        WHEN t1.row_count = t2.row_count AND t1.checksum = t2.checksum
        THEN 'Tables are identical'
        ELSE 'Tables are different'
    END as comparison_result
FROM table1_check t1
CROSS JOIN table2_check t2;
-- Compare two tables in different schemas and return records
(SELECT *, 'Only in original' as difference
 FROM original_shema.table_name
 EXCEPT
 SELECT *, 'Only in original'
 FROM modified_shema.table_name)

UNION all

(SELECT *, 'Only in modified'
 FROM modified_shema.table_name
 EXCEPT
 SELECT *, 'Only in modified'
 FROM original_shema.table_name);

MSSQL

Restoring a backup using docker

To restore an mssql backup, the following steps can be taken.

First, pull/run the image

docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=[my_secure_pw]" -p 1433:1433 -v [path_to_folder_with_backup_file]:/tmp/db_backup -d mcr.microsoft.com/mssql/server
  • mcr.microsoft.com/mssql/server is the image we want
  • set your pw - mssql will error if it is less than 8 characters and doesn't contain at least 3 different types of characters (lowercase, uppercase, number, special)
  • you'll want to mount a volume if you are restoring a backup as give the database access to the file

Then, connect to your database with your method of choice. The default user is "sa".

Then, you can run the following commands. I've added some comments with what this looked like restoring AIMS data

-- First, inspect the backup file
RESTORE FILELISTONLY
FROM DISK = '/tmp/db_backup/AIMS_backup_2025_01_04_070429_9170129.bak' ;
-- AIMS	     F:\SQL Data\AIMS.mdf
-- AIMS_log  F:\SQL Logs\AIMS_log.ldf

-- Without telling it where to put data, it errors
-- because it uses the filepaths for storing data found in the backup file
RESTORE DATABASE AIMS
FROM DISK = '/tmp/db_backup/AIMS_backup_2025_01_04_070429_9170129.bak';
-- " Directory lookup for the file "F:\SQL Data\AIMS.mdf" failed with the operating system error 2(The system cannot find the file specified.)."

-- Run command telling it where to store data.
-- this docker image stores data in /var/opt/mssql/data/ and logs in /var/opt/mssql/log/ by default
RESTORE DATABASE AIMS
FROM DISK = '/tmp/db_backup/AIMS_backup_2025_01_04_070429_9170129.bak'
WITH MOVE 'AIMS' TO '/var/opt/mssql/data/AIMS.mdf', 
     MOVE 'AIMS_log' TO '/var/opt/mssql/log/AIMS_log.ldf'