MySQL ‐ Data Modeling for Practical Service Development - thought-corner/Backend-PlayGround GitHub Wiki

MySQL - Data Modeling for Practical Service Development

Partitioning Strategies and Index Optimization for Scalable Databases

mysql> SHOW COLUMNS FROM posts;
+---------------+-------------------------------------+------+-----+-------------------+-----------------------------------------------+
| Field         | Type                                | Null | Key | Default           | Extra                                         |
+---------------+-------------------------------------+------+-----+-------------------+-----------------------------------------------+
| post_id       | bigint                              | NO   | PRI | NULL              | auto_increment                                |
| user_id       | bigint                              | NO   | MUL | NULL              |                                               |
| category_id   | int                                 | NO   | MUL | NULL              |                                               |
| title         | varchar(200)                        | NO   | MUL | NULL              |                                               |
| content       | text                                | NO   |     | NULL              |                                               |
| comment_count | int                                 | YES  |     | 0                 |                                               |
| like_count    | int                                 | YES  |     | 0                 |                                               |
| view_count    | int                                 | YES  |     | 0                 |                                               |
| status        | enum('DRAFT','PUBLISHED','DELETED') | YES  | MUL | DRAFT             |                                               |
| is_featured   | tinyint(1)                          | YES  | MUL | 0                 |                                               |
| created_at    | timestamp                           | YES  |     | CURRENT_TIMESTAMP | DEFAULT_GENERATED                             |
| updated_at    | timestamp                           | YES  |     | CURRENT_TIMESTAMP | DEFAULT_GENERATED on update CURRENT_TIMESTAMP |
+---------------+-------------------------------------+------+-----+-------------------+-----------------------------------------------+
12 rows in set (0.01 sec)

comment_count, like_count, view_count와 같은 경우가 성능을 위해 비정규화를 적용한 대표적인 사례이다.

-- 현재 파티션 상태를 확인
mysql> select partition_name, partition_expression, partition_description, table_rows, round(data_length / 1024 / 1024, 2) as data_size_mb, round(index_length / 1024 / 1024, 2) as index_size_mb from information_schema.partitions where table_schema = 'financial_master_class' and table_name = 'stock_trades' and partition_name is null order by partition_ordinal_position;
Empty set (0.01 sec)

-- 파티션별 성능 분석
mysql> SELECT partition_name, ROUND(data_length/(1024*1024*1024),2) AS data_size_gb, table_rows, ROUND(table_rows/NULLIF(data_length/1024,0),2) AS rows_per_kb, CASE WHEN table_rows>50000000 THEN 'very large' WHEN table_rows>10000000 THEN 'large' WHEN table_rows>1000000 THEN 'medium' ELSE 'small' END AS partition_size_category FROM information_schema.partitions WHERE table_schema='financial_master_class' AND table_name='stock_trades' AND partition_name IS NOT NULL;
Empty set (0.01 sec)

-- 계좌별 거래 이력 테이블 (Hash 파티셔닝)
CREATE TABLE account_trade_history (
    history_id BIGINT UNSIGNED AUTO_INCREMENT,
    account_id BIGINT UNSIGNED NOT NULL,
    trade_id BIGINT UNSIGNED NOT NULL,
    trade_date DATE NOT NULL,
    symbol VARCHAR(10) NOT NULL,
    trade_type ENUM('BUY', 'SELL') NOT NULL,
    price DECIMAL(12,4) NOT NULL,
    volume INT UNSIGNED NOT NULL,
    trade_value DECIMAL(18,4) NOT NULL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

    PRIMARY KEY (history_id, account_id)
) ENGINE=InnoDB
PARTITION BY HASH(account_id)
PARTITIONS 16; -- 16개 파티션으로 분산

Hash Partitioning : 해시 파티셔닝은 특정 컬럼의 값을 해시 함수로 계산해서 자동으로 여러 파티션에 균등하게 분산시키는 방식이다.
- 데이터 균등 분산 : 특정 파티션에 데이터 몰림 현상 방지, 트래픽 분산 효과
- 쓰기 성능 향상 : INSERT가 여러 파티션에 나뉘어 들어간다.
- 샤딩 느낌 구조 : 논리적으로는 하나, 물리적으로는 분산
- 범위 조회에 약하다 : 모든 파티션을 다 조회해야 하기 때문에

-- 시장별 거래 데이터 (List 파티셔닝)
CREATE TABLE market_specific_trades (
    trade_id BIGINT UNSIGNED AUTO_INCREMENT,
    symbol VARCHAR(10) NOT NULL,
    market_code VARCHAR(10) NOT NULL,
    trade_date DATE NOT NULL,
    trade_timestamp TIMESTAMP(6) NOT NULL,
    price DECIMAL(12,4) NOT NULL,
    volume INT UNSIGNED NOT NULL,
    trade_type ENUM('BUY', 'SELL') NOT NULL,

    PRIMARY KEY (trade_id, market_code),
    INDEX idx_symbol_timestamp (symbol, trade_timestamp),
    INDEX idx_date_volume (trade_date, volume)

) ENGINE=InnoDB
    PARTITION BY LIST COLUMNS(market_code) (
        PARTITION p_kospi VALUES IN ('KOSPI'),
        PARTITION p_kosdaq VALUES IN ('KOSDAQ'),
        PARTITION p_nasdaq VALUES IN ('NASDAQ'),
        PARTITION p_nyse VALUES IN ('NYSE'),
        PARTITION p_other VALUES IN ('OTHER', 'CRYPTO', 'FOREX')
);

-- 선택도가 높은 컬럼이 앞쪽에 위치해야 한다.
CREATE INDEX idx_orders_user_status ON orders(user_id, status, created_at);

-- 카디널리티(현재 선택한 이 테이블에서 얼마나 고유한가) 확인
SELECT
    COUNT(DISTINCT email) / COUNT(*) as email_selectivity,
    COUNT(DISTINCT status) / COUNT(*) as status_selectivity
FROM users;

-- email(높은 카디널리티) > status(낮은 카디널리티)
CREATE INDEX idx_users_email_status ON users(email, status);