MySQL - Data Modeling for Practical Service Development
Partitioning Strategies and Index Optimization for Scalable Databases
mysql> SHOW COLUMNS FROM posts;
+---------------+-------------------------------------+------+-----+-------------------+-----------------------------------------------+
| Field | Type | Null | Key | Default | Extra |
+---------------+-------------------------------------+------+-----+-------------------+-----------------------------------------------+
| post_id | bigint | NO | PRI | NULL | auto_increment |
| user_id | bigint | NO | MUL | NULL | |
| category_id | int | NO | MUL | NULL | |
| title | varchar(200) | NO | MUL | NULL | |
| content | text | NO | | NULL | |
| comment_count | int | YES | | 0 | |
| like_count | int | YES | | 0 | |
| view_count | int | YES | | 0 | |
| status | enum('DRAFT','PUBLISHED','DELETED') | YES | MUL | DRAFT | |
| is_featured | tinyint(1) | YES | MUL | 0 | |
| created_at | timestamp | YES | | CURRENT_TIMESTAMP | DEFAULT_GENERATED |
| updated_at | timestamp | YES | | CURRENT_TIMESTAMP | DEFAULT_GENERATED on update CURRENT_TIMESTAMP |
+---------------+-------------------------------------+------+-----+-------------------+-----------------------------------------------+
12 rows in set (0.01 sec)
comment_count, like_count, view_count와 같은 경우가 성능을 위해 비정규화를 적용한 대표적인 사례이다.
-- 현재 파티션 상태를 확인
mysql> select partition_name, partition_expression, partition_description, table_rows, round(data_length / 1024 / 1024, 2) as data_size_mb, round(index_length / 1024 / 1024, 2) as index_size_mb from information_schema.partitions where table_schema = 'financial_master_class' and table_name = 'stock_trades' and partition_name is null order by partition_ordinal_position;
Empty set (0.01 sec)
-- 파티션별 성능 분석
mysql> SELECT partition_name, ROUND(data_length/(1024*1024*1024),2) AS data_size_gb, table_rows, ROUND(table_rows/NULLIF(data_length/1024,0),2) AS rows_per_kb, CASE WHEN table_rows>50000000 THEN 'very large' WHEN table_rows>10000000 THEN 'large' WHEN table_rows>1000000 THEN 'medium' ELSE 'small' END AS partition_size_category FROM information_schema.partitions WHERE table_schema='financial_master_class' AND table_name='stock_trades' AND partition_name IS NOT NULL;
Empty set (0.01 sec)
-- 계좌별 거래 이력 테이블 (Hash 파티셔닝)
CREATE TABLE account_trade_history (
history_id BIGINT UNSIGNED AUTO_INCREMENT,
account_id BIGINT UNSIGNED NOT NULL,
trade_id BIGINT UNSIGNED NOT NULL,
trade_date DATE NOT NULL,
symbol VARCHAR(10) NOT NULL,
trade_type ENUM('BUY', 'SELL') NOT NULL,
price DECIMAL(12,4) NOT NULL,
volume INT UNSIGNED NOT NULL,
trade_value DECIMAL(18,4) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (history_id, account_id)
) ENGINE=InnoDB
PARTITION BY HASH(account_id)
PARTITIONS 16; -- 16개 파티션으로 분산
- Hash Partitioning : 해시 파티셔닝은 특정 컬럼의 값을 해시 함수로 계산해서 자동으로 여러 파티션에 균등하게 분산시키는 방식이다.
- 데이터 균등 분산 : 특정 파티션에 데이터 몰림 현상 방지, 트래픽 분산 효과
- 쓰기 성능 향상 : INSERT가 여러 파티션에 나뉘어 들어간다.
- 샤딩 느낌 구조 : 논리적으로는 하나, 물리적으로는 분산
- 범위 조회에 약하다 : 모든 파티션을 다 조회해야 하기 때문에
-- 시장별 거래 데이터 (List 파티셔닝)
CREATE TABLE market_specific_trades (
trade_id BIGINT UNSIGNED AUTO_INCREMENT,
symbol VARCHAR(10) NOT NULL,
market_code VARCHAR(10) NOT NULL,
trade_date DATE NOT NULL,
trade_timestamp TIMESTAMP(6) NOT NULL,
price DECIMAL(12,4) NOT NULL,
volume INT UNSIGNED NOT NULL,
trade_type ENUM('BUY', 'SELL') NOT NULL,
PRIMARY KEY (trade_id, market_code),
INDEX idx_symbol_timestamp (symbol, trade_timestamp),
INDEX idx_date_volume (trade_date, volume)
) ENGINE=InnoDB
PARTITION BY LIST COLUMNS(market_code) (
PARTITION p_kospi VALUES IN ('KOSPI'),
PARTITION p_kosdaq VALUES IN ('KOSDAQ'),
PARTITION p_nasdaq VALUES IN ('NASDAQ'),
PARTITION p_nyse VALUES IN ('NYSE'),
PARTITION p_other VALUES IN ('OTHER', 'CRYPTO', 'FOREX')
);
-- 선택도가 높은 컬럼이 앞쪽에 위치해야 한다.
CREATE INDEX idx_orders_user_status ON orders(user_id, status, created_at);
-- 카디널리티(현재 선택한 이 테이블에서 얼마나 고유한가) 확인
SELECT
COUNT(DISTINCT email) / COUNT(*) as email_selectivity,
COUNT(DISTINCT status) / COUNT(*) as status_selectivity
FROM users;
-- email(높은 카디널리티) > status(낮은 카디널리티)
CREATE INDEX idx_users_email_status ON users(email, status);