- 댓글을 2400만건 생성한다.
- 50%확률로 '2400만번' 게시글에 작성
- 나머지 50% 확률은 1..2399만 9999번 게시글까지 랜덤으로 댓글을 작성
- 각 게시글에는 댓글-대댓글/댓글-대댓글 순으로 댓글 작성 시도
CREATE TABLE IF NOT EXISTS article_comments(
article_comment_id BIGINT NOT NULL PRIMARY KEY,
content VARCHAR(3000) NOT NULL,
article_id BIGINT NOT NULL,
root_parent_comment_id BIGINT NOT NULL,
writer_id BIGINT NOT NULL,
writer_nickname VARCHAR(15) NOT NULL,
parent_comment_writer_id BIGINT,
parent_comment_writer_nickname VARCHAR(15),
delete_status VARCHAR(20) NOT NULL,
created_at DATETIME NOT NULL,
modified_at DATETIME NOT NULL
);
package com.ttasjwi.board.system.app.articlecomment.data
import org.junit.jupiter.api.DisplayName
import org.junit.jupiter.api.Test
import java.io.BufferedWriter
import java.io.File
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.util.Random
class ArticleCommentDataInitializer {
companion object {
private const val USER_COUNT = 12_000_000L
private const val ARTICLE_COUNT = 24_000_000L
private const val COMMENT_COUNT = 24_000_000L
private const val TARGET_ARTICLE_ID = 24_000_000L
private val baseTime = LocalDateTime.of(2025, 6, 10, 0, 0)
private val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
// 게시글 별로, 최근 0-1개의 댓글 보관
// 게시글 Id -> (댓글Id, 댓글 작성자 Id)
// 최근 10만건정도까지 유지(오래된 순으로 만료됨)
private val articleCommentHistories = object : LinkedHashMap<Long, Pair<Long, Long>>(10_000, 0.75f, true) {
override fun removeEldestEntry(eldest: Map.Entry<Long, Pair<Long, Long>>): Boolean {
return size > 100_000
}
}
}
@Test
@DisplayName("게시글 댓글 데이터 csv 파일 생성")
fun initialize() {
val csvFile = File("article-comments.csv")
val random = Random()
csvFile.bufferedWriter().use { writer ->
// 2400만 댓글 작성
for (articleCommentId in 1L..COMMENT_COUNT) {
// 50% 확률로 target 게시글에 댓글 작성
if (shouldWriteOnTargetArticle(random)) {
makeCommentOnArticle(articleCommentId, TARGET_ARTICLE_ID, random, writer)
} else {
// 그 외의 경우에는 나머지 게시글에 댓글 랜덤 작성
val randomArticleId = random.nextLong(ARTICLE_COUNT - 1) + 1L
makeCommentOnArticle(articleCommentId, randomArticleId, random, writer)
}
}
}
}
private fun shouldWriteOnTargetArticle(random: Random): Boolean {
val randomDouble = random.nextDouble()
// 50% 확률로 좋아요
return randomDouble < 0.5
}
private fun makeCommentOnArticle(articleCommentId: Long, articleId: Long, random: Random, writer: BufferedWriter) {
val writerId = random.nextLong(USER_COUNT) + 1L
// 해당 게시글에 댓글이 있는 경우, 대댓글 작성
if (hasParentComment(articleId)) {
val (parentCommentId, parentCommentWriterId) = getParentCommentInfo(articleId)
writeComment(
articleCommentId = articleCommentId,
articleId = articleId,
rootParentCommentId = parentCommentId,
writerId = writerId,
parentCommentWriterId = parentCommentWriterId,
writer = writer
)
removeCommentHistory(articleId)
} else {
// 댓글이 없다면, 루트댓글 작성
writeComment(
articleCommentId = articleCommentId,
articleId = articleId,
rootParentCommentId = articleCommentId,
writerId = writerId,
parentCommentWriterId = null,
writer = writer
)
saveCommentHistory(articleId, articleCommentId, writerId)
}
}
private fun hasParentComment(articleId: Long): Boolean {
return articleCommentHistories.containsKey(articleId)
}
private fun getParentCommentInfo(articleId: Long): Pair<Long, Long> {
return articleCommentHistories[articleId]!!
}
private fun removeCommentHistory(articleId: Long) {
articleCommentHistories.remove(articleId)
}
private fun saveCommentHistory(articleId: Long, articleCommentId: Long, commentWriterId : Long) {
articleCommentHistories[articleId] = Pair(articleCommentId, commentWriterId)
}
private fun writeComment(
articleCommentId: Long,
articleId: Long,
rootParentCommentId: Long,
writerId: Long,
parentCommentWriterId: Long?,
writer: BufferedWriter) {
val content = makeContent(articleCommentId)
val writerNickname = makeUserNickname(writerId)
val parentCommentWriterNickname = parentCommentWriterId?.let { makeUserNickname(it) }
val createdAt = baseTime.plusSeconds(articleCommentId).format(formatter)
val modifiedAt = createdAt
writer.write(
"$articleCommentId,$content,$articleId,$rootParentCommentId,$writerId,$writerNickname,${parentCommentWriterId ?: ""},${parentCommentWriterNickname ?: ""},NOT_DELETED,$createdAt,$modifiedAt\n"
)
if (articleCommentId % 100_000L == 0L) {
println("Written $articleCommentId lines...")
}
}
private fun makeContent(articleCommentId: Long): String {
return "content${makePaddedId(articleCommentId)}"
}
private fun makeUserNickname(userId: Long): String {
return "user${makePaddedId(userId)}"
}
private fun makePaddedId(id: Long): String {
return id.toString().padStart(8, '0')
}
}
docker cp data/article-comments.csv mysql:/tmp/article-comments.csv
LOAD DATA INFILE '/tmp/article-comments.csv'
INTO TABLE article_comments
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
(article_comment_id, content, article_id, root_parent_comment_id, writer_id, writer_nickname, @parent_comment_writer_id, @parent_comment_writer_nickname, delete_status, created_at, modified_at)
SET
parent_comment_writer_id = NULLIF(@parent_comment_writer_id, ''),
parent_comment_writer_nickname = NULLIF(@parent_comment_writer_nickname, '');
mysql> select count(*) from article_comments where article_id=24000000;
+----------+
| count(*) |
+----------+
| 12000214 |
+----------+
1 row in set (8.35 sec)
mysql> select * from article_comments where article_id = 24000000 order by root_parent_comment_id asc, article_comment_id asc limit 30;
+--------------------+-----------------+------------+------------------------+-----------+-----------------+--------------------------+--------------------------------+---------------+---------------------+---------------------+
| article_comment_id | content | article_id | root_parent_comment_id | writer_id | writer_nickname | parent_comment_writer_id | parent_comment_writer_nickname | delete_status | created_at | modified_at |
+--------------------+-----------------+------------+------------------------+-----------+-----------------+--------------------------+--------------------------------+---------------+---------------------+---------------------+
| 1 | content00000001 | 24000000 | 1 | 3086886 | user03086886 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:01 | 2025-06-10 00:00:01 |
| 2 | content00000002 | 24000000 | 1 | 8523146 | user08523146 | 3086886 | user03086886 | NOT_DELETED | 2025-06-10 00:00:02 | 2025-06-10 00:00:02 |
| 3 | content00000003 | 24000000 | 3 | 7765200 | user07765200 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:03 | 2025-06-10 00:00:03 |
| 8 | content00000008 | 24000000 | 3 | 781508 | user00781508 | 7765200 | user07765200 | NOT_DELETED | 2025-06-10 00:00:08 | 2025-06-10 00:00:08 |
| 9 | content00000009 | 24000000 | 9 | 5851782 | user05851782 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:09 | 2025-06-10 00:00:09 |
| 10 | content00000010 | 24000000 | 9 | 6274014 | user06274014 | 5851782 | user05851782 | NOT_DELETED | 2025-06-10 00:00:10 | 2025-06-10 00:00:10 |
| 11 | content00000011 | 24000000 | 11 | 1958281 | user01958281 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:11 | 2025-06-10 00:00:11 |
| 13 | content00000013 | 24000000 | 11 | 11032530 | user11032530 | 1958281 | user01958281 | NOT_DELETED | 2025-06-10 00:00:13 | 2025-06-10 00:00:13 |
| 14 | content00000014 | 24000000 | 14 | 1945604 | user01945604 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:14 | 2025-06-10 00:00:14 |
| 16 | content00000016 | 24000000 | 14 | 4042469 | user04042469 | 1945604 | user01945604 | NOT_DELETED | 2025-06-10 00:00:16 | 2025-06-10 00:00:16 |
| 17 | content00000017 | 24000000 | 17 | 6269382 | user06269382 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:17 | 2025-06-10 00:00:17 |
| 18 | content00000018 | 24000000 | 17 | 5304610 | user05304610 | 6269382 | user06269382 | NOT_DELETED | 2025-06-10 00:00:18 | 2025-06-10 00:00:18 |
| 19 | content00000019 | 24000000 | 19 | 1906123 | user01906123 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:19 | 2025-06-10 00:00:19 |
| 22 | content00000022 | 24000000 | 19 | 4931423 | user04931423 | 1906123 | user01906123 | NOT_DELETED | 2025-06-10 00:00:22 | 2025-06-10 00:00:22 |
| 25 | content00000025 | 24000000 | 25 | 136511 | user00136511 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:25 | 2025-06-10 00:00:25 |
| 27 | content00000027 | 24000000 | 25 | 10976066 | user10976066 | 136511 | user00136511 | NOT_DELETED | 2025-06-10 00:00:27 | 2025-06-10 00:00:27 |
| 28 | content00000028 | 24000000 | 28 | 6972860 | user06972860 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:28 | 2025-06-10 00:00:28 |
| 30 | content00000030 | 24000000 | 28 | 11423393 | user11423393 | 6972860 | user06972860 | NOT_DELETED | 2025-06-10 00:00:30 | 2025-06-10 00:00:30 |
| 31 | content00000031 | 24000000 | 31 | 5748247 | user05748247 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:31 | 2025-06-10 00:00:31 |
| 32 | content00000032 | 24000000 | 31 | 831868 | user00831868 | 5748247 | user05748247 | NOT_DELETED | 2025-06-10 00:00:32 | 2025-06-10 00:00:32 |
| 34 | content00000034 | 24000000 | 34 | 1561266 | user01561266 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:34 | 2025-06-10 00:00:34 |
| 41 | content00000041 | 24000000 | 34 | 9324342 | user09324342 | 1561266 | user01561266 | NOT_DELETED | 2025-06-10 00:00:41 | 2025-06-10 00:00:41 |
| 42 | content00000042 | 24000000 | 42 | 10464228 | user10464228 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:42 | 2025-06-10 00:00:42 |
| 45 | content00000045 | 24000000 | 42 | 958636 | user00958636 | 10464228 | user10464228 | NOT_DELETED | 2025-06-10 00:00:45 | 2025-06-10 00:00:45 |
| 46 | content00000046 | 24000000 | 46 | 10423704 | user10423704 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:46 | 2025-06-10 00:00:46 |
| 48 | content00000048 | 24000000 | 46 | 9652292 | user09652292 | 10423704 | user10423704 | NOT_DELETED | 2025-06-10 00:00:48 | 2025-06-10 00:00:48 |
| 54 | content00000054 | 24000000 | 54 | 2089023 | user02089023 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:54 | 2025-06-10 00:00:54 |
| 55 | content00000055 | 24000000 | 54 | 9418820 | user09418820 | 2089023 | user02089023 | NOT_DELETED | 2025-06-10 00:00:55 | 2025-06-10 00:00:55 |
| 57 | content00000057 | 24000000 | 57 | 6567780 | user06567780 | NULL | NULL | NOT_DELETED | 2025-06-10 00:00:57 | 2025-06-10 00:00:57 |
| 59 | content00000059 | 24000000 | 57 | 569252 | user00569252 | 6567780 | user06567780 | NOT_DELETED | 2025-06-10 00:00:59 | 2025-06-10 00:00:59 |
+--------------------+-----------------+------------+------------------------+-----------+-----------------+--------------------------+--------------------------------+---------------+---------------------+---------------------+
30 rows in set (22.82 sec)
CREATE TABLE IF NOT EXISTS article_comment_counts(
article_id BIGINT NOT NULL PRIMARY KEY,
comment_count BIGINT NOT NULL
);
INSERT INTO article_comment_counts (article_id, comment_count)
SELECT article_id, COUNT(*) AS comment_count
FROM article_comments
GROUP BY article_id;
mysql> select * from article_comment_counts where article_id = 24000000;
+------------+---------------+
| article_id | comment_count |
+------------+---------------+
| 24000000 | 12000214 |
+------------+---------------+
1 row in set (0.00 sec)