테스트 데이터 삽입 ‐ (7) 게시글 댓글 데이터 삽입 - ttasjwi/board-system GitHub Wiki

댓글 데이터 생성

  • 댓글을 2400만건 생성한다.
    • 50%확률로 '2400만번' 게시글에 작성
    • 나머지 50% 확률은 1..2399만 9999번 게시글까지 랜덤으로 댓글을 작성
    • 각 게시글에는 댓글-대댓글/댓글-대댓글 순으로 댓글 작성 시도

댓글 테이블 생성

CREATE TABLE IF NOT EXISTS article_comments(
    article_comment_id             BIGINT        NOT NULL PRIMARY KEY,
    content                        VARCHAR(3000) NOT NULL,
    article_id                     BIGINT        NOT NULL,
    root_parent_comment_id         BIGINT        NOT NULL,
    writer_id                      BIGINT        NOT NULL,
    writer_nickname                VARCHAR(15)   NOT NULL,
    parent_comment_writer_id       BIGINT,
    parent_comment_writer_nickname VARCHAR(15),
    delete_status                  VARCHAR(20)   NOT NULL,
    created_at                     DATETIME      NOT NULL,
    modified_at                    DATETIME      NOT NULL
);

댓글 데이터 생성

package com.ttasjwi.board.system.app.articlecomment.data

import org.junit.jupiter.api.DisplayName
import org.junit.jupiter.api.Test
import java.io.BufferedWriter
import java.io.File
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.util.Random

class ArticleCommentDataInitializer {

    companion object {
        private const val USER_COUNT = 12_000_000L
        private const val ARTICLE_COUNT = 24_000_000L
        private const val COMMENT_COUNT = 24_000_000L
        private const val TARGET_ARTICLE_ID = 24_000_000L
        private val baseTime = LocalDateTime.of(2025, 6, 10, 0, 0)
        private val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")

        // 게시글 별로, 최근 0-1개의 댓글 보관
        // 게시글 Id -> (댓글Id, 댓글 작성자 Id)
        // 최근 10만건정도까지 유지(오래된 순으로 만료됨)
        private val articleCommentHistories = object : LinkedHashMap<Long, Pair<Long, Long>>(10_000, 0.75f, true) {
            override fun removeEldestEntry(eldest: Map.Entry<Long, Pair<Long, Long>>): Boolean {
                return size > 100_000
            }
        }
    }

    @Test
    @DisplayName("게시글 댓글 데이터 csv 파일 생성")
    fun initialize() {
        val csvFile = File("article-comments.csv")
        val random = Random()

        csvFile.bufferedWriter().use { writer ->

            // 2400만 댓글 작성
            for (articleCommentId in 1L..COMMENT_COUNT) {
                // 50% 확률로 target 게시글에 댓글 작성
                if (shouldWriteOnTargetArticle(random)) {
                    makeCommentOnArticle(articleCommentId, TARGET_ARTICLE_ID, random, writer)
                } else {
                    // 그 외의 경우에는 나머지 게시글에 댓글 랜덤 작성
                    val randomArticleId = random.nextLong(ARTICLE_COUNT - 1) + 1L
                    makeCommentOnArticle(articleCommentId, randomArticleId, random, writer)
                }

            }
        }
    }

    private fun shouldWriteOnTargetArticle(random: Random): Boolean {
        val randomDouble = random.nextDouble()

        // 50% 확률로 좋아요
        return randomDouble < 0.5
    }

    private fun makeCommentOnArticle(articleCommentId: Long, articleId: Long, random: Random, writer: BufferedWriter) {
        val writerId = random.nextLong(USER_COUNT) + 1L

        // 해당 게시글에 댓글이 있는 경우, 대댓글 작성
        if (hasParentComment(articleId)) {
            val (parentCommentId, parentCommentWriterId) = getParentCommentInfo(articleId)

            writeComment(
                articleCommentId = articleCommentId,
                articleId = articleId,
                rootParentCommentId = parentCommentId,
                writerId = writerId,
                parentCommentWriterId = parentCommentWriterId,
                writer = writer
            )
            removeCommentHistory(articleId)
        } else {
            // 댓글이 없다면, 루트댓글 작성
            writeComment(
                articleCommentId = articleCommentId,
                articleId = articleId,
                rootParentCommentId = articleCommentId,
                writerId = writerId,
                parentCommentWriterId = null,
                writer = writer
            )
            saveCommentHistory(articleId, articleCommentId, writerId)
        }
    }

    private fun hasParentComment(articleId: Long): Boolean {
        return articleCommentHistories.containsKey(articleId)
    }

    private fun getParentCommentInfo(articleId: Long): Pair<Long, Long> {
        return articleCommentHistories[articleId]!!
    }

    private fun removeCommentHistory(articleId: Long) {
        articleCommentHistories.remove(articleId)
    }

    private fun saveCommentHistory(articleId: Long, articleCommentId: Long, commentWriterId : Long) {
        articleCommentHistories[articleId] = Pair(articleCommentId, commentWriterId)
    }


    private fun writeComment(
        articleCommentId: Long,
        articleId: Long,
        rootParentCommentId: Long,
        writerId: Long,
        parentCommentWriterId: Long?,
        writer: BufferedWriter) {

        val content = makeContent(articleCommentId)
        val writerNickname = makeUserNickname(writerId)
        val parentCommentWriterNickname = parentCommentWriterId?.let { makeUserNickname(it) }
        val createdAt = baseTime.plusSeconds(articleCommentId).format(formatter)
        val modifiedAt = createdAt
        writer.write(
            "$articleCommentId,$content,$articleId,$rootParentCommentId,$writerId,$writerNickname,${parentCommentWriterId ?: ""},${parentCommentWriterNickname ?: ""},NOT_DELETED,$createdAt,$modifiedAt\n"
        )

        if (articleCommentId % 100_000L == 0L) {
            println("Written $articleCommentId lines...")
        }
    }

    private fun makeContent(articleCommentId: Long): String {
        return "content${makePaddedId(articleCommentId)}"
    }

    private fun makeUserNickname(userId: Long): String {
        return "user${makePaddedId(userId)}"
    }

    private fun makePaddedId(id: Long): String {
        return id.toString().padStart(8, '0')
    }
}

댓글 데이터 삽입

docker cp data/article-comments.csv mysql:/tmp/article-comments.csv
LOAD DATA INFILE '/tmp/article-comments.csv' 
INTO TABLE article_comments 
FIELDS TERMINATED BY ',' 
LINES TERMINATED BY '\n'
(article_comment_id, content, article_id, root_parent_comment_id, writer_id, writer_nickname, @parent_comment_writer_id, @parent_comment_writer_nickname, delete_status, created_at, modified_at)
SET
  parent_comment_writer_id = NULLIF(@parent_comment_writer_id, ''),
  parent_comment_writer_nickname = NULLIF(@parent_comment_writer_nickname, '');

댓글 조회

mysql> select count(*) from article_comments where article_id=24000000;
+----------+
| count(*) |
+----------+
| 12000214 |
+----------+
1 row in set (8.35 sec)

mysql> select * from article_comments where article_id = 24000000 order by root_parent_comment_id asc, article_comment_id asc limit 30;
+--------------------+-----------------+------------+------------------------+-----------+-----------------+--------------------------+--------------------------------+---------------+---------------------+---------------------+
| article_comment_id | content         | article_id | root_parent_comment_id | writer_id | writer_nickname | parent_comment_writer_id | parent_comment_writer_nickname | delete_status | created_at          | modified_at         |
+--------------------+-----------------+------------+------------------------+-----------+-----------------+--------------------------+--------------------------------+---------------+---------------------+---------------------+
|                  1 | content00000001 |   24000000 |                      1 |   3086886 | user03086886    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:01 | 2025-06-10 00:00:01 |
|                  2 | content00000002 |   24000000 |                      1 |   8523146 | user08523146    |                  3086886 | user03086886                   | NOT_DELETED   | 2025-06-10 00:00:02 | 2025-06-10 00:00:02 |
|                  3 | content00000003 |   24000000 |                      3 |   7765200 | user07765200    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:03 | 2025-06-10 00:00:03 |
|                  8 | content00000008 |   24000000 |                      3 |    781508 | user00781508    |                  7765200 | user07765200                   | NOT_DELETED   | 2025-06-10 00:00:08 | 2025-06-10 00:00:08 |
|                  9 | content00000009 |   24000000 |                      9 |   5851782 | user05851782    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:09 | 2025-06-10 00:00:09 |
|                 10 | content00000010 |   24000000 |                      9 |   6274014 | user06274014    |                  5851782 | user05851782                   | NOT_DELETED   | 2025-06-10 00:00:10 | 2025-06-10 00:00:10 |
|                 11 | content00000011 |   24000000 |                     11 |   1958281 | user01958281    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:11 | 2025-06-10 00:00:11 |
|                 13 | content00000013 |   24000000 |                     11 |  11032530 | user11032530    |                  1958281 | user01958281                   | NOT_DELETED   | 2025-06-10 00:00:13 | 2025-06-10 00:00:13 |
|                 14 | content00000014 |   24000000 |                     14 |   1945604 | user01945604    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:14 | 2025-06-10 00:00:14 |
|                 16 | content00000016 |   24000000 |                     14 |   4042469 | user04042469    |                  1945604 | user01945604                   | NOT_DELETED   | 2025-06-10 00:00:16 | 2025-06-10 00:00:16 |
|                 17 | content00000017 |   24000000 |                     17 |   6269382 | user06269382    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:17 | 2025-06-10 00:00:17 |
|                 18 | content00000018 |   24000000 |                     17 |   5304610 | user05304610    |                  6269382 | user06269382                   | NOT_DELETED   | 2025-06-10 00:00:18 | 2025-06-10 00:00:18 |
|                 19 | content00000019 |   24000000 |                     19 |   1906123 | user01906123    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:19 | 2025-06-10 00:00:19 |
|                 22 | content00000022 |   24000000 |                     19 |   4931423 | user04931423    |                  1906123 | user01906123                   | NOT_DELETED   | 2025-06-10 00:00:22 | 2025-06-10 00:00:22 |
|                 25 | content00000025 |   24000000 |                     25 |    136511 | user00136511    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:25 | 2025-06-10 00:00:25 |
|                 27 | content00000027 |   24000000 |                     25 |  10976066 | user10976066    |                   136511 | user00136511                   | NOT_DELETED   | 2025-06-10 00:00:27 | 2025-06-10 00:00:27 |
|                 28 | content00000028 |   24000000 |                     28 |   6972860 | user06972860    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:28 | 2025-06-10 00:00:28 |
|                 30 | content00000030 |   24000000 |                     28 |  11423393 | user11423393    |                  6972860 | user06972860                   | NOT_DELETED   | 2025-06-10 00:00:30 | 2025-06-10 00:00:30 |
|                 31 | content00000031 |   24000000 |                     31 |   5748247 | user05748247    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:31 | 2025-06-10 00:00:31 |
|                 32 | content00000032 |   24000000 |                     31 |    831868 | user00831868    |                  5748247 | user05748247                   | NOT_DELETED   | 2025-06-10 00:00:32 | 2025-06-10 00:00:32 |
|                 34 | content00000034 |   24000000 |                     34 |   1561266 | user01561266    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:34 | 2025-06-10 00:00:34 |
|                 41 | content00000041 |   24000000 |                     34 |   9324342 | user09324342    |                  1561266 | user01561266                   | NOT_DELETED   | 2025-06-10 00:00:41 | 2025-06-10 00:00:41 |
|                 42 | content00000042 |   24000000 |                     42 |  10464228 | user10464228    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:42 | 2025-06-10 00:00:42 |
|                 45 | content00000045 |   24000000 |                     42 |    958636 | user00958636    |                 10464228 | user10464228                   | NOT_DELETED   | 2025-06-10 00:00:45 | 2025-06-10 00:00:45 |
|                 46 | content00000046 |   24000000 |                     46 |  10423704 | user10423704    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:46 | 2025-06-10 00:00:46 |
|                 48 | content00000048 |   24000000 |                     46 |   9652292 | user09652292    |                 10423704 | user10423704                   | NOT_DELETED   | 2025-06-10 00:00:48 | 2025-06-10 00:00:48 |
|                 54 | content00000054 |   24000000 |                     54 |   2089023 | user02089023    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:54 | 2025-06-10 00:00:54 |
|                 55 | content00000055 |   24000000 |                     54 |   9418820 | user09418820    |                  2089023 | user02089023                   | NOT_DELETED   | 2025-06-10 00:00:55 | 2025-06-10 00:00:55 |
|                 57 | content00000057 |   24000000 |                     57 |   6567780 | user06567780    |                     NULL | NULL                           | NOT_DELETED   | 2025-06-10 00:00:57 | 2025-06-10 00:00:57 |
|                 59 | content00000059 |   24000000 |                     57 |    569252 | user00569252    |                  6567780 | user06567780                   | NOT_DELETED   | 2025-06-10 00:00:59 | 2025-06-10 00:00:59 |
+--------------------+-----------------+------------+------------------------+-----------+-----------------+--------------------------+--------------------------------+---------------+---------------------+---------------------+
30 rows in set (22.82 sec)

댓글수 테이블 생성

CREATE TABLE IF NOT EXISTS article_comment_counts(
    article_id    BIGINT NOT NULL PRIMARY KEY,
    comment_count BIGINT NOT NULL
);

댓글수 데이터 삽입

INSERT INTO article_comment_counts (article_id, comment_count)
SELECT article_id, COUNT(*) AS comment_count
FROM article_comments
GROUP BY article_id;

댓글수 데이터 조회

mysql> select * from article_comment_counts where article_id = 24000000;
+------------+---------------+
| article_id | comment_count |
+------------+---------------+
|   24000000 |      12000214 |
+------------+---------------+
1 row in set (0.00 sec)
⚠️ **GitHub.com Fallback** ⚠️