KR_Kafka - somaz94/python-study GitHub Wiki

Python Kafka ๊ฐœ๋… ์ •๋ฆฌ


1๏ธโƒฃ Kafka ๊ธฐ์ดˆ

Kafka๋Š” ๊ณ ์„ฑ๋Šฅ ๋ถ„์‚ฐ ๋ฉ”์‹œ์ง• ์‹œ์Šคํ…œ์ด๋‹ค.

from kafka import KafkaProducer, KafkaConsumer
from kafka.admin import KafkaAdminClient, NewTopic
from typing import List, Dict, Any
import json

class KafkaConfig:
    def __init__(
        self,
        bootstrap_servers: str = 'localhost:9092',
        client_id: str = 'python-kafka'
    ):
        self.bootstrap_servers = bootstrap_servers
        self.client_id = client_id
    
    def create_admin_client(self) -> KafkaAdminClient:
        """Admin ํด๋ผ์ด์–ธํŠธ ์ƒ์„ฑ"""
        return KafkaAdminClient(
            bootstrap_servers=self.bootstrap_servers,
            client_id=self.client_id
        )
    
    def create_topic(self, topic_name: str, num_partitions: int = 1):
        """ํ† ํ”ฝ ์ƒ์„ฑ"""
        admin_client = self.create_admin_client()
        topic = NewTopic(
            name=topic_name,
            num_partitions=num_partitions,
            replication_factor=1
        )
        admin_client.create_topics([topic])

โœ… ํŠน์ง•:

  • ๋ถ„์‚ฐ ์‹œ์Šคํ…œ
  • ๊ณ ์„ฑ๋Šฅ ์ฒ˜๋ฆฌ
  • ํ™•์žฅ์„ฑ


2๏ธโƒฃ ํ”„๋กœ๋“€์„œ ๊ตฌํ˜„

Kafka์— ๋ฉ”์‹œ์ง€๋ฅผ ๋ฐœํ–‰ํ•˜๋Š” ํ”„๋กœ๋“€์„œ ๊ตฌํ˜„ ๋ฐฉ๋ฒ•์ด๋‹ค.

class KafkaMessageProducer:
    def __init__(self, config: KafkaConfig):
        self.producer = KafkaProducer(
            bootstrap_servers=config.bootstrap_servers,
            value_serializer=lambda v: json.dumps(v).encode('utf-8'),
            key_serializer=lambda k: k.encode('utf-8') if k else None
        )
    
    def send_message(
        self,
        topic: str,
        value: Dict,
        key: str = None,
        partition: int = None
    ):
        """๋ฉ”์‹œ์ง€ ์ „์†ก"""
        future = self.producer.send(
            topic=topic,
            value=value,
            key=key,
            partition=partition
        )
        return future.get(timeout=10)

โœ… ํŠน์ง•:

  • ๋น„๋™๊ธฐ ์ „์†ก
  • ์ง๋ ฌํ™”
  • ํŒŒํ‹ฐ์…”๋‹


3๏ธโƒฃ ์ปจ์Šˆ๋จธ ๊ตฌํ˜„

Kafka์—์„œ ๋ฉ”์‹œ์ง€๋ฅผ ์†Œ๋น„ํ•˜๋Š” ์ปจ์Šˆ๋จธ ๊ตฌํ˜„ ๋ฐฉ๋ฒ•์ด๋‹ค.

class KafkaMessageConsumer:
    def __init__(
        self,
        config: KafkaConfig,
        topics: List[str],
        group_id: str
    ):
        self.consumer = KafkaConsumer(
            *topics,
            bootstrap_servers=config.bootstrap_servers,
            group_id=group_id,
            auto_offset_reset='earliest',
            enable_auto_commit=True,
            value_deserializer=lambda x: json.loads(x.decode('utf-8'))
        )
    
    def consume_messages(self, handler):
        """๋ฉ”์‹œ์ง€ ์†Œ๋น„"""
        try:
            for message in self.consumer:
                handler(message)
        finally:
            self.close()

โœ… ํŠน์ง•:

  • ๊ทธ๋ฃน ๊ด€๋ฆฌ
  • ์˜คํ”„์…‹ ๊ด€๋ฆฌ
  • ์ž๋™ ์ปค๋ฐ‹


4๏ธโƒฃ ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ

Kafka ๋ฉ”์‹œ์ง€๋ฅผ ์ผ๊ด„์ ์œผ๋กœ ์ฒ˜๋ฆฌํ•˜๋Š” ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๋ฐฉ๋ฒ•์ด๋‹ค.

from typing import List, Callable
import time

class KafkaBatchProcessor:
    def __init__(
        self,
        producer: KafkaMessageProducer,
        batch_size: int = 100,
        flush_interval: int = 30
    ):
        self.producer = producer
        self.batch_size = batch_size
        self.flush_interval = flush_interval
        self.messages = []
        self.last_flush = time.time()
    
    def add_message(self, topic: str, message: Dict):
        """๋ฉ”์‹œ์ง€ ๋ฐฐ์น˜์— ์ถ”๊ฐ€"""
        self.messages.append((topic, message))
        
        if len(self.messages) >= self.batch_size:
            self.flush_batch()

โœ… ํŠน์ง•:

  • ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ
  • ์„ฑ๋Šฅ ์ตœ์ ํ™”
  • ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ


5๏ธโƒฃ ์ŠคํŠธ๋ฆผ ์ฒ˜๋ฆฌ

Kafka ๋ฉ”์‹œ์ง€ ์ŠคํŠธ๋ฆผ์„ ์ฒ˜๋ฆฌํ•˜๋Š” ๋ฐฉ๋ฒ•์ด๋‹ค.

class KafkaStreamProcessor:
    def __init__(
        self,
        input_topic: str,
        output_topic: str,
        config: KafkaConfig,
        group_id: str
    ):
        self.producer = KafkaMessageProducer(config)
        self.consumer = KafkaMessageConsumer(
            config,
            [input_topic],
            group_id
        )
    
    def process_stream(self, transform_func):
        """์ŠคํŠธ๋ฆผ ์ฒ˜๋ฆฌ"""
        def message_handler(message):
            transformed_value = transform_func(message.value)
            self.producer.send_message(
                self.output_topic,
                transformed_value
            )
        
        self.consumer.consume_messages(message_handler)

โœ… ํŠน์ง•:

  • ์‹ค์‹œ๊ฐ„ ์ฒ˜๋ฆฌ
  • ๋ณ€ํ™˜ ํŒŒ์ดํ”„๋ผ์ธ
  • ์ŠคํŠธ๋ฆผ ์—ฐ๊ฒฐ


์ฃผ์š” ํŒ

โœ… ๋ชจ๋ฒ” ์‚ฌ๋ก€:

  • ํŒŒํ‹ฐ์…˜ ์ „๋žต ์ˆ˜๋ฆฝ
  • ๋ฉ”์‹œ์ง€ ์ˆœ์„œ ๋ณด์žฅ
  • ์˜คํ”„์…‹ ๊ด€๋ฆฌ
  • ์žฌ์‹œ๋„ ์ฒ˜๋ฆฌ
  • ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ์ตœ์ ํ™”
  • ๋ชจ๋‹ˆํ„ฐ๋ง ๊ตฌ์ถ•
  • ์—๋Ÿฌ ์ฒ˜๋ฆฌ
  • ํ™•์žฅ์„ฑ ๊ณ ๋ ค
  • ๋ณด์•ˆ ์„ค์ •
  • ์„ฑ๋Šฅ ํŠœ๋‹


โš ๏ธ **GitHub.com Fallback** โš ๏ธ