359. Logger Rate Limiter - cocoder39/coco39_LC GitHub Wiki

359. Logger Rate Limiter

class Logger:

    def __init__(self):
        self.msg_to_time = {}

    def shouldPrintMessage(self, timestamp: int, message: str) -> bool:
        if message not in self.msg_to_time or self.msg_to_time[message] + 10 <= timestamp:
            self.msg_to_time[message] = timestamp
            return True
        else:
            return False

follow up: (1) present all the messages need to be printed (2) If duplicate messages come within a 10secs window then discard previous and current message

  • use map + doubly linked list to achieve O(1) deletion by key
class ListNode:
    def __init__(self, message: str = None):
        self.message = message
        self.next = None
        self.prev = None

class Logger:
    def __init__(self):
        self.message_timestamp = {}
        self.head = ListNode()
        self.tail = ListNode()
        self.head.next = self.tail
        self.tail.prev = self.head
        self.message_node_map = {}

    def shouldPrintMessage(self, timestamp: int, message: str) -> bool:
        if message in self.message_timestamp:
            prev_timestamp = self.message_timestamp[message]
            if timestamp - prev_timestamp < 10:
                # Discard both the previous and the current message
                self._remove_message(message)
                del self.message_timestamp[message]
                return False
        
        # Either it's a new message or it's been more than 10 seconds since the last print
        self.message_timestamp[message] = timestamp
        self._append_message(message)
        return True

    def _append_message(self, message: str):
        new_node = ListNode(message)
        last = self.tail.prev
        last.next = new_node
        new_node.prev = last
        new_node.next = self.tail
        self.tail.prev = new_node
        self.message_node_map[message] = new_node

    def _remove_message(self, message: str):
        node = self.message_node_map.get(message)
        if not node:
            return
        node.prev.next = node.next
        node.next.prev = node.prev
        del self.message_node_map[message]

follow up: how to clean up unneeded logs

  • a lock-free approach to leverage time to coordinate operations across 3 sets: active set and last_active_set hold data in the past 10s, the remaining set hold data older than 10s
    • (int(time.time()) // 10) % 3 to rotate current active set every 10s
    • last active set is (cur_set - 1) % 3
    • every time check both cur active set and last active set. only those 2 could hold data from within the past 10s
    • the set to clean up is (cur_set + 1) % 3, which hold data older than 10s. cleaning up thread is invoked every 10s
    • can extend the cycle to 20s for safety then the set to clean up holding data older than 20s

side note: time.sleep(10) is not equivalent to a loop running for 10 seconds, burning CPU cycles while holding the GIL lock. The thread is switched away for 10 seconds, allowing other threads to run. https://stackoverflow.com/questions/61809897/why-does-time-sleep-not-get-affected-by-the-gil

import threading
import time

class Logger:
    def __init__(self):
        self.sets = [{}, {}, {}]
        self.cleaner_thread = threading.Thread(target=self.cleaner, daemon=True)
        self.cleaner_thread.start()

    def get_current_set(self):
        return (int(time.time()) // 10) % 3

    def cleaner(self):
        while True:
            time.sleep(10)  
            set_to_clean = (self.get_current_set() + 1) % 3
            self.sets[set_to_clean].clear()

    def should_print_message(self, timestamp, message):
        current_set = self.get_current_set()
        previous_set = (current_set - 1) % 3

        if (message in self.sets[current_set] and timestamp - self.sets[current_set][message] < 10) or \
           (message in self.sets[previous_set] and timestamp - self.sets[previous_set][message] < 10):
            return False

        self.sets[current_set][message] = timestamp
        return True

# Testing the Logger
logger = Logger()

# Simulating multiple threads logging messages
def log_messages(thread_id):
    messages = ["foo", "bar", "baz"]
    for i in range(3):
        for message in messages:
            timestamp = int(time.time())
            if logger.should_print_message(timestamp, message):
                print(f"Thread {thread_id} logged message: {message} at {timestamp}")
            time.sleep(1)

threads = [threading.Thread(target=log_messages, args=(i,)) for i in range(3)]
for thread in threads:
    thread.start()
for thread in threads:
    thread.join()
class Logger {
public:
    /** Initialize your data structure here. */
    Logger() {
        
    }
    
    /** Returns true if the message should be printed in the given timestamp, otherwise returns false.
        If this method returns false, the message will not be printed.
        The timestamp is in seconds granularity. */
    bool shouldPrintMessage(int timestamp, string message) {
        auto it = map.find(message);
        if (it == map.end() || it->second + 10 <= timestamp) {
            map[message] = timestamp;
            return true;
        }
        else {
            return false;
        }
    }
private:
    unordered_map<string, int> map;
};