LC 0480 [H] Sliding Window Median - ALawliet/algorithms GitHub Wiki

prereq: median data stream


heappushpop: push item on the heap, then pop and return the smallest item from the heap

The lazy deletion technique means that when items need to be removed, it just records the items but does not actually remove them. Actual deletions only happen when the top of the heap is queried and the real top of the heap is blocked by items that should have been removed. Only at that time are those removed items actually popped out of the heap. This technique ensures that items are always removed from the top of the heap, hence the O(log(size of heap)) time complexity for removing a item at any position. However, when using this technique, the overall time complexity for the whole problem is O(nlog(size of heap)) where size of heap is not always k because in the worst case scenario the size of heap can be n. An illustration of the time complexity is shown below following the coding part.

lo contains half elements
hi contains half (if even) or half + 1 (if odd) elements
class Solution:
    def medianSlidingWindow(self, nums: List[int], k: int) -> List[float]:
        if not nums or not k:
            return []
        lo = [] # max heap
        hi = [] # min heap
        for i in range(k):
            if len(lo) == len(hi): # hi is always either equal or 1 greater than lo
                heappush(hi, -heappushpop(lo, -nums[i]))
            else: # just push to lo 
                heappush(lo, -heappushpop(hi, nums[i]))

        ans = [float(hi[0])] if k & 1 else [(hi[0] + -lo[0]) / 2.0]
        to_remove = defaultdict(int)
        for i in range(k, len(nums)): # right bound of window
            heappush(lo, -heappushpop(hi, nums[i])) # always push to lo

            out_num = nums[i-k] # number at left of sliding window
            if out_num > -lo[0]: # is greater than top of max heap
               heappush(hi, -heappop(lo)) # rebalance: move it from lo to hi 
            to_remove[out_num] += 1

            # invalidate lo first, then high, and deal with duplicates, and invalidate elements that fell out of the window
            while lo and to_remove[-lo[0]]:
                to_remove[-lo[0]] -= 1 # top of max heap
                heappop(lo)
            while to_remove[hi[0]]: # hi always has at least 1, lo could be empty
                to_remove[hi[0]] -= 1 # bot of min heap
                heappop(hi)

            if k % 2:
                ans.append(float(hi[0]))
            else:
                ans.append((hi[0] + -lo[0]) / 2.0)
        return ans

https://thewayofnada.medium.com/a-very-thorough-solution-to-sliding-window-median-and-some-heap-magics-5091a3ed1cdc

from heapq import heappush, heappop

class Solution:
    def medianSlidingWindow(self, nums: List[int], k: int) -> List[float]:
        result = []
        max_heap, min_heap = [], []
        removed_set = set()
        self.max_heap_size, self.min_heap_size = 0, 0

        for i, num in enumerate(nums):
            self._add_number(max_heap, min_heap, num, i, removed_set)

            if i < k - 1:
                continue 
            
            if i > k - 1:
                self._delete_number(max_heap, min_heap, removed_set, nums[i - k], i - k)
            
            self._balance(max_heap, min_heap, removed_set)
            self._pop_removed_items(max_heap, min_heap, removed_set)
            if self.max_heap_size == self.min_heap_size + 1:
                median = -max_heap[0][0]
            else:
                median = (-max_heap[0][0] + min_heap[0][0]) / 2
            result.append(median)
        
        return result 
    

    def _add_number(self, max_heap, min_heap, number, index, removed_set):
        self._pop_removed_items(max_heap, min_heap, removed_set)
        if not max_heap or (number, index) <= (-max_heap[0][0], -max_heap[0][1]):
            heappush(max_heap, (-number, -index))
            self.max_heap_size += 1
        else:
            heappush(min_heap, (number, index))
            self.min_heap_size += 1 
    

    def _delete_number(self, max_heap, min_heap, removed_set, number, index):
        # it's guaranteed that max_heap is not empty
        self._pop_removed_items(max_heap, min_heap, removed_set)
        if (number, index) <= (-max_heap[0][0], -max_heap[0][1]):
            removed_set.add((-number, -index))
            self.max_heap_size -= 1 
        else:
            removed_set.add((number, index))
            self.min_heap_size -= 1 
    

    def _balance(self, max_heap, min_heap, removed_set):
        # at most one iteration in one of the while loops will be executed
        while self.max_heap_size > self.min_heap_size + 1:
            self._pop_removed_items(max_heap, min_heap, removed_set)

            negative_number, negative_index = heappop(max_heap)
            self.max_heap_size -= 1

            heappush(min_heap, (-negative_number, -negative_index))
            self.min_heap_size += 1 

        while self.max_heap_size < self.min_heap_size:
            self._pop_removed_items(max_heap, min_heap, removed_set)

            number, index = heappop(min_heap)
            self.min_heap_size -= 1 

            heappush(max_heap, (-number, -index))
            self.max_heap_size += 1 
    

    def _pop_removed_items(self, max_heap, min_heap, removed_set):
        while max_heap and max_heap[0] in removed_set:
            heappop(max_heap)
        
        while min_heap and min_heap[0] in removed_set:
            heappop(min_heap)

I was confused by the official solution and many of the top posts. This problem shouldn't be that hard! I ended up using the same template as in Question 295 and passed all tests on my first attempt. The key is using two heaps (just like 295) and keeping track of just two things - the element to include and the element to remove. Below is my Python solution:

A few important things to clarify:

  1. Two heaps (lo and hi). The size of hi, as measured by the number of valid elements it contains, is either equal to that of lo or one greater than that of lo, depending on the value of k. (This is an invariant we enforce when we add and remove elements from lo and hi). It's worth noting that by "valid" I mean elements within the current window.
  2. Lazy removal. I used a defaultdict to_remove to keep track of elements to be removed and their occurrances, and remove them if and only if they are at the top of either heaps.
  3. How to add and remove. The logic is extremely straightforward. When adding a new element, we always add to lo. If the element to be removed is in lo as well, great! We don't need to do anything because the heap sizes do not change. However, if the element to be removed happen to be in hi, we then pop an element from lo and add it to hi. Important: that element we pop is guaranteed be a valid element(!!) because otherwise it should have been removed during the previous iteration.
  4. Some may be worried that removing elements makes heaps imbalanced. That never happens! No matter how many elements are removed at the end of an iteration, they are invalid elements! The heap lo can contain all the invalid elements and much greater in size than hi, but still in perfect balance with hi. As long as lo and hi each contains half (or (half, half+1) when k is odd) of the elements in the current window, we say that they are balanced.
class Solution:
    def medianSlidingWindow(self, nums: List[int], k: int) -> List[float]:
        maxHeap = []
        minHeap = []
        result = [0.0 for x in range(len(nums) - k + 1)]
        
        def rebalance():
            if len(maxHeap) - len(minHeap) >= 2:
              heappush(minHeap, -heappop(maxHeap))
            elif len(maxHeap) < len(minHeap):
              heappush(maxHeap, -heappop(minHeap))

        def remove(heap, element):
            ind = heap.index(element)  # find the element
            # move the element to the end and delete it
            heap[ind] = heap[-1]
            del heap[-1]
            # we can use heapify to readjust the elements but that would be O(N),
            # instead, we will adjust only one element which will O(logN)
            if ind < len(heap):
              #heapq.heapify(heap)
              heapq._siftup(heap, ind)
              heapq._siftdown(heap, 0, ind)

        for r in range(len(nums)):
            if not maxHeap or nums[r] <= -maxHeap[0]:
                heappush(maxHeap, -nums[r])
            else:
                heappush(minHeap, nums[r])
                
            rebalance()
            
            if r - k + 1 >= 0:
                if len(maxHeap) == len(minHeap):
                    result[r - k + 1] = (-maxHeap[0] + minHeap[0]) / 2.0
                else:
                    result[r - k + 1] = float(-maxHeap[0])
                    
                elementToBeRemoved = nums[r - k + 1]
                if elementToBeRemoved <= -maxHeap[0]:
                  remove(maxHeap, -elementToBeRemoved)
                else:
                  remove(minHeap, elementToBeRemoved)

                rebalance()
        
        return result
time: O(n*k)
O(logk) - insert/remove from heap
O(k) - remove from sliding iwndow
space: O(k) store in sliding window