LC 0480 [H] Sliding Window Median - ALawliet/algorithms GitHub Wiki
prereq: median data stream
heappushpop: push item on the heap, then pop and return the smallest item from the heap
The lazy deletion technique means that when items need to be removed, it just records the items but does not actually remove them. Actual deletions only happen when the top of the heap is queried and the real top of the heap is blocked by items that should have been removed. Only at that time are those removed items actually popped out of the heap. This technique ensures that items are always removed from the top of the heap, hence the O(log(size of heap)) time complexity for removing a item at any position. However, when using this technique, the overall time complexity for the whole problem is O(nlog(size of heap)) where size of heap is not always k because in the worst case scenario the size of heap can be n. An illustration of the time complexity is shown below following the coding part.
lo contains half elements
hi contains half (if even) or half + 1 (if odd) elements
class Solution:
def medianSlidingWindow(self, nums: List[int], k: int) -> List[float]:
if not nums or not k:
return []
lo = [] # max heap
hi = [] # min heap
for i in range(k):
if len(lo) == len(hi): # hi is always either equal or 1 greater than lo
heappush(hi, -heappushpop(lo, -nums[i]))
else: # just push to lo
heappush(lo, -heappushpop(hi, nums[i]))
ans = [float(hi[0])] if k & 1 else [(hi[0] + -lo[0]) / 2.0]
to_remove = defaultdict(int)
for i in range(k, len(nums)): # right bound of window
heappush(lo, -heappushpop(hi, nums[i])) # always push to lo
out_num = nums[i-k] # number at left of sliding window
if out_num > -lo[0]: # is greater than top of max heap
heappush(hi, -heappop(lo)) # rebalance: move it from lo to hi
to_remove[out_num] += 1
# invalidate lo first, then high, and deal with duplicates, and invalidate elements that fell out of the window
while lo and to_remove[-lo[0]]:
to_remove[-lo[0]] -= 1 # top of max heap
heappop(lo)
while to_remove[hi[0]]: # hi always has at least 1, lo could be empty
to_remove[hi[0]] -= 1 # bot of min heap
heappop(hi)
if k % 2:
ans.append(float(hi[0]))
else:
ans.append((hi[0] + -lo[0]) / 2.0)
return ans
from heapq import heappush, heappop
class Solution:
def medianSlidingWindow(self, nums: List[int], k: int) -> List[float]:
result = []
max_heap, min_heap = [], []
removed_set = set()
self.max_heap_size, self.min_heap_size = 0, 0
for i, num in enumerate(nums):
self._add_number(max_heap, min_heap, num, i, removed_set)
if i < k - 1:
continue
if i > k - 1:
self._delete_number(max_heap, min_heap, removed_set, nums[i - k], i - k)
self._balance(max_heap, min_heap, removed_set)
self._pop_removed_items(max_heap, min_heap, removed_set)
if self.max_heap_size == self.min_heap_size + 1:
median = -max_heap[0][0]
else:
median = (-max_heap[0][0] + min_heap[0][0]) / 2
result.append(median)
return result
def _add_number(self, max_heap, min_heap, number, index, removed_set):
self._pop_removed_items(max_heap, min_heap, removed_set)
if not max_heap or (number, index) <= (-max_heap[0][0], -max_heap[0][1]):
heappush(max_heap, (-number, -index))
self.max_heap_size += 1
else:
heappush(min_heap, (number, index))
self.min_heap_size += 1
def _delete_number(self, max_heap, min_heap, removed_set, number, index):
# it's guaranteed that max_heap is not empty
self._pop_removed_items(max_heap, min_heap, removed_set)
if (number, index) <= (-max_heap[0][0], -max_heap[0][1]):
removed_set.add((-number, -index))
self.max_heap_size -= 1
else:
removed_set.add((number, index))
self.min_heap_size -= 1
def _balance(self, max_heap, min_heap, removed_set):
# at most one iteration in one of the while loops will be executed
while self.max_heap_size > self.min_heap_size + 1:
self._pop_removed_items(max_heap, min_heap, removed_set)
negative_number, negative_index = heappop(max_heap)
self.max_heap_size -= 1
heappush(min_heap, (-negative_number, -negative_index))
self.min_heap_size += 1
while self.max_heap_size < self.min_heap_size:
self._pop_removed_items(max_heap, min_heap, removed_set)
number, index = heappop(min_heap)
self.min_heap_size -= 1
heappush(max_heap, (-number, -index))
self.max_heap_size += 1
def _pop_removed_items(self, max_heap, min_heap, removed_set):
while max_heap and max_heap[0] in removed_set:
heappop(max_heap)
while min_heap and min_heap[0] in removed_set:
heappop(min_heap)
I was confused by the official solution and many of the top posts. This problem shouldn't be that hard! I ended up using the same template as in Question 295 and passed all tests on my first attempt. The key is using two heaps (just like 295) and keeping track of just two things - the element to include and the element to remove. Below is my Python solution:
A few important things to clarify:
- Two heaps (lo and hi). The size of hi, as measured by the number of valid elements it contains, is either equal to that of lo or one greater than that of lo, depending on the value of k. (This is an invariant we enforce when we add and remove elements from lo and hi). It's worth noting that by "valid" I mean elements within the current window.
- Lazy removal. I used a defaultdict to_remove to keep track of elements to be removed and their occurrances, and remove them if and only if they are at the top of either heaps.
- How to add and remove. The logic is extremely straightforward. When adding a new element, we always add to lo. If the element to be removed is in lo as well, great! We don't need to do anything because the heap sizes do not change. However, if the element to be removed happen to be in hi, we then pop an element from lo and add it to hi. Important: that element we pop is guaranteed be a valid element(!!) because otherwise it should have been removed during the previous iteration.
- Some may be worried that removing elements makes heaps imbalanced. That never happens! No matter how many elements are removed at the end of an iteration, they are invalid elements! The heap lo can contain all the invalid elements and much greater in size than hi, but still in perfect balance with hi. As long as lo and hi each contains half (or (half, half+1) when k is odd) of the elements in the current window, we say that they are balanced.
class Solution:
def medianSlidingWindow(self, nums: List[int], k: int) -> List[float]:
maxHeap = []
minHeap = []
result = [0.0 for x in range(len(nums) - k + 1)]
def rebalance():
if len(maxHeap) - len(minHeap) >= 2:
heappush(minHeap, -heappop(maxHeap))
elif len(maxHeap) < len(minHeap):
heappush(maxHeap, -heappop(minHeap))
def remove(heap, element):
ind = heap.index(element) # find the element
# move the element to the end and delete it
heap[ind] = heap[-1]
del heap[-1]
# we can use heapify to readjust the elements but that would be O(N),
# instead, we will adjust only one element which will O(logN)
if ind < len(heap):
#heapq.heapify(heap)
heapq._siftup(heap, ind)
heapq._siftdown(heap, 0, ind)
for r in range(len(nums)):
if not maxHeap or nums[r] <= -maxHeap[0]:
heappush(maxHeap, -nums[r])
else:
heappush(minHeap, nums[r])
rebalance()
if r - k + 1 >= 0:
if len(maxHeap) == len(minHeap):
result[r - k + 1] = (-maxHeap[0] + minHeap[0]) / 2.0
else:
result[r - k + 1] = float(-maxHeap[0])
elementToBeRemoved = nums[r - k + 1]
if elementToBeRemoved <= -maxHeap[0]:
remove(maxHeap, -elementToBeRemoved)
else:
remove(minHeap, elementToBeRemoved)
rebalance()
return result
time: O(n*k)
O(logk) - insert/remove from heap
O(k) - remove from sliding iwndow
space: O(k) store in sliding window