LC 0347 [M] Top K Frequent Elements - ALawliet/algorithms GitHub Wiki

hashmap + heap: O(nlogk)

from queue import PriorityQueue

class Solution:
    def topKFrequent(self, nums: List[int], k: int) -> List[int]:
        num_to_freq = Counter(nums)
        minH = PriorityQueue(k+1)
        
        for num, freq in num_to_freq.items():
            minH.put( (freq, num) )
            if minH.full():
                minH.get()
                
        return list(map(lambda freq_num: freq_num[1], minH.queue))

bucket sort: O(n)

There are solution, using quickselect with O(n) complexity in average, but I think they are overcomplicated: actually, there is O(n) solution, using bucket sort. The idea, is that frequency of any element can not be more than n. So, the plan is the following:

Create list of empty lists for bucktes: for frequencies 1, 2, ..., n.
Use Counter to count frequencies of elements in nums
Iterate over our Counter and add elements to corresponding buckets.
buckets is list of lists now, create one big list out of it.
Finally, take the k last elements from this list, these elements will be top K frequent elements.

Complexity: time complexity is O(n), because we first iterate over nums once and create buckets, then we flatten list of lists with total number of elements O(n) and finally we return last k elements. Space complexity is also O(n).

print(buckets) # returns a list of lists
[], [3], [2], [1], [], [], [](/ALawliet/algorithms/wiki/],-[3],-[2],-[1],-[],-[],-[)

print(*buckets) # returns an iterator of lists
[] [3] [2] [1] [] [] []

print(chain(*buckets))
<itertools.chain object at 0x7f5af1513550>

print(list(chain(*buckets))) # chaining an iterator gets rid of empty lists and extends the items into one list
[3, 2, 1]

filled_buckets = []
for l in buckets:
    if l: # is not empty
        filled_buckets += l # filled_buckets.extend(l), add each item without the list

class Solution:
    def topKFrequent(self, nums, k):
        buckets = [[] for _ in range(len(nums) + 1)] # n + 1 buckets

        for num, freq in Counter(nums).items():
            buckets[freq].append(num) 

        filled_buckets = list(chain(*buckets))
        return filled_buckets[::-1][:k] # reversed up to k

quickselect: O(n) average

class Solution:
    def topKFrequent(self, nums: List[int], k: int) -> List[int]:
        count = Counter(nums)
        unique = list(count.keys())
        
        def partition(left, right, pivot_index) -> int:
            pivot_frequency = count[unique[pivot_index]]
            # 1. move pivot to end
            unique[pivot_index], unique[right] = unique[right], unique[pivot_index]  
            
            # 2. move all less frequent elements to the left
            store_index = left
            for i in range(left, right):
                if count[unique[i]] < pivot_frequency:
                    unique[store_index], unique[i] = unique[i], unique[store_index]
                    store_index += 1

            # 3. move pivot to its final place
            unique[right], unique[store_index] = unique[store_index], unique[right]  
            
            return store_index
        
        def quickselect(left, right, k_smallest) -> None:
            """
            Sort a list within left..right till kth less frequent element
            takes its place. 
            """
            # base case: the list contains only one element
            if left == right: 
                return
            
            # select a random pivot_index
            pivot_index = random.randint(left, right)     
                            
            # find the pivot position in a sorted list   
            pivot_index = partition(left, right, pivot_index)
            
            # if the pivot is in its final sorted position
            if k_smallest == pivot_index:
                 return 
            # go left
            elif k_smallest < pivot_index:
                quickselect(left, pivot_index - 1, k_smallest)
            # go right
            else:
                quickselect(pivot_index + 1, right, k_smallest)
         
        n = len(unique) 
        # kth top frequent element is (n - k)th less frequent.
        # Do a partial sort: from less frequent to the most frequent, till
        # (n - k)th less frequent element takes its place (n - k) in a sorted array. 
        # All element on the left are less frequent.
        # All the elements on the right are more frequent.  
        quickselect(0, n - 1, n - k)
        # Return top k frequent elements
        return unique[n - k:]