347. Top K Frequent Elements - cocoder39/coco39_LC GitHub Wiki

347. Top K Frequent Elements

Notes 2022

Option 1: bucket sort

class Solution:
    def topKFrequent(self, nums: List[int], k: int) -> List[int]:
        counter = collections.Counter(nums)
        
        buckets = collections.defaultdict(list)
        for num, count in counter.items():
            buckets[count].append(num)
        
        output = []
        for count in range(len(nums), 0, -1):
            if count in buckets:
                for num in buckets[count]:
                    output.append(num)
                    if len(output) == k:
                        return output
        return output

Option 2: quick selection

class Solution:
    def topKFrequent(self, nums: List[int], k: int) -> List[int]:
        counter = collections.Counter(nums)
        unique_nums = list(counter.keys())

        low, high = 0, len(unique_nums)-1
        while low <= high:
            pivot_idx = random.randint(low, high)
            partition_idx = self.partition(unique_nums, counter, low, high, pivot_idx)
            if partition_idx == k - 1:
                return unique_nums[:k]
            elif partition_idx < k - 1:
                low = partition_idx + 1
            else: # pivot_idx > k - 1
                high = partition_idx - 1
        return unique_nums[:k]

    
    def partition(self, unique_nums, counter, low, high, pivot_idx):
        pivot = unique_nums[pivot_idx]
        unique_nums[pivot_idx], unique_nums[high] = unique_nums[high], unique_nums[pivot_idx]

        partition_idx = low # last index where frequency is larger or equal to pivot, exclusive
        for i in range(low, high):
            num = unique_nums[i]
            if counter[num] >= counter[pivot]:
                unique_nums[partition_idx], unique_nums[i] = unique_nums[i], unique_nums[partition_idx]
                partition_idx += 1
        # last index where frequency is larger or equal to pivot, inclusive
        unique_nums[partition_idx], unique_nums[high] = unique_nums[high], unique_nums[partition_idx]
        return partition_idx

Option 3: size-N max heap or size-k min heap

  • max heap: O(N) heapify + O(klgN)
  • min heap: O(k logk) build heap + O((N-K) * logk) ======================================== idea is straightforward, get frequency of each number first, then get top k through priority_queue. One tip is pq.push({item.second, item.first}) because pq sort according to first value in the pair by default.

time is O(n * log n), memory is O(n)

vector<int> topKFrequent(vector<int>& nums, int k) {
        unordered_map<int, int> map;
        for (auto num : nums) {
            map[num]++;
        }
        
        priority_queue<pair<int, int>> pq;
        for (auto &item : map) {
            pq.push({item.second, item.first}); //pq sort based on first val by default
        }
        
        vector<int> res;
        for (int i = 0; i < k; i++) {
            res.push_back(pq.top().second);
            pq.pop();
        }
        return res;
    }

maintain a k-element min heap, achieving O(n * log k) time complexity

vector<int> topKFrequent(vector<int>& nums, int k) {
        unordered_map<int, int> mp;
        for (auto num : nums) {
            mp[num]++;
        }
        
        priority_queue<pair<int, int>> pq;
        for (auto item : mp) {
            pq.push({-item.second, item.first});
            if (pq.size() > k) {
                pq.pop();
            }
        }
        
        vector<int> res;
        int sz = pq.size();
        for (int i = 0; i < k && i < sz; i++) {
            res.push_back(pq.top().second);
            pq.pop();
        }
        reverse(res.begin(), res.end());
        return res;
     }

an optimization is using bucket to achieve O(n) time. The logic behind this solution is that the max frequency is no more than nums.size()

time is O(n) and memory is O(n)

vector<int> topKFrequent(vector<int>& nums, int k) {
        unordered_map<int, int> map;
        for (auto num : nums) {
            map[num]++;
        }
        //list[i] contains number whose frequency is i, max frequency is nums.size()
        vector<vector<int>> list(nums.size() + 1, vector<int>()); 
        for (auto& item : map) {
            list[item.second].push_back(item.first);
        }
        
        vector<int> res;
        for (int i = list.size() - 1; i > 0 && res.size() < k; i--) {
            for (auto num : list[i]) {
                res.push_back(num);
            }
        } 
        res.resize(k);
        return res;
    }

Java implementation

  1. tree map
public List<Integer> topKFrequent(int[] nums, int k) {
        Map<Integer, Integer> mp = new HashMap<>();
        for (Integer num : nums) {
            mp.put(num, mp.getOrDefault(num, 0) + 1);
        }
        
        TreeMap<Integer, List<Integer>> treeMp = new TreeMap<>();
        for (Map.Entry<Integer, Integer> entry : mp.entrySet()) {
            if (!treeMp.containsKey(entry.getValue())) {
                treeMp.put(entry.getValue(), new LinkedList<>());
            }
            treeMp.get(entry.getValue()).add(entry.getKey());
        }
        
        List<Integer> res = new ArrayList<>();
        while (res.size() < k) {
            List<Integer> vals = treeMp.pollLastEntry().getValue();
            for (int i = 0; i < vals.size() && res.size() < k; i++) {
                res.add(vals.get(i));
            }
        }
        return res;
    }
  1. bucket sort
public List<Integer> topKFrequent(int[] nums, int k) {
        Map<Integer, Integer> mp = new HashMap<>();
        for (Integer num : nums) {
            mp.put(num, mp.getOrDefault(num, 0) + 1);
        }
        
        List<Integer>[] buckets = new List[nums.length + 1];
        for (Map.Entry<Integer, Integer> entry : mp.entrySet()) {
            if (buckets[entry.getValue()] == null) {
                buckets[entry.getValue()] = new ArrayList<>();
            }
            buckets[entry.getValue()].add(entry.getKey());
        }
        
        List<Integer> res = new ArrayList<>();
        for (int i = buckets.length - 1; i >= 0; i--) {
            if (buckets[i] == null) {
                continue;
            }
            for (Integer n : buckets[i]) {
                if (res.size() < k) {
                    res.add(n);
                }
            }
        }
        return res;
    }
  1. min heap
public List<Integer> topKFrequent(int[] nums, int k) {
        Map<Integer, Integer> mp = new HashMap<>();
        for (Integer num : nums) {
            mp.put(num, mp.getOrDefault(num, 0) + 1);
        }
        
        PriorityQueue<Map.Entry<Integer, Integer>> minHeap = 
            new PriorityQueue<>((entry1, entry2) -> (entry1.getValue() - entry2.getValue()));
        List<Integer> res = new ArrayList<>();
        
        for (Map.Entry<Integer, Integer> entry : mp.entrySet()) {
            minHeap.add(entry);
            if (minHeap.size() > k) {
                minHeap.poll();
            }
        }
        
        while (!minHeap.isEmpty()) {
            res.add(minHeap.poll().getKey());
        }
        return res;
    }
  1. max heap
public List<Integer> topKFrequent(int[] nums, int k) {
        Map<Integer, Integer> mp = new HashMap<>();
        for (Integer num : nums) {
            mp.put(num, mp.getOrDefault(num, 0) + 1);
        }
        
        PriorityQueue<Map.Entry<Integer, Integer>> maxHeap = 
            new PriorityQueue<>((entry1, entry2) -> (entry2.getValue() - entry1.getValue()));
        List<Integer> res = new ArrayList<>();
        
        for (Map.Entry<Integer, Integer> entry : mp.entrySet()) {
            maxHeap.add(entry);
            if (maxHeap.size() > mp.size() - k) {
                res.add(maxHeap.poll().getKey());
            }
        }
        return res;
    }
⚠️ **GitHub.com Fallback** ⚠️