347. Top K Frequent Elements - cocoder39/coco39_LC GitHub Wiki
Notes 2022
Option 1: bucket sort
class Solution:
def topKFrequent(self, nums: List[int], k: int) -> List[int]:
counter = collections.Counter(nums)
buckets = collections.defaultdict(list)
for num, count in counter.items():
buckets[count].append(num)
output = []
for count in range(len(nums), 0, -1):
if count in buckets:
for num in buckets[count]:
output.append(num)
if len(output) == k:
return output
return output
Option 2: quick selection
class Solution:
def topKFrequent(self, nums: List[int], k: int) -> List[int]:
counter = collections.Counter(nums)
unique_nums = list(counter.keys())
low, high = 0, len(unique_nums)-1
while low <= high:
pivot_idx = random.randint(low, high)
partition_idx = self.partition(unique_nums, counter, low, high, pivot_idx)
if partition_idx == k - 1:
return unique_nums[:k]
elif partition_idx < k - 1:
low = partition_idx + 1
else: # pivot_idx > k - 1
high = partition_idx - 1
return unique_nums[:k]
def partition(self, unique_nums, counter, low, high, pivot_idx):
pivot = unique_nums[pivot_idx]
unique_nums[pivot_idx], unique_nums[high] = unique_nums[high], unique_nums[pivot_idx]
partition_idx = low # last index where frequency is larger or equal to pivot, exclusive
for i in range(low, high):
num = unique_nums[i]
if counter[num] >= counter[pivot]:
unique_nums[partition_idx], unique_nums[i] = unique_nums[i], unique_nums[partition_idx]
partition_idx += 1
# last index where frequency is larger or equal to pivot, inclusive
unique_nums[partition_idx], unique_nums[high] = unique_nums[high], unique_nums[partition_idx]
return partition_idx
Option 3: size-N max heap or size-k min heap
- max heap: O(N) heapify + O(klgN)
- min heap: O(k logk) build heap + O((N-K) * logk)
========================================
idea is straightforward, get frequency of each number first, then get top k through priority_queue. One tip is
pq.push({item.second, item.first})
because pq sort according to first value in the pair by default.
time is O(n * log n), memory is O(n)
vector<int> topKFrequent(vector<int>& nums, int k) {
unordered_map<int, int> map;
for (auto num : nums) {
map[num]++;
}
priority_queue<pair<int, int>> pq;
for (auto &item : map) {
pq.push({item.second, item.first}); //pq sort based on first val by default
}
vector<int> res;
for (int i = 0; i < k; i++) {
res.push_back(pq.top().second);
pq.pop();
}
return res;
}
maintain a k-element min heap, achieving O(n * log k) time complexity
vector<int> topKFrequent(vector<int>& nums, int k) {
unordered_map<int, int> mp;
for (auto num : nums) {
mp[num]++;
}
priority_queue<pair<int, int>> pq;
for (auto item : mp) {
pq.push({-item.second, item.first});
if (pq.size() > k) {
pq.pop();
}
}
vector<int> res;
int sz = pq.size();
for (int i = 0; i < k && i < sz; i++) {
res.push_back(pq.top().second);
pq.pop();
}
reverse(res.begin(), res.end());
return res;
}
an optimization is using bucket to achieve O(n) time. The logic behind this solution is that the max frequency is no more than nums.size()
time is O(n) and memory is O(n)
vector<int> topKFrequent(vector<int>& nums, int k) {
unordered_map<int, int> map;
for (auto num : nums) {
map[num]++;
}
//list[i] contains number whose frequency is i, max frequency is nums.size()
vector<vector<int>> list(nums.size() + 1, vector<int>());
for (auto& item : map) {
list[item.second].push_back(item.first);
}
vector<int> res;
for (int i = list.size() - 1; i > 0 && res.size() < k; i--) {
for (auto num : list[i]) {
res.push_back(num);
}
}
res.resize(k);
return res;
}
Java implementation
- tree map
public List<Integer> topKFrequent(int[] nums, int k) {
Map<Integer, Integer> mp = new HashMap<>();
for (Integer num : nums) {
mp.put(num, mp.getOrDefault(num, 0) + 1);
}
TreeMap<Integer, List<Integer>> treeMp = new TreeMap<>();
for (Map.Entry<Integer, Integer> entry : mp.entrySet()) {
if (!treeMp.containsKey(entry.getValue())) {
treeMp.put(entry.getValue(), new LinkedList<>());
}
treeMp.get(entry.getValue()).add(entry.getKey());
}
List<Integer> res = new ArrayList<>();
while (res.size() < k) {
List<Integer> vals = treeMp.pollLastEntry().getValue();
for (int i = 0; i < vals.size() && res.size() < k; i++) {
res.add(vals.get(i));
}
}
return res;
}
- bucket sort
public List<Integer> topKFrequent(int[] nums, int k) {
Map<Integer, Integer> mp = new HashMap<>();
for (Integer num : nums) {
mp.put(num, mp.getOrDefault(num, 0) + 1);
}
List<Integer>[] buckets = new List[nums.length + 1];
for (Map.Entry<Integer, Integer> entry : mp.entrySet()) {
if (buckets[entry.getValue()] == null) {
buckets[entry.getValue()] = new ArrayList<>();
}
buckets[entry.getValue()].add(entry.getKey());
}
List<Integer> res = new ArrayList<>();
for (int i = buckets.length - 1; i >= 0; i--) {
if (buckets[i] == null) {
continue;
}
for (Integer n : buckets[i]) {
if (res.size() < k) {
res.add(n);
}
}
}
return res;
}
- min heap
public List<Integer> topKFrequent(int[] nums, int k) {
Map<Integer, Integer> mp = new HashMap<>();
for (Integer num : nums) {
mp.put(num, mp.getOrDefault(num, 0) + 1);
}
PriorityQueue<Map.Entry<Integer, Integer>> minHeap =
new PriorityQueue<>((entry1, entry2) -> (entry1.getValue() - entry2.getValue()));
List<Integer> res = new ArrayList<>();
for (Map.Entry<Integer, Integer> entry : mp.entrySet()) {
minHeap.add(entry);
if (minHeap.size() > k) {
minHeap.poll();
}
}
while (!minHeap.isEmpty()) {
res.add(minHeap.poll().getKey());
}
return res;
}
- max heap
public List<Integer> topKFrequent(int[] nums, int k) {
Map<Integer, Integer> mp = new HashMap<>();
for (Integer num : nums) {
mp.put(num, mp.getOrDefault(num, 0) + 1);
}
PriorityQueue<Map.Entry<Integer, Integer>> maxHeap =
new PriorityQueue<>((entry1, entry2) -> (entry2.getValue() - entry1.getValue()));
List<Integer> res = new ArrayList<>();
for (Map.Entry<Integer, Integer> entry : mp.entrySet()) {
maxHeap.add(entry);
if (maxHeap.size() > mp.size() - k) {
res.add(maxHeap.poll().getKey());
}
}
return res;
}