721. Accounts Merge - cocoder39/coco39_LC GitHub Wiki

721. Accounts Merge

option 1: dfs

  • one challenge is how to form the graph. we have accounts, which is connections from account_id to emails. Then we need connections from email to account_ids email_to_accounts
  • to dfs an account_id, we can get associated emails based on accounts, then we can get connected account_id based on email_to_accounts

Suppose there are m customers and each customer has n emails. We need to iterate all emails 2 times and then sort emails for each customers so T = O(mn) + O(mnlogn) = O(mn*logn)

class Solution:
    def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
        email_to_accounts = collections.defaultdict(list)
        for i, account in enumerate(accounts):
            for email in account[1:]:
                email_to_accounts[email].append(i)
        
        n = len(accounts)
        visited = [False] * n
        output = []
        for i in range(n):
            if not visited[i]:
                emails = set()
                self.dfs(i, visited, emails, email_to_accounts, accounts)
                output.append([accounts[i][0]] + sorted(list(emails)))
        return output
            
    
    def dfs(self, index: int, visited: list, emails: set, email_to_accounts: dict, accounts: list) -> None:
        if visited[index]:
            return
        
        visited[index] = True
        for email in accounts[index][1:]:
            emails.add(email)
            for next_index in email_to_accounts[email]:
                self.dfs(next_index, visited, emails, email_to_accounts, accounts)

Option 2: union find

class Solution:
    def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
        parent = {}
        size = {}
        email_to_name = {}
        
        for account in accounts:
            name = account[0]
            for i in range(1, len(account)):
                email = account[i]
                parent[email] = email
                size[email] = 1
                email_to_name[email] = name
        
        def find(email):
            while email != parent[email]:
                parent[email] = parent[parent[email]]
                email = parent[email]
            return email
        
        def union(email1, email2):
            root_1, root_2 = find(email1), find(email2)
            if root_1 != root_2:
                if size[root_1] >= size[root_2]:
                    parent[root_2] = root_1
                    size[root_1] += size[root_2]
                else:
                    parent[root_1] = root_2
                    size[root_2] += size[root_1]
        
        for account in accounts:
            for i in range(2, len(account)):
                union(account[1], account[i])
        
        root_to_emails = collections.defaultdict(set)
        for account in accounts:
            for i in range(1, len(account)):
                email = account[i]
                root = find(email)
                root_to_emails[root].add(email)
        
        res = []
        for root in root_to_emails:
            account = [email_to_name[root]]
            for email in sorted(list(root_to_emails[root])):
                account.append(email)
            res.append(account)
        return res

Suppose there are m customers and each customer has n emails. We need to iterate all emails 3 times and then sort emails for each customers so T = O(mn) + O(mnlogn) = O(mn*logn)