Find sets of disjoint sets from a list of tuples or sets in python

4.2k views Asked by At

here is the problem: I have a list of tuples (could be sets as well if needed). For instance:

a = [(1, 5), (4, 2), (4, 3), (5, 4), (6, 3), (7, 6)]

What I want to find is a list

r = [(1, 5, 4, 2, 3, 6, 7)]

because the intersection is not empty once all the sets are put together.

For the example

a = [(1, 5), (4, 2), (4, 3), (5, 4), (6, 3), (7, 6), (8, 9)]

the result should be

r = [(1, 5, 4, 2, 3, 6, 7), (8, 9)]

Hope the problem is clear. So what is the most elegant way to do this in python, if any?

Cheers

3

There are 3 answers

0
wim On BEST ANSWER

These are the connected components of a graph, and can be found using a graphing library such as networkx. For your second example:

>>> edges = [(1, 5), (4, 2), (4, 3), (5, 4), (6, 3), (7, 6), (8, 9)]
>>> graph = nx.Graph(edges) 
>>> [tuple(c) for c in nx.connected_components(graph)]
[(1, 2, 3, 4, 5, 6, 7), (8, 9)]
0
linello On

Take a look at this implementation, it's fast because it's using Disjoint set with path compression, both find and merge operations are log(n):

class DisjointSet(object):

    def __init__(self,size=None):
        if size is None:
            self.leader = {}  # maps a member to the group's leader
            self.group = {}  # maps a group leader to the group (which is a set)
            self.oldgroup = {}
            self.oldleader = {}
        else:
            self.group = { i:set([i]) for i in range(0,size) }
            self.leader = { i:i for i in range(0,size) }
            self.oldgroup = { i:set([i]) for i in range(0,size) }
            self.oldleader = { i:i for i in range(0,size) }                

    def add(self, a, b):
        self.oldgroup = self.group.copy()
        self.oldleader = self.leader.copy()
        leadera = self.leader.get(a)
        leaderb = self.leader.get(b)
        if leadera is not None:
            if leaderb is not None:
                if leadera == leaderb:
                    return  # nothing to do
                groupa = self.group[leadera]
                groupb = self.group[leaderb]
                if len(groupa) < len(groupb):
                    a, leadera, groupa, b, leaderb, groupb = b, leaderb, groupb, a, leadera, groupa
                groupa |= groupb
                del self.group[leaderb]
                for k in groupb:
                    self.leader[k] = leadera
            else:
                self.group[leadera].add(b)
                self.leader[b] = leadera
        else:
            if leaderb is not None:
                self.group[leaderb].add(a)
                self.leader[a] = leaderb
            else:
                self.leader[a] = self.leader[b] = a
                self.group[a] = set([a, b])

    def connected(self, a, b):
        leadera = self.leader.get(a)
        leaderb = self.leader.get(b)
        if leadera is not None:
            if leaderb is not None:
                return leadera == leaderb
            else:
                return False
        else:
            return False

    def undo(self):        
        self.group = self.oldgroup.copy()
        self.leader = self.oldleader.copy()


def test():
    x = DisjointSet()
    x.add(0,1)
    x.add(0,2)
    x.add(3,4)
    x.undo()
    print x.leader
    print x.group

if __name__ == "__main__":
    test()

You can also undo the last add. In your case you can do the following:

import DisjointSet
a = [(1, 5), (4, 2), (4, 3), (5, 4), (6, 3), (7, 6)]
d = DisjointSet()
for e in a:
    d.add(*e)
print d.group
print d.leader
0
Zek'i B. Ulu On
def pairs_to_whole(touching_pairs:list):
    out = []
    while len(touching_pairs)>0:
        first, *rest = touching_pairs
        first = set(first)

        lf = -1
        while len(first)>lf:
            lf = len(first)

            rest2 = []
            for r in rest:
                if len(first.intersection(set(r)))>0:
                    first |= set(r)
                else:
                    rest2.append(r)     
            rest = rest2

        out.append(first)
        touching_pairs = rest
    return out