You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			171 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			171 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
""" fontTools.misc.classifyTools.py -- tools for classifying things.
 | 
						|
"""
 | 
						|
 | 
						|
 | 
						|
class Classifier(object):
 | 
						|
    """
 | 
						|
    Main Classifier object, used to classify things into similar sets.
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, sort=True):
 | 
						|
        self._things = set()  # set of all things known so far
 | 
						|
        self._sets = []  # list of class sets produced so far
 | 
						|
        self._mapping = {}  # map from things to their class set
 | 
						|
        self._dirty = False
 | 
						|
        self._sort = sort
 | 
						|
 | 
						|
    def add(self, set_of_things):
 | 
						|
        """
 | 
						|
        Add a set to the classifier.  Any iterable is accepted.
 | 
						|
        """
 | 
						|
        if not set_of_things:
 | 
						|
            return
 | 
						|
 | 
						|
        self._dirty = True
 | 
						|
 | 
						|
        things, sets, mapping = self._things, self._sets, self._mapping
 | 
						|
 | 
						|
        s = set(set_of_things)
 | 
						|
        intersection = s.intersection(things)  # existing things
 | 
						|
        s.difference_update(intersection)  # new things
 | 
						|
        difference = s
 | 
						|
        del s
 | 
						|
 | 
						|
        # Add new class for new things
 | 
						|
        if difference:
 | 
						|
            things.update(difference)
 | 
						|
            sets.append(difference)
 | 
						|
            for thing in difference:
 | 
						|
                mapping[thing] = difference
 | 
						|
        del difference
 | 
						|
 | 
						|
        while intersection:
 | 
						|
            # Take one item and process the old class it belongs to
 | 
						|
            old_class = mapping[next(iter(intersection))]
 | 
						|
            old_class_intersection = old_class.intersection(intersection)
 | 
						|
 | 
						|
            # Update old class to remove items from new set
 | 
						|
            old_class.difference_update(old_class_intersection)
 | 
						|
 | 
						|
            # Remove processed items from todo list
 | 
						|
            intersection.difference_update(old_class_intersection)
 | 
						|
 | 
						|
            # Add new class for the intersection with old class
 | 
						|
            sets.append(old_class_intersection)
 | 
						|
            for thing in old_class_intersection:
 | 
						|
                mapping[thing] = old_class_intersection
 | 
						|
            del old_class_intersection
 | 
						|
 | 
						|
    def update(self, list_of_sets):
 | 
						|
        """
 | 
						|
        Add a a list of sets to the classifier.  Any iterable of iterables is accepted.
 | 
						|
        """
 | 
						|
        for s in list_of_sets:
 | 
						|
            self.add(s)
 | 
						|
 | 
						|
    def _process(self):
 | 
						|
        if not self._dirty:
 | 
						|
            return
 | 
						|
 | 
						|
        # Do any deferred processing
 | 
						|
        sets = self._sets
 | 
						|
        self._sets = [s for s in sets if s]
 | 
						|
 | 
						|
        if self._sort:
 | 
						|
            self._sets = sorted(self._sets, key=lambda s: (-len(s), sorted(s)))
 | 
						|
 | 
						|
        self._dirty = False
 | 
						|
 | 
						|
    # Output methods
 | 
						|
 | 
						|
    def getThings(self):
 | 
						|
        """Returns the set of all things known so far.
 | 
						|
 | 
						|
        The return value belongs to the Classifier object and should NOT
 | 
						|
        be modified while the classifier is still in use.
 | 
						|
        """
 | 
						|
        self._process()
 | 
						|
        return self._things
 | 
						|
 | 
						|
    def getMapping(self):
 | 
						|
        """Returns the mapping from things to their class set.
 | 
						|
 | 
						|
        The return value belongs to the Classifier object and should NOT
 | 
						|
        be modified while the classifier is still in use.
 | 
						|
        """
 | 
						|
        self._process()
 | 
						|
        return self._mapping
 | 
						|
 | 
						|
    def getClasses(self):
 | 
						|
        """Returns the list of class sets.
 | 
						|
 | 
						|
        The return value belongs to the Classifier object and should NOT
 | 
						|
        be modified while the classifier is still in use.
 | 
						|
        """
 | 
						|
        self._process()
 | 
						|
        return self._sets
 | 
						|
 | 
						|
 | 
						|
def classify(list_of_sets, sort=True):
 | 
						|
    """
 | 
						|
    Takes a iterable of iterables (list of sets from here on; but any
 | 
						|
    iterable works.), and returns the smallest list of sets such that
 | 
						|
    each set, is either a subset, or is disjoint from, each of the input
 | 
						|
    sets.
 | 
						|
 | 
						|
    In other words, this function classifies all the things present in
 | 
						|
    any of the input sets, into similar classes, based on which sets
 | 
						|
    things are a member of.
 | 
						|
 | 
						|
    If sort=True, return class sets are sorted by decreasing size and
 | 
						|
    their natural sort order within each class size.  Otherwise, class
 | 
						|
    sets are returned in the order that they were identified, which is
 | 
						|
    generally not significant.
 | 
						|
 | 
						|
    >>> classify([]) == ([], {})
 | 
						|
    True
 | 
						|
    >>> classify([[]]) == ([], {})
 | 
						|
    True
 | 
						|
    >>> classify([[], []]) == ([], {})
 | 
						|
    True
 | 
						|
    >>> classify([[1]]) == ([{1}], {1: {1}})
 | 
						|
    True
 | 
						|
    >>> classify([[1,2]]) == ([{1, 2}], {1: {1, 2}, 2: {1, 2}})
 | 
						|
    True
 | 
						|
    >>> classify([[1],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}})
 | 
						|
    True
 | 
						|
    >>> classify([[1,2],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}})
 | 
						|
    True
 | 
						|
    >>> classify([[1,2],[2,4]]) == ([{1}, {2}, {4}], {1: {1}, 2: {2}, 4: {4}})
 | 
						|
    True
 | 
						|
    >>> classify([[1,2],[2,4,5]]) == (
 | 
						|
    ...     [{4, 5}, {1}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}})
 | 
						|
    True
 | 
						|
    >>> classify([[1,2],[2,4,5]], sort=False) == (
 | 
						|
    ...     [{1}, {4, 5}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}})
 | 
						|
    True
 | 
						|
    >>> classify([[1,2,9],[2,4,5]], sort=False) == (
 | 
						|
    ...     [{1, 9}, {4, 5}, {2}], {1: {1, 9}, 2: {2}, 4: {4, 5}, 5: {4, 5},
 | 
						|
    ...     9: {1, 9}})
 | 
						|
    True
 | 
						|
    >>> classify([[1,2,9,15],[2,4,5]], sort=False) == (
 | 
						|
    ...     [{1, 9, 15}, {4, 5}, {2}], {1: {1, 9, 15}, 2: {2}, 4: {4, 5},
 | 
						|
    ...     5: {4, 5}, 9: {1, 9, 15}, 15: {1, 9, 15}})
 | 
						|
    True
 | 
						|
    >>> classes, mapping = classify([[1,2,9,15],[2,4,5],[15,5]], sort=False)
 | 
						|
    >>> set([frozenset(c) for c in classes]) == set(
 | 
						|
    ...     [frozenset(s) for s in ({1, 9}, {4}, {2}, {5}, {15})])
 | 
						|
    True
 | 
						|
    >>> mapping == {1: {1, 9}, 2: {2}, 4: {4}, 5: {5}, 9: {1, 9}, 15: {15}}
 | 
						|
    True
 | 
						|
    """
 | 
						|
    classifier = Classifier(sort=sort)
 | 
						|
    classifier.update(list_of_sets)
 | 
						|
    return classifier.getClasses(), classifier.getMapping()
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    import sys, doctest
 | 
						|
 | 
						|
    sys.exit(doctest.testmod(optionflags=doctest.ELLIPSIS).failed)
 |