|
1 """Classes to represent arbitrary sets (including sets of sets). |
|
2 |
|
3 This module implements sets using dictionaries whose values are |
|
4 ignored. The usual operations (union, intersection, deletion, etc.) |
|
5 are provided as both methods and operators. |
|
6 |
|
7 Important: sets are not sequences! While they support 'x in s', |
|
8 'len(s)', and 'for x in s', none of those operations are unique for |
|
9 sequences; for example, mappings support all three as well. The |
|
10 characteristic operation for sequences is subscripting with small |
|
11 integers: s[i], for i in range(len(s)). Sets don't support |
|
12 subscripting at all. Also, sequences allow multiple occurrences and |
|
13 their elements have a definite order; sets on the other hand don't |
|
14 record multiple occurrences and don't remember the order of element |
|
15 insertion (which is why they don't support s[i]). |
|
16 |
|
17 The following classes are provided: |
|
18 |
|
19 BaseSet -- All the operations common to both mutable and immutable |
|
20 sets. This is an abstract class, not meant to be directly |
|
21 instantiated. |
|
22 |
|
23 Set -- Mutable sets, subclass of BaseSet; not hashable. |
|
24 |
|
25 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable. |
|
26 An iterable argument is mandatory to create an ImmutableSet. |
|
27 |
|
28 _TemporarilyImmutableSet -- A wrapper around a Set, hashable, |
|
29 giving the same hash value as the immutable set equivalent |
|
30 would have. Do not use this class directly. |
|
31 |
|
32 Only hashable objects can be added to a Set. In particular, you cannot |
|
33 really add a Set as an element to another Set; if you try, what is |
|
34 actually added is an ImmutableSet built from it (it compares equal to |
|
35 the one you tried adding). |
|
36 |
|
37 When you ask if `x in y' where x is a Set and y is a Set or |
|
38 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and |
|
39 what's tested is actually `z in y'. |
|
40 |
|
41 """ |
|
42 |
|
43 # Code history: |
|
44 # |
|
45 # - Greg V. Wilson wrote the first version, using a different approach |
|
46 # to the mutable/immutable problem, and inheriting from dict. |
|
47 # |
|
48 # - Alex Martelli modified Greg's version to implement the current |
|
49 # Set/ImmutableSet approach, and make the data an attribute. |
|
50 # |
|
51 # - Guido van Rossum rewrote much of the code, made some API changes, |
|
52 # and cleaned up the docstrings. |
|
53 # |
|
54 # - Raymond Hettinger added a number of speedups and other |
|
55 # improvements. |
|
56 |
|
57 from __future__ import generators |
|
58 try: |
|
59 from itertools import ifilter, ifilterfalse |
|
60 except ImportError: |
|
61 # Code to make the module run under Py2.2 |
|
62 def ifilter(predicate, iterable): |
|
63 if predicate is None: |
|
64 def predicate(x): |
|
65 return x |
|
66 for x in iterable: |
|
67 if predicate(x): |
|
68 yield x |
|
69 def ifilterfalse(predicate, iterable): |
|
70 if predicate is None: |
|
71 def predicate(x): |
|
72 return x |
|
73 for x in iterable: |
|
74 if not predicate(x): |
|
75 yield x |
|
76 try: |
|
77 True, False |
|
78 except NameError: |
|
79 True, False = (0==0, 0!=0) |
|
80 |
|
81 __all__ = ['BaseSet', 'Set', 'ImmutableSet'] |
|
82 |
|
83 class BaseSet(object): |
|
84 """Common base class for mutable and immutable sets.""" |
|
85 |
|
86 __slots__ = ['_data'] |
|
87 |
|
88 # Constructor |
|
89 |
|
90 def __init__(self): |
|
91 """This is an abstract class.""" |
|
92 # Don't call this from a concrete subclass! |
|
93 if self.__class__ is BaseSet: |
|
94 raise TypeError, ("BaseSet is an abstract class. " |
|
95 "Use Set or ImmutableSet.") |
|
96 |
|
97 # Standard protocols: __len__, __repr__, __str__, __iter__ |
|
98 |
|
99 def __len__(self): |
|
100 """Return the number of elements of a set.""" |
|
101 return len(self._data) |
|
102 |
|
103 def __repr__(self): |
|
104 """Return string representation of a set. |
|
105 |
|
106 This looks like 'Set([<list of elements>])'. |
|
107 """ |
|
108 return self._repr() |
|
109 |
|
110 # __str__ is the same as __repr__ |
|
111 __str__ = __repr__ |
|
112 |
|
113 def _repr(self, sorted=False): |
|
114 elements = self._data.keys() |
|
115 if sorted: |
|
116 elements.sort() |
|
117 return '%s(%r)' % (self.__class__.__name__, elements) |
|
118 |
|
119 def __iter__(self): |
|
120 """Return an iterator over the elements or a set. |
|
121 |
|
122 This is the keys iterator for the underlying dict. |
|
123 """ |
|
124 return self._data.iterkeys() |
|
125 |
|
126 # Three-way comparison is not supported. However, because __eq__ is |
|
127 # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and |
|
128 # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this |
|
129 # case). |
|
130 |
|
131 def __cmp__(self, other): |
|
132 raise TypeError, "can't compare sets using cmp()" |
|
133 |
|
134 # Equality comparisons using the underlying dicts. Mixed-type comparisons |
|
135 # are allowed here, where Set == z for non-Set z always returns False, |
|
136 # and Set != z always True. This allows expressions like "x in y" to |
|
137 # give the expected result when y is a sequence of mixed types, not |
|
138 # raising a pointless TypeError just because y contains a Set, or x is |
|
139 # a Set and y contain's a non-set ("in" invokes only __eq__). |
|
140 # Subtle: it would be nicer if __eq__ and __ne__ could return |
|
141 # NotImplemented instead of True or False. Then the other comparand |
|
142 # would get a chance to determine the result, and if the other comparand |
|
143 # also returned NotImplemented then it would fall back to object address |
|
144 # comparison (which would always return False for __eq__ and always |
|
145 # True for __ne__). However, that doesn't work, because this type |
|
146 # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented, |
|
147 # Python tries __cmp__ next, and the __cmp__ here then raises TypeError. |
|
148 |
|
149 def __eq__(self, other): |
|
150 if isinstance(other, BaseSet): |
|
151 return self._data == other._data |
|
152 else: |
|
153 return False |
|
154 |
|
155 def __ne__(self, other): |
|
156 if isinstance(other, BaseSet): |
|
157 return self._data != other._data |
|
158 else: |
|
159 return True |
|
160 |
|
161 # Copying operations |
|
162 |
|
163 def copy(self): |
|
164 """Return a shallow copy of a set.""" |
|
165 result = self.__class__() |
|
166 result._data.update(self._data) |
|
167 return result |
|
168 |
|
169 __copy__ = copy # For the copy module |
|
170 |
|
171 def __deepcopy__(self, memo): |
|
172 """Return a deep copy of a set; used by copy module.""" |
|
173 # This pre-creates the result and inserts it in the memo |
|
174 # early, in case the deep copy recurses into another reference |
|
175 # to this same set. A set can't be an element of itself, but |
|
176 # it can certainly contain an object that has a reference to |
|
177 # itself. |
|
178 from copy import deepcopy |
|
179 result = self.__class__() |
|
180 memo[id(self)] = result |
|
181 data = result._data |
|
182 value = True |
|
183 for elt in self: |
|
184 data[deepcopy(elt, memo)] = value |
|
185 return result |
|
186 |
|
187 # Standard set operations: union, intersection, both differences. |
|
188 # Each has an operator version (e.g. __or__, invoked with |) and a |
|
189 # method version (e.g. union). |
|
190 # Subtle: Each pair requires distinct code so that the outcome is |
|
191 # correct when the type of other isn't suitable. For example, if |
|
192 # we did "union = __or__" instead, then Set().union(3) would return |
|
193 # NotImplemented instead of raising TypeError (albeit that *why* it |
|
194 # raises TypeError as-is is also a bit subtle). |
|
195 |
|
196 def __or__(self, other): |
|
197 """Return the union of two sets as a new set. |
|
198 |
|
199 (I.e. all elements that are in either set.) |
|
200 """ |
|
201 if not isinstance(other, BaseSet): |
|
202 return NotImplemented |
|
203 return self.union(other) |
|
204 |
|
205 def union(self, other): |
|
206 """Return the union of two sets as a new set. |
|
207 |
|
208 (I.e. all elements that are in either set.) |
|
209 """ |
|
210 result = self.__class__(self) |
|
211 result._update(other) |
|
212 return result |
|
213 |
|
214 def __and__(self, other): |
|
215 """Return the intersection of two sets as a new set. |
|
216 |
|
217 (I.e. all elements that are in both sets.) |
|
218 """ |
|
219 if not isinstance(other, BaseSet): |
|
220 return NotImplemented |
|
221 return self.intersection(other) |
|
222 |
|
223 def intersection(self, other): |
|
224 """Return the intersection of two sets as a new set. |
|
225 |
|
226 (I.e. all elements that are in both sets.) |
|
227 """ |
|
228 if not isinstance(other, BaseSet): |
|
229 other = Set(other) |
|
230 if len(self) <= len(other): |
|
231 little, big = self, other |
|
232 else: |
|
233 little, big = other, self |
|
234 common = ifilter(big._data.has_key, little) |
|
235 return self.__class__(common) |
|
236 |
|
237 def __xor__(self, other): |
|
238 """Return the symmetric difference of two sets as a new set. |
|
239 |
|
240 (I.e. all elements that are in exactly one of the sets.) |
|
241 """ |
|
242 if not isinstance(other, BaseSet): |
|
243 return NotImplemented |
|
244 return self.symmetric_difference(other) |
|
245 |
|
246 def symmetric_difference(self, other): |
|
247 """Return the symmetric difference of two sets as a new set. |
|
248 |
|
249 (I.e. all elements that are in exactly one of the sets.) |
|
250 """ |
|
251 result = self.__class__() |
|
252 data = result._data |
|
253 value = True |
|
254 selfdata = self._data |
|
255 try: |
|
256 otherdata = other._data |
|
257 except AttributeError: |
|
258 otherdata = Set(other)._data |
|
259 for elt in ifilterfalse(otherdata.has_key, selfdata): |
|
260 data[elt] = value |
|
261 for elt in ifilterfalse(selfdata.has_key, otherdata): |
|
262 data[elt] = value |
|
263 return result |
|
264 |
|
265 def __sub__(self, other): |
|
266 """Return the difference of two sets as a new Set. |
|
267 |
|
268 (I.e. all elements that are in this set and not in the other.) |
|
269 """ |
|
270 if not isinstance(other, BaseSet): |
|
271 return NotImplemented |
|
272 return self.difference(other) |
|
273 |
|
274 def difference(self, other): |
|
275 """Return the difference of two sets as a new Set. |
|
276 |
|
277 (I.e. all elements that are in this set and not in the other.) |
|
278 """ |
|
279 result = self.__class__() |
|
280 data = result._data |
|
281 try: |
|
282 otherdata = other._data |
|
283 except AttributeError: |
|
284 otherdata = Set(other)._data |
|
285 value = True |
|
286 for elt in ifilterfalse(otherdata.has_key, self): |
|
287 data[elt] = value |
|
288 return result |
|
289 |
|
290 # Membership test |
|
291 |
|
292 def __contains__(self, element): |
|
293 """Report whether an element is a member of a set. |
|
294 |
|
295 (Called in response to the expression `element in self'.) |
|
296 """ |
|
297 try: |
|
298 return element in self._data |
|
299 except TypeError: |
|
300 transform = getattr(element, "__as_temporarily_immutable__", None) |
|
301 if transform is None: |
|
302 raise # re-raise the TypeError exception we caught |
|
303 return transform() in self._data |
|
304 |
|
305 # Subset and superset test |
|
306 |
|
307 def issubset(self, other): |
|
308 """Report whether another set contains this set.""" |
|
309 self._binary_sanity_check(other) |
|
310 if len(self) > len(other): # Fast check for obvious cases |
|
311 return False |
|
312 for elt in ifilterfalse(other._data.has_key, self): |
|
313 return False |
|
314 return True |
|
315 |
|
316 def issuperset(self, other): |
|
317 """Report whether this set contains another set.""" |
|
318 self._binary_sanity_check(other) |
|
319 if len(self) < len(other): # Fast check for obvious cases |
|
320 return False |
|
321 for elt in ifilterfalse(self._data.has_key, other): |
|
322 return False |
|
323 return True |
|
324 |
|
325 # Inequality comparisons using the is-subset relation. |
|
326 __le__ = issubset |
|
327 __ge__ = issuperset |
|
328 |
|
329 def __lt__(self, other): |
|
330 self._binary_sanity_check(other) |
|
331 return len(self) < len(other) and self.issubset(other) |
|
332 |
|
333 def __gt__(self, other): |
|
334 self._binary_sanity_check(other) |
|
335 return len(self) > len(other) and self.issuperset(other) |
|
336 |
|
337 # Assorted helpers |
|
338 |
|
339 def _binary_sanity_check(self, other): |
|
340 # Check that the other argument to a binary operation is also |
|
341 # a set, raising a TypeError otherwise. |
|
342 if not isinstance(other, BaseSet): |
|
343 raise TypeError, "Binary operation only permitted between sets" |
|
344 |
|
345 def _compute_hash(self): |
|
346 # Calculate hash code for a set by xor'ing the hash codes of |
|
347 # the elements. This ensures that the hash code does not depend |
|
348 # on the order in which elements are added to the set. This is |
|
349 # not called __hash__ because a BaseSet should not be hashable; |
|
350 # only an ImmutableSet is hashable. |
|
351 result = 0 |
|
352 for elt in self: |
|
353 result ^= hash(elt) |
|
354 return result |
|
355 |
|
356 def _update(self, iterable): |
|
357 # The main loop for update() and the subclass __init__() methods. |
|
358 data = self._data |
|
359 |
|
360 # Use the fast update() method when a dictionary is available. |
|
361 if isinstance(iterable, BaseSet): |
|
362 data.update(iterable._data) |
|
363 return |
|
364 |
|
365 value = True |
|
366 |
|
367 if type(iterable) in (list, tuple, xrange): |
|
368 # Optimized: we know that __iter__() and next() can't |
|
369 # raise TypeError, so we can move 'try:' out of the loop. |
|
370 it = iter(iterable) |
|
371 while True: |
|
372 try: |
|
373 for element in it: |
|
374 data[element] = value |
|
375 return |
|
376 except TypeError: |
|
377 transform = getattr(element, "__as_immutable__", None) |
|
378 if transform is None: |
|
379 raise # re-raise the TypeError exception we caught |
|
380 data[transform()] = value |
|
381 else: |
|
382 # Safe: only catch TypeError where intended |
|
383 for element in iterable: |
|
384 try: |
|
385 data[element] = value |
|
386 except TypeError: |
|
387 transform = getattr(element, "__as_immutable__", None) |
|
388 if transform is None: |
|
389 raise # re-raise the TypeError exception we caught |
|
390 data[transform()] = value |
|
391 |
|
392 |
|
393 class ImmutableSet(BaseSet): |
|
394 """Immutable set class.""" |
|
395 |
|
396 __slots__ = ['_hashcode'] |
|
397 |
|
398 # BaseSet + hashing |
|
399 |
|
400 def __init__(self, iterable=None): |
|
401 """Construct an immutable set from an optional iterable.""" |
|
402 self._hashcode = None |
|
403 self._data = {} |
|
404 if iterable is not None: |
|
405 self._update(iterable) |
|
406 |
|
407 def __hash__(self): |
|
408 if self._hashcode is None: |
|
409 self._hashcode = self._compute_hash() |
|
410 return self._hashcode |
|
411 |
|
412 def __getstate__(self): |
|
413 return self._data, self._hashcode |
|
414 |
|
415 def __setstate__(self, state): |
|
416 self._data, self._hashcode = state |
|
417 |
|
418 class Set(BaseSet): |
|
419 """ Mutable set class.""" |
|
420 |
|
421 __slots__ = [] |
|
422 |
|
423 # BaseSet + operations requiring mutability; no hashing |
|
424 |
|
425 def __init__(self, iterable=None): |
|
426 """Construct a set from an optional iterable.""" |
|
427 self._data = {} |
|
428 if iterable is not None: |
|
429 self._update(iterable) |
|
430 |
|
431 def __getstate__(self): |
|
432 # getstate's results are ignored if it is not |
|
433 return self._data, |
|
434 |
|
435 def __setstate__(self, data): |
|
436 self._data, = data |
|
437 |
|
438 def __hash__(self): |
|
439 """A Set cannot be hashed.""" |
|
440 # We inherit object.__hash__, so we must deny this explicitly |
|
441 raise TypeError, "Can't hash a Set, only an ImmutableSet." |
|
442 |
|
443 # In-place union, intersection, differences. |
|
444 # Subtle: The xyz_update() functions deliberately return None, |
|
445 # as do all mutating operations on built-in container types. |
|
446 # The __xyz__ spellings have to return self, though. |
|
447 |
|
448 def __ior__(self, other): |
|
449 """Update a set with the union of itself and another.""" |
|
450 self._binary_sanity_check(other) |
|
451 self._data.update(other._data) |
|
452 return self |
|
453 |
|
454 def union_update(self, other): |
|
455 """Update a set with the union of itself and another.""" |
|
456 self._update(other) |
|
457 |
|
458 def __iand__(self, other): |
|
459 """Update a set with the intersection of itself and another.""" |
|
460 self._binary_sanity_check(other) |
|
461 self._data = (self & other)._data |
|
462 return self |
|
463 |
|
464 def intersection_update(self, other): |
|
465 """Update a set with the intersection of itself and another.""" |
|
466 if isinstance(other, BaseSet): |
|
467 self &= other |
|
468 else: |
|
469 self._data = (self.intersection(other))._data |
|
470 |
|
471 def __ixor__(self, other): |
|
472 """Update a set with the symmetric difference of itself and another.""" |
|
473 self._binary_sanity_check(other) |
|
474 self.symmetric_difference_update(other) |
|
475 return self |
|
476 |
|
477 def symmetric_difference_update(self, other): |
|
478 """Update a set with the symmetric difference of itself and another.""" |
|
479 data = self._data |
|
480 value = True |
|
481 if not isinstance(other, BaseSet): |
|
482 other = Set(other) |
|
483 if self is other: |
|
484 self.clear() |
|
485 for elt in other: |
|
486 if elt in data: |
|
487 del data[elt] |
|
488 else: |
|
489 data[elt] = value |
|
490 |
|
491 def __isub__(self, other): |
|
492 """Remove all elements of another set from this set.""" |
|
493 self._binary_sanity_check(other) |
|
494 self.difference_update(other) |
|
495 return self |
|
496 |
|
497 def difference_update(self, other): |
|
498 """Remove all elements of another set from this set.""" |
|
499 data = self._data |
|
500 if not isinstance(other, BaseSet): |
|
501 other = Set(other) |
|
502 if self is other: |
|
503 self.clear() |
|
504 for elt in ifilter(data.has_key, other): |
|
505 del data[elt] |
|
506 |
|
507 # Python dict-like mass mutations: update, clear |
|
508 |
|
509 def update(self, iterable): |
|
510 """Add all values from an iterable (such as a list or file).""" |
|
511 self._update(iterable) |
|
512 |
|
513 def clear(self): |
|
514 """Remove all elements from this set.""" |
|
515 self._data.clear() |
|
516 |
|
517 # Single-element mutations: add, remove, discard |
|
518 |
|
519 def add(self, element): |
|
520 """Add an element to a set. |
|
521 |
|
522 This has no effect if the element is already present. |
|
523 """ |
|
524 try: |
|
525 self._data[element] = True |
|
526 except TypeError: |
|
527 transform = getattr(element, "__as_immutable__", None) |
|
528 if transform is None: |
|
529 raise # re-raise the TypeError exception we caught |
|
530 self._data[transform()] = True |
|
531 |
|
532 def remove(self, element): |
|
533 """Remove an element from a set; it must be a member. |
|
534 |
|
535 If the element is not a member, raise a KeyError. |
|
536 """ |
|
537 try: |
|
538 del self._data[element] |
|
539 except TypeError: |
|
540 transform = getattr(element, "__as_temporarily_immutable__", None) |
|
541 if transform is None: |
|
542 raise # re-raise the TypeError exception we caught |
|
543 del self._data[transform()] |
|
544 |
|
545 def discard(self, element): |
|
546 """Remove an element from a set if it is a member. |
|
547 |
|
548 If the element is not a member, do nothing. |
|
549 """ |
|
550 try: |
|
551 self.remove(element) |
|
552 except KeyError: |
|
553 pass |
|
554 |
|
555 def pop(self): |
|
556 """Remove and return an arbitrary set element.""" |
|
557 return self._data.popitem()[0] |
|
558 |
|
559 def __as_immutable__(self): |
|
560 # Return a copy of self as an immutable set |
|
561 return ImmutableSet(self) |
|
562 |
|
563 def __as_temporarily_immutable__(self): |
|
564 # Return self wrapped in a temporarily immutable set |
|
565 return _TemporarilyImmutableSet(self) |
|
566 |
|
567 |
|
568 class _TemporarilyImmutableSet(BaseSet): |
|
569 # Wrap a mutable set as if it was temporarily immutable. |
|
570 # This only supplies hashing and equality comparisons. |
|
571 |
|
572 def __init__(self, set): |
|
573 self._set = set |
|
574 self._data = set._data # Needed by ImmutableSet.__eq__() |
|
575 |
|
576 def __hash__(self): |
|
577 return self._set._compute_hash() |