|
1 """Classes to represent arbitrary sets (including sets of sets). |
|
2 |
|
3 This module implements sets using dictionaries whose values are |
|
4 ignored. The usual operations (union, intersection, deletion, etc.) |
|
5 are provided as both methods and operators. |
|
6 |
|
7 Important: sets are not sequences! While they support 'x in s', |
|
8 'len(s)', and 'for x in s', none of those operations are unique for |
|
9 sequences; for example, mappings support all three as well. The |
|
10 characteristic operation for sequences is subscripting with small |
|
11 integers: s[i], for i in range(len(s)). Sets don't support |
|
12 subscripting at all. Also, sequences allow multiple occurrences and |
|
13 their elements have a definite order; sets on the other hand don't |
|
14 record multiple occurrences and don't remember the order of element |
|
15 insertion (which is why they don't support s[i]). |
|
16 |
|
17 The following classes are provided: |
|
18 |
|
19 BaseSet -- All the operations common to both mutable and immutable |
|
20 sets. This is an abstract class, not meant to be directly |
|
21 instantiated. |
|
22 |
|
23 Set -- Mutable sets, subclass of BaseSet; not hashable. |
|
24 |
|
25 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable. |
|
26 An iterable argument is mandatory to create an ImmutableSet. |
|
27 |
|
28 _TemporarilyImmutableSet -- A wrapper around a Set, hashable, |
|
29 giving the same hash value as the immutable set equivalent |
|
30 would have. Do not use this class directly. |
|
31 |
|
32 Only hashable objects can be added to a Set. In particular, you cannot |
|
33 really add a Set as an element to another Set; if you try, what is |
|
34 actually added is an ImmutableSet built from it (it compares equal to |
|
35 the one you tried adding). |
|
36 |
|
37 When you ask if `x in y' where x is a Set and y is a Set or |
|
38 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and |
|
39 what's tested is actually `z in y'. |
|
40 |
|
41 """ |
|
42 |
|
43 # Code history: |
|
44 # |
|
45 # - Greg V. Wilson wrote the first version, using a different approach |
|
46 # to the mutable/immutable problem, and inheriting from dict. |
|
47 # |
|
48 # - Alex Martelli modified Greg's version to implement the current |
|
49 # Set/ImmutableSet approach, and make the data an attribute. |
|
50 # |
|
51 # - Guido van Rossum rewrote much of the code, made some API changes, |
|
52 # and cleaned up the docstrings. |
|
53 # |
|
54 # - Raymond Hettinger added a number of speedups and other |
|
55 # improvements. |
|
56 |
|
57 from __future__ import generators |
|
58 try: |
|
59 from itertools import ifilter, ifilterfalse |
|
60 except ImportError: |
|
61 # Code to make the module run under Py2.2 |
|
62 def ifilter(predicate, iterable): |
|
63 if predicate is None: |
|
64 def predicate(x): |
|
65 return x |
|
66 for x in iterable: |
|
67 if predicate(x): |
|
68 yield x |
|
69 def ifilterfalse(predicate, iterable): |
|
70 if predicate is None: |
|
71 def predicate(x): |
|
72 return x |
|
73 for x in iterable: |
|
74 if not predicate(x): |
|
75 yield x |
|
76 try: |
|
77 True, False |
|
78 except NameError: |
|
79 True, False = (0==0, 0!=0) |
|
80 |
|
81 __all__ = ['BaseSet', 'Set', 'ImmutableSet'] |
|
82 |
|
83 import warnings |
|
84 warnings.warn("the sets module is deprecated", DeprecationWarning, |
|
85 stacklevel=2) |
|
86 |
|
87 class BaseSet(object): |
|
88 """Common base class for mutable and immutable sets.""" |
|
89 |
|
90 __slots__ = ['_data'] |
|
91 |
|
92 # Constructor |
|
93 |
|
94 def __init__(self): |
|
95 """This is an abstract class.""" |
|
96 # Don't call this from a concrete subclass! |
|
97 if self.__class__ is BaseSet: |
|
98 raise TypeError, ("BaseSet is an abstract class. " |
|
99 "Use Set or ImmutableSet.") |
|
100 |
|
101 # Standard protocols: __len__, __repr__, __str__, __iter__ |
|
102 |
|
103 def __len__(self): |
|
104 """Return the number of elements of a set.""" |
|
105 return len(self._data) |
|
106 |
|
107 def __repr__(self): |
|
108 """Return string representation of a set. |
|
109 |
|
110 This looks like 'Set([<list of elements>])'. |
|
111 """ |
|
112 return self._repr() |
|
113 |
|
114 # __str__ is the same as __repr__ |
|
115 __str__ = __repr__ |
|
116 |
|
117 def _repr(self, sorted=False): |
|
118 elements = self._data.keys() |
|
119 if sorted: |
|
120 elements.sort() |
|
121 return '%s(%r)' % (self.__class__.__name__, elements) |
|
122 |
|
123 def __iter__(self): |
|
124 """Return an iterator over the elements or a set. |
|
125 |
|
126 This is the keys iterator for the underlying dict. |
|
127 """ |
|
128 return self._data.iterkeys() |
|
129 |
|
130 # Three-way comparison is not supported. However, because __eq__ is |
|
131 # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and |
|
132 # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this |
|
133 # case). |
|
134 |
|
135 def __cmp__(self, other): |
|
136 raise TypeError, "can't compare sets using cmp()" |
|
137 |
|
138 # Equality comparisons using the underlying dicts. Mixed-type comparisons |
|
139 # are allowed here, where Set == z for non-Set z always returns False, |
|
140 # and Set != z always True. This allows expressions like "x in y" to |
|
141 # give the expected result when y is a sequence of mixed types, not |
|
142 # raising a pointless TypeError just because y contains a Set, or x is |
|
143 # a Set and y contain's a non-set ("in" invokes only __eq__). |
|
144 # Subtle: it would be nicer if __eq__ and __ne__ could return |
|
145 # NotImplemented instead of True or False. Then the other comparand |
|
146 # would get a chance to determine the result, and if the other comparand |
|
147 # also returned NotImplemented then it would fall back to object address |
|
148 # comparison (which would always return False for __eq__ and always |
|
149 # True for __ne__). However, that doesn't work, because this type |
|
150 # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented, |
|
151 # Python tries __cmp__ next, and the __cmp__ here then raises TypeError. |
|
152 |
|
153 def __eq__(self, other): |
|
154 if isinstance(other, BaseSet): |
|
155 return self._data == other._data |
|
156 else: |
|
157 return False |
|
158 |
|
159 def __ne__(self, other): |
|
160 if isinstance(other, BaseSet): |
|
161 return self._data != other._data |
|
162 else: |
|
163 return True |
|
164 |
|
165 # Copying operations |
|
166 |
|
167 def copy(self): |
|
168 """Return a shallow copy of a set.""" |
|
169 result = self.__class__() |
|
170 result._data.update(self._data) |
|
171 return result |
|
172 |
|
173 __copy__ = copy # For the copy module |
|
174 |
|
175 def __deepcopy__(self, memo): |
|
176 """Return a deep copy of a set; used by copy module.""" |
|
177 # This pre-creates the result and inserts it in the memo |
|
178 # early, in case the deep copy recurses into another reference |
|
179 # to this same set. A set can't be an element of itself, but |
|
180 # it can certainly contain an object that has a reference to |
|
181 # itself. |
|
182 from copy import deepcopy |
|
183 result = self.__class__() |
|
184 memo[id(self)] = result |
|
185 data = result._data |
|
186 value = True |
|
187 for elt in self: |
|
188 data[deepcopy(elt, memo)] = value |
|
189 return result |
|
190 |
|
191 # Standard set operations: union, intersection, both differences. |
|
192 # Each has an operator version (e.g. __or__, invoked with |) and a |
|
193 # method version (e.g. union). |
|
194 # Subtle: Each pair requires distinct code so that the outcome is |
|
195 # correct when the type of other isn't suitable. For example, if |
|
196 # we did "union = __or__" instead, then Set().union(3) would return |
|
197 # NotImplemented instead of raising TypeError (albeit that *why* it |
|
198 # raises TypeError as-is is also a bit subtle). |
|
199 |
|
200 def __or__(self, other): |
|
201 """Return the union of two sets as a new set. |
|
202 |
|
203 (I.e. all elements that are in either set.) |
|
204 """ |
|
205 if not isinstance(other, BaseSet): |
|
206 return NotImplemented |
|
207 return self.union(other) |
|
208 |
|
209 def union(self, other): |
|
210 """Return the union of two sets as a new set. |
|
211 |
|
212 (I.e. all elements that are in either set.) |
|
213 """ |
|
214 result = self.__class__(self) |
|
215 result._update(other) |
|
216 return result |
|
217 |
|
218 def __and__(self, other): |
|
219 """Return the intersection of two sets as a new set. |
|
220 |
|
221 (I.e. all elements that are in both sets.) |
|
222 """ |
|
223 if not isinstance(other, BaseSet): |
|
224 return NotImplemented |
|
225 return self.intersection(other) |
|
226 |
|
227 def intersection(self, other): |
|
228 """Return the intersection of two sets as a new set. |
|
229 |
|
230 (I.e. all elements that are in both sets.) |
|
231 """ |
|
232 if not isinstance(other, BaseSet): |
|
233 other = Set(other) |
|
234 if len(self) <= len(other): |
|
235 little, big = self, other |
|
236 else: |
|
237 little, big = other, self |
|
238 common = ifilter(big._data.has_key, little) |
|
239 return self.__class__(common) |
|
240 |
|
241 def __xor__(self, other): |
|
242 """Return the symmetric difference of two sets as a new set. |
|
243 |
|
244 (I.e. all elements that are in exactly one of the sets.) |
|
245 """ |
|
246 if not isinstance(other, BaseSet): |
|
247 return NotImplemented |
|
248 return self.symmetric_difference(other) |
|
249 |
|
250 def symmetric_difference(self, other): |
|
251 """Return the symmetric difference of two sets as a new set. |
|
252 |
|
253 (I.e. all elements that are in exactly one of the sets.) |
|
254 """ |
|
255 result = self.__class__() |
|
256 data = result._data |
|
257 value = True |
|
258 selfdata = self._data |
|
259 try: |
|
260 otherdata = other._data |
|
261 except AttributeError: |
|
262 otherdata = Set(other)._data |
|
263 for elt in ifilterfalse(otherdata.has_key, selfdata): |
|
264 data[elt] = value |
|
265 for elt in ifilterfalse(selfdata.has_key, otherdata): |
|
266 data[elt] = value |
|
267 return result |
|
268 |
|
269 def __sub__(self, other): |
|
270 """Return the difference of two sets as a new Set. |
|
271 |
|
272 (I.e. all elements that are in this set and not in the other.) |
|
273 """ |
|
274 if not isinstance(other, BaseSet): |
|
275 return NotImplemented |
|
276 return self.difference(other) |
|
277 |
|
278 def difference(self, other): |
|
279 """Return the difference of two sets as a new Set. |
|
280 |
|
281 (I.e. all elements that are in this set and not in the other.) |
|
282 """ |
|
283 result = self.__class__() |
|
284 data = result._data |
|
285 try: |
|
286 otherdata = other._data |
|
287 except AttributeError: |
|
288 otherdata = Set(other)._data |
|
289 value = True |
|
290 for elt in ifilterfalse(otherdata.has_key, self): |
|
291 data[elt] = value |
|
292 return result |
|
293 |
|
294 # Membership test |
|
295 |
|
296 def __contains__(self, element): |
|
297 """Report whether an element is a member of a set. |
|
298 |
|
299 (Called in response to the expression `element in self'.) |
|
300 """ |
|
301 try: |
|
302 return element in self._data |
|
303 except TypeError: |
|
304 transform = getattr(element, "__as_temporarily_immutable__", None) |
|
305 if transform is None: |
|
306 raise # re-raise the TypeError exception we caught |
|
307 return transform() in self._data |
|
308 |
|
309 # Subset and superset test |
|
310 |
|
311 def issubset(self, other): |
|
312 """Report whether another set contains this set.""" |
|
313 self._binary_sanity_check(other) |
|
314 if len(self) > len(other): # Fast check for obvious cases |
|
315 return False |
|
316 for elt in ifilterfalse(other._data.has_key, self): |
|
317 return False |
|
318 return True |
|
319 |
|
320 def issuperset(self, other): |
|
321 """Report whether this set contains another set.""" |
|
322 self._binary_sanity_check(other) |
|
323 if len(self) < len(other): # Fast check for obvious cases |
|
324 return False |
|
325 for elt in ifilterfalse(self._data.has_key, other): |
|
326 return False |
|
327 return True |
|
328 |
|
329 # Inequality comparisons using the is-subset relation. |
|
330 __le__ = issubset |
|
331 __ge__ = issuperset |
|
332 |
|
333 def __lt__(self, other): |
|
334 self._binary_sanity_check(other) |
|
335 return len(self) < len(other) and self.issubset(other) |
|
336 |
|
337 def __gt__(self, other): |
|
338 self._binary_sanity_check(other) |
|
339 return len(self) > len(other) and self.issuperset(other) |
|
340 |
|
341 # Assorted helpers |
|
342 |
|
343 def _binary_sanity_check(self, other): |
|
344 # Check that the other argument to a binary operation is also |
|
345 # a set, raising a TypeError otherwise. |
|
346 if not isinstance(other, BaseSet): |
|
347 raise TypeError, "Binary operation only permitted between sets" |
|
348 |
|
349 def _compute_hash(self): |
|
350 # Calculate hash code for a set by xor'ing the hash codes of |
|
351 # the elements. This ensures that the hash code does not depend |
|
352 # on the order in which elements are added to the set. This is |
|
353 # not called __hash__ because a BaseSet should not be hashable; |
|
354 # only an ImmutableSet is hashable. |
|
355 result = 0 |
|
356 for elt in self: |
|
357 result ^= hash(elt) |
|
358 return result |
|
359 |
|
360 def _update(self, iterable): |
|
361 # The main loop for update() and the subclass __init__() methods. |
|
362 data = self._data |
|
363 |
|
364 # Use the fast update() method when a dictionary is available. |
|
365 if isinstance(iterable, BaseSet): |
|
366 data.update(iterable._data) |
|
367 return |
|
368 |
|
369 value = True |
|
370 |
|
371 if type(iterable) in (list, tuple, xrange): |
|
372 # Optimized: we know that __iter__() and next() can't |
|
373 # raise TypeError, so we can move 'try:' out of the loop. |
|
374 it = iter(iterable) |
|
375 while True: |
|
376 try: |
|
377 for element in it: |
|
378 data[element] = value |
|
379 return |
|
380 except TypeError: |
|
381 transform = getattr(element, "__as_immutable__", None) |
|
382 if transform is None: |
|
383 raise # re-raise the TypeError exception we caught |
|
384 data[transform()] = value |
|
385 else: |
|
386 # Safe: only catch TypeError where intended |
|
387 for element in iterable: |
|
388 try: |
|
389 data[element] = value |
|
390 except TypeError: |
|
391 transform = getattr(element, "__as_immutable__", None) |
|
392 if transform is None: |
|
393 raise # re-raise the TypeError exception we caught |
|
394 data[transform()] = value |
|
395 |
|
396 |
|
397 class ImmutableSet(BaseSet): |
|
398 """Immutable set class.""" |
|
399 |
|
400 __slots__ = ['_hashcode'] |
|
401 |
|
402 # BaseSet + hashing |
|
403 |
|
404 def __init__(self, iterable=None): |
|
405 """Construct an immutable set from an optional iterable.""" |
|
406 self._hashcode = None |
|
407 self._data = {} |
|
408 if iterable is not None: |
|
409 self._update(iterable) |
|
410 |
|
411 def __hash__(self): |
|
412 if self._hashcode is None: |
|
413 self._hashcode = self._compute_hash() |
|
414 return self._hashcode |
|
415 |
|
416 def __getstate__(self): |
|
417 return self._data, self._hashcode |
|
418 |
|
419 def __setstate__(self, state): |
|
420 self._data, self._hashcode = state |
|
421 |
|
422 class Set(BaseSet): |
|
423 """ Mutable set class.""" |
|
424 |
|
425 __slots__ = [] |
|
426 |
|
427 # BaseSet + operations requiring mutability; no hashing |
|
428 |
|
429 def __init__(self, iterable=None): |
|
430 """Construct a set from an optional iterable.""" |
|
431 self._data = {} |
|
432 if iterable is not None: |
|
433 self._update(iterable) |
|
434 |
|
435 def __getstate__(self): |
|
436 # getstate's results are ignored if it is not |
|
437 return self._data, |
|
438 |
|
439 def __setstate__(self, data): |
|
440 self._data, = data |
|
441 |
|
442 # We inherit object.__hash__, so we must deny this explicitly |
|
443 __hash__ = None |
|
444 |
|
445 # In-place union, intersection, differences. |
|
446 # Subtle: The xyz_update() functions deliberately return None, |
|
447 # as do all mutating operations on built-in container types. |
|
448 # The __xyz__ spellings have to return self, though. |
|
449 |
|
450 def __ior__(self, other): |
|
451 """Update a set with the union of itself and another.""" |
|
452 self._binary_sanity_check(other) |
|
453 self._data.update(other._data) |
|
454 return self |
|
455 |
|
456 def union_update(self, other): |
|
457 """Update a set with the union of itself and another.""" |
|
458 self._update(other) |
|
459 |
|
460 def __iand__(self, other): |
|
461 """Update a set with the intersection of itself and another.""" |
|
462 self._binary_sanity_check(other) |
|
463 self._data = (self & other)._data |
|
464 return self |
|
465 |
|
466 def intersection_update(self, other): |
|
467 """Update a set with the intersection of itself and another.""" |
|
468 if isinstance(other, BaseSet): |
|
469 self &= other |
|
470 else: |
|
471 self._data = (self.intersection(other))._data |
|
472 |
|
473 def __ixor__(self, other): |
|
474 """Update a set with the symmetric difference of itself and another.""" |
|
475 self._binary_sanity_check(other) |
|
476 self.symmetric_difference_update(other) |
|
477 return self |
|
478 |
|
479 def symmetric_difference_update(self, other): |
|
480 """Update a set with the symmetric difference of itself and another.""" |
|
481 data = self._data |
|
482 value = True |
|
483 if not isinstance(other, BaseSet): |
|
484 other = Set(other) |
|
485 if self is other: |
|
486 self.clear() |
|
487 for elt in other: |
|
488 if elt in data: |
|
489 del data[elt] |
|
490 else: |
|
491 data[elt] = value |
|
492 |
|
493 def __isub__(self, other): |
|
494 """Remove all elements of another set from this set.""" |
|
495 self._binary_sanity_check(other) |
|
496 self.difference_update(other) |
|
497 return self |
|
498 |
|
499 def difference_update(self, other): |
|
500 """Remove all elements of another set from this set.""" |
|
501 data = self._data |
|
502 if not isinstance(other, BaseSet): |
|
503 other = Set(other) |
|
504 if self is other: |
|
505 self.clear() |
|
506 for elt in ifilter(data.has_key, other): |
|
507 del data[elt] |
|
508 |
|
509 # Python dict-like mass mutations: update, clear |
|
510 |
|
511 def update(self, iterable): |
|
512 """Add all values from an iterable (such as a list or file).""" |
|
513 self._update(iterable) |
|
514 |
|
515 def clear(self): |
|
516 """Remove all elements from this set.""" |
|
517 self._data.clear() |
|
518 |
|
519 # Single-element mutations: add, remove, discard |
|
520 |
|
521 def add(self, element): |
|
522 """Add an element to a set. |
|
523 |
|
524 This has no effect if the element is already present. |
|
525 """ |
|
526 try: |
|
527 self._data[element] = True |
|
528 except TypeError: |
|
529 transform = getattr(element, "__as_immutable__", None) |
|
530 if transform is None: |
|
531 raise # re-raise the TypeError exception we caught |
|
532 self._data[transform()] = True |
|
533 |
|
534 def remove(self, element): |
|
535 """Remove an element from a set; it must be a member. |
|
536 |
|
537 If the element is not a member, raise a KeyError. |
|
538 """ |
|
539 try: |
|
540 del self._data[element] |
|
541 except TypeError: |
|
542 transform = getattr(element, "__as_temporarily_immutable__", None) |
|
543 if transform is None: |
|
544 raise # re-raise the TypeError exception we caught |
|
545 del self._data[transform()] |
|
546 |
|
547 def discard(self, element): |
|
548 """Remove an element from a set if it is a member. |
|
549 |
|
550 If the element is not a member, do nothing. |
|
551 """ |
|
552 try: |
|
553 self.remove(element) |
|
554 except KeyError: |
|
555 pass |
|
556 |
|
557 def pop(self): |
|
558 """Remove and return an arbitrary set element.""" |
|
559 return self._data.popitem()[0] |
|
560 |
|
561 def __as_immutable__(self): |
|
562 # Return a copy of self as an immutable set |
|
563 return ImmutableSet(self) |
|
564 |
|
565 def __as_temporarily_immutable__(self): |
|
566 # Return self wrapped in a temporarily immutable set |
|
567 return _TemporarilyImmutableSet(self) |
|
568 |
|
569 |
|
570 class _TemporarilyImmutableSet(BaseSet): |
|
571 # Wrap a mutable set as if it was temporarily immutable. |
|
572 # This only supplies hashing and equality comparisons. |
|
573 |
|
574 def __init__(self, set): |
|
575 self._set = set |
|
576 self._data = set._data # Needed by ImmutableSet.__eq__() |
|
577 |
|
578 def __hash__(self): |
|
579 return self._set._compute_hash() |