|
1 """Sort performance test. |
|
2 |
|
3 See main() for command line syntax. |
|
4 See tabulate() for output format. |
|
5 |
|
6 """ |
|
7 |
|
8 import sys |
|
9 import time |
|
10 import random |
|
11 import marshal |
|
12 import tempfile |
|
13 import os |
|
14 |
|
15 td = tempfile.gettempdir() |
|
16 |
|
17 def randfloats(n): |
|
18 """Return a list of n random floats in [0, 1).""" |
|
19 # Generating floats is expensive, so this writes them out to a file in |
|
20 # a temp directory. If the file already exists, it just reads them |
|
21 # back in and shuffles them a bit. |
|
22 fn = os.path.join(td, "rr%06d" % n) |
|
23 try: |
|
24 fp = open(fn, "rb") |
|
25 except IOError: |
|
26 r = random.random |
|
27 result = [r() for i in xrange(n)] |
|
28 try: |
|
29 try: |
|
30 fp = open(fn, "wb") |
|
31 marshal.dump(result, fp) |
|
32 fp.close() |
|
33 fp = None |
|
34 finally: |
|
35 if fp: |
|
36 try: |
|
37 os.unlink(fn) |
|
38 except os.error: |
|
39 pass |
|
40 except IOError, msg: |
|
41 print "can't write", fn, ":", msg |
|
42 else: |
|
43 result = marshal.load(fp) |
|
44 fp.close() |
|
45 # Shuffle it a bit... |
|
46 for i in range(10): |
|
47 i = random.randrange(n) |
|
48 temp = result[:i] |
|
49 del result[:i] |
|
50 temp.reverse() |
|
51 result.extend(temp) |
|
52 del temp |
|
53 assert len(result) == n |
|
54 return result |
|
55 |
|
56 def flush(): |
|
57 sys.stdout.flush() |
|
58 |
|
59 def doit(L): |
|
60 t0 = time.clock() |
|
61 L.sort() |
|
62 t1 = time.clock() |
|
63 print "%6.2f" % (t1-t0), |
|
64 flush() |
|
65 |
|
66 def tabulate(r): |
|
67 """Tabulate sort speed for lists of various sizes. |
|
68 |
|
69 The sizes are 2**i for i in r (the argument, a list). |
|
70 |
|
71 The output displays i, 2**i, and the time to sort arrays of 2**i |
|
72 floating point numbers with the following properties: |
|
73 |
|
74 *sort: random data |
|
75 \sort: descending data |
|
76 /sort: ascending data |
|
77 3sort: ascending, then 3 random exchanges |
|
78 +sort: ascending, then 10 random at the end |
|
79 %sort: ascending, then randomly replace 1% of the elements w/ random values |
|
80 ~sort: many duplicates |
|
81 =sort: all equal |
|
82 !sort: worst case scenario |
|
83 |
|
84 """ |
|
85 cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"]) |
|
86 fmt = ("%2s %7s" + " %6s"*len(cases)) |
|
87 print fmt % (("i", "2**i") + cases) |
|
88 for i in r: |
|
89 n = 1 << i |
|
90 L = randfloats(n) |
|
91 print "%2d %7d" % (i, n), |
|
92 flush() |
|
93 doit(L) # *sort |
|
94 L.reverse() |
|
95 doit(L) # \sort |
|
96 doit(L) # /sort |
|
97 |
|
98 # Do 3 random exchanges. |
|
99 for dummy in range(3): |
|
100 i1 = random.randrange(n) |
|
101 i2 = random.randrange(n) |
|
102 L[i1], L[i2] = L[i2], L[i1] |
|
103 doit(L) # 3sort |
|
104 |
|
105 # Replace the last 10 with random floats. |
|
106 if n >= 10: |
|
107 L[-10:] = [random.random() for dummy in range(10)] |
|
108 doit(L) # +sort |
|
109 |
|
110 # Replace 1% of the elements at random. |
|
111 for dummy in xrange(n // 100): |
|
112 L[random.randrange(n)] = random.random() |
|
113 doit(L) # %sort |
|
114 |
|
115 # Arrange for lots of duplicates. |
|
116 if n > 4: |
|
117 del L[4:] |
|
118 L = L * (n // 4) |
|
119 # Force the elements to be distinct objects, else timings can be |
|
120 # artificially low. |
|
121 L = map(lambda x: --x, L) |
|
122 doit(L) # ~sort |
|
123 del L |
|
124 |
|
125 # All equal. Again, force the elements to be distinct objects. |
|
126 L = map(abs, [-0.5] * n) |
|
127 doit(L) # =sort |
|
128 del L |
|
129 |
|
130 # This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case |
|
131 # for an older implementation of quicksort, which used the median |
|
132 # of the first, last and middle elements as the pivot. |
|
133 half = n // 2 |
|
134 L = range(half - 1, -1, -1) |
|
135 L.extend(range(half)) |
|
136 # Force to float, so that the timings are comparable. This is |
|
137 # significantly faster if we leave tham as ints. |
|
138 L = map(float, L) |
|
139 doit(L) # !sort |
|
140 print |
|
141 |
|
142 def main(): |
|
143 """Main program when invoked as a script. |
|
144 |
|
145 One argument: tabulate a single row. |
|
146 Two arguments: tabulate a range (inclusive). |
|
147 Extra arguments are used to seed the random generator. |
|
148 |
|
149 """ |
|
150 # default range (inclusive) |
|
151 k1 = 15 |
|
152 k2 = 20 |
|
153 if sys.argv[1:]: |
|
154 # one argument: single point |
|
155 k1 = k2 = int(sys.argv[1]) |
|
156 if sys.argv[2:]: |
|
157 # two arguments: specify range |
|
158 k2 = int(sys.argv[2]) |
|
159 if sys.argv[3:]: |
|
160 # derive random seed from remaining arguments |
|
161 x = 1 |
|
162 for a in sys.argv[3:]: |
|
163 x = 69069 * x + hash(a) |
|
164 random.seed(x) |
|
165 r = range(k1, k2+1) # include the end point |
|
166 tabulate(r) |
|
167 |
|
168 if __name__ == '__main__': |
|
169 main() |