|
1 #include "Python.h" |
|
2 |
|
3 #ifdef WITH_PYMALLOC |
|
4 |
|
5 /* An object allocator for Python. |
|
6 |
|
7 Here is an introduction to the layers of the Python memory architecture, |
|
8 showing where the object allocator is actually used (layer +2), It is |
|
9 called for every object allocation and deallocation (PyObject_New/Del), |
|
10 unless the object-specific allocators implement a proprietary allocation |
|
11 scheme (ex.: ints use a simple free list). This is also the place where |
|
12 the cyclic garbage collector operates selectively on container objects. |
|
13 |
|
14 |
|
15 Object-specific allocators |
|
16 _____ ______ ______ ________ |
|
17 [ int ] [ dict ] [ list ] ... [ string ] Python core | |
|
18 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> | |
|
19 _______________________________ | | |
|
20 [ Python's object allocator ] | | |
|
21 +2 | ####### Object memory ####### | <------ Internal buffers ------> | |
|
22 ______________________________________________________________ | |
|
23 [ Python's raw memory allocator (PyMem_ API) ] | |
|
24 +1 | <----- Python memory (under PyMem manager's control) ------> | | |
|
25 __________________________________________________________________ |
|
26 [ Underlying general-purpose allocator (ex: C library malloc) ] |
|
27 0 | <------ Virtual memory allocated for the python process -------> | |
|
28 |
|
29 ========================================================================= |
|
30 _______________________________________________________________________ |
|
31 [ OS-specific Virtual Memory Manager (VMM) ] |
|
32 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | |
|
33 __________________________________ __________________________________ |
|
34 [ ] [ ] |
|
35 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | |
|
36 |
|
37 */ |
|
38 /*==========================================================================*/ |
|
39 |
|
40 /* A fast, special-purpose memory allocator for small blocks, to be used |
|
41 on top of a general-purpose malloc -- heavily based on previous art. */ |
|
42 |
|
43 /* Vladimir Marangozov -- August 2000 */ |
|
44 |
|
45 /* |
|
46 * "Memory management is where the rubber meets the road -- if we do the wrong |
|
47 * thing at any level, the results will not be good. And if we don't make the |
|
48 * levels work well together, we are in serious trouble." (1) |
|
49 * |
|
50 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, |
|
51 * "Dynamic Storage Allocation: A Survey and Critical Review", |
|
52 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. |
|
53 */ |
|
54 |
|
55 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ |
|
56 |
|
57 /*==========================================================================*/ |
|
58 |
|
59 /* |
|
60 * Allocation strategy abstract: |
|
61 * |
|
62 * For small requests, the allocator sub-allocates <Big> blocks of memory. |
|
63 * Requests greater than 256 bytes are routed to the system's allocator. |
|
64 * |
|
65 * Small requests are grouped in size classes spaced 8 bytes apart, due |
|
66 * to the required valid alignment of the returned address. Requests of |
|
67 * a particular size are serviced from memory pools of 4K (one VMM page). |
|
68 * Pools are fragmented on demand and contain free lists of blocks of one |
|
69 * particular size class. In other words, there is a fixed-size allocator |
|
70 * for each size class. Free pools are shared by the different allocators |
|
71 * thus minimizing the space reserved for a particular size class. |
|
72 * |
|
73 * This allocation strategy is a variant of what is known as "simple |
|
74 * segregated storage based on array of free lists". The main drawback of |
|
75 * simple segregated storage is that we might end up with lot of reserved |
|
76 * memory for the different free lists, which degenerate in time. To avoid |
|
77 * this, we partition each free list in pools and we share dynamically the |
|
78 * reserved space between all free lists. This technique is quite efficient |
|
79 * for memory intensive programs which allocate mainly small-sized blocks. |
|
80 * |
|
81 * For small requests we have the following table: |
|
82 * |
|
83 * Request in bytes Size of allocated block Size class idx |
|
84 * ---------------------------------------------------------------- |
|
85 * 1-8 8 0 |
|
86 * 9-16 16 1 |
|
87 * 17-24 24 2 |
|
88 * 25-32 32 3 |
|
89 * 33-40 40 4 |
|
90 * 41-48 48 5 |
|
91 * 49-56 56 6 |
|
92 * 57-64 64 7 |
|
93 * 65-72 72 8 |
|
94 * ... ... ... |
|
95 * 241-248 248 30 |
|
96 * 249-256 256 31 |
|
97 * |
|
98 * 0, 257 and up: routed to the underlying allocator. |
|
99 */ |
|
100 |
|
101 /*==========================================================================*/ |
|
102 |
|
103 /* |
|
104 * -- Main tunable settings section -- |
|
105 */ |
|
106 |
|
107 /* |
|
108 * Alignment of addresses returned to the user. 8-bytes alignment works |
|
109 * on most current architectures (with 32-bit or 64-bit address busses). |
|
110 * The alignment value is also used for grouping small requests in size |
|
111 * classes spaced ALIGNMENT bytes apart. |
|
112 * |
|
113 * You shouldn't change this unless you know what you are doing. |
|
114 */ |
|
115 #define ALIGNMENT 8 /* must be 2^N */ |
|
116 #define ALIGNMENT_SHIFT 3 |
|
117 #define ALIGNMENT_MASK (ALIGNMENT - 1) |
|
118 |
|
119 /* Return the number of bytes in size class I, as a uint. */ |
|
120 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT) |
|
121 |
|
122 /* |
|
123 * Max size threshold below which malloc requests are considered to be |
|
124 * small enough in order to use preallocated memory pools. You can tune |
|
125 * this value according to your application behaviour and memory needs. |
|
126 * |
|
127 * The following invariants must hold: |
|
128 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 256 |
|
129 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT |
|
130 * |
|
131 * Although not required, for better performance and space efficiency, |
|
132 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. |
|
133 */ |
|
134 #define SMALL_REQUEST_THRESHOLD 256 |
|
135 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) |
|
136 |
|
137 /* |
|
138 * The system's VMM page size can be obtained on most unices with a |
|
139 * getpagesize() call or deduced from various header files. To make |
|
140 * things simpler, we assume that it is 4K, which is OK for most systems. |
|
141 * It is probably better if this is the native page size, but it doesn't |
|
142 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page |
|
143 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation |
|
144 * violation fault. 4K is apparently OK for all the platforms that python |
|
145 * currently targets. |
|
146 */ |
|
147 #define SYSTEM_PAGE_SIZE (4 * 1024) |
|
148 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) |
|
149 |
|
150 /* |
|
151 * Maximum amount of memory managed by the allocator for small requests. |
|
152 */ |
|
153 #ifdef WITH_MEMORY_LIMITS |
|
154 #ifndef SMALL_MEMORY_LIMIT |
|
155 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ |
|
156 #endif |
|
157 #endif |
|
158 |
|
159 /* |
|
160 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned |
|
161 * on a page boundary. This is a reserved virtual address space for the |
|
162 * current process (obtained through a malloc call). In no way this means |
|
163 * that the memory arenas will be used entirely. A malloc(<Big>) is usually |
|
164 * an address range reservation for <Big> bytes, unless all pages within this |
|
165 * space are referenced subsequently. So malloc'ing big blocks and not using |
|
166 * them does not mean "wasting memory". It's an addressable range wastage... |
|
167 * |
|
168 * Therefore, allocating arenas with malloc is not optimal, because there is |
|
169 * some address space wastage, but this is the most portable way to request |
|
170 * memory from the system across various platforms. |
|
171 */ |
|
172 #define ARENA_SIZE (256 << 10) /* 256KB */ |
|
173 |
|
174 #ifdef WITH_MEMORY_LIMITS |
|
175 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) |
|
176 #endif |
|
177 |
|
178 /* |
|
179 * Size of the pools used for small blocks. Should be a power of 2, |
|
180 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k. |
|
181 */ |
|
182 #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ |
|
183 #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK |
|
184 |
|
185 /* |
|
186 * -- End of tunable settings section -- |
|
187 */ |
|
188 |
|
189 /*==========================================================================*/ |
|
190 |
|
191 /* |
|
192 * Locking |
|
193 * |
|
194 * To reduce lock contention, it would probably be better to refine the |
|
195 * crude function locking with per size class locking. I'm not positive |
|
196 * however, whether it's worth switching to such locking policy because |
|
197 * of the performance penalty it might introduce. |
|
198 * |
|
199 * The following macros describe the simplest (should also be the fastest) |
|
200 * lock object on a particular platform and the init/fini/lock/unlock |
|
201 * operations on it. The locks defined here are not expected to be recursive |
|
202 * because it is assumed that they will always be called in the order: |
|
203 * INIT, [LOCK, UNLOCK]*, FINI. |
|
204 */ |
|
205 |
|
206 /* |
|
207 * Python's threads are serialized, so object malloc locking is disabled. |
|
208 */ |
|
209 #define SIMPLELOCK_DECL(lock) /* simple lock declaration */ |
|
210 #define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */ |
|
211 #define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */ |
|
212 #define SIMPLELOCK_LOCK(lock) /* acquire released lock */ |
|
213 #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */ |
|
214 |
|
215 /* |
|
216 * Basic types |
|
217 * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom. |
|
218 */ |
|
219 #undef uchar |
|
220 #define uchar unsigned char /* assuming == 8 bits */ |
|
221 |
|
222 #undef uint |
|
223 #define uint unsigned int /* assuming >= 16 bits */ |
|
224 |
|
225 #undef ulong |
|
226 #define ulong unsigned long /* assuming >= 32 bits */ |
|
227 |
|
228 #undef uptr |
|
229 #define uptr Py_uintptr_t |
|
230 |
|
231 /* When you say memory, my mind reasons in terms of (pointers to) blocks */ |
|
232 typedef uchar block; |
|
233 |
|
234 /* Pool for small blocks. */ |
|
235 struct pool_header { |
|
236 union { block *_padding; |
|
237 uint count; } ref; /* number of allocated blocks */ |
|
238 block *freeblock; /* pool's free list head */ |
|
239 struct pool_header *nextpool; /* next pool of this size class */ |
|
240 struct pool_header *prevpool; /* previous pool "" */ |
|
241 uint arenaindex; /* index into arenas of base adr */ |
|
242 uint szidx; /* block size class index */ |
|
243 uint nextoffset; /* bytes to virgin block */ |
|
244 uint maxnextoffset; /* largest valid nextoffset */ |
|
245 }; |
|
246 |
|
247 typedef struct pool_header *poolp; |
|
248 |
|
249 /* Record keeping for arenas. */ |
|
250 struct arena_object { |
|
251 /* The address of the arena, as returned by malloc. Note that 0 |
|
252 * will never be returned by a successful malloc, and is used |
|
253 * here to mark an arena_object that doesn't correspond to an |
|
254 * allocated arena. |
|
255 */ |
|
256 uptr address; |
|
257 |
|
258 /* Pool-aligned pointer to the next pool to be carved off. */ |
|
259 block* pool_address; |
|
260 |
|
261 /* The number of available pools in the arena: free pools + never- |
|
262 * allocated pools. |
|
263 */ |
|
264 uint nfreepools; |
|
265 |
|
266 /* The total number of pools in the arena, whether or not available. */ |
|
267 uint ntotalpools; |
|
268 |
|
269 /* Singly-linked list of available pools. */ |
|
270 struct pool_header* freepools; |
|
271 |
|
272 /* Whenever this arena_object is not associated with an allocated |
|
273 * arena, the nextarena member is used to link all unassociated |
|
274 * arena_objects in the singly-linked `unused_arena_objects` list. |
|
275 * The prevarena member is unused in this case. |
|
276 * |
|
277 * When this arena_object is associated with an allocated arena |
|
278 * with at least one available pool, both members are used in the |
|
279 * doubly-linked `usable_arenas` list, which is maintained in |
|
280 * increasing order of `nfreepools` values. |
|
281 * |
|
282 * Else this arena_object is associated with an allocated arena |
|
283 * all of whose pools are in use. `nextarena` and `prevarena` |
|
284 * are both meaningless in this case. |
|
285 */ |
|
286 struct arena_object* nextarena; |
|
287 struct arena_object* prevarena; |
|
288 }; |
|
289 |
|
290 #undef ROUNDUP |
|
291 #define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK) |
|
292 #define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header)) |
|
293 |
|
294 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ |
|
295 |
|
296 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ |
|
297 #define POOL_ADDR(P) ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK)) |
|
298 |
|
299 /* Return total number of blocks in pool of size index I, as a uint. */ |
|
300 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) |
|
301 |
|
302 /*==========================================================================*/ |
|
303 |
|
304 /* |
|
305 * This malloc lock |
|
306 */ |
|
307 SIMPLELOCK_DECL(_malloc_lock) |
|
308 #define LOCK() SIMPLELOCK_LOCK(_malloc_lock) |
|
309 #define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock) |
|
310 #define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock) |
|
311 #define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock) |
|
312 |
|
313 /* |
|
314 * Pool table -- headed, circular, doubly-linked lists of partially used pools. |
|
315 |
|
316 This is involved. For an index i, usedpools[i+i] is the header for a list of |
|
317 all partially used pools holding small blocks with "size class idx" i. So |
|
318 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size |
|
319 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT. |
|
320 |
|
321 Pools are carved off an arena's highwater mark (an arena_object's pool_address |
|
322 member) as needed. Once carved off, a pool is in one of three states forever |
|
323 after: |
|
324 |
|
325 used == partially used, neither empty nor full |
|
326 At least one block in the pool is currently allocated, and at least one |
|
327 block in the pool is not currently allocated (note this implies a pool |
|
328 has room for at least two blocks). |
|
329 This is a pool's initial state, as a pool is created only when malloc |
|
330 needs space. |
|
331 The pool holds blocks of a fixed size, and is in the circular list headed |
|
332 at usedpools[i] (see above). It's linked to the other used pools of the |
|
333 same size class via the pool_header's nextpool and prevpool members. |
|
334 If all but one block is currently allocated, a malloc can cause a |
|
335 transition to the full state. If all but one block is not currently |
|
336 allocated, a free can cause a transition to the empty state. |
|
337 |
|
338 full == all the pool's blocks are currently allocated |
|
339 On transition to full, a pool is unlinked from its usedpools[] list. |
|
340 It's not linked to from anything then anymore, and its nextpool and |
|
341 prevpool members are meaningless until it transitions back to used. |
|
342 A free of a block in a full pool puts the pool back in the used state. |
|
343 Then it's linked in at the front of the appropriate usedpools[] list, so |
|
344 that the next allocation for its size class will reuse the freed block. |
|
345 |
|
346 empty == all the pool's blocks are currently available for allocation |
|
347 On transition to empty, a pool is unlinked from its usedpools[] list, |
|
348 and linked to the front of its arena_object's singly-linked freepools list, |
|
349 via its nextpool member. The prevpool member has no meaning in this case. |
|
350 Empty pools have no inherent size class: the next time a malloc finds |
|
351 an empty list in usedpools[], it takes the first pool off of freepools. |
|
352 If the size class needed happens to be the same as the size class the pool |
|
353 last had, some pool initialization can be skipped. |
|
354 |
|
355 |
|
356 Block Management |
|
357 |
|
358 Blocks within pools are again carved out as needed. pool->freeblock points to |
|
359 the start of a singly-linked list of free blocks within the pool. When a |
|
360 block is freed, it's inserted at the front of its pool's freeblock list. Note |
|
361 that the available blocks in a pool are *not* linked all together when a pool |
|
362 is initialized. Instead only "the first two" (lowest addresses) blocks are |
|
363 set up, returning the first such block, and setting pool->freeblock to a |
|
364 one-block list holding the second such block. This is consistent with that |
|
365 pymalloc strives at all levels (arena, pool, and block) never to touch a piece |
|
366 of memory until it's actually needed. |
|
367 |
|
368 So long as a pool is in the used state, we're certain there *is* a block |
|
369 available for allocating, and pool->freeblock is not NULL. If pool->freeblock |
|
370 points to the end of the free list before we've carved the entire pool into |
|
371 blocks, that means we simply haven't yet gotten to one of the higher-address |
|
372 blocks. The offset from the pool_header to the start of "the next" virgin |
|
373 block is stored in the pool_header nextoffset member, and the largest value |
|
374 of nextoffset that makes sense is stored in the maxnextoffset member when a |
|
375 pool is initialized. All the blocks in a pool have been passed out at least |
|
376 once when and only when nextoffset > maxnextoffset. |
|
377 |
|
378 |
|
379 Major obscurity: While the usedpools vector is declared to have poolp |
|
380 entries, it doesn't really. It really contains two pointers per (conceptual) |
|
381 poolp entry, the nextpool and prevpool members of a pool_header. The |
|
382 excruciating initialization code below fools C so that |
|
383 |
|
384 usedpool[i+i] |
|
385 |
|
386 "acts like" a genuine poolp, but only so long as you only reference its |
|
387 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is |
|
388 compensating for that a pool_header's nextpool and prevpool members |
|
389 immediately follow a pool_header's first two members: |
|
390 |
|
391 union { block *_padding; |
|
392 uint count; } ref; |
|
393 block *freeblock; |
|
394 |
|
395 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really |
|
396 contains is a fudged-up pointer p such that *if* C believes it's a poolp |
|
397 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed |
|
398 circular list is empty). |
|
399 |
|
400 It's unclear why the usedpools setup is so convoluted. It could be to |
|
401 minimize the amount of cache required to hold this heavily-referenced table |
|
402 (which only *needs* the two interpool pointer members of a pool_header). OTOH, |
|
403 referencing code has to remember to "double the index" and doing so isn't |
|
404 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying |
|
405 on that C doesn't insert any padding anywhere in a pool_header at or before |
|
406 the prevpool member. |
|
407 **************************************************************************** */ |
|
408 |
|
409 #define PTA(x) ((poolp )((uchar *)&(usedpools[2*(x)]) - 2*sizeof(block *))) |
|
410 #define PT(x) PTA(x), PTA(x) |
|
411 |
|
412 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { |
|
413 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7) |
|
414 #if NB_SMALL_SIZE_CLASSES > 8 |
|
415 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15) |
|
416 #if NB_SMALL_SIZE_CLASSES > 16 |
|
417 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23) |
|
418 #if NB_SMALL_SIZE_CLASSES > 24 |
|
419 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31) |
|
420 #if NB_SMALL_SIZE_CLASSES > 32 |
|
421 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39) |
|
422 #if NB_SMALL_SIZE_CLASSES > 40 |
|
423 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47) |
|
424 #if NB_SMALL_SIZE_CLASSES > 48 |
|
425 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) |
|
426 #if NB_SMALL_SIZE_CLASSES > 56 |
|
427 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) |
|
428 #endif /* NB_SMALL_SIZE_CLASSES > 56 */ |
|
429 #endif /* NB_SMALL_SIZE_CLASSES > 48 */ |
|
430 #endif /* NB_SMALL_SIZE_CLASSES > 40 */ |
|
431 #endif /* NB_SMALL_SIZE_CLASSES > 32 */ |
|
432 #endif /* NB_SMALL_SIZE_CLASSES > 24 */ |
|
433 #endif /* NB_SMALL_SIZE_CLASSES > 16 */ |
|
434 #endif /* NB_SMALL_SIZE_CLASSES > 8 */ |
|
435 }; |
|
436 |
|
437 /*========================================================================== |
|
438 Arena management. |
|
439 |
|
440 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of |
|
441 which may not be currently used (== they're arena_objects that aren't |
|
442 currently associated with an allocated arena). Note that arenas proper are |
|
443 separately malloc'ed. |
|
444 |
|
445 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, |
|
446 we do try to free() arenas, and use some mild heuristic strategies to increase |
|
447 the likelihood that arenas eventually can be freed. |
|
448 |
|
449 unused_arena_objects |
|
450 |
|
451 This is a singly-linked list of the arena_objects that are currently not |
|
452 being used (no arena is associated with them). Objects are taken off the |
|
453 head of the list in new_arena(), and are pushed on the head of the list in |
|
454 PyObject_Free() when the arena is empty. Key invariant: an arena_object |
|
455 is on this list if and only if its .address member is 0. |
|
456 |
|
457 usable_arenas |
|
458 |
|
459 This is a doubly-linked list of the arena_objects associated with arenas |
|
460 that have pools available. These pools are either waiting to be reused, |
|
461 or have not been used before. The list is sorted to have the most- |
|
462 allocated arenas first (ascending order based on the nfreepools member). |
|
463 This means that the next allocation will come from a heavily used arena, |
|
464 which gives the nearly empty arenas a chance to be returned to the system. |
|
465 In my unscientific tests this dramatically improved the number of arenas |
|
466 that could be freed. |
|
467 |
|
468 Note that an arena_object associated with an arena all of whose pools are |
|
469 currently in use isn't on either list. |
|
470 */ |
|
471 |
|
472 /* Array of objects used to track chunks of memory (arenas). */ |
|
473 static struct arena_object* arenas = NULL; |
|
474 /* Number of slots currently allocated in the `arenas` vector. */ |
|
475 static uint maxarenas = 0; |
|
476 |
|
477 /* The head of the singly-linked, NULL-terminated list of available |
|
478 * arena_objects. |
|
479 */ |
|
480 static struct arena_object* unused_arena_objects = NULL; |
|
481 |
|
482 /* The head of the doubly-linked, NULL-terminated at each end, list of |
|
483 * arena_objects associated with arenas that have pools available. |
|
484 */ |
|
485 static struct arena_object* usable_arenas = NULL; |
|
486 |
|
487 /* How many arena_objects do we initially allocate? |
|
488 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the |
|
489 * `arenas` vector. |
|
490 */ |
|
491 #define INITIAL_ARENA_OBJECTS 16 |
|
492 |
|
493 /* Number of arenas allocated that haven't been free()'d. */ |
|
494 static size_t narenas_currently_allocated = 0; |
|
495 |
|
496 #ifdef PYMALLOC_DEBUG |
|
497 /* Total number of times malloc() called to allocate an arena. */ |
|
498 static size_t ntimes_arena_allocated = 0; |
|
499 /* High water mark (max value ever seen) for narenas_currently_allocated. */ |
|
500 static size_t narenas_highwater = 0; |
|
501 #endif |
|
502 |
|
503 /* Allocate a new arena. If we run out of memory, return NULL. Else |
|
504 * allocate a new arena, and return the address of an arena_object |
|
505 * describing the new arena. It's expected that the caller will set |
|
506 * `usable_arenas` to the return value. |
|
507 */ |
|
508 static struct arena_object* |
|
509 new_arena(void) |
|
510 { |
|
511 struct arena_object* arenaobj; |
|
512 uint excess; /* number of bytes above pool alignment */ |
|
513 |
|
514 #ifdef PYMALLOC_DEBUG |
|
515 if (Py_GETENV("PYTHONMALLOCSTATS")) |
|
516 _PyObject_DebugMallocStats(); |
|
517 #endif |
|
518 if (unused_arena_objects == NULL) { |
|
519 uint i; |
|
520 uint numarenas; |
|
521 size_t nbytes; |
|
522 |
|
523 /* Double the number of arena objects on each allocation. |
|
524 * Note that it's possible for `numarenas` to overflow. |
|
525 */ |
|
526 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; |
|
527 if (numarenas <= maxarenas) |
|
528 return NULL; /* overflow */ |
|
529 #if SIZEOF_SIZE_T <= SIZEOF_INT |
|
530 if (numarenas > PY_SIZE_MAX / sizeof(*arenas)) |
|
531 return NULL; /* overflow */ |
|
532 #endif |
|
533 nbytes = numarenas * sizeof(*arenas); |
|
534 arenaobj = (struct arena_object *)realloc(arenas, nbytes); |
|
535 if (arenaobj == NULL) |
|
536 return NULL; |
|
537 arenas = arenaobj; |
|
538 |
|
539 /* We might need to fix pointers that were copied. However, |
|
540 * new_arena only gets called when all the pages in the |
|
541 * previous arenas are full. Thus, there are *no* pointers |
|
542 * into the old array. Thus, we don't have to worry about |
|
543 * invalid pointers. Just to be sure, some asserts: |
|
544 */ |
|
545 assert(usable_arenas == NULL); |
|
546 assert(unused_arena_objects == NULL); |
|
547 |
|
548 /* Put the new arenas on the unused_arena_objects list. */ |
|
549 for (i = maxarenas; i < numarenas; ++i) { |
|
550 arenas[i].address = 0; /* mark as unassociated */ |
|
551 arenas[i].nextarena = i < numarenas - 1 ? |
|
552 &arenas[i+1] : NULL; |
|
553 } |
|
554 |
|
555 /* Update globals. */ |
|
556 unused_arena_objects = &arenas[maxarenas]; |
|
557 maxarenas = numarenas; |
|
558 } |
|
559 |
|
560 /* Take the next available arena object off the head of the list. */ |
|
561 assert(unused_arena_objects != NULL); |
|
562 arenaobj = unused_arena_objects; |
|
563 unused_arena_objects = arenaobj->nextarena; |
|
564 assert(arenaobj->address == 0); |
|
565 arenaobj->address = (uptr)malloc(ARENA_SIZE); |
|
566 if (arenaobj->address == 0) { |
|
567 /* The allocation failed: return NULL after putting the |
|
568 * arenaobj back. |
|
569 */ |
|
570 arenaobj->nextarena = unused_arena_objects; |
|
571 unused_arena_objects = arenaobj; |
|
572 return NULL; |
|
573 } |
|
574 |
|
575 ++narenas_currently_allocated; |
|
576 #ifdef PYMALLOC_DEBUG |
|
577 ++ntimes_arena_allocated; |
|
578 if (narenas_currently_allocated > narenas_highwater) |
|
579 narenas_highwater = narenas_currently_allocated; |
|
580 #endif |
|
581 arenaobj->freepools = NULL; |
|
582 /* pool_address <- first pool-aligned address in the arena |
|
583 nfreepools <- number of whole pools that fit after alignment */ |
|
584 arenaobj->pool_address = (block*)arenaobj->address; |
|
585 arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE; |
|
586 assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE); |
|
587 excess = (uint)(arenaobj->address & POOL_SIZE_MASK); |
|
588 if (excess != 0) { |
|
589 --arenaobj->nfreepools; |
|
590 arenaobj->pool_address += POOL_SIZE - excess; |
|
591 } |
|
592 arenaobj->ntotalpools = arenaobj->nfreepools; |
|
593 |
|
594 return arenaobj; |
|
595 } |
|
596 |
|
597 /* |
|
598 Py_ADDRESS_IN_RANGE(P, POOL) |
|
599 |
|
600 Return true if and only if P is an address that was allocated by pymalloc. |
|
601 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P) |
|
602 (the caller is asked to compute this because the macro expands POOL more than |
|
603 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a |
|
604 variable and pass the latter to the macro; because Py_ADDRESS_IN_RANGE is |
|
605 called on every alloc/realloc/free, micro-efficiency is important here). |
|
606 |
|
607 Tricky: Let B be the arena base address associated with the pool, B = |
|
608 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if |
|
609 |
|
610 B <= P < B + ARENA_SIZE |
|
611 |
|
612 Subtracting B throughout, this is true iff |
|
613 |
|
614 0 <= P-B < ARENA_SIZE |
|
615 |
|
616 By using unsigned arithmetic, the "0 <=" half of the test can be skipped. |
|
617 |
|
618 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc |
|
619 before the first arena has been allocated. `arenas` is still NULL in that |
|
620 case. We're relying on that maxarenas is also 0 in that case, so that |
|
621 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index |
|
622 into a NULL arenas. |
|
623 |
|
624 Details: given P and POOL, the arena_object corresponding to P is AO = |
|
625 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild |
|
626 stores, etc), POOL is the correct address of P's pool, AO.address is the |
|
627 correct base address of the pool's arena, and P must be within ARENA_SIZE of |
|
628 AO.address. In addition, AO.address is not 0 (no arena can start at address 0 |
|
629 (NULL)). Therefore Py_ADDRESS_IN_RANGE correctly reports that obmalloc |
|
630 controls P. |
|
631 |
|
632 Now suppose obmalloc does not control P (e.g., P was obtained via a direct |
|
633 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything |
|
634 in this case -- it may even be uninitialized trash. If the trash arenaindex |
|
635 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't |
|
636 control P. |
|
637 |
|
638 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an |
|
639 allocated arena, obmalloc controls all the memory in slice AO.address : |
|
640 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc, |
|
641 so P doesn't lie in that slice, so the macro correctly reports that P is not |
|
642 controlled by obmalloc. |
|
643 |
|
644 Finally, if P is not controlled by obmalloc and AO corresponds to an unused |
|
645 arena_object (one not currently associated with an allocated arena), |
|
646 AO.address is 0, and the second test in the macro reduces to: |
|
647 |
|
648 P < ARENA_SIZE |
|
649 |
|
650 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes |
|
651 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part |
|
652 of the test still passes, and the third clause (AO.address != 0) is necessary |
|
653 to get the correct result: AO.address is 0 in this case, so the macro |
|
654 correctly reports that P is not controlled by obmalloc (despite that P lies in |
|
655 slice AO.address : AO.address + ARENA_SIZE). |
|
656 |
|
657 Note: The third (AO.address != 0) clause was added in Python 2.5. Before |
|
658 2.5, arenas were never free()'ed, and an arenaindex < maxarena always |
|
659 corresponded to a currently-allocated arena, so the "P is not controlled by |
|
660 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case |
|
661 was impossible. |
|
662 |
|
663 Note that the logic is excruciating, and reading up possibly uninitialized |
|
664 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex) |
|
665 creates problems for some memory debuggers. The overwhelming advantage is |
|
666 that this test determines whether an arbitrary address is controlled by |
|
667 obmalloc in a small constant time, independent of the number of arenas |
|
668 obmalloc controls. Since this test is needed at every entry point, it's |
|
669 extremely desirable that it be this fast. |
|
670 */ |
|
671 #define Py_ADDRESS_IN_RANGE(P, POOL) \ |
|
672 ((POOL)->arenaindex < maxarenas && \ |
|
673 (uptr)(P) - arenas[(POOL)->arenaindex].address < (uptr)ARENA_SIZE && \ |
|
674 arenas[(POOL)->arenaindex].address != 0) |
|
675 |
|
676 |
|
677 /* This is only useful when running memory debuggers such as |
|
678 * Purify or Valgrind. Uncomment to use. |
|
679 * |
|
680 #define Py_USING_MEMORY_DEBUGGER |
|
681 */ |
|
682 |
|
683 #ifdef Py_USING_MEMORY_DEBUGGER |
|
684 |
|
685 /* Py_ADDRESS_IN_RANGE may access uninitialized memory by design |
|
686 * This leads to thousands of spurious warnings when using |
|
687 * Purify or Valgrind. By making a function, we can easily |
|
688 * suppress the uninitialized memory reads in this one function. |
|
689 * So we won't ignore real errors elsewhere. |
|
690 * |
|
691 * Disable the macro and use a function. |
|
692 */ |
|
693 |
|
694 #undef Py_ADDRESS_IN_RANGE |
|
695 |
|
696 #if defined(__GNUC__) && ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) || \ |
|
697 (__GNUC__ >= 4)) |
|
698 #define Py_NO_INLINE __attribute__((__noinline__)) |
|
699 #else |
|
700 #define Py_NO_INLINE |
|
701 #endif |
|
702 |
|
703 /* Don't make static, to try to ensure this isn't inlined. */ |
|
704 int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE; |
|
705 #undef Py_NO_INLINE |
|
706 #endif |
|
707 |
|
708 /*==========================================================================*/ |
|
709 |
|
710 /* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct |
|
711 * from all other currently live pointers. This may not be possible. |
|
712 */ |
|
713 |
|
714 /* |
|
715 * The basic blocks are ordered by decreasing execution frequency, |
|
716 * which minimizes the number of jumps in the most common cases, |
|
717 * improves branching prediction and instruction scheduling (small |
|
718 * block allocations typically result in a couple of instructions). |
|
719 * Unless the optimizer reorders everything, being too smart... |
|
720 */ |
|
721 |
|
722 #undef PyObject_Malloc |
|
723 void * |
|
724 PyObject_Malloc(size_t nbytes) |
|
725 { |
|
726 block *bp; |
|
727 poolp pool; |
|
728 poolp next; |
|
729 uint size; |
|
730 |
|
731 /* |
|
732 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. |
|
733 * Most python internals blindly use a signed Py_ssize_t to track |
|
734 * things without checking for overflows or negatives. |
|
735 * As size_t is unsigned, checking for nbytes < 0 is not required. |
|
736 */ |
|
737 if (nbytes > PY_SSIZE_T_MAX) |
|
738 return NULL; |
|
739 |
|
740 /* |
|
741 * This implicitly redirects malloc(0). |
|
742 */ |
|
743 if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) { |
|
744 LOCK(); |
|
745 /* |
|
746 * Most frequent paths first |
|
747 */ |
|
748 size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; |
|
749 pool = usedpools[size + size]; |
|
750 if (pool != pool->nextpool) { |
|
751 /* |
|
752 * There is a used pool for this size class. |
|
753 * Pick up the head block of its free list. |
|
754 */ |
|
755 ++pool->ref.count; |
|
756 bp = pool->freeblock; |
|
757 assert(bp != NULL); |
|
758 if ((pool->freeblock = *(block **)bp) != NULL) { |
|
759 UNLOCK(); |
|
760 return (void *)bp; |
|
761 } |
|
762 /* |
|
763 * Reached the end of the free list, try to extend it. |
|
764 */ |
|
765 if (pool->nextoffset <= pool->maxnextoffset) { |
|
766 /* There is room for another block. */ |
|
767 pool->freeblock = (block*)pool + |
|
768 pool->nextoffset; |
|
769 pool->nextoffset += INDEX2SIZE(size); |
|
770 *(block **)(pool->freeblock) = NULL; |
|
771 UNLOCK(); |
|
772 return (void *)bp; |
|
773 } |
|
774 /* Pool is full, unlink from used pools. */ |
|
775 next = pool->nextpool; |
|
776 pool = pool->prevpool; |
|
777 next->prevpool = pool; |
|
778 pool->nextpool = next; |
|
779 UNLOCK(); |
|
780 return (void *)bp; |
|
781 } |
|
782 |
|
783 /* There isn't a pool of the right size class immediately |
|
784 * available: use a free pool. |
|
785 */ |
|
786 if (usable_arenas == NULL) { |
|
787 /* No arena has a free pool: allocate a new arena. */ |
|
788 #ifdef WITH_MEMORY_LIMITS |
|
789 if (narenas_currently_allocated >= MAX_ARENAS) { |
|
790 UNLOCK(); |
|
791 goto redirect; |
|
792 } |
|
793 #endif |
|
794 usable_arenas = new_arena(); |
|
795 if (usable_arenas == NULL) { |
|
796 UNLOCK(); |
|
797 goto redirect; |
|
798 } |
|
799 usable_arenas->nextarena = |
|
800 usable_arenas->prevarena = NULL; |
|
801 } |
|
802 assert(usable_arenas->address != 0); |
|
803 |
|
804 /* Try to get a cached free pool. */ |
|
805 pool = usable_arenas->freepools; |
|
806 if (pool != NULL) { |
|
807 /* Unlink from cached pools. */ |
|
808 usable_arenas->freepools = pool->nextpool; |
|
809 |
|
810 /* This arena already had the smallest nfreepools |
|
811 * value, so decreasing nfreepools doesn't change |
|
812 * that, and we don't need to rearrange the |
|
813 * usable_arenas list. However, if the arena has |
|
814 * become wholly allocated, we need to remove its |
|
815 * arena_object from usable_arenas. |
|
816 */ |
|
817 --usable_arenas->nfreepools; |
|
818 if (usable_arenas->nfreepools == 0) { |
|
819 /* Wholly allocated: remove. */ |
|
820 assert(usable_arenas->freepools == NULL); |
|
821 assert(usable_arenas->nextarena == NULL || |
|
822 usable_arenas->nextarena->prevarena == |
|
823 usable_arenas); |
|
824 |
|
825 usable_arenas = usable_arenas->nextarena; |
|
826 if (usable_arenas != NULL) { |
|
827 usable_arenas->prevarena = NULL; |
|
828 assert(usable_arenas->address != 0); |
|
829 } |
|
830 } |
|
831 else { |
|
832 /* nfreepools > 0: it must be that freepools |
|
833 * isn't NULL, or that we haven't yet carved |
|
834 * off all the arena's pools for the first |
|
835 * time. |
|
836 */ |
|
837 assert(usable_arenas->freepools != NULL || |
|
838 usable_arenas->pool_address <= |
|
839 (block*)usable_arenas->address + |
|
840 ARENA_SIZE - POOL_SIZE); |
|
841 } |
|
842 init_pool: |
|
843 /* Frontlink to used pools. */ |
|
844 next = usedpools[size + size]; /* == prev */ |
|
845 pool->nextpool = next; |
|
846 pool->prevpool = next; |
|
847 next->nextpool = pool; |
|
848 next->prevpool = pool; |
|
849 pool->ref.count = 1; |
|
850 if (pool->szidx == size) { |
|
851 /* Luckily, this pool last contained blocks |
|
852 * of the same size class, so its header |
|
853 * and free list are already initialized. |
|
854 */ |
|
855 bp = pool->freeblock; |
|
856 pool->freeblock = *(block **)bp; |
|
857 UNLOCK(); |
|
858 return (void *)bp; |
|
859 } |
|
860 /* |
|
861 * Initialize the pool header, set up the free list to |
|
862 * contain just the second block, and return the first |
|
863 * block. |
|
864 */ |
|
865 pool->szidx = size; |
|
866 size = INDEX2SIZE(size); |
|
867 bp = (block *)pool + POOL_OVERHEAD; |
|
868 pool->nextoffset = POOL_OVERHEAD + (size << 1); |
|
869 pool->maxnextoffset = POOL_SIZE - size; |
|
870 pool->freeblock = bp + size; |
|
871 *(block **)(pool->freeblock) = NULL; |
|
872 UNLOCK(); |
|
873 return (void *)bp; |
|
874 } |
|
875 |
|
876 /* Carve off a new pool. */ |
|
877 assert(usable_arenas->nfreepools > 0); |
|
878 assert(usable_arenas->freepools == NULL); |
|
879 pool = (poolp)usable_arenas->pool_address; |
|
880 assert((block*)pool <= (block*)usable_arenas->address + |
|
881 ARENA_SIZE - POOL_SIZE); |
|
882 pool->arenaindex = usable_arenas - arenas; |
|
883 assert(&arenas[pool->arenaindex] == usable_arenas); |
|
884 pool->szidx = DUMMY_SIZE_IDX; |
|
885 usable_arenas->pool_address += POOL_SIZE; |
|
886 --usable_arenas->nfreepools; |
|
887 |
|
888 if (usable_arenas->nfreepools == 0) { |
|
889 assert(usable_arenas->nextarena == NULL || |
|
890 usable_arenas->nextarena->prevarena == |
|
891 usable_arenas); |
|
892 /* Unlink the arena: it is completely allocated. */ |
|
893 usable_arenas = usable_arenas->nextarena; |
|
894 if (usable_arenas != NULL) { |
|
895 usable_arenas->prevarena = NULL; |
|
896 assert(usable_arenas->address != 0); |
|
897 } |
|
898 } |
|
899 |
|
900 goto init_pool; |
|
901 } |
|
902 |
|
903 /* The small block allocator ends here. */ |
|
904 |
|
905 redirect: |
|
906 /* Redirect the original request to the underlying (libc) allocator. |
|
907 * We jump here on bigger requests, on error in the code above (as a |
|
908 * last chance to serve the request) or when the max memory limit |
|
909 * has been reached. |
|
910 */ |
|
911 if (nbytes == 0) |
|
912 nbytes = 1; |
|
913 return (void *)malloc(nbytes); |
|
914 } |
|
915 |
|
916 /* free */ |
|
917 |
|
918 #undef PyObject_Free |
|
919 void |
|
920 PyObject_Free(void *p) |
|
921 { |
|
922 poolp pool; |
|
923 block *lastfree; |
|
924 poolp next, prev; |
|
925 uint size; |
|
926 |
|
927 if (p == NULL) /* free(NULL) has no effect */ |
|
928 return; |
|
929 |
|
930 pool = POOL_ADDR(p); |
|
931 if (Py_ADDRESS_IN_RANGE(p, pool)) { |
|
932 /* We allocated this address. */ |
|
933 LOCK(); |
|
934 /* Link p to the start of the pool's freeblock list. Since |
|
935 * the pool had at least the p block outstanding, the pool |
|
936 * wasn't empty (so it's already in a usedpools[] list, or |
|
937 * was full and is in no list -- it's not in the freeblocks |
|
938 * list in any case). |
|
939 */ |
|
940 assert(pool->ref.count > 0); /* else it was empty */ |
|
941 *(block **)p = lastfree = pool->freeblock; |
|
942 pool->freeblock = (block *)p; |
|
943 if (lastfree) { |
|
944 struct arena_object* ao; |
|
945 uint nf; /* ao->nfreepools */ |
|
946 |
|
947 /* freeblock wasn't NULL, so the pool wasn't full, |
|
948 * and the pool is in a usedpools[] list. |
|
949 */ |
|
950 if (--pool->ref.count != 0) { |
|
951 /* pool isn't empty: leave it in usedpools */ |
|
952 UNLOCK(); |
|
953 return; |
|
954 } |
|
955 /* Pool is now empty: unlink from usedpools, and |
|
956 * link to the front of freepools. This ensures that |
|
957 * previously freed pools will be allocated later |
|
958 * (being not referenced, they are perhaps paged out). |
|
959 */ |
|
960 next = pool->nextpool; |
|
961 prev = pool->prevpool; |
|
962 next->prevpool = prev; |
|
963 prev->nextpool = next; |
|
964 |
|
965 /* Link the pool to freepools. This is a singly-linked |
|
966 * list, and pool->prevpool isn't used there. |
|
967 */ |
|
968 ao = &arenas[pool->arenaindex]; |
|
969 pool->nextpool = ao->freepools; |
|
970 ao->freepools = pool; |
|
971 nf = ++ao->nfreepools; |
|
972 |
|
973 /* All the rest is arena management. We just freed |
|
974 * a pool, and there are 4 cases for arena mgmt: |
|
975 * 1. If all the pools are free, return the arena to |
|
976 * the system free(). |
|
977 * 2. If this is the only free pool in the arena, |
|
978 * add the arena back to the `usable_arenas` list. |
|
979 * 3. If the "next" arena has a smaller count of free |
|
980 * pools, we have to "slide this arena right" to |
|
981 * restore that usable_arenas is sorted in order of |
|
982 * nfreepools. |
|
983 * 4. Else there's nothing more to do. |
|
984 */ |
|
985 if (nf == ao->ntotalpools) { |
|
986 /* Case 1. First unlink ao from usable_arenas. |
|
987 */ |
|
988 assert(ao->prevarena == NULL || |
|
989 ao->prevarena->address != 0); |
|
990 assert(ao ->nextarena == NULL || |
|
991 ao->nextarena->address != 0); |
|
992 |
|
993 /* Fix the pointer in the prevarena, or the |
|
994 * usable_arenas pointer. |
|
995 */ |
|
996 if (ao->prevarena == NULL) { |
|
997 usable_arenas = ao->nextarena; |
|
998 assert(usable_arenas == NULL || |
|
999 usable_arenas->address != 0); |
|
1000 } |
|
1001 else { |
|
1002 assert(ao->prevarena->nextarena == ao); |
|
1003 ao->prevarena->nextarena = |
|
1004 ao->nextarena; |
|
1005 } |
|
1006 /* Fix the pointer in the nextarena. */ |
|
1007 if (ao->nextarena != NULL) { |
|
1008 assert(ao->nextarena->prevarena == ao); |
|
1009 ao->nextarena->prevarena = |
|
1010 ao->prevarena; |
|
1011 } |
|
1012 /* Record that this arena_object slot is |
|
1013 * available to be reused. |
|
1014 */ |
|
1015 ao->nextarena = unused_arena_objects; |
|
1016 unused_arena_objects = ao; |
|
1017 |
|
1018 /* Free the entire arena. */ |
|
1019 free((void *)ao->address); |
|
1020 ao->address = 0; /* mark unassociated */ |
|
1021 --narenas_currently_allocated; |
|
1022 |
|
1023 UNLOCK(); |
|
1024 return; |
|
1025 } |
|
1026 if (nf == 1) { |
|
1027 /* Case 2. Put ao at the head of |
|
1028 * usable_arenas. Note that because |
|
1029 * ao->nfreepools was 0 before, ao isn't |
|
1030 * currently on the usable_arenas list. |
|
1031 */ |
|
1032 ao->nextarena = usable_arenas; |
|
1033 ao->prevarena = NULL; |
|
1034 if (usable_arenas) |
|
1035 usable_arenas->prevarena = ao; |
|
1036 usable_arenas = ao; |
|
1037 assert(usable_arenas->address != 0); |
|
1038 |
|
1039 UNLOCK(); |
|
1040 return; |
|
1041 } |
|
1042 /* If this arena is now out of order, we need to keep |
|
1043 * the list sorted. The list is kept sorted so that |
|
1044 * the "most full" arenas are used first, which allows |
|
1045 * the nearly empty arenas to be completely freed. In |
|
1046 * a few un-scientific tests, it seems like this |
|
1047 * approach allowed a lot more memory to be freed. |
|
1048 */ |
|
1049 if (ao->nextarena == NULL || |
|
1050 nf <= ao->nextarena->nfreepools) { |
|
1051 /* Case 4. Nothing to do. */ |
|
1052 UNLOCK(); |
|
1053 return; |
|
1054 } |
|
1055 /* Case 3: We have to move the arena towards the end |
|
1056 * of the list, because it has more free pools than |
|
1057 * the arena to its right. |
|
1058 * First unlink ao from usable_arenas. |
|
1059 */ |
|
1060 if (ao->prevarena != NULL) { |
|
1061 /* ao isn't at the head of the list */ |
|
1062 assert(ao->prevarena->nextarena == ao); |
|
1063 ao->prevarena->nextarena = ao->nextarena; |
|
1064 } |
|
1065 else { |
|
1066 /* ao is at the head of the list */ |
|
1067 assert(usable_arenas == ao); |
|
1068 usable_arenas = ao->nextarena; |
|
1069 } |
|
1070 ao->nextarena->prevarena = ao->prevarena; |
|
1071 |
|
1072 /* Locate the new insertion point by iterating over |
|
1073 * the list, using our nextarena pointer. |
|
1074 */ |
|
1075 while (ao->nextarena != NULL && |
|
1076 nf > ao->nextarena->nfreepools) { |
|
1077 ao->prevarena = ao->nextarena; |
|
1078 ao->nextarena = ao->nextarena->nextarena; |
|
1079 } |
|
1080 |
|
1081 /* Insert ao at this point. */ |
|
1082 assert(ao->nextarena == NULL || |
|
1083 ao->prevarena == ao->nextarena->prevarena); |
|
1084 assert(ao->prevarena->nextarena == ao->nextarena); |
|
1085 |
|
1086 ao->prevarena->nextarena = ao; |
|
1087 if (ao->nextarena != NULL) |
|
1088 ao->nextarena->prevarena = ao; |
|
1089 |
|
1090 /* Verify that the swaps worked. */ |
|
1091 assert(ao->nextarena == NULL || |
|
1092 nf <= ao->nextarena->nfreepools); |
|
1093 assert(ao->prevarena == NULL || |
|
1094 nf > ao->prevarena->nfreepools); |
|
1095 assert(ao->nextarena == NULL || |
|
1096 ao->nextarena->prevarena == ao); |
|
1097 assert((usable_arenas == ao && |
|
1098 ao->prevarena == NULL) || |
|
1099 ao->prevarena->nextarena == ao); |
|
1100 |
|
1101 UNLOCK(); |
|
1102 return; |
|
1103 } |
|
1104 /* Pool was full, so doesn't currently live in any list: |
|
1105 * link it to the front of the appropriate usedpools[] list. |
|
1106 * This mimics LRU pool usage for new allocations and |
|
1107 * targets optimal filling when several pools contain |
|
1108 * blocks of the same size class. |
|
1109 */ |
|
1110 --pool->ref.count; |
|
1111 assert(pool->ref.count > 0); /* else the pool is empty */ |
|
1112 size = pool->szidx; |
|
1113 next = usedpools[size + size]; |
|
1114 prev = next->prevpool; |
|
1115 /* insert pool before next: prev <-> pool <-> next */ |
|
1116 pool->nextpool = next; |
|
1117 pool->prevpool = prev; |
|
1118 next->prevpool = pool; |
|
1119 prev->nextpool = pool; |
|
1120 UNLOCK(); |
|
1121 return; |
|
1122 } |
|
1123 |
|
1124 /* We didn't allocate this address. */ |
|
1125 free(p); |
|
1126 } |
|
1127 |
|
1128 /* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0, |
|
1129 * then as the Python docs promise, we do not treat this like free(p), and |
|
1130 * return a non-NULL result. |
|
1131 */ |
|
1132 |
|
1133 #undef PyObject_Realloc |
|
1134 void * |
|
1135 PyObject_Realloc(void *p, size_t nbytes) |
|
1136 { |
|
1137 void *bp; |
|
1138 poolp pool; |
|
1139 size_t size; |
|
1140 |
|
1141 if (p == NULL) |
|
1142 return PyObject_Malloc(nbytes); |
|
1143 |
|
1144 /* |
|
1145 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. |
|
1146 * Most python internals blindly use a signed Py_ssize_t to track |
|
1147 * things without checking for overflows or negatives. |
|
1148 * As size_t is unsigned, checking for nbytes < 0 is not required. |
|
1149 */ |
|
1150 if (nbytes > PY_SSIZE_T_MAX) |
|
1151 return NULL; |
|
1152 |
|
1153 pool = POOL_ADDR(p); |
|
1154 if (Py_ADDRESS_IN_RANGE(p, pool)) { |
|
1155 /* We're in charge of this block */ |
|
1156 size = INDEX2SIZE(pool->szidx); |
|
1157 if (nbytes <= size) { |
|
1158 /* The block is staying the same or shrinking. If |
|
1159 * it's shrinking, there's a tradeoff: it costs |
|
1160 * cycles to copy the block to a smaller size class, |
|
1161 * but it wastes memory not to copy it. The |
|
1162 * compromise here is to copy on shrink only if at |
|
1163 * least 25% of size can be shaved off. |
|
1164 */ |
|
1165 if (4 * nbytes > 3 * size) { |
|
1166 /* It's the same, |
|
1167 * or shrinking and new/old > 3/4. |
|
1168 */ |
|
1169 return p; |
|
1170 } |
|
1171 size = nbytes; |
|
1172 } |
|
1173 bp = PyObject_Malloc(nbytes); |
|
1174 if (bp != NULL) { |
|
1175 memcpy(bp, p, size); |
|
1176 PyObject_Free(p); |
|
1177 } |
|
1178 return bp; |
|
1179 } |
|
1180 /* We're not managing this block. If nbytes <= |
|
1181 * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this |
|
1182 * block. However, if we do, we need to copy the valid data from |
|
1183 * the C-managed block to one of our blocks, and there's no portable |
|
1184 * way to know how much of the memory space starting at p is valid. |
|
1185 * As bug 1185883 pointed out the hard way, it's possible that the |
|
1186 * C-managed block is "at the end" of allocated VM space, so that |
|
1187 * a memory fault can occur if we try to copy nbytes bytes starting |
|
1188 * at p. Instead we punt: let C continue to manage this block. |
|
1189 */ |
|
1190 if (nbytes) |
|
1191 return realloc(p, nbytes); |
|
1192 /* C doesn't define the result of realloc(p, 0) (it may or may not |
|
1193 * return NULL then), but Python's docs promise that nbytes==0 never |
|
1194 * returns NULL. We don't pass 0 to realloc(), to avoid that endcase |
|
1195 * to begin with. Even then, we can't be sure that realloc() won't |
|
1196 * return NULL. |
|
1197 */ |
|
1198 bp = realloc(p, 1); |
|
1199 return bp ? bp : p; |
|
1200 } |
|
1201 |
|
1202 #else /* ! WITH_PYMALLOC */ |
|
1203 |
|
1204 /*==========================================================================*/ |
|
1205 /* pymalloc not enabled: Redirect the entry points to malloc. These will |
|
1206 * only be used by extensions that are compiled with pymalloc enabled. */ |
|
1207 |
|
1208 void * |
|
1209 PyObject_Malloc(size_t n) |
|
1210 { |
|
1211 return PyMem_MALLOC(n); |
|
1212 } |
|
1213 |
|
1214 void * |
|
1215 PyObject_Realloc(void *p, size_t n) |
|
1216 { |
|
1217 return PyMem_REALLOC(p, n); |
|
1218 } |
|
1219 |
|
1220 void |
|
1221 PyObject_Free(void *p) |
|
1222 { |
|
1223 PyMem_FREE(p); |
|
1224 } |
|
1225 #endif /* WITH_PYMALLOC */ |
|
1226 |
|
1227 #ifdef PYMALLOC_DEBUG |
|
1228 /*==========================================================================*/ |
|
1229 /* A x-platform debugging allocator. This doesn't manage memory directly, |
|
1230 * it wraps a real allocator, adding extra debugging info to the memory blocks. |
|
1231 */ |
|
1232 |
|
1233 /* Special bytes broadcast into debug memory blocks at appropriate times. |
|
1234 * Strings of these are unlikely to be valid addresses, floats, ints or |
|
1235 * 7-bit ASCII. |
|
1236 */ |
|
1237 #undef CLEANBYTE |
|
1238 #undef DEADBYTE |
|
1239 #undef FORBIDDENBYTE |
|
1240 #define CLEANBYTE 0xCB /* clean (newly allocated) memory */ |
|
1241 #define DEADBYTE 0xDB /* dead (newly freed) memory */ |
|
1242 #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ |
|
1243 |
|
1244 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ |
|
1245 |
|
1246 /* serialno is always incremented via calling this routine. The point is |
|
1247 * to supply a single place to set a breakpoint. |
|
1248 */ |
|
1249 static void |
|
1250 bumpserialno(void) |
|
1251 { |
|
1252 ++serialno; |
|
1253 } |
|
1254 |
|
1255 #define SST SIZEOF_SIZE_T |
|
1256 |
|
1257 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */ |
|
1258 static size_t |
|
1259 read_size_t(const void *p) |
|
1260 { |
|
1261 const uchar *q = (const uchar *)p; |
|
1262 size_t result = *q++; |
|
1263 int i; |
|
1264 |
|
1265 for (i = SST; --i > 0; ++q) |
|
1266 result = (result << 8) | *q; |
|
1267 return result; |
|
1268 } |
|
1269 |
|
1270 /* Write n as a big-endian size_t, MSB at address p, LSB at |
|
1271 * p + sizeof(size_t) - 1. |
|
1272 */ |
|
1273 static void |
|
1274 write_size_t(void *p, size_t n) |
|
1275 { |
|
1276 uchar *q = (uchar *)p + SST - 1; |
|
1277 int i; |
|
1278 |
|
1279 for (i = SST; --i >= 0; --q) { |
|
1280 *q = (uchar)(n & 0xff); |
|
1281 n >>= 8; |
|
1282 } |
|
1283 } |
|
1284 |
|
1285 #ifdef Py_DEBUG |
|
1286 /* Is target in the list? The list is traversed via the nextpool pointers. |
|
1287 * The list may be NULL-terminated, or circular. Return 1 if target is in |
|
1288 * list, else 0. |
|
1289 */ |
|
1290 static int |
|
1291 pool_is_in_list(const poolp target, poolp list) |
|
1292 { |
|
1293 poolp origlist = list; |
|
1294 assert(target != NULL); |
|
1295 if (list == NULL) |
|
1296 return 0; |
|
1297 do { |
|
1298 if (target == list) |
|
1299 return 1; |
|
1300 list = list->nextpool; |
|
1301 } while (list != NULL && list != origlist); |
|
1302 return 0; |
|
1303 } |
|
1304 |
|
1305 #else |
|
1306 #define pool_is_in_list(X, Y) 1 |
|
1307 |
|
1308 #endif /* Py_DEBUG */ |
|
1309 |
|
1310 /* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and |
|
1311 fills them with useful stuff, here calling the underlying malloc's result p: |
|
1312 |
|
1313 p[0: S] |
|
1314 Number of bytes originally asked for. This is a size_t, big-endian (easier |
|
1315 to read in a memory dump). |
|
1316 p[S: 2*S] |
|
1317 Copies of FORBIDDENBYTE. Used to catch under- writes and reads. |
|
1318 p[2*S: 2*S+n] |
|
1319 The requested memory, filled with copies of CLEANBYTE. |
|
1320 Used to catch reference to uninitialized memory. |
|
1321 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc |
|
1322 handled the request itself. |
|
1323 p[2*S+n: 2*S+n+S] |
|
1324 Copies of FORBIDDENBYTE. Used to catch over- writes and reads. |
|
1325 p[2*S+n+S: 2*S+n+2*S] |
|
1326 A serial number, incremented by 1 on each call to _PyObject_DebugMalloc |
|
1327 and _PyObject_DebugRealloc. |
|
1328 This is a big-endian size_t. |
|
1329 If "bad memory" is detected later, the serial number gives an |
|
1330 excellent way to set a breakpoint on the next run, to capture the |
|
1331 instant at which this block was passed out. |
|
1332 */ |
|
1333 |
|
1334 void * |
|
1335 _PyObject_DebugMalloc(size_t nbytes) |
|
1336 { |
|
1337 uchar *p; /* base address of malloc'ed block */ |
|
1338 uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */ |
|
1339 size_t total; /* nbytes + 4*SST */ |
|
1340 |
|
1341 bumpserialno(); |
|
1342 total = nbytes + 4*SST; |
|
1343 if (total < nbytes) |
|
1344 /* overflow: can't represent total as a size_t */ |
|
1345 return NULL; |
|
1346 |
|
1347 p = (uchar *)PyObject_Malloc(total); |
|
1348 if (p == NULL) |
|
1349 return NULL; |
|
1350 |
|
1351 write_size_t(p, nbytes); |
|
1352 memset(p + SST, FORBIDDENBYTE, SST); |
|
1353 |
|
1354 if (nbytes > 0) |
|
1355 memset(p + 2*SST, CLEANBYTE, nbytes); |
|
1356 |
|
1357 tail = p + 2*SST + nbytes; |
|
1358 memset(tail, FORBIDDENBYTE, SST); |
|
1359 write_size_t(tail + SST, serialno); |
|
1360 |
|
1361 return p + 2*SST; |
|
1362 } |
|
1363 |
|
1364 /* The debug free first checks the 2*SST bytes on each end for sanity (in |
|
1365 particular, that the FORBIDDENBYTEs are still intact). |
|
1366 Then fills the original bytes with DEADBYTE. |
|
1367 Then calls the underlying free. |
|
1368 */ |
|
1369 void |
|
1370 _PyObject_DebugFree(void *p) |
|
1371 { |
|
1372 uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */ |
|
1373 size_t nbytes; |
|
1374 |
|
1375 if (p == NULL) |
|
1376 return; |
|
1377 _PyObject_DebugCheckAddress(p); |
|
1378 nbytes = read_size_t(q); |
|
1379 if (nbytes > 0) |
|
1380 memset(q, DEADBYTE, nbytes); |
|
1381 PyObject_Free(q); |
|
1382 } |
|
1383 |
|
1384 void * |
|
1385 _PyObject_DebugRealloc(void *p, size_t nbytes) |
|
1386 { |
|
1387 uchar *q = (uchar *)p; |
|
1388 uchar *tail; |
|
1389 size_t total; /* nbytes + 4*SST */ |
|
1390 size_t original_nbytes; |
|
1391 int i; |
|
1392 |
|
1393 if (p == NULL) |
|
1394 return _PyObject_DebugMalloc(nbytes); |
|
1395 |
|
1396 _PyObject_DebugCheckAddress(p); |
|
1397 bumpserialno(); |
|
1398 original_nbytes = read_size_t(q - 2*SST); |
|
1399 total = nbytes + 4*SST; |
|
1400 if (total < nbytes) |
|
1401 /* overflow: can't represent total as a size_t */ |
|
1402 return NULL; |
|
1403 |
|
1404 if (nbytes < original_nbytes) { |
|
1405 /* shrinking: mark old extra memory dead */ |
|
1406 memset(q + nbytes, DEADBYTE, original_nbytes - nbytes); |
|
1407 } |
|
1408 |
|
1409 /* Resize and add decorations. */ |
|
1410 q = (uchar *)PyObject_Realloc(q - 2*SST, total); |
|
1411 if (q == NULL) |
|
1412 return NULL; |
|
1413 |
|
1414 write_size_t(q, nbytes); |
|
1415 for (i = 0; i < SST; ++i) |
|
1416 assert(q[SST + i] == FORBIDDENBYTE); |
|
1417 q += 2*SST; |
|
1418 tail = q + nbytes; |
|
1419 memset(tail, FORBIDDENBYTE, SST); |
|
1420 write_size_t(tail + SST, serialno); |
|
1421 |
|
1422 if (nbytes > original_nbytes) { |
|
1423 /* growing: mark new extra memory clean */ |
|
1424 memset(q + original_nbytes, CLEANBYTE, |
|
1425 nbytes - original_nbytes); |
|
1426 } |
|
1427 |
|
1428 return q; |
|
1429 } |
|
1430 |
|
1431 /* Check the forbidden bytes on both ends of the memory allocated for p. |
|
1432 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress, |
|
1433 * and call Py_FatalError to kill the program. |
|
1434 */ |
|
1435 void |
|
1436 _PyObject_DebugCheckAddress(const void *p) |
|
1437 { |
|
1438 const uchar *q = (const uchar *)p; |
|
1439 char *msg; |
|
1440 size_t nbytes; |
|
1441 const uchar *tail; |
|
1442 int i; |
|
1443 |
|
1444 if (p == NULL) { |
|
1445 msg = "didn't expect a NULL pointer"; |
|
1446 goto error; |
|
1447 } |
|
1448 |
|
1449 /* Check the stuff at the start of p first: if there's underwrite |
|
1450 * corruption, the number-of-bytes field may be nuts, and checking |
|
1451 * the tail could lead to a segfault then. |
|
1452 */ |
|
1453 for (i = SST; i >= 1; --i) { |
|
1454 if (*(q-i) != FORBIDDENBYTE) { |
|
1455 msg = "bad leading pad byte"; |
|
1456 goto error; |
|
1457 } |
|
1458 } |
|
1459 |
|
1460 nbytes = read_size_t(q - 2*SST); |
|
1461 tail = q + nbytes; |
|
1462 for (i = 0; i < SST; ++i) { |
|
1463 if (tail[i] != FORBIDDENBYTE) { |
|
1464 msg = "bad trailing pad byte"; |
|
1465 goto error; |
|
1466 } |
|
1467 } |
|
1468 |
|
1469 return; |
|
1470 |
|
1471 error: |
|
1472 _PyObject_DebugDumpAddress(p); |
|
1473 Py_FatalError(msg); |
|
1474 } |
|
1475 |
|
1476 /* Display info to stderr about the memory block at p. */ |
|
1477 void |
|
1478 _PyObject_DebugDumpAddress(const void *p) |
|
1479 { |
|
1480 const uchar *q = (const uchar *)p; |
|
1481 const uchar *tail; |
|
1482 size_t nbytes, serial; |
|
1483 int i; |
|
1484 int ok; |
|
1485 |
|
1486 fprintf(stderr, "Debug memory block at address p=%p:\n", p); |
|
1487 if (p == NULL) |
|
1488 return; |
|
1489 |
|
1490 nbytes = read_size_t(q - 2*SST); |
|
1491 fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally " |
|
1492 "requested\n", nbytes); |
|
1493 |
|
1494 /* In case this is nuts, check the leading pad bytes first. */ |
|
1495 fprintf(stderr, " The %d pad bytes at p-%d are ", SST, SST); |
|
1496 ok = 1; |
|
1497 for (i = 1; i <= SST; ++i) { |
|
1498 if (*(q-i) != FORBIDDENBYTE) { |
|
1499 ok = 0; |
|
1500 break; |
|
1501 } |
|
1502 } |
|
1503 if (ok) |
|
1504 fputs("FORBIDDENBYTE, as expected.\n", stderr); |
|
1505 else { |
|
1506 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", |
|
1507 FORBIDDENBYTE); |
|
1508 for (i = SST; i >= 1; --i) { |
|
1509 const uchar byte = *(q-i); |
|
1510 fprintf(stderr, " at p-%d: 0x%02x", i, byte); |
|
1511 if (byte != FORBIDDENBYTE) |
|
1512 fputs(" *** OUCH", stderr); |
|
1513 fputc('\n', stderr); |
|
1514 } |
|
1515 |
|
1516 fputs(" Because memory is corrupted at the start, the " |
|
1517 "count of bytes requested\n" |
|
1518 " may be bogus, and checking the trailing pad " |
|
1519 "bytes may segfault.\n", stderr); |
|
1520 } |
|
1521 |
|
1522 tail = q + nbytes; |
|
1523 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail); |
|
1524 ok = 1; |
|
1525 for (i = 0; i < SST; ++i) { |
|
1526 if (tail[i] != FORBIDDENBYTE) { |
|
1527 ok = 0; |
|
1528 break; |
|
1529 } |
|
1530 } |
|
1531 if (ok) |
|
1532 fputs("FORBIDDENBYTE, as expected.\n", stderr); |
|
1533 else { |
|
1534 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", |
|
1535 FORBIDDENBYTE); |
|
1536 for (i = 0; i < SST; ++i) { |
|
1537 const uchar byte = tail[i]; |
|
1538 fprintf(stderr, " at tail+%d: 0x%02x", |
|
1539 i, byte); |
|
1540 if (byte != FORBIDDENBYTE) |
|
1541 fputs(" *** OUCH", stderr); |
|
1542 fputc('\n', stderr); |
|
1543 } |
|
1544 } |
|
1545 |
|
1546 serial = read_size_t(tail + SST); |
|
1547 fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T |
|
1548 "u to debug malloc/realloc.\n", serial); |
|
1549 |
|
1550 if (nbytes > 0) { |
|
1551 i = 0; |
|
1552 fputs(" Data at p:", stderr); |
|
1553 /* print up to 8 bytes at the start */ |
|
1554 while (q < tail && i < 8) { |
|
1555 fprintf(stderr, " %02x", *q); |
|
1556 ++i; |
|
1557 ++q; |
|
1558 } |
|
1559 /* and up to 8 at the end */ |
|
1560 if (q < tail) { |
|
1561 if (tail - q > 8) { |
|
1562 fputs(" ...", stderr); |
|
1563 q = tail - 8; |
|
1564 } |
|
1565 while (q < tail) { |
|
1566 fprintf(stderr, " %02x", *q); |
|
1567 ++q; |
|
1568 } |
|
1569 } |
|
1570 fputc('\n', stderr); |
|
1571 } |
|
1572 } |
|
1573 |
|
1574 static size_t |
|
1575 printone(const char* msg, size_t value) |
|
1576 { |
|
1577 int i, k; |
|
1578 char buf[100]; |
|
1579 size_t origvalue = value; |
|
1580 |
|
1581 fputs(msg, stderr); |
|
1582 for (i = (int)strlen(msg); i < 35; ++i) |
|
1583 fputc(' ', stderr); |
|
1584 fputc('=', stderr); |
|
1585 |
|
1586 /* Write the value with commas. */ |
|
1587 i = 22; |
|
1588 buf[i--] = '\0'; |
|
1589 buf[i--] = '\n'; |
|
1590 k = 3; |
|
1591 do { |
|
1592 size_t nextvalue = value / 10; |
|
1593 uint digit = (uint)(value - nextvalue * 10); |
|
1594 value = nextvalue; |
|
1595 buf[i--] = (char)(digit + '0'); |
|
1596 --k; |
|
1597 if (k == 0 && value && i >= 0) { |
|
1598 k = 3; |
|
1599 buf[i--] = ','; |
|
1600 } |
|
1601 } while (value && i >= 0); |
|
1602 |
|
1603 while (i >= 0) |
|
1604 buf[i--] = ' '; |
|
1605 fputs(buf, stderr); |
|
1606 |
|
1607 return origvalue; |
|
1608 } |
|
1609 |
|
1610 /* Print summary info to stderr about the state of pymalloc's structures. |
|
1611 * In Py_DEBUG mode, also perform some expensive internal consistency |
|
1612 * checks. |
|
1613 */ |
|
1614 void |
|
1615 _PyObject_DebugMallocStats(void) |
|
1616 { |
|
1617 uint i; |
|
1618 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; |
|
1619 /* # of pools, allocated blocks, and free blocks per class index */ |
|
1620 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; |
|
1621 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; |
|
1622 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; |
|
1623 /* total # of allocated bytes in used and full pools */ |
|
1624 size_t allocated_bytes = 0; |
|
1625 /* total # of available bytes in used pools */ |
|
1626 size_t available_bytes = 0; |
|
1627 /* # of free pools + pools not yet carved out of current arena */ |
|
1628 uint numfreepools = 0; |
|
1629 /* # of bytes for arena alignment padding */ |
|
1630 size_t arena_alignment = 0; |
|
1631 /* # of bytes in used and full pools used for pool_headers */ |
|
1632 size_t pool_header_bytes = 0; |
|
1633 /* # of bytes in used and full pools wasted due to quantization, |
|
1634 * i.e. the necessarily leftover space at the ends of used and |
|
1635 * full pools. |
|
1636 */ |
|
1637 size_t quantization = 0; |
|
1638 /* # of arenas actually allocated. */ |
|
1639 size_t narenas = 0; |
|
1640 /* running total -- should equal narenas * ARENA_SIZE */ |
|
1641 size_t total; |
|
1642 char buf[128]; |
|
1643 |
|
1644 fprintf(stderr, "Small block threshold = %d, in %u size classes.\n", |
|
1645 SMALL_REQUEST_THRESHOLD, numclasses); |
|
1646 |
|
1647 for (i = 0; i < numclasses; ++i) |
|
1648 numpools[i] = numblocks[i] = numfreeblocks[i] = 0; |
|
1649 |
|
1650 /* Because full pools aren't linked to from anything, it's easiest |
|
1651 * to march over all the arenas. If we're lucky, most of the memory |
|
1652 * will be living in full pools -- would be a shame to miss them. |
|
1653 */ |
|
1654 for (i = 0; i < maxarenas; ++i) { |
|
1655 uint poolsinarena; |
|
1656 uint j; |
|
1657 uptr base = arenas[i].address; |
|
1658 |
|
1659 /* Skip arenas which are not allocated. */ |
|
1660 if (arenas[i].address == (uptr)NULL) |
|
1661 continue; |
|
1662 narenas += 1; |
|
1663 |
|
1664 poolsinarena = arenas[i].ntotalpools; |
|
1665 numfreepools += arenas[i].nfreepools; |
|
1666 |
|
1667 /* round up to pool alignment */ |
|
1668 if (base & (uptr)POOL_SIZE_MASK) { |
|
1669 arena_alignment += POOL_SIZE; |
|
1670 base &= ~(uptr)POOL_SIZE_MASK; |
|
1671 base += POOL_SIZE; |
|
1672 } |
|
1673 |
|
1674 /* visit every pool in the arena */ |
|
1675 assert(base <= (uptr) arenas[i].pool_address); |
|
1676 for (j = 0; |
|
1677 base < (uptr) arenas[i].pool_address; |
|
1678 ++j, base += POOL_SIZE) { |
|
1679 poolp p = (poolp)base; |
|
1680 const uint sz = p->szidx; |
|
1681 uint freeblocks; |
|
1682 |
|
1683 if (p->ref.count == 0) { |
|
1684 /* currently unused */ |
|
1685 assert(pool_is_in_list(p, arenas[i].freepools)); |
|
1686 continue; |
|
1687 } |
|
1688 ++numpools[sz]; |
|
1689 numblocks[sz] += p->ref.count; |
|
1690 freeblocks = NUMBLOCKS(sz) - p->ref.count; |
|
1691 numfreeblocks[sz] += freeblocks; |
|
1692 #ifdef Py_DEBUG |
|
1693 if (freeblocks > 0) |
|
1694 assert(pool_is_in_list(p, usedpools[sz + sz])); |
|
1695 #endif |
|
1696 } |
|
1697 } |
|
1698 assert(narenas == narenas_currently_allocated); |
|
1699 |
|
1700 fputc('\n', stderr); |
|
1701 fputs("class size num pools blocks in use avail blocks\n" |
|
1702 "----- ---- --------- ------------- ------------\n", |
|
1703 stderr); |
|
1704 |
|
1705 for (i = 0; i < numclasses; ++i) { |
|
1706 size_t p = numpools[i]; |
|
1707 size_t b = numblocks[i]; |
|
1708 size_t f = numfreeblocks[i]; |
|
1709 uint size = INDEX2SIZE(i); |
|
1710 if (p == 0) { |
|
1711 assert(b == 0 && f == 0); |
|
1712 continue; |
|
1713 } |
|
1714 fprintf(stderr, "%5u %6u " |
|
1715 "%11" PY_FORMAT_SIZE_T "u " |
|
1716 "%15" PY_FORMAT_SIZE_T "u " |
|
1717 "%13" PY_FORMAT_SIZE_T "u\n", |
|
1718 i, size, p, b, f); |
|
1719 allocated_bytes += b * size; |
|
1720 available_bytes += f * size; |
|
1721 pool_header_bytes += p * POOL_OVERHEAD; |
|
1722 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size); |
|
1723 } |
|
1724 fputc('\n', stderr); |
|
1725 (void)printone("# times object malloc called", serialno); |
|
1726 |
|
1727 (void)printone("# arenas allocated total", ntimes_arena_allocated); |
|
1728 (void)printone("# arenas reclaimed", ntimes_arena_allocated - narenas); |
|
1729 (void)printone("# arenas highwater mark", narenas_highwater); |
|
1730 (void)printone("# arenas allocated current", narenas); |
|
1731 |
|
1732 PyOS_snprintf(buf, sizeof(buf), |
|
1733 "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena", |
|
1734 narenas, ARENA_SIZE); |
|
1735 (void)printone(buf, narenas * ARENA_SIZE); |
|
1736 |
|
1737 fputc('\n', stderr); |
|
1738 |
|
1739 total = printone("# bytes in allocated blocks", allocated_bytes); |
|
1740 total += printone("# bytes in available blocks", available_bytes); |
|
1741 |
|
1742 PyOS_snprintf(buf, sizeof(buf), |
|
1743 "%u unused pools * %d bytes", numfreepools, POOL_SIZE); |
|
1744 total += printone(buf, (size_t)numfreepools * POOL_SIZE); |
|
1745 |
|
1746 total += printone("# bytes lost to pool headers", pool_header_bytes); |
|
1747 total += printone("# bytes lost to quantization", quantization); |
|
1748 total += printone("# bytes lost to arena alignment", arena_alignment); |
|
1749 (void)printone("Total", total); |
|
1750 } |
|
1751 |
|
1752 #endif /* PYMALLOC_DEBUG */ |
|
1753 |
|
1754 #ifdef Py_USING_MEMORY_DEBUGGER |
|
1755 /* Make this function last so gcc won't inline it since the definition is |
|
1756 * after the reference. |
|
1757 */ |
|
1758 int |
|
1759 Py_ADDRESS_IN_RANGE(void *P, poolp pool) |
|
1760 { |
|
1761 return pool->arenaindex < maxarenas && |
|
1762 (uptr)P - arenas[pool->arenaindex].address < (uptr)ARENA_SIZE && |
|
1763 arenas[pool->arenaindex].address != 0; |
|
1764 } |
|
1765 #endif |