|
1 // Copyright (c) 2005, 2007, Google Inc. |
|
2 // All rights reserved. |
|
3 // Copyright (C) 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
|
4 // |
|
5 // Redistribution and use in source and binary forms, with or without |
|
6 // modification, are permitted provided that the following conditions are |
|
7 // met: |
|
8 // |
|
9 // * Redistributions of source code must retain the above copyright |
|
10 // notice, this list of conditions and the following disclaimer. |
|
11 // * Redistributions in binary form must reproduce the above |
|
12 // copyright notice, this list of conditions and the following disclaimer |
|
13 // in the documentation and/or other materials provided with the |
|
14 // distribution. |
|
15 // * Neither the name of Google Inc. nor the names of its |
|
16 // contributors may be used to endorse or promote products derived from |
|
17 // this software without specific prior written permission. |
|
18 // |
|
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
30 |
|
31 // --- |
|
32 // Author: Sanjay Ghemawat <opensource@google.com> |
|
33 // |
|
34 // A malloc that uses a per-thread cache to satisfy small malloc requests. |
|
35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.) |
|
36 // |
|
37 // See doc/tcmalloc.html for a high-level |
|
38 // description of how this malloc works. |
|
39 // |
|
40 // SYNCHRONIZATION |
|
41 // 1. The thread-specific lists are accessed without acquiring any locks. |
|
42 // This is safe because each such list is only accessed by one thread. |
|
43 // 2. We have a lock per central free-list, and hold it while manipulating |
|
44 // the central free list for a particular size. |
|
45 // 3. The central page allocator is protected by "pageheap_lock". |
|
46 // 4. The pagemap (which maps from page-number to descriptor), |
|
47 // can be read without holding any locks, and written while holding |
|
48 // the "pageheap_lock". |
|
49 // 5. To improve performance, a subset of the information one can get |
|
50 // from the pagemap is cached in a data structure, pagemap_cache_, |
|
51 // that atomically reads and writes its entries. This cache can be |
|
52 // read and written without locking. |
|
53 // |
|
54 // This multi-threaded access to the pagemap is safe for fairly |
|
55 // subtle reasons. We basically assume that when an object X is |
|
56 // allocated by thread A and deallocated by thread B, there must |
|
57 // have been appropriate synchronization in the handoff of object |
|
58 // X from thread A to thread B. The same logic applies to pagemap_cache_. |
|
59 // |
|
60 // THE PAGEID-TO-SIZECLASS CACHE |
|
61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache |
|
62 // returns 0 for a particular PageID then that means "no information," not that |
|
63 // the sizeclass is 0. The cache may have stale information for pages that do |
|
64 // not hold the beginning of any free()'able object. Staleness is eliminated |
|
65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and |
|
66 // do_memalign() for all other relevant pages. |
|
67 // |
|
68 // TODO: Bias reclamation to larger addresses |
|
69 // TODO: implement mallinfo/mallopt |
|
70 // TODO: Better testing |
|
71 // |
|
72 // 9/28/2003 (new page-level allocator replaces ptmalloc2): |
|
73 // * malloc/free of small objects goes from ~300 ns to ~50 ns. |
|
74 // * allocation of a reasonably complicated struct |
|
75 // goes from about 1100 ns to about 300 ns. |
|
76 |
|
77 #include "config.h" |
|
78 #include "FastMalloc.h" |
|
79 |
|
80 #include "Assertions.h" |
|
81 #include <limits> |
|
82 #if ENABLE(JSC_MULTIPLE_THREADS) |
|
83 #include <pthread.h> |
|
84 #endif |
|
85 |
|
86 #ifndef NO_TCMALLOC_SAMPLES |
|
87 #ifdef WTF_CHANGES |
|
88 #define NO_TCMALLOC_SAMPLES |
|
89 #endif |
|
90 #endif |
|
91 |
|
92 #if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG) |
|
93 #define FORCE_SYSTEM_MALLOC 0 |
|
94 #else |
|
95 #define FORCE_SYSTEM_MALLOC 1 |
|
96 #endif |
|
97 |
|
98 // Use a background thread to periodically scavenge memory to release back to the system |
|
99 // https://bugs.webkit.org/show_bug.cgi?id=27900: don't turn this on for Tiger until we have figured out why it caused a crash. |
|
100 #if defined(BUILDING_ON_TIGER) |
|
101 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0 |
|
102 #else |
|
103 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1 |
|
104 #endif |
|
105 |
|
106 #ifndef NDEBUG |
|
107 namespace WTF { |
|
108 |
|
109 #if ENABLE(JSC_MULTIPLE_THREADS) |
|
110 static pthread_key_t isForbiddenKey; |
|
111 static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT; |
|
112 static void initializeIsForbiddenKey() |
|
113 { |
|
114 pthread_key_create(&isForbiddenKey, 0); |
|
115 } |
|
116 |
|
117 #if !ASSERT_DISABLED |
|
118 static bool isForbidden() |
|
119 { |
|
120 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); |
|
121 return !!pthread_getspecific(isForbiddenKey); |
|
122 } |
|
123 #endif |
|
124 |
|
125 void fastMallocForbid() |
|
126 { |
|
127 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); |
|
128 pthread_setspecific(isForbiddenKey, &isForbiddenKey); |
|
129 } |
|
130 |
|
131 void fastMallocAllow() |
|
132 { |
|
133 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); |
|
134 pthread_setspecific(isForbiddenKey, 0); |
|
135 } |
|
136 |
|
137 #else |
|
138 |
|
139 static bool staticIsForbidden; |
|
140 static bool isForbidden() |
|
141 { |
|
142 return staticIsForbidden; |
|
143 } |
|
144 |
|
145 void fastMallocForbid() |
|
146 { |
|
147 staticIsForbidden = true; |
|
148 } |
|
149 |
|
150 void fastMallocAllow() |
|
151 { |
|
152 staticIsForbidden = false; |
|
153 } |
|
154 #endif // ENABLE(JSC_MULTIPLE_THREADS) |
|
155 |
|
156 } // namespace WTF |
|
157 #endif // NDEBUG |
|
158 |
|
159 #include <string.h> |
|
160 |
|
161 namespace WTF { |
|
162 |
|
163 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
164 |
|
165 namespace Internal { |
|
166 |
|
167 void fastMallocMatchFailed(void*) |
|
168 { |
|
169 CRASH(); |
|
170 } |
|
171 |
|
172 } // namespace Internal |
|
173 |
|
174 #endif |
|
175 |
|
176 void* fastZeroedMalloc(size_t n) |
|
177 { |
|
178 void* result = fastMalloc(n); |
|
179 memset(result, 0, n); |
|
180 return result; |
|
181 } |
|
182 |
|
183 char* fastStrDup(const char* src) |
|
184 { |
|
185 int len = strlen(src) + 1; |
|
186 char* dup = static_cast<char*>(fastMalloc(len)); |
|
187 |
|
188 if (dup) |
|
189 memcpy(dup, src, len); |
|
190 |
|
191 return dup; |
|
192 } |
|
193 |
|
194 TryMallocReturnValue tryFastZeroedMalloc(size_t n) |
|
195 { |
|
196 void* result; |
|
197 if (!tryFastMalloc(n).getValue(result)) |
|
198 return 0; |
|
199 memset(result, 0, n); |
|
200 return result; |
|
201 } |
|
202 |
|
203 } // namespace WTF |
|
204 |
|
205 #if FORCE_SYSTEM_MALLOC |
|
206 |
|
207 #if PLATFORM(BREWMP) |
|
208 #include "brew/SystemMallocBrew.h" |
|
209 #endif |
|
210 |
|
211 #if OS(DARWIN) |
|
212 #include <malloc/malloc.h> |
|
213 #elif COMPILER(MSVC) |
|
214 #include <malloc.h> |
|
215 #endif |
|
216 |
|
217 namespace WTF { |
|
218 |
|
219 TryMallocReturnValue tryFastMalloc(size_t n) |
|
220 { |
|
221 ASSERT(!isForbidden()); |
|
222 |
|
223 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
224 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur... |
|
225 return 0; |
|
226 |
|
227 void* result = malloc(n + sizeof(AllocAlignmentInteger)); |
|
228 if (!result) |
|
229 return 0; |
|
230 |
|
231 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
|
232 result = static_cast<AllocAlignmentInteger*>(result) + 1; |
|
233 |
|
234 return result; |
|
235 #else |
|
236 return malloc(n); |
|
237 #endif |
|
238 } |
|
239 |
|
240 void* fastMalloc(size_t n) |
|
241 { |
|
242 ASSERT(!isForbidden()); |
|
243 |
|
244 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
245 TryMallocReturnValue returnValue = tryFastMalloc(n); |
|
246 void* result; |
|
247 returnValue.getValue(result); |
|
248 #else |
|
249 void* result = malloc(n); |
|
250 #endif |
|
251 |
|
252 if (!result) { |
|
253 #if PLATFORM(BREWMP) |
|
254 // The behavior of malloc(0) is implementation defined. |
|
255 // To make sure that fastMalloc never returns 0, retry with fastMalloc(1). |
|
256 if (!n) |
|
257 return fastMalloc(1); |
|
258 #endif |
|
259 CRASH(); |
|
260 } |
|
261 |
|
262 return result; |
|
263 } |
|
264 |
|
265 TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size) |
|
266 { |
|
267 ASSERT(!isForbidden()); |
|
268 |
|
269 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
270 size_t totalBytes = n_elements * element_size; |
|
271 if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes)) |
|
272 return 0; |
|
273 |
|
274 totalBytes += sizeof(AllocAlignmentInteger); |
|
275 void* result = malloc(totalBytes); |
|
276 if (!result) |
|
277 return 0; |
|
278 |
|
279 memset(result, 0, totalBytes); |
|
280 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
|
281 result = static_cast<AllocAlignmentInteger*>(result) + 1; |
|
282 return result; |
|
283 #else |
|
284 return calloc(n_elements, element_size); |
|
285 #endif |
|
286 } |
|
287 |
|
288 void* fastCalloc(size_t n_elements, size_t element_size) |
|
289 { |
|
290 ASSERT(!isForbidden()); |
|
291 |
|
292 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
293 TryMallocReturnValue returnValue = tryFastCalloc(n_elements, element_size); |
|
294 void* result; |
|
295 returnValue.getValue(result); |
|
296 #else |
|
297 void* result = calloc(n_elements, element_size); |
|
298 #endif |
|
299 |
|
300 if (!result) { |
|
301 #if PLATFORM(BREWMP) |
|
302 // If either n_elements or element_size is 0, the behavior of calloc is implementation defined. |
|
303 // To make sure that fastCalloc never returns 0, retry with fastCalloc(1, 1). |
|
304 if (!n_elements || !element_size) |
|
305 return fastCalloc(1, 1); |
|
306 #endif |
|
307 CRASH(); |
|
308 } |
|
309 |
|
310 return result; |
|
311 } |
|
312 |
|
313 void fastFree(void* p) |
|
314 { |
|
315 ASSERT(!isForbidden()); |
|
316 |
|
317 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
318 if (!p) |
|
319 return; |
|
320 |
|
321 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p); |
|
322 if (*header != Internal::AllocTypeMalloc) |
|
323 Internal::fastMallocMatchFailed(p); |
|
324 free(header); |
|
325 #else |
|
326 free(p); |
|
327 #endif |
|
328 } |
|
329 |
|
330 TryMallocReturnValue tryFastRealloc(void* p, size_t n) |
|
331 { |
|
332 ASSERT(!isForbidden()); |
|
333 |
|
334 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
335 if (p) { |
|
336 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur... |
|
337 return 0; |
|
338 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p); |
|
339 if (*header != Internal::AllocTypeMalloc) |
|
340 Internal::fastMallocMatchFailed(p); |
|
341 void* result = realloc(header, n + sizeof(AllocAlignmentInteger)); |
|
342 if (!result) |
|
343 return 0; |
|
344 |
|
345 // This should not be needed because the value is already there: |
|
346 // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
|
347 result = static_cast<AllocAlignmentInteger*>(result) + 1; |
|
348 return result; |
|
349 } else { |
|
350 return fastMalloc(n); |
|
351 } |
|
352 #else |
|
353 return realloc(p, n); |
|
354 #endif |
|
355 } |
|
356 |
|
357 void* fastRealloc(void* p, size_t n) |
|
358 { |
|
359 ASSERT(!isForbidden()); |
|
360 |
|
361 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
362 TryMallocReturnValue returnValue = tryFastRealloc(p, n); |
|
363 void* result; |
|
364 returnValue.getValue(result); |
|
365 #else |
|
366 void* result = realloc(p, n); |
|
367 #endif |
|
368 |
|
369 if (!result) |
|
370 CRASH(); |
|
371 return result; |
|
372 } |
|
373 |
|
374 void releaseFastMallocFreeMemory() { } |
|
375 |
|
376 FastMallocStatistics fastMallocStatistics() |
|
377 { |
|
378 FastMallocStatistics statistics = { 0, 0, 0 }; |
|
379 return statistics; |
|
380 } |
|
381 |
|
382 size_t fastMallocSize(const void* p) |
|
383 { |
|
384 #if OS(DARWIN) |
|
385 return malloc_size(p); |
|
386 #elif COMPILER(MSVC) |
|
387 return _msize(const_cast<void*>(p)); |
|
388 #else |
|
389 return 1; |
|
390 #endif |
|
391 } |
|
392 |
|
393 } // namespace WTF |
|
394 |
|
395 #if OS(DARWIN) |
|
396 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled. |
|
397 // It will never be used in this case, so it's type and value are less interesting than its presence. |
|
398 extern "C" const int jscore_fastmalloc_introspection = 0; |
|
399 #endif |
|
400 |
|
401 #else // FORCE_SYSTEM_MALLOC |
|
402 |
|
403 #if HAVE(STDINT_H) |
|
404 #include <stdint.h> |
|
405 #elif HAVE(INTTYPES_H) |
|
406 #include <inttypes.h> |
|
407 #else |
|
408 #include <sys/types.h> |
|
409 #endif |
|
410 |
|
411 #include "AlwaysInline.h" |
|
412 #include "Assertions.h" |
|
413 #include "TCPackedCache.h" |
|
414 #include "TCPageMap.h" |
|
415 #include "TCSpinLock.h" |
|
416 #include "TCSystemAlloc.h" |
|
417 #include <algorithm> |
|
418 #include <errno.h> |
|
419 #include <limits> |
|
420 #include <pthread.h> |
|
421 #include <stdarg.h> |
|
422 #include <stddef.h> |
|
423 #include <stdio.h> |
|
424 #if OS(UNIX) |
|
425 #include <unistd.h> |
|
426 #endif |
|
427 #if COMPILER(MSVC) |
|
428 #ifndef WIN32_LEAN_AND_MEAN |
|
429 #define WIN32_LEAN_AND_MEAN |
|
430 #endif |
|
431 #include <windows.h> |
|
432 #endif |
|
433 |
|
434 #ifdef WTF_CHANGES |
|
435 |
|
436 #if OS(DARWIN) |
|
437 #include "MallocZoneSupport.h" |
|
438 #include <wtf/HashSet.h> |
|
439 #include <wtf/Vector.h> |
|
440 #endif |
|
441 #if HAVE(DISPATCH_H) |
|
442 #include <dispatch/dispatch.h> |
|
443 #endif |
|
444 |
|
445 |
|
446 #ifndef PRIuS |
|
447 #define PRIuS "zu" |
|
448 #endif |
|
449 |
|
450 // Calling pthread_getspecific through a global function pointer is faster than a normal |
|
451 // call to the function on Mac OS X, and it's used in performance-critical code. So we |
|
452 // use a function pointer. But that's not necessarily faster on other platforms, and we had |
|
453 // problems with this technique on Windows, so we'll do this only on Mac OS X. |
|
454 #if OS(DARWIN) |
|
455 static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific; |
|
456 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key) |
|
457 #endif |
|
458 |
|
459 #define DEFINE_VARIABLE(type, name, value, meaning) \ |
|
460 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ |
|
461 type FLAGS_##name(value); \ |
|
462 char FLAGS_no##name; \ |
|
463 } \ |
|
464 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name |
|
465 |
|
466 #define DEFINE_int64(name, value, meaning) \ |
|
467 DEFINE_VARIABLE(int64_t, name, value, meaning) |
|
468 |
|
469 #define DEFINE_double(name, value, meaning) \ |
|
470 DEFINE_VARIABLE(double, name, value, meaning) |
|
471 |
|
472 namespace WTF { |
|
473 |
|
474 #define malloc fastMalloc |
|
475 #define calloc fastCalloc |
|
476 #define free fastFree |
|
477 #define realloc fastRealloc |
|
478 |
|
479 #define MESSAGE LOG_ERROR |
|
480 #define CHECK_CONDITION ASSERT |
|
481 |
|
482 #if OS(DARWIN) |
|
483 struct Span; |
|
484 class TCMalloc_Central_FreeListPadded; |
|
485 class TCMalloc_PageHeap; |
|
486 class TCMalloc_ThreadCache; |
|
487 template <typename T> class PageHeapAllocator; |
|
488 |
|
489 class FastMallocZone { |
|
490 public: |
|
491 static void init(); |
|
492 |
|
493 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t); |
|
494 static size_t goodSize(malloc_zone_t*, size_t size) { return size; } |
|
495 static boolean_t check(malloc_zone_t*) { return true; } |
|
496 static void print(malloc_zone_t*, boolean_t) { } |
|
497 static void log(malloc_zone_t*, void*) { } |
|
498 static void forceLock(malloc_zone_t*) { } |
|
499 static void forceUnlock(malloc_zone_t*) { } |
|
500 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); } |
|
501 |
|
502 private: |
|
503 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*); |
|
504 static size_t size(malloc_zone_t*, const void*); |
|
505 static void* zoneMalloc(malloc_zone_t*, size_t); |
|
506 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size); |
|
507 static void zoneFree(malloc_zone_t*, void*); |
|
508 static void* zoneRealloc(malloc_zone_t*, void*, size_t); |
|
509 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; } |
|
510 static void zoneDestroy(malloc_zone_t*) { } |
|
511 |
|
512 malloc_zone_t m_zone; |
|
513 TCMalloc_PageHeap* m_pageHeap; |
|
514 TCMalloc_ThreadCache** m_threadHeaps; |
|
515 TCMalloc_Central_FreeListPadded* m_centralCaches; |
|
516 PageHeapAllocator<Span>* m_spanAllocator; |
|
517 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator; |
|
518 }; |
|
519 |
|
520 #endif |
|
521 |
|
522 #endif |
|
523 |
|
524 #ifndef WTF_CHANGES |
|
525 // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if |
|
526 // you're porting to a system where you really can't get a stacktrace. |
|
527 #ifdef NO_TCMALLOC_SAMPLES |
|
528 // We use #define so code compiles even if you #include stacktrace.h somehow. |
|
529 # define GetStackTrace(stack, depth, skip) (0) |
|
530 #else |
|
531 # include <google/stacktrace.h> |
|
532 #endif |
|
533 #endif |
|
534 |
|
535 // Even if we have support for thread-local storage in the compiler |
|
536 // and linker, the OS may not support it. We need to check that at |
|
537 // runtime. Right now, we have to keep a manual set of "bad" OSes. |
|
538 #if defined(HAVE_TLS) |
|
539 static bool kernel_supports_tls = false; // be conservative |
|
540 static inline bool KernelSupportsTLS() { |
|
541 return kernel_supports_tls; |
|
542 } |
|
543 # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS |
|
544 static void CheckIfKernelSupportsTLS() { |
|
545 kernel_supports_tls = false; |
|
546 } |
|
547 # else |
|
548 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too |
|
549 static void CheckIfKernelSupportsTLS() { |
|
550 struct utsname buf; |
|
551 if (uname(&buf) != 0) { // should be impossible |
|
552 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); |
|
553 kernel_supports_tls = false; |
|
554 } else if (strcasecmp(buf.sysname, "linux") == 0) { |
|
555 // The linux case: the first kernel to support TLS was 2.6.0 |
|
556 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x |
|
557 kernel_supports_tls = false; |
|
558 else if (buf.release[0] == '2' && buf.release[1] == '.' && |
|
559 buf.release[2] >= '0' && buf.release[2] < '6' && |
|
560 buf.release[3] == '.') // 2.0 - 2.5 |
|
561 kernel_supports_tls = false; |
|
562 else |
|
563 kernel_supports_tls = true; |
|
564 } else { // some other kernel, we'll be optimisitic |
|
565 kernel_supports_tls = true; |
|
566 } |
|
567 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG |
|
568 } |
|
569 # endif // HAVE_DECL_UNAME |
|
570 #endif // HAVE_TLS |
|
571 |
|
572 // __THROW is defined in glibc systems. It means, counter-intuitively, |
|
573 // "This function will never throw an exception." It's an optional |
|
574 // optimization tool, but we may need to use it to match glibc prototypes. |
|
575 #ifndef __THROW // I guess we're not on a glibc system |
|
576 # define __THROW // __THROW is just an optimization, so ok to make it "" |
|
577 #endif |
|
578 |
|
579 //------------------------------------------------------------------- |
|
580 // Configuration |
|
581 //------------------------------------------------------------------- |
|
582 |
|
583 // Not all possible combinations of the following parameters make |
|
584 // sense. In particular, if kMaxSize increases, you may have to |
|
585 // increase kNumClasses as well. |
|
586 static const size_t kPageShift = 12; |
|
587 static const size_t kPageSize = 1 << kPageShift; |
|
588 static const size_t kMaxSize = 8u * kPageSize; |
|
589 static const size_t kAlignShift = 3; |
|
590 static const size_t kAlignment = 1 << kAlignShift; |
|
591 static const size_t kNumClasses = 68; |
|
592 |
|
593 // Allocates a big block of memory for the pagemap once we reach more than |
|
594 // 128MB |
|
595 static const size_t kPageMapBigAllocationThreshold = 128 << 20; |
|
596 |
|
597 // Minimum number of pages to fetch from system at a time. Must be |
|
598 // significantly bigger than kPageSize to amortize system-call |
|
599 // overhead, and also to reduce external fragementation. Also, we |
|
600 // should keep this value big because various incarnations of Linux |
|
601 // have small limits on the number of mmap() regions per |
|
602 // address-space. |
|
603 static const size_t kMinSystemAlloc = 1 << (20 - kPageShift); |
|
604 |
|
605 // Number of objects to move between a per-thread list and a central |
|
606 // list in one shot. We want this to be not too small so we can |
|
607 // amortize the lock overhead for accessing the central list. Making |
|
608 // it too big may temporarily cause unnecessary memory wastage in the |
|
609 // per-thread free list until the scavenger cleans up the list. |
|
610 static int num_objects_to_move[kNumClasses]; |
|
611 |
|
612 // Maximum length we allow a per-thread free-list to have before we |
|
613 // move objects from it into the corresponding central free-list. We |
|
614 // want this big to avoid locking the central free-list too often. It |
|
615 // should not hurt to make this list somewhat big because the |
|
616 // scavenging code will shrink it down when its contents are not in use. |
|
617 static const int kMaxFreeListLength = 256; |
|
618 |
|
619 // Lower and upper bounds on the per-thread cache sizes |
|
620 static const size_t kMinThreadCacheSize = kMaxSize * 2; |
|
621 static const size_t kMaxThreadCacheSize = 2 << 20; |
|
622 |
|
623 // Default bound on the total amount of thread caches |
|
624 static const size_t kDefaultOverallThreadCacheSize = 16 << 20; |
|
625 |
|
626 // For all span-lengths < kMaxPages we keep an exact-size list. |
|
627 // REQUIRED: kMaxPages >= kMinSystemAlloc; |
|
628 static const size_t kMaxPages = kMinSystemAlloc; |
|
629 |
|
630 /* The smallest prime > 2^n */ |
|
631 static int primes_list[] = { |
|
632 // Small values might cause high rates of sampling |
|
633 // and hence commented out. |
|
634 // 2, 5, 11, 17, 37, 67, 131, 257, |
|
635 // 521, 1031, 2053, 4099, 8209, 16411, |
|
636 32771, 65537, 131101, 262147, 524309, 1048583, |
|
637 2097169, 4194319, 8388617, 16777259, 33554467 }; |
|
638 |
|
639 // Twice the approximate gap between sampling actions. |
|
640 // I.e., we take one sample approximately once every |
|
641 // tcmalloc_sample_parameter/2 |
|
642 // bytes of allocation, i.e., ~ once every 128KB. |
|
643 // Must be a prime number. |
|
644 #ifdef NO_TCMALLOC_SAMPLES |
|
645 DEFINE_int64(tcmalloc_sample_parameter, 0, |
|
646 "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); |
|
647 static size_t sample_period = 0; |
|
648 #else |
|
649 DEFINE_int64(tcmalloc_sample_parameter, 262147, |
|
650 "Twice the approximate gap between sampling actions." |
|
651 " Must be a prime number. Otherwise will be rounded up to a " |
|
652 " larger prime number"); |
|
653 static size_t sample_period = 262147; |
|
654 #endif |
|
655 |
|
656 // Protects sample_period above |
|
657 static SpinLock sample_period_lock = SPINLOCK_INITIALIZER; |
|
658 |
|
659 // Parameters for controlling how fast memory is returned to the OS. |
|
660 |
|
661 DEFINE_double(tcmalloc_release_rate, 1, |
|
662 "Rate at which we release unused memory to the system. " |
|
663 "Zero means we never release memory back to the system. " |
|
664 "Increase this flag to return memory faster; decrease it " |
|
665 "to return memory slower. Reasonable rates are in the " |
|
666 "range [0,10]"); |
|
667 |
|
668 //------------------------------------------------------------------- |
|
669 // Mapping from size to size_class and vice versa |
|
670 //------------------------------------------------------------------- |
|
671 |
|
672 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an |
|
673 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. |
|
674 // So for these larger sizes we have an array indexed by ceil(size/128). |
|
675 // |
|
676 // We flatten both logical arrays into one physical array and use |
|
677 // arithmetic to compute an appropriate index. The constants used by |
|
678 // ClassIndex() were selected to make the flattening work. |
|
679 // |
|
680 // Examples: |
|
681 // Size Expression Index |
|
682 // ------------------------------------------------------- |
|
683 // 0 (0 + 7) / 8 0 |
|
684 // 1 (1 + 7) / 8 1 |
|
685 // ... |
|
686 // 1024 (1024 + 7) / 8 128 |
|
687 // 1025 (1025 + 127 + (120<<7)) / 128 129 |
|
688 // ... |
|
689 // 32768 (32768 + 127 + (120<<7)) / 128 376 |
|
690 static const size_t kMaxSmallSize = 1024; |
|
691 static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128 |
|
692 static const int add_amount[2] = { 7, 127 + (120 << 7) }; |
|
693 static unsigned char class_array[377]; |
|
694 |
|
695 // Compute index of the class_array[] entry for a given size |
|
696 static inline int ClassIndex(size_t s) { |
|
697 const int i = (s > kMaxSmallSize); |
|
698 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]); |
|
699 } |
|
700 |
|
701 // Mapping from size class to max size storable in that class |
|
702 static size_t class_to_size[kNumClasses]; |
|
703 |
|
704 // Mapping from size class to number of pages to allocate at a time |
|
705 static size_t class_to_pages[kNumClasses]; |
|
706 |
|
707 // TransferCache is used to cache transfers of num_objects_to_move[size_class] |
|
708 // back and forth between thread caches and the central cache for a given size |
|
709 // class. |
|
710 struct TCEntry { |
|
711 void *head; // Head of chain of objects. |
|
712 void *tail; // Tail of chain of objects. |
|
713 }; |
|
714 // A central cache freelist can have anywhere from 0 to kNumTransferEntries |
|
715 // slots to put link list chains into. To keep memory usage bounded the total |
|
716 // number of TCEntries across size classes is fixed. Currently each size |
|
717 // class is initially given one TCEntry which also means that the maximum any |
|
718 // one class can have is kNumClasses. |
|
719 static const int kNumTransferEntries = kNumClasses; |
|
720 |
|
721 // Note: the following only works for "n"s that fit in 32-bits, but |
|
722 // that is fine since we only use it for small sizes. |
|
723 static inline int LgFloor(size_t n) { |
|
724 int log = 0; |
|
725 for (int i = 4; i >= 0; --i) { |
|
726 int shift = (1 << i); |
|
727 size_t x = n >> shift; |
|
728 if (x != 0) { |
|
729 n = x; |
|
730 log += shift; |
|
731 } |
|
732 } |
|
733 ASSERT(n == 1); |
|
734 return log; |
|
735 } |
|
736 |
|
737 // Some very basic linked list functions for dealing with using void * as |
|
738 // storage. |
|
739 |
|
740 static inline void *SLL_Next(void *t) { |
|
741 return *(reinterpret_cast<void**>(t)); |
|
742 } |
|
743 |
|
744 static inline void SLL_SetNext(void *t, void *n) { |
|
745 *(reinterpret_cast<void**>(t)) = n; |
|
746 } |
|
747 |
|
748 static inline void SLL_Push(void **list, void *element) { |
|
749 SLL_SetNext(element, *list); |
|
750 *list = element; |
|
751 } |
|
752 |
|
753 static inline void *SLL_Pop(void **list) { |
|
754 void *result = *list; |
|
755 *list = SLL_Next(*list); |
|
756 return result; |
|
757 } |
|
758 |
|
759 |
|
760 // Remove N elements from a linked list to which head points. head will be |
|
761 // modified to point to the new head. start and end will point to the first |
|
762 // and last nodes of the range. Note that end will point to NULL after this |
|
763 // function is called. |
|
764 static inline void SLL_PopRange(void **head, int N, void **start, void **end) { |
|
765 if (N == 0) { |
|
766 *start = NULL; |
|
767 *end = NULL; |
|
768 return; |
|
769 } |
|
770 |
|
771 void *tmp = *head; |
|
772 for (int i = 1; i < N; ++i) { |
|
773 tmp = SLL_Next(tmp); |
|
774 } |
|
775 |
|
776 *start = *head; |
|
777 *end = tmp; |
|
778 *head = SLL_Next(tmp); |
|
779 // Unlink range from list. |
|
780 SLL_SetNext(tmp, NULL); |
|
781 } |
|
782 |
|
783 static inline void SLL_PushRange(void **head, void *start, void *end) { |
|
784 if (!start) return; |
|
785 SLL_SetNext(end, *head); |
|
786 *head = start; |
|
787 } |
|
788 |
|
789 static inline size_t SLL_Size(void *head) { |
|
790 int count = 0; |
|
791 while (head) { |
|
792 count++; |
|
793 head = SLL_Next(head); |
|
794 } |
|
795 return count; |
|
796 } |
|
797 |
|
798 // Setup helper functions. |
|
799 |
|
800 static ALWAYS_INLINE size_t SizeClass(size_t size) { |
|
801 return class_array[ClassIndex(size)]; |
|
802 } |
|
803 |
|
804 // Get the byte-size for a specified class |
|
805 static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) { |
|
806 return class_to_size[cl]; |
|
807 } |
|
808 static int NumMoveSize(size_t size) { |
|
809 if (size == 0) return 0; |
|
810 // Use approx 64k transfers between thread and central caches. |
|
811 int num = static_cast<int>(64.0 * 1024.0 / size); |
|
812 if (num < 2) num = 2; |
|
813 // Clamp well below kMaxFreeListLength to avoid ping pong between central |
|
814 // and thread caches. |
|
815 if (num > static_cast<int>(0.8 * kMaxFreeListLength)) |
|
816 num = static_cast<int>(0.8 * kMaxFreeListLength); |
|
817 |
|
818 // Also, avoid bringing in too many objects into small object free |
|
819 // lists. There are lots of such lists, and if we allow each one to |
|
820 // fetch too many at a time, we end up having to scavenge too often |
|
821 // (especially when there are lots of threads and each thread gets a |
|
822 // small allowance for its thread cache). |
|
823 // |
|
824 // TODO: Make thread cache free list sizes dynamic so that we do not |
|
825 // have to equally divide a fixed resource amongst lots of threads. |
|
826 if (num > 32) num = 32; |
|
827 |
|
828 return num; |
|
829 } |
|
830 |
|
831 // Initialize the mapping arrays |
|
832 static void InitSizeClasses() { |
|
833 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] |
|
834 if (ClassIndex(0) < 0) { |
|
835 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0)); |
|
836 CRASH(); |
|
837 } |
|
838 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) { |
|
839 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize)); |
|
840 CRASH(); |
|
841 } |
|
842 |
|
843 // Compute the size classes we want to use |
|
844 size_t sc = 1; // Next size class to assign |
|
845 unsigned char alignshift = kAlignShift; |
|
846 int last_lg = -1; |
|
847 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) { |
|
848 int lg = LgFloor(size); |
|
849 if (lg > last_lg) { |
|
850 // Increase alignment every so often. |
|
851 // |
|
852 // Since we double the alignment every time size doubles and |
|
853 // size >= 128, this means that space wasted due to alignment is |
|
854 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256 |
|
855 // bytes, so the space wasted as a percentage starts falling for |
|
856 // sizes > 2K. |
|
857 if ((lg >= 7) && (alignshift < 8)) { |
|
858 alignshift++; |
|
859 } |
|
860 last_lg = lg; |
|
861 } |
|
862 |
|
863 // Allocate enough pages so leftover is less than 1/8 of total. |
|
864 // This bounds wasted space to at most 12.5%. |
|
865 size_t psize = kPageSize; |
|
866 while ((psize % size) > (psize >> 3)) { |
|
867 psize += kPageSize; |
|
868 } |
|
869 const size_t my_pages = psize >> kPageShift; |
|
870 |
|
871 if (sc > 1 && my_pages == class_to_pages[sc-1]) { |
|
872 // See if we can merge this into the previous class without |
|
873 // increasing the fragmentation of the previous class. |
|
874 const size_t my_objects = (my_pages << kPageShift) / size; |
|
875 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift) |
|
876 / class_to_size[sc-1]; |
|
877 if (my_objects == prev_objects) { |
|
878 // Adjust last class to include this size |
|
879 class_to_size[sc-1] = size; |
|
880 continue; |
|
881 } |
|
882 } |
|
883 |
|
884 // Add new class |
|
885 class_to_pages[sc] = my_pages; |
|
886 class_to_size[sc] = size; |
|
887 sc++; |
|
888 } |
|
889 if (sc != kNumClasses) { |
|
890 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n", |
|
891 sc, int(kNumClasses)); |
|
892 CRASH(); |
|
893 } |
|
894 |
|
895 // Initialize the mapping arrays |
|
896 int next_size = 0; |
|
897 for (unsigned char c = 1; c < kNumClasses; c++) { |
|
898 const size_t max_size_in_class = class_to_size[c]; |
|
899 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) { |
|
900 class_array[ClassIndex(s)] = c; |
|
901 } |
|
902 next_size = static_cast<int>(max_size_in_class + kAlignment); |
|
903 } |
|
904 |
|
905 // Double-check sizes just to be safe |
|
906 for (size_t size = 0; size <= kMaxSize; size++) { |
|
907 const size_t sc = SizeClass(size); |
|
908 if (sc == 0) { |
|
909 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); |
|
910 CRASH(); |
|
911 } |
|
912 if (sc > 1 && size <= class_to_size[sc-1]) { |
|
913 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS |
|
914 "\n", sc, size); |
|
915 CRASH(); |
|
916 } |
|
917 if (sc >= kNumClasses) { |
|
918 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); |
|
919 CRASH(); |
|
920 } |
|
921 const size_t s = class_to_size[sc]; |
|
922 if (size > s) { |
|
923 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); |
|
924 CRASH(); |
|
925 } |
|
926 if (s == 0) { |
|
927 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); |
|
928 CRASH(); |
|
929 } |
|
930 } |
|
931 |
|
932 // Initialize the num_objects_to_move array. |
|
933 for (size_t cl = 1; cl < kNumClasses; ++cl) { |
|
934 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl)); |
|
935 } |
|
936 |
|
937 #ifndef WTF_CHANGES |
|
938 if (false) { |
|
939 // Dump class sizes and maximum external wastage per size class |
|
940 for (size_t cl = 1; cl < kNumClasses; ++cl) { |
|
941 const int alloc_size = class_to_pages[cl] << kPageShift; |
|
942 const int alloc_objs = alloc_size / class_to_size[cl]; |
|
943 const int min_used = (class_to_size[cl-1] + 1) * alloc_objs; |
|
944 const int max_waste = alloc_size - min_used; |
|
945 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n", |
|
946 int(cl), |
|
947 int(class_to_size[cl-1] + 1), |
|
948 int(class_to_size[cl]), |
|
949 int(class_to_pages[cl] << kPageShift), |
|
950 max_waste * 100.0 / alloc_size |
|
951 ); |
|
952 } |
|
953 } |
|
954 #endif |
|
955 } |
|
956 |
|
957 // ------------------------------------------------------------------------- |
|
958 // Simple allocator for objects of a specified type. External locking |
|
959 // is required before accessing one of these objects. |
|
960 // ------------------------------------------------------------------------- |
|
961 |
|
962 // Metadata allocator -- keeps stats about how many bytes allocated |
|
963 static uint64_t metadata_system_bytes = 0; |
|
964 static void* MetaDataAlloc(size_t bytes) { |
|
965 void* result = TCMalloc_SystemAlloc(bytes, 0); |
|
966 if (result != NULL) { |
|
967 metadata_system_bytes += bytes; |
|
968 } |
|
969 return result; |
|
970 } |
|
971 |
|
972 template <class T> |
|
973 class PageHeapAllocator { |
|
974 private: |
|
975 // How much to allocate from system at a time |
|
976 static const size_t kAllocIncrement = 32 << 10; |
|
977 |
|
978 // Aligned size of T |
|
979 static const size_t kAlignedSize |
|
980 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); |
|
981 |
|
982 // Free area from which to carve new objects |
|
983 char* free_area_; |
|
984 size_t free_avail_; |
|
985 |
|
986 // Linked list of all regions allocated by this allocator |
|
987 void* allocated_regions_; |
|
988 |
|
989 // Free list of already carved objects |
|
990 void* free_list_; |
|
991 |
|
992 // Number of allocated but unfreed objects |
|
993 int inuse_; |
|
994 |
|
995 public: |
|
996 void Init() { |
|
997 ASSERT(kAlignedSize <= kAllocIncrement); |
|
998 inuse_ = 0; |
|
999 allocated_regions_ = 0; |
|
1000 free_area_ = NULL; |
|
1001 free_avail_ = 0; |
|
1002 free_list_ = NULL; |
|
1003 } |
|
1004 |
|
1005 T* New() { |
|
1006 // Consult free list |
|
1007 void* result; |
|
1008 if (free_list_ != NULL) { |
|
1009 result = free_list_; |
|
1010 free_list_ = *(reinterpret_cast<void**>(result)); |
|
1011 } else { |
|
1012 if (free_avail_ < kAlignedSize) { |
|
1013 // Need more room |
|
1014 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); |
|
1015 if (!new_allocation) |
|
1016 CRASH(); |
|
1017 |
|
1018 *(void**)new_allocation = allocated_regions_; |
|
1019 allocated_regions_ = new_allocation; |
|
1020 free_area_ = new_allocation + kAlignedSize; |
|
1021 free_avail_ = kAllocIncrement - kAlignedSize; |
|
1022 } |
|
1023 result = free_area_; |
|
1024 free_area_ += kAlignedSize; |
|
1025 free_avail_ -= kAlignedSize; |
|
1026 } |
|
1027 inuse_++; |
|
1028 return reinterpret_cast<T*>(result); |
|
1029 } |
|
1030 |
|
1031 void Delete(T* p) { |
|
1032 *(reinterpret_cast<void**>(p)) = free_list_; |
|
1033 free_list_ = p; |
|
1034 inuse_--; |
|
1035 } |
|
1036 |
|
1037 int inuse() const { return inuse_; } |
|
1038 |
|
1039 #if defined(WTF_CHANGES) && OS(DARWIN) |
|
1040 template <class Recorder> |
|
1041 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader) |
|
1042 { |
|
1043 vm_address_t adminAllocation = reinterpret_cast<vm_address_t>(allocated_regions_); |
|
1044 while (adminAllocation) { |
|
1045 recorder.recordRegion(adminAllocation, kAllocIncrement); |
|
1046 adminAllocation = *reader(reinterpret_cast<vm_address_t*>(adminAllocation)); |
|
1047 } |
|
1048 } |
|
1049 #endif |
|
1050 }; |
|
1051 |
|
1052 // ------------------------------------------------------------------------- |
|
1053 // Span - a contiguous run of pages |
|
1054 // ------------------------------------------------------------------------- |
|
1055 |
|
1056 // Type that can hold a page number |
|
1057 typedef uintptr_t PageID; |
|
1058 |
|
1059 // Type that can hold the length of a run of pages |
|
1060 typedef uintptr_t Length; |
|
1061 |
|
1062 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; |
|
1063 |
|
1064 // Convert byte size into pages. This won't overflow, but may return |
|
1065 // an unreasonably large value if bytes is huge enough. |
|
1066 static inline Length pages(size_t bytes) { |
|
1067 return (bytes >> kPageShift) + |
|
1068 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); |
|
1069 } |
|
1070 |
|
1071 // Convert a user size into the number of bytes that will actually be |
|
1072 // allocated |
|
1073 static size_t AllocationSize(size_t bytes) { |
|
1074 if (bytes > kMaxSize) { |
|
1075 // Large object: we allocate an integral number of pages |
|
1076 ASSERT(bytes <= (kMaxValidPages << kPageShift)); |
|
1077 return pages(bytes) << kPageShift; |
|
1078 } else { |
|
1079 // Small object: find the size class to which it belongs |
|
1080 return ByteSizeForClass(SizeClass(bytes)); |
|
1081 } |
|
1082 } |
|
1083 |
|
1084 // Information kept for a span (a contiguous run of pages). |
|
1085 struct Span { |
|
1086 PageID start; // Starting page number |
|
1087 Length length; // Number of pages in span |
|
1088 Span* next; // Used when in link list |
|
1089 Span* prev; // Used when in link list |
|
1090 void* objects; // Linked list of free objects |
|
1091 unsigned int free : 1; // Is the span free |
|
1092 #ifndef NO_TCMALLOC_SAMPLES |
|
1093 unsigned int sample : 1; // Sampled object? |
|
1094 #endif |
|
1095 unsigned int sizeclass : 8; // Size-class for small objects (or 0) |
|
1096 unsigned int refcount : 11; // Number of non-free objects |
|
1097 bool decommitted : 1; |
|
1098 |
|
1099 #undef SPAN_HISTORY |
|
1100 #ifdef SPAN_HISTORY |
|
1101 // For debugging, we can keep a log events per span |
|
1102 int nexthistory; |
|
1103 char history[64]; |
|
1104 int value[64]; |
|
1105 #endif |
|
1106 }; |
|
1107 |
|
1108 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted) |
|
1109 |
|
1110 #ifdef SPAN_HISTORY |
|
1111 void Event(Span* span, char op, int v = 0) { |
|
1112 span->history[span->nexthistory] = op; |
|
1113 span->value[span->nexthistory] = v; |
|
1114 span->nexthistory++; |
|
1115 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; |
|
1116 } |
|
1117 #else |
|
1118 #define Event(s,o,v) ((void) 0) |
|
1119 #endif |
|
1120 |
|
1121 // Allocator/deallocator for spans |
|
1122 static PageHeapAllocator<Span> span_allocator; |
|
1123 static Span* NewSpan(PageID p, Length len) { |
|
1124 Span* result = span_allocator.New(); |
|
1125 memset(result, 0, sizeof(*result)); |
|
1126 result->start = p; |
|
1127 result->length = len; |
|
1128 #ifdef SPAN_HISTORY |
|
1129 result->nexthistory = 0; |
|
1130 #endif |
|
1131 return result; |
|
1132 } |
|
1133 |
|
1134 static inline void DeleteSpan(Span* span) { |
|
1135 #ifndef NDEBUG |
|
1136 // In debug mode, trash the contents of deleted Spans |
|
1137 memset(span, 0x3f, sizeof(*span)); |
|
1138 #endif |
|
1139 span_allocator.Delete(span); |
|
1140 } |
|
1141 |
|
1142 // ------------------------------------------------------------------------- |
|
1143 // Doubly linked list of spans. |
|
1144 // ------------------------------------------------------------------------- |
|
1145 |
|
1146 static inline void DLL_Init(Span* list) { |
|
1147 list->next = list; |
|
1148 list->prev = list; |
|
1149 } |
|
1150 |
|
1151 static inline void DLL_Remove(Span* span) { |
|
1152 span->prev->next = span->next; |
|
1153 span->next->prev = span->prev; |
|
1154 span->prev = NULL; |
|
1155 span->next = NULL; |
|
1156 } |
|
1157 |
|
1158 static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) { |
|
1159 return list->next == list; |
|
1160 } |
|
1161 |
|
1162 static int DLL_Length(const Span* list) { |
|
1163 int result = 0; |
|
1164 for (Span* s = list->next; s != list; s = s->next) { |
|
1165 result++; |
|
1166 } |
|
1167 return result; |
|
1168 } |
|
1169 |
|
1170 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */ |
|
1171 static void DLL_Print(const char* label, const Span* list) { |
|
1172 MESSAGE("%-10s %p:", label, list); |
|
1173 for (const Span* s = list->next; s != list; s = s->next) { |
|
1174 MESSAGE(" <%p,%u,%u>", s, s->start, s->length); |
|
1175 } |
|
1176 MESSAGE("\n"); |
|
1177 } |
|
1178 #endif |
|
1179 |
|
1180 static inline void DLL_Prepend(Span* list, Span* span) { |
|
1181 ASSERT(span->next == NULL); |
|
1182 ASSERT(span->prev == NULL); |
|
1183 span->next = list->next; |
|
1184 span->prev = list; |
|
1185 list->next->prev = span; |
|
1186 list->next = span; |
|
1187 } |
|
1188 |
|
1189 // ------------------------------------------------------------------------- |
|
1190 // Stack traces kept for sampled allocations |
|
1191 // The following state is protected by pageheap_lock_. |
|
1192 // ------------------------------------------------------------------------- |
|
1193 |
|
1194 // size/depth are made the same size as a pointer so that some generic |
|
1195 // code below can conveniently cast them back and forth to void*. |
|
1196 static const int kMaxStackDepth = 31; |
|
1197 struct StackTrace { |
|
1198 uintptr_t size; // Size of object |
|
1199 uintptr_t depth; // Number of PC values stored in array below |
|
1200 void* stack[kMaxStackDepth]; |
|
1201 }; |
|
1202 static PageHeapAllocator<StackTrace> stacktrace_allocator; |
|
1203 static Span sampled_objects; |
|
1204 |
|
1205 // ------------------------------------------------------------------------- |
|
1206 // Map from page-id to per-page data |
|
1207 // ------------------------------------------------------------------------- |
|
1208 |
|
1209 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. |
|
1210 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings, |
|
1211 // because sometimes the sizeclass is all the information we need. |
|
1212 |
|
1213 // Selector class -- general selector uses 3-level map |
|
1214 template <int BITS> class MapSelector { |
|
1215 public: |
|
1216 typedef TCMalloc_PageMap3<BITS-kPageShift> Type; |
|
1217 typedef PackedCache<BITS, uint64_t> CacheType; |
|
1218 }; |
|
1219 |
|
1220 #if defined(WTF_CHANGES) |
|
1221 #if CPU(X86_64) |
|
1222 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore |
|
1223 // can be excluded from the PageMap key. |
|
1224 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details |
|
1225 |
|
1226 static const size_t kBitsUnusedOn64Bit = 16; |
|
1227 #else |
|
1228 static const size_t kBitsUnusedOn64Bit = 0; |
|
1229 #endif |
|
1230 |
|
1231 // A three-level map for 64-bit machines |
|
1232 template <> class MapSelector<64> { |
|
1233 public: |
|
1234 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type; |
|
1235 typedef PackedCache<64, uint64_t> CacheType; |
|
1236 }; |
|
1237 #endif |
|
1238 |
|
1239 // A two-level map for 32-bit machines |
|
1240 template <> class MapSelector<32> { |
|
1241 public: |
|
1242 typedef TCMalloc_PageMap2<32 - kPageShift> Type; |
|
1243 typedef PackedCache<32 - kPageShift, uint16_t> CacheType; |
|
1244 }; |
|
1245 |
|
1246 // ------------------------------------------------------------------------- |
|
1247 // Page-level allocator |
|
1248 // * Eager coalescing |
|
1249 // |
|
1250 // Heap for page-level allocation. We allow allocating and freeing a |
|
1251 // contiguous runs of pages (called a "span"). |
|
1252 // ------------------------------------------------------------------------- |
|
1253 |
|
1254 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1255 // The page heap maintains a free list for spans that are no longer in use by |
|
1256 // the central cache or any thread caches. We use a background thread to |
|
1257 // periodically scan the free list and release a percentage of it back to the OS. |
|
1258 |
|
1259 // If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the |
|
1260 // background thread: |
|
1261 // - wakes up |
|
1262 // - pauses for kScavengeDelayInSeconds |
|
1263 // - returns to the OS a percentage of the memory that remained unused during |
|
1264 // that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_) |
|
1265 // The goal of this strategy is to reduce memory pressure in a timely fashion |
|
1266 // while avoiding thrashing the OS allocator. |
|
1267 |
|
1268 // Time delay before the page heap scavenger will consider returning pages to |
|
1269 // the OS. |
|
1270 static const int kScavengeDelayInSeconds = 2; |
|
1271 |
|
1272 // Approximate percentage of free committed pages to return to the OS in one |
|
1273 // scavenge. |
|
1274 static const float kScavengePercentage = .5f; |
|
1275 |
|
1276 // number of span lists to keep spans in when memory is returned. |
|
1277 static const int kMinSpanListsWithSpans = 32; |
|
1278 |
|
1279 // Number of free committed pages that we want to keep around. The minimum number of pages used when there |
|
1280 // is 1 span in each of the first kMinSpanListsWithSpans spanlists. Currently 528 pages. |
|
1281 static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f); |
|
1282 |
|
1283 #endif |
|
1284 |
|
1285 class TCMalloc_PageHeap { |
|
1286 public: |
|
1287 void init(); |
|
1288 |
|
1289 // Allocate a run of "n" pages. Returns zero if out of memory. |
|
1290 Span* New(Length n); |
|
1291 |
|
1292 // Delete the span "[p, p+n-1]". |
|
1293 // REQUIRES: span was returned by earlier call to New() and |
|
1294 // has not yet been deleted. |
|
1295 void Delete(Span* span); |
|
1296 |
|
1297 // Mark an allocated span as being used for small objects of the |
|
1298 // specified size-class. |
|
1299 // REQUIRES: span was returned by an earlier call to New() |
|
1300 // and has not yet been deleted. |
|
1301 void RegisterSizeClass(Span* span, size_t sc); |
|
1302 |
|
1303 // Split an allocated span into two spans: one of length "n" pages |
|
1304 // followed by another span of length "span->length - n" pages. |
|
1305 // Modifies "*span" to point to the first span of length "n" pages. |
|
1306 // Returns a pointer to the second span. |
|
1307 // |
|
1308 // REQUIRES: "0 < n < span->length" |
|
1309 // REQUIRES: !span->free |
|
1310 // REQUIRES: span->sizeclass == 0 |
|
1311 Span* Split(Span* span, Length n); |
|
1312 |
|
1313 // Return the descriptor for the specified page. |
|
1314 inline Span* GetDescriptor(PageID p) const { |
|
1315 return reinterpret_cast<Span*>(pagemap_.get(p)); |
|
1316 } |
|
1317 |
|
1318 #ifdef WTF_CHANGES |
|
1319 inline Span* GetDescriptorEnsureSafe(PageID p) |
|
1320 { |
|
1321 pagemap_.Ensure(p, 1); |
|
1322 return GetDescriptor(p); |
|
1323 } |
|
1324 |
|
1325 size_t ReturnedBytes() const; |
|
1326 #endif |
|
1327 |
|
1328 // Dump state to stderr |
|
1329 #ifndef WTF_CHANGES |
|
1330 void Dump(TCMalloc_Printer* out); |
|
1331 #endif |
|
1332 |
|
1333 // Return number of bytes allocated from system |
|
1334 inline uint64_t SystemBytes() const { return system_bytes_; } |
|
1335 |
|
1336 // Return number of free bytes in heap |
|
1337 uint64_t FreeBytes() const { |
|
1338 return (static_cast<uint64_t>(free_pages_) << kPageShift); |
|
1339 } |
|
1340 |
|
1341 bool Check(); |
|
1342 bool CheckList(Span* list, Length min_pages, Length max_pages); |
|
1343 |
|
1344 // Release all pages on the free list for reuse by the OS: |
|
1345 void ReleaseFreePages(); |
|
1346 |
|
1347 // Return 0 if we have no information, or else the correct sizeclass for p. |
|
1348 // Reads and writes to pagemap_cache_ do not require locking. |
|
1349 // The entries are 64 bits on 64-bit hardware and 16 bits on |
|
1350 // 32-bit hardware, and we don't mind raciness as long as each read of |
|
1351 // an entry yields a valid entry, not a partially updated entry. |
|
1352 size_t GetSizeClassIfCached(PageID p) const { |
|
1353 return pagemap_cache_.GetOrDefault(p, 0); |
|
1354 } |
|
1355 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } |
|
1356 |
|
1357 private: |
|
1358 // Pick the appropriate map and cache types based on pointer size |
|
1359 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; |
|
1360 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; |
|
1361 PageMap pagemap_; |
|
1362 mutable PageMapCache pagemap_cache_; |
|
1363 |
|
1364 // We segregate spans of a given size into two circular linked |
|
1365 // lists: one for normal spans, and one for spans whose memory |
|
1366 // has been returned to the system. |
|
1367 struct SpanList { |
|
1368 Span normal; |
|
1369 Span returned; |
|
1370 }; |
|
1371 |
|
1372 // List of free spans of length >= kMaxPages |
|
1373 SpanList large_; |
|
1374 |
|
1375 // Array mapping from span length to a doubly linked list of free spans |
|
1376 SpanList free_[kMaxPages]; |
|
1377 |
|
1378 // Number of pages kept in free lists |
|
1379 uintptr_t free_pages_; |
|
1380 |
|
1381 // Bytes allocated from system |
|
1382 uint64_t system_bytes_; |
|
1383 |
|
1384 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1385 // Number of pages kept in free lists that are still committed. |
|
1386 Length free_committed_pages_; |
|
1387 |
|
1388 // Minimum number of free committed pages since last scavenge. (Can be 0 if |
|
1389 // we've committed new pages since the last scavenge.) |
|
1390 Length min_free_committed_pages_since_last_scavenge_; |
|
1391 #endif |
|
1392 |
|
1393 bool GrowHeap(Length n); |
|
1394 |
|
1395 // REQUIRES span->length >= n |
|
1396 // Remove span from its free list, and move any leftover part of |
|
1397 // span into appropriate free lists. Also update "span" to have |
|
1398 // length exactly "n" and mark it as non-free so it can be returned |
|
1399 // to the client. |
|
1400 // |
|
1401 // "released" is true iff "span" was found on a "returned" list. |
|
1402 void Carve(Span* span, Length n, bool released); |
|
1403 |
|
1404 void RecordSpan(Span* span) { |
|
1405 pagemap_.set(span->start, span); |
|
1406 if (span->length > 1) { |
|
1407 pagemap_.set(span->start + span->length - 1, span); |
|
1408 } |
|
1409 } |
|
1410 |
|
1411 // Allocate a large span of length == n. If successful, returns a |
|
1412 // span of exactly the specified length. Else, returns NULL. |
|
1413 Span* AllocLarge(Length n); |
|
1414 |
|
1415 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1416 // Incrementally release some memory to the system. |
|
1417 // IncrementalScavenge(n) is called whenever n pages are freed. |
|
1418 void IncrementalScavenge(Length n); |
|
1419 #endif |
|
1420 |
|
1421 // Number of pages to deallocate before doing more scavenging |
|
1422 int64_t scavenge_counter_; |
|
1423 |
|
1424 // Index of last free list we scavenged |
|
1425 size_t scavenge_index_; |
|
1426 |
|
1427 #if defined(WTF_CHANGES) && OS(DARWIN) |
|
1428 friend class FastMallocZone; |
|
1429 #endif |
|
1430 |
|
1431 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1432 void initializeScavenger(); |
|
1433 ALWAYS_INLINE void signalScavenger(); |
|
1434 void scavenge(); |
|
1435 ALWAYS_INLINE bool shouldScavenge() const; |
|
1436 |
|
1437 #if !HAVE(DISPATCH_H) |
|
1438 static NO_RETURN_WITH_VALUE void* runScavengerThread(void*); |
|
1439 NO_RETURN void scavengerThread(); |
|
1440 |
|
1441 // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or |
|
1442 // it's blocked waiting for more pages to be deleted. |
|
1443 bool m_scavengeThreadActive; |
|
1444 |
|
1445 pthread_mutex_t m_scavengeMutex; |
|
1446 pthread_cond_t m_scavengeCondition; |
|
1447 #else // !HAVE(DISPATCH_H) |
|
1448 void periodicScavenge(); |
|
1449 |
|
1450 dispatch_queue_t m_scavengeQueue; |
|
1451 dispatch_source_t m_scavengeTimer; |
|
1452 bool m_scavengingScheduled; |
|
1453 #endif |
|
1454 |
|
1455 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1456 }; |
|
1457 |
|
1458 void TCMalloc_PageHeap::init() |
|
1459 { |
|
1460 pagemap_.init(MetaDataAlloc); |
|
1461 pagemap_cache_ = PageMapCache(0); |
|
1462 free_pages_ = 0; |
|
1463 system_bytes_ = 0; |
|
1464 |
|
1465 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1466 free_committed_pages_ = 0; |
|
1467 min_free_committed_pages_since_last_scavenge_ = 0; |
|
1468 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1469 |
|
1470 scavenge_counter_ = 0; |
|
1471 // Start scavenging at kMaxPages list |
|
1472 scavenge_index_ = kMaxPages-1; |
|
1473 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); |
|
1474 DLL_Init(&large_.normal); |
|
1475 DLL_Init(&large_.returned); |
|
1476 for (size_t i = 0; i < kMaxPages; i++) { |
|
1477 DLL_Init(&free_[i].normal); |
|
1478 DLL_Init(&free_[i].returned); |
|
1479 } |
|
1480 |
|
1481 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1482 initializeScavenger(); |
|
1483 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1484 } |
|
1485 |
|
1486 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1487 |
|
1488 #if !HAVE(DISPATCH_H) |
|
1489 |
|
1490 void TCMalloc_PageHeap::initializeScavenger() |
|
1491 { |
|
1492 pthread_mutex_init(&m_scavengeMutex, 0); |
|
1493 pthread_cond_init(&m_scavengeCondition, 0); |
|
1494 m_scavengeThreadActive = true; |
|
1495 pthread_t thread; |
|
1496 pthread_create(&thread, 0, runScavengerThread, this); |
|
1497 } |
|
1498 |
|
1499 void* TCMalloc_PageHeap::runScavengerThread(void* context) |
|
1500 { |
|
1501 static_cast<TCMalloc_PageHeap*>(context)->scavengerThread(); |
|
1502 #if COMPILER(MSVC) |
|
1503 // Without this, Visual Studio will complain that this method does not return a value. |
|
1504 return 0; |
|
1505 #endif |
|
1506 } |
|
1507 |
|
1508 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() |
|
1509 { |
|
1510 if (!m_scavengeThreadActive && shouldScavenge()) |
|
1511 pthread_cond_signal(&m_scavengeCondition); |
|
1512 } |
|
1513 |
|
1514 #else // !HAVE(DISPATCH_H) |
|
1515 |
|
1516 void TCMalloc_PageHeap::initializeScavenger() |
|
1517 { |
|
1518 m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL); |
|
1519 m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue); |
|
1520 dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, kScavengeDelayInSeconds * NSEC_PER_SEC); |
|
1521 dispatch_source_set_timer(m_scavengeTimer, startTime, kScavengeDelayInSeconds * NSEC_PER_SEC, 1000 * NSEC_PER_USEC); |
|
1522 dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); }); |
|
1523 m_scavengingScheduled = false; |
|
1524 } |
|
1525 |
|
1526 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() |
|
1527 { |
|
1528 if (!m_scavengingScheduled && shouldScavenge()) { |
|
1529 m_scavengingScheduled = true; |
|
1530 dispatch_resume(m_scavengeTimer); |
|
1531 } |
|
1532 } |
|
1533 |
|
1534 #endif |
|
1535 |
|
1536 void TCMalloc_PageHeap::scavenge() |
|
1537 { |
|
1538 size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage; |
|
1539 size_t targetPageCount = std::max<size_t>(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease); |
|
1540 |
|
1541 while (free_committed_pages_ > targetPageCount) { |
|
1542 for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) { |
|
1543 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i]; |
|
1544 // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span. |
|
1545 // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left. |
|
1546 size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? DLL_Length(&slist->normal) : static_cast<size_t>(.5 * DLL_Length(&slist->normal)); |
|
1547 for (int j = 0; static_cast<size_t>(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal) && free_committed_pages_ > targetPageCount; j++) { |
|
1548 Span* s = slist->normal.prev; |
|
1549 DLL_Remove(s); |
|
1550 ASSERT(!s->decommitted); |
|
1551 if (!s->decommitted) { |
|
1552 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), |
|
1553 static_cast<size_t>(s->length << kPageShift)); |
|
1554 ASSERT(free_committed_pages_ >= s->length); |
|
1555 free_committed_pages_ -= s->length; |
|
1556 s->decommitted = true; |
|
1557 } |
|
1558 DLL_Prepend(&slist->returned, s); |
|
1559 } |
|
1560 } |
|
1561 } |
|
1562 |
|
1563 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; |
|
1564 } |
|
1565 |
|
1566 ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const |
|
1567 { |
|
1568 return free_committed_pages_ > kMinimumFreeCommittedPageCount; |
|
1569 } |
|
1570 |
|
1571 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1572 |
|
1573 inline Span* TCMalloc_PageHeap::New(Length n) { |
|
1574 ASSERT(Check()); |
|
1575 ASSERT(n > 0); |
|
1576 |
|
1577 // Find first size >= n that has a non-empty list |
|
1578 for (Length s = n; s < kMaxPages; s++) { |
|
1579 Span* ll = NULL; |
|
1580 bool released = false; |
|
1581 if (!DLL_IsEmpty(&free_[s].normal)) { |
|
1582 // Found normal span |
|
1583 ll = &free_[s].normal; |
|
1584 } else if (!DLL_IsEmpty(&free_[s].returned)) { |
|
1585 // Found returned span; reallocate it |
|
1586 ll = &free_[s].returned; |
|
1587 released = true; |
|
1588 } else { |
|
1589 // Keep looking in larger classes |
|
1590 continue; |
|
1591 } |
|
1592 |
|
1593 Span* result = ll->next; |
|
1594 Carve(result, n, released); |
|
1595 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1596 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the |
|
1597 // free committed pages count. |
|
1598 ASSERT(free_committed_pages_ >= n); |
|
1599 free_committed_pages_ -= n; |
|
1600 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) |
|
1601 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; |
|
1602 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1603 ASSERT(Check()); |
|
1604 free_pages_ -= n; |
|
1605 return result; |
|
1606 } |
|
1607 |
|
1608 Span* result = AllocLarge(n); |
|
1609 if (result != NULL) { |
|
1610 ASSERT_SPAN_COMMITTED(result); |
|
1611 return result; |
|
1612 } |
|
1613 |
|
1614 // Grow the heap and try again |
|
1615 if (!GrowHeap(n)) { |
|
1616 ASSERT(Check()); |
|
1617 return NULL; |
|
1618 } |
|
1619 |
|
1620 return AllocLarge(n); |
|
1621 } |
|
1622 |
|
1623 Span* TCMalloc_PageHeap::AllocLarge(Length n) { |
|
1624 // find the best span (closest to n in size). |
|
1625 // The following loops implements address-ordered best-fit. |
|
1626 bool from_released = false; |
|
1627 Span *best = NULL; |
|
1628 |
|
1629 // Search through normal list |
|
1630 for (Span* span = large_.normal.next; |
|
1631 span != &large_.normal; |
|
1632 span = span->next) { |
|
1633 if (span->length >= n) { |
|
1634 if ((best == NULL) |
|
1635 || (span->length < best->length) |
|
1636 || ((span->length == best->length) && (span->start < best->start))) { |
|
1637 best = span; |
|
1638 from_released = false; |
|
1639 } |
|
1640 } |
|
1641 } |
|
1642 |
|
1643 // Search through released list in case it has a better fit |
|
1644 for (Span* span = large_.returned.next; |
|
1645 span != &large_.returned; |
|
1646 span = span->next) { |
|
1647 if (span->length >= n) { |
|
1648 if ((best == NULL) |
|
1649 || (span->length < best->length) |
|
1650 || ((span->length == best->length) && (span->start < best->start))) { |
|
1651 best = span; |
|
1652 from_released = true; |
|
1653 } |
|
1654 } |
|
1655 } |
|
1656 |
|
1657 if (best != NULL) { |
|
1658 Carve(best, n, from_released); |
|
1659 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1660 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the |
|
1661 // free committed pages count. |
|
1662 ASSERT(free_committed_pages_ >= n); |
|
1663 free_committed_pages_ -= n; |
|
1664 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) |
|
1665 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; |
|
1666 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1667 ASSERT(Check()); |
|
1668 free_pages_ -= n; |
|
1669 return best; |
|
1670 } |
|
1671 return NULL; |
|
1672 } |
|
1673 |
|
1674 Span* TCMalloc_PageHeap::Split(Span* span, Length n) { |
|
1675 ASSERT(0 < n); |
|
1676 ASSERT(n < span->length); |
|
1677 ASSERT(!span->free); |
|
1678 ASSERT(span->sizeclass == 0); |
|
1679 Event(span, 'T', n); |
|
1680 |
|
1681 const Length extra = span->length - n; |
|
1682 Span* leftover = NewSpan(span->start + n, extra); |
|
1683 Event(leftover, 'U', extra); |
|
1684 RecordSpan(leftover); |
|
1685 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span |
|
1686 span->length = n; |
|
1687 |
|
1688 return leftover; |
|
1689 } |
|
1690 |
|
1691 inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { |
|
1692 ASSERT(n > 0); |
|
1693 DLL_Remove(span); |
|
1694 span->free = 0; |
|
1695 Event(span, 'A', n); |
|
1696 |
|
1697 if (released) { |
|
1698 // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time. |
|
1699 ASSERT(span->decommitted); |
|
1700 TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift)); |
|
1701 span->decommitted = false; |
|
1702 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1703 free_committed_pages_ += span->length; |
|
1704 #endif |
|
1705 } |
|
1706 |
|
1707 const int extra = static_cast<int>(span->length - n); |
|
1708 ASSERT(extra >= 0); |
|
1709 if (extra > 0) { |
|
1710 Span* leftover = NewSpan(span->start + n, extra); |
|
1711 leftover->free = 1; |
|
1712 leftover->decommitted = false; |
|
1713 Event(leftover, 'S', extra); |
|
1714 RecordSpan(leftover); |
|
1715 |
|
1716 // Place leftover span on appropriate free list |
|
1717 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_; |
|
1718 Span* dst = &listpair->normal; |
|
1719 DLL_Prepend(dst, leftover); |
|
1720 |
|
1721 span->length = n; |
|
1722 pagemap_.set(span->start + n - 1, span); |
|
1723 } |
|
1724 } |
|
1725 |
|
1726 static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other) |
|
1727 { |
|
1728 if (destination->decommitted && !other->decommitted) { |
|
1729 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift), |
|
1730 static_cast<size_t>(other->length << kPageShift)); |
|
1731 } else if (other->decommitted && !destination->decommitted) { |
|
1732 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift), |
|
1733 static_cast<size_t>(destination->length << kPageShift)); |
|
1734 destination->decommitted = true; |
|
1735 } |
|
1736 } |
|
1737 |
|
1738 inline void TCMalloc_PageHeap::Delete(Span* span) { |
|
1739 ASSERT(Check()); |
|
1740 ASSERT(!span->free); |
|
1741 ASSERT(span->length > 0); |
|
1742 ASSERT(GetDescriptor(span->start) == span); |
|
1743 ASSERT(GetDescriptor(span->start + span->length - 1) == span); |
|
1744 span->sizeclass = 0; |
|
1745 #ifndef NO_TCMALLOC_SAMPLES |
|
1746 span->sample = 0; |
|
1747 #endif |
|
1748 |
|
1749 // Coalesce -- we guarantee that "p" != 0, so no bounds checking |
|
1750 // necessary. We do not bother resetting the stale pagemap |
|
1751 // entries for the pieces we are merging together because we only |
|
1752 // care about the pagemap entries for the boundaries. |
|
1753 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1754 // Track the total size of the neighboring free spans that are committed. |
|
1755 Length neighboringCommittedSpansLength = 0; |
|
1756 #endif |
|
1757 const PageID p = span->start; |
|
1758 const Length n = span->length; |
|
1759 Span* prev = GetDescriptor(p-1); |
|
1760 if (prev != NULL && prev->free) { |
|
1761 // Merge preceding span into this span |
|
1762 ASSERT(prev->start + prev->length == p); |
|
1763 const Length len = prev->length; |
|
1764 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1765 if (!prev->decommitted) |
|
1766 neighboringCommittedSpansLength += len; |
|
1767 #endif |
|
1768 mergeDecommittedStates(span, prev); |
|
1769 DLL_Remove(prev); |
|
1770 DeleteSpan(prev); |
|
1771 span->start -= len; |
|
1772 span->length += len; |
|
1773 pagemap_.set(span->start, span); |
|
1774 Event(span, 'L', len); |
|
1775 } |
|
1776 Span* next = GetDescriptor(p+n); |
|
1777 if (next != NULL && next->free) { |
|
1778 // Merge next span into this span |
|
1779 ASSERT(next->start == p+n); |
|
1780 const Length len = next->length; |
|
1781 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1782 if (!next->decommitted) |
|
1783 neighboringCommittedSpansLength += len; |
|
1784 #endif |
|
1785 mergeDecommittedStates(span, next); |
|
1786 DLL_Remove(next); |
|
1787 DeleteSpan(next); |
|
1788 span->length += len; |
|
1789 pagemap_.set(span->start + span->length - 1, span); |
|
1790 Event(span, 'R', len); |
|
1791 } |
|
1792 |
|
1793 Event(span, 'D', span->length); |
|
1794 span->free = 1; |
|
1795 if (span->decommitted) { |
|
1796 if (span->length < kMaxPages) |
|
1797 DLL_Prepend(&free_[span->length].returned, span); |
|
1798 else |
|
1799 DLL_Prepend(&large_.returned, span); |
|
1800 } else { |
|
1801 if (span->length < kMaxPages) |
|
1802 DLL_Prepend(&free_[span->length].normal, span); |
|
1803 else |
|
1804 DLL_Prepend(&large_.normal, span); |
|
1805 } |
|
1806 free_pages_ += n; |
|
1807 |
|
1808 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1809 if (span->decommitted) { |
|
1810 // If the merged span is decommitted, that means we decommitted any neighboring spans that were |
|
1811 // committed. Update the free committed pages count. |
|
1812 free_committed_pages_ -= neighboringCommittedSpansLength; |
|
1813 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) |
|
1814 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; |
|
1815 } else { |
|
1816 // If the merged span remains committed, add the deleted span's size to the free committed pages count. |
|
1817 free_committed_pages_ += n; |
|
1818 } |
|
1819 |
|
1820 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system. |
|
1821 signalScavenger(); |
|
1822 #else |
|
1823 IncrementalScavenge(n); |
|
1824 #endif |
|
1825 |
|
1826 ASSERT(Check()); |
|
1827 } |
|
1828 |
|
1829 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
1830 void TCMalloc_PageHeap::IncrementalScavenge(Length n) { |
|
1831 // Fast path; not yet time to release memory |
|
1832 scavenge_counter_ -= n; |
|
1833 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge |
|
1834 |
|
1835 // If there is nothing to release, wait for so many pages before |
|
1836 // scavenging again. With 4K pages, this comes to 16MB of memory. |
|
1837 static const size_t kDefaultReleaseDelay = 1 << 8; |
|
1838 |
|
1839 // Find index of free list to scavenge |
|
1840 size_t index = scavenge_index_ + 1; |
|
1841 for (size_t i = 0; i < kMaxPages+1; i++) { |
|
1842 if (index > kMaxPages) index = 0; |
|
1843 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; |
|
1844 if (!DLL_IsEmpty(&slist->normal)) { |
|
1845 // Release the last span on the normal portion of this list |
|
1846 Span* s = slist->normal.prev; |
|
1847 DLL_Remove(s); |
|
1848 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), |
|
1849 static_cast<size_t>(s->length << kPageShift)); |
|
1850 s->decommitted = true; |
|
1851 DLL_Prepend(&slist->returned, s); |
|
1852 |
|
1853 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay))); |
|
1854 |
|
1855 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal)) |
|
1856 scavenge_index_ = index - 1; |
|
1857 else |
|
1858 scavenge_index_ = index; |
|
1859 return; |
|
1860 } |
|
1861 index++; |
|
1862 } |
|
1863 |
|
1864 // Nothing to scavenge, delay for a while |
|
1865 scavenge_counter_ = kDefaultReleaseDelay; |
|
1866 } |
|
1867 #endif |
|
1868 |
|
1869 void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) { |
|
1870 // Associate span object with all interior pages as well |
|
1871 ASSERT(!span->free); |
|
1872 ASSERT(GetDescriptor(span->start) == span); |
|
1873 ASSERT(GetDescriptor(span->start+span->length-1) == span); |
|
1874 Event(span, 'C', sc); |
|
1875 span->sizeclass = static_cast<unsigned int>(sc); |
|
1876 for (Length i = 1; i < span->length-1; i++) { |
|
1877 pagemap_.set(span->start+i, span); |
|
1878 } |
|
1879 } |
|
1880 |
|
1881 #ifdef WTF_CHANGES |
|
1882 size_t TCMalloc_PageHeap::ReturnedBytes() const { |
|
1883 size_t result = 0; |
|
1884 for (unsigned s = 0; s < kMaxPages; s++) { |
|
1885 const int r_length = DLL_Length(&free_[s].returned); |
|
1886 unsigned r_pages = s * r_length; |
|
1887 result += r_pages << kPageShift; |
|
1888 } |
|
1889 |
|
1890 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) |
|
1891 result += s->length << kPageShift; |
|
1892 return result; |
|
1893 } |
|
1894 #endif |
|
1895 |
|
1896 #ifndef WTF_CHANGES |
|
1897 static double PagesToMB(uint64_t pages) { |
|
1898 return (pages << kPageShift) / 1048576.0; |
|
1899 } |
|
1900 |
|
1901 void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) { |
|
1902 int nonempty_sizes = 0; |
|
1903 for (int s = 0; s < kMaxPages; s++) { |
|
1904 if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) { |
|
1905 nonempty_sizes++; |
|
1906 } |
|
1907 } |
|
1908 out->printf("------------------------------------------------\n"); |
|
1909 out->printf("PageHeap: %d sizes; %6.1f MB free\n", |
|
1910 nonempty_sizes, PagesToMB(free_pages_)); |
|
1911 out->printf("------------------------------------------------\n"); |
|
1912 uint64_t total_normal = 0; |
|
1913 uint64_t total_returned = 0; |
|
1914 for (int s = 0; s < kMaxPages; s++) { |
|
1915 const int n_length = DLL_Length(&free_[s].normal); |
|
1916 const int r_length = DLL_Length(&free_[s].returned); |
|
1917 if (n_length + r_length > 0) { |
|
1918 uint64_t n_pages = s * n_length; |
|
1919 uint64_t r_pages = s * r_length; |
|
1920 total_normal += n_pages; |
|
1921 total_returned += r_pages; |
|
1922 out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum" |
|
1923 "; unmapped: %6.1f MB; %6.1f MB cum\n", |
|
1924 s, |
|
1925 (n_length + r_length), |
|
1926 PagesToMB(n_pages + r_pages), |
|
1927 PagesToMB(total_normal + total_returned), |
|
1928 PagesToMB(r_pages), |
|
1929 PagesToMB(total_returned)); |
|
1930 } |
|
1931 } |
|
1932 |
|
1933 uint64_t n_pages = 0; |
|
1934 uint64_t r_pages = 0; |
|
1935 int n_spans = 0; |
|
1936 int r_spans = 0; |
|
1937 out->printf("Normal large spans:\n"); |
|
1938 for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { |
|
1939 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n", |
|
1940 s->length, PagesToMB(s->length)); |
|
1941 n_pages += s->length; |
|
1942 n_spans++; |
|
1943 } |
|
1944 out->printf("Unmapped large spans:\n"); |
|
1945 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { |
|
1946 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n", |
|
1947 s->length, PagesToMB(s->length)); |
|
1948 r_pages += s->length; |
|
1949 r_spans++; |
|
1950 } |
|
1951 total_normal += n_pages; |
|
1952 total_returned += r_pages; |
|
1953 out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum" |
|
1954 "; unmapped: %6.1f MB; %6.1f MB cum\n", |
|
1955 (n_spans + r_spans), |
|
1956 PagesToMB(n_pages + r_pages), |
|
1957 PagesToMB(total_normal + total_returned), |
|
1958 PagesToMB(r_pages), |
|
1959 PagesToMB(total_returned)); |
|
1960 } |
|
1961 #endif |
|
1962 |
|
1963 bool TCMalloc_PageHeap::GrowHeap(Length n) { |
|
1964 ASSERT(kMaxPages >= kMinSystemAlloc); |
|
1965 if (n > kMaxValidPages) return false; |
|
1966 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); |
|
1967 size_t actual_size; |
|
1968 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); |
|
1969 if (ptr == NULL) { |
|
1970 if (n < ask) { |
|
1971 // Try growing just "n" pages |
|
1972 ask = n; |
|
1973 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); |
|
1974 } |
|
1975 if (ptr == NULL) return false; |
|
1976 } |
|
1977 ask = actual_size >> kPageShift; |
|
1978 |
|
1979 uint64_t old_system_bytes = system_bytes_; |
|
1980 system_bytes_ += (ask << kPageShift); |
|
1981 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; |
|
1982 ASSERT(p > 0); |
|
1983 |
|
1984 // If we have already a lot of pages allocated, just pre allocate a bunch of |
|
1985 // memory for the page map. This prevents fragmentation by pagemap metadata |
|
1986 // when a program keeps allocating and freeing large blocks. |
|
1987 |
|
1988 if (old_system_bytes < kPageMapBigAllocationThreshold |
|
1989 && system_bytes_ >= kPageMapBigAllocationThreshold) { |
|
1990 pagemap_.PreallocateMoreMemory(); |
|
1991 } |
|
1992 |
|
1993 // Make sure pagemap_ has entries for all of the new pages. |
|
1994 // Plus ensure one before and one after so coalescing code |
|
1995 // does not need bounds-checking. |
|
1996 if (pagemap_.Ensure(p-1, ask+2)) { |
|
1997 // Pretend the new area is allocated and then Delete() it to |
|
1998 // cause any necessary coalescing to occur. |
|
1999 // |
|
2000 // We do not adjust free_pages_ here since Delete() will do it for us. |
|
2001 Span* span = NewSpan(p, ask); |
|
2002 RecordSpan(span); |
|
2003 Delete(span); |
|
2004 ASSERT(Check()); |
|
2005 return true; |
|
2006 } else { |
|
2007 // We could not allocate memory within "pagemap_" |
|
2008 // TODO: Once we can return memory to the system, return the new span |
|
2009 return false; |
|
2010 } |
|
2011 } |
|
2012 |
|
2013 bool TCMalloc_PageHeap::Check() { |
|
2014 ASSERT(free_[0].normal.next == &free_[0].normal); |
|
2015 ASSERT(free_[0].returned.next == &free_[0].returned); |
|
2016 CheckList(&large_.normal, kMaxPages, 1000000000); |
|
2017 CheckList(&large_.returned, kMaxPages, 1000000000); |
|
2018 for (Length s = 1; s < kMaxPages; s++) { |
|
2019 CheckList(&free_[s].normal, s, s); |
|
2020 CheckList(&free_[s].returned, s, s); |
|
2021 } |
|
2022 return true; |
|
2023 } |
|
2024 |
|
2025 #if ASSERT_DISABLED |
|
2026 bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) { |
|
2027 return true; |
|
2028 } |
|
2029 #else |
|
2030 bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) { |
|
2031 for (Span* s = list->next; s != list; s = s->next) { |
|
2032 CHECK_CONDITION(s->free); |
|
2033 CHECK_CONDITION(s->length >= min_pages); |
|
2034 CHECK_CONDITION(s->length <= max_pages); |
|
2035 CHECK_CONDITION(GetDescriptor(s->start) == s); |
|
2036 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); |
|
2037 } |
|
2038 return true; |
|
2039 } |
|
2040 #endif |
|
2041 |
|
2042 static void ReleaseFreeList(Span* list, Span* returned) { |
|
2043 // Walk backwards through list so that when we push these |
|
2044 // spans on the "returned" list, we preserve the order. |
|
2045 while (!DLL_IsEmpty(list)) { |
|
2046 Span* s = list->prev; |
|
2047 DLL_Remove(s); |
|
2048 DLL_Prepend(returned, s); |
|
2049 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), |
|
2050 static_cast<size_t>(s->length << kPageShift)); |
|
2051 } |
|
2052 } |
|
2053 |
|
2054 void TCMalloc_PageHeap::ReleaseFreePages() { |
|
2055 for (Length s = 0; s < kMaxPages; s++) { |
|
2056 ReleaseFreeList(&free_[s].normal, &free_[s].returned); |
|
2057 } |
|
2058 ReleaseFreeList(&large_.normal, &large_.returned); |
|
2059 ASSERT(Check()); |
|
2060 } |
|
2061 |
|
2062 //------------------------------------------------------------------- |
|
2063 // Free list |
|
2064 //------------------------------------------------------------------- |
|
2065 |
|
2066 class TCMalloc_ThreadCache_FreeList { |
|
2067 private: |
|
2068 void* list_; // Linked list of nodes |
|
2069 uint16_t length_; // Current length |
|
2070 uint16_t lowater_; // Low water mark for list length |
|
2071 |
|
2072 public: |
|
2073 void Init() { |
|
2074 list_ = NULL; |
|
2075 length_ = 0; |
|
2076 lowater_ = 0; |
|
2077 } |
|
2078 |
|
2079 // Return current length of list |
|
2080 int length() const { |
|
2081 return length_; |
|
2082 } |
|
2083 |
|
2084 // Is list empty? |
|
2085 bool empty() const { |
|
2086 return list_ == NULL; |
|
2087 } |
|
2088 |
|
2089 // Low-water mark management |
|
2090 int lowwatermark() const { return lowater_; } |
|
2091 void clear_lowwatermark() { lowater_ = length_; } |
|
2092 |
|
2093 ALWAYS_INLINE void Push(void* ptr) { |
|
2094 SLL_Push(&list_, ptr); |
|
2095 length_++; |
|
2096 } |
|
2097 |
|
2098 void PushRange(int N, void *start, void *end) { |
|
2099 SLL_PushRange(&list_, start, end); |
|
2100 length_ = length_ + static_cast<uint16_t>(N); |
|
2101 } |
|
2102 |
|
2103 void PopRange(int N, void **start, void **end) { |
|
2104 SLL_PopRange(&list_, N, start, end); |
|
2105 ASSERT(length_ >= N); |
|
2106 length_ = length_ - static_cast<uint16_t>(N); |
|
2107 if (length_ < lowater_) lowater_ = length_; |
|
2108 } |
|
2109 |
|
2110 ALWAYS_INLINE void* Pop() { |
|
2111 ASSERT(list_ != NULL); |
|
2112 length_--; |
|
2113 if (length_ < lowater_) lowater_ = length_; |
|
2114 return SLL_Pop(&list_); |
|
2115 } |
|
2116 |
|
2117 #ifdef WTF_CHANGES |
|
2118 template <class Finder, class Reader> |
|
2119 void enumerateFreeObjects(Finder& finder, const Reader& reader) |
|
2120 { |
|
2121 for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject))) |
|
2122 finder.visit(nextObject); |
|
2123 } |
|
2124 #endif |
|
2125 }; |
|
2126 |
|
2127 //------------------------------------------------------------------- |
|
2128 // Data kept per thread |
|
2129 //------------------------------------------------------------------- |
|
2130 |
|
2131 class TCMalloc_ThreadCache { |
|
2132 private: |
|
2133 typedef TCMalloc_ThreadCache_FreeList FreeList; |
|
2134 #if COMPILER(MSVC) |
|
2135 typedef DWORD ThreadIdentifier; |
|
2136 #else |
|
2137 typedef pthread_t ThreadIdentifier; |
|
2138 #endif |
|
2139 |
|
2140 size_t size_; // Combined size of data |
|
2141 ThreadIdentifier tid_; // Which thread owns it |
|
2142 bool in_setspecific_; // Called pthread_setspecific? |
|
2143 FreeList list_[kNumClasses]; // Array indexed by size-class |
|
2144 |
|
2145 // We sample allocations, biased by the size of the allocation |
|
2146 uint32_t rnd_; // Cheap random number generator |
|
2147 size_t bytes_until_sample_; // Bytes until we sample next |
|
2148 |
|
2149 // Allocate a new heap. REQUIRES: pageheap_lock is held. |
|
2150 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid); |
|
2151 |
|
2152 // Use only as pthread thread-specific destructor function. |
|
2153 static void DestroyThreadCache(void* ptr); |
|
2154 public: |
|
2155 // All ThreadCache objects are kept in a linked list (for stats collection) |
|
2156 TCMalloc_ThreadCache* next_; |
|
2157 TCMalloc_ThreadCache* prev_; |
|
2158 |
|
2159 void Init(ThreadIdentifier tid); |
|
2160 void Cleanup(); |
|
2161 |
|
2162 // Accessors (mostly just for printing stats) |
|
2163 int freelist_length(size_t cl) const { return list_[cl].length(); } |
|
2164 |
|
2165 // Total byte size in cache |
|
2166 size_t Size() const { return size_; } |
|
2167 |
|
2168 void* Allocate(size_t size); |
|
2169 void Deallocate(void* ptr, size_t size_class); |
|
2170 |
|
2171 void FetchFromCentralCache(size_t cl, size_t allocationSize); |
|
2172 void ReleaseToCentralCache(size_t cl, int N); |
|
2173 void Scavenge(); |
|
2174 void Print() const; |
|
2175 |
|
2176 // Record allocation of "k" bytes. Return true iff allocation |
|
2177 // should be sampled |
|
2178 bool SampleAllocation(size_t k); |
|
2179 |
|
2180 // Pick next sampling point |
|
2181 void PickNextSample(size_t k); |
|
2182 |
|
2183 static void InitModule(); |
|
2184 static void InitTSD(); |
|
2185 static TCMalloc_ThreadCache* GetThreadHeap(); |
|
2186 static TCMalloc_ThreadCache* GetCache(); |
|
2187 static TCMalloc_ThreadCache* GetCacheIfPresent(); |
|
2188 static TCMalloc_ThreadCache* CreateCacheIfNecessary(); |
|
2189 static void DeleteCache(TCMalloc_ThreadCache* heap); |
|
2190 static void BecomeIdle(); |
|
2191 static void RecomputeThreadCacheSize(); |
|
2192 |
|
2193 #ifdef WTF_CHANGES |
|
2194 template <class Finder, class Reader> |
|
2195 void enumerateFreeObjects(Finder& finder, const Reader& reader) |
|
2196 { |
|
2197 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++) |
|
2198 list_[sizeClass].enumerateFreeObjects(finder, reader); |
|
2199 } |
|
2200 #endif |
|
2201 }; |
|
2202 |
|
2203 //------------------------------------------------------------------- |
|
2204 // Data kept per size-class in central cache |
|
2205 //------------------------------------------------------------------- |
|
2206 |
|
2207 class TCMalloc_Central_FreeList { |
|
2208 public: |
|
2209 void Init(size_t cl); |
|
2210 |
|
2211 // These methods all do internal locking. |
|
2212 |
|
2213 // Insert the specified range into the central freelist. N is the number of |
|
2214 // elements in the range. |
|
2215 void InsertRange(void *start, void *end, int N); |
|
2216 |
|
2217 // Returns the actual number of fetched elements into N. |
|
2218 void RemoveRange(void **start, void **end, int *N); |
|
2219 |
|
2220 // Returns the number of free objects in cache. |
|
2221 size_t length() { |
|
2222 SpinLockHolder h(&lock_); |
|
2223 return counter_; |
|
2224 } |
|
2225 |
|
2226 // Returns the number of free objects in the transfer cache. |
|
2227 int tc_length() { |
|
2228 SpinLockHolder h(&lock_); |
|
2229 return used_slots_ * num_objects_to_move[size_class_]; |
|
2230 } |
|
2231 |
|
2232 #ifdef WTF_CHANGES |
|
2233 template <class Finder, class Reader> |
|
2234 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList) |
|
2235 { |
|
2236 for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0)) |
|
2237 ASSERT(!span->objects); |
|
2238 |
|
2239 ASSERT(!nonempty_.objects); |
|
2240 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this); |
|
2241 |
|
2242 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset); |
|
2243 Span* remoteSpan = nonempty_.next; |
|
2244 |
|
2245 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) { |
|
2246 for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject))) |
|
2247 finder.visit(nextObject); |
|
2248 } |
|
2249 } |
|
2250 #endif |
|
2251 |
|
2252 private: |
|
2253 // REQUIRES: lock_ is held |
|
2254 // Remove object from cache and return. |
|
2255 // Return NULL if no free entries in cache. |
|
2256 void* FetchFromSpans(); |
|
2257 |
|
2258 // REQUIRES: lock_ is held |
|
2259 // Remove object from cache and return. Fetches |
|
2260 // from pageheap if cache is empty. Only returns |
|
2261 // NULL on allocation failure. |
|
2262 void* FetchFromSpansSafe(); |
|
2263 |
|
2264 // REQUIRES: lock_ is held |
|
2265 // Release a linked list of objects to spans. |
|
2266 // May temporarily release lock_. |
|
2267 void ReleaseListToSpans(void *start); |
|
2268 |
|
2269 // REQUIRES: lock_ is held |
|
2270 // Release an object to spans. |
|
2271 // May temporarily release lock_. |
|
2272 void ReleaseToSpans(void* object); |
|
2273 |
|
2274 // REQUIRES: lock_ is held |
|
2275 // Populate cache by fetching from the page heap. |
|
2276 // May temporarily release lock_. |
|
2277 void Populate(); |
|
2278 |
|
2279 // REQUIRES: lock is held. |
|
2280 // Tries to make room for a TCEntry. If the cache is full it will try to |
|
2281 // expand it at the cost of some other cache size. Return false if there is |
|
2282 // no space. |
|
2283 bool MakeCacheSpace(); |
|
2284 |
|
2285 // REQUIRES: lock_ for locked_size_class is held. |
|
2286 // Picks a "random" size class to steal TCEntry slot from. In reality it |
|
2287 // just iterates over the sizeclasses but does so without taking a lock. |
|
2288 // Returns true on success. |
|
2289 // May temporarily lock a "random" size class. |
|
2290 static bool EvictRandomSizeClass(size_t locked_size_class, bool force); |
|
2291 |
|
2292 // REQUIRES: lock_ is *not* held. |
|
2293 // Tries to shrink the Cache. If force is true it will relase objects to |
|
2294 // spans if it allows it to shrink the cache. Return false if it failed to |
|
2295 // shrink the cache. Decrements cache_size_ on succeess. |
|
2296 // May temporarily take lock_. If it takes lock_, the locked_size_class |
|
2297 // lock is released to the thread from holding two size class locks |
|
2298 // concurrently which could lead to a deadlock. |
|
2299 bool ShrinkCache(int locked_size_class, bool force); |
|
2300 |
|
2301 // This lock protects all the data members. cached_entries and cache_size_ |
|
2302 // may be looked at without holding the lock. |
|
2303 SpinLock lock_; |
|
2304 |
|
2305 // We keep linked lists of empty and non-empty spans. |
|
2306 size_t size_class_; // My size class |
|
2307 Span empty_; // Dummy header for list of empty spans |
|
2308 Span nonempty_; // Dummy header for list of non-empty spans |
|
2309 size_t counter_; // Number of free objects in cache entry |
|
2310 |
|
2311 // Here we reserve space for TCEntry cache slots. Since one size class can |
|
2312 // end up getting all the TCEntries quota in the system we just preallocate |
|
2313 // sufficient number of entries here. |
|
2314 TCEntry tc_slots_[kNumTransferEntries]; |
|
2315 |
|
2316 // Number of currently used cached entries in tc_slots_. This variable is |
|
2317 // updated under a lock but can be read without one. |
|
2318 int32_t used_slots_; |
|
2319 // The current number of slots for this size class. This is an |
|
2320 // adaptive value that is increased if there is lots of traffic |
|
2321 // on a given size class. |
|
2322 int32_t cache_size_; |
|
2323 }; |
|
2324 |
|
2325 // Pad each CentralCache object to multiple of 64 bytes |
|
2326 class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList { |
|
2327 private: |
|
2328 char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64]; |
|
2329 }; |
|
2330 |
|
2331 //------------------------------------------------------------------- |
|
2332 // Global variables |
|
2333 //------------------------------------------------------------------- |
|
2334 |
|
2335 // Central cache -- a collection of free-lists, one per size-class. |
|
2336 // We have a separate lock per free-list to reduce contention. |
|
2337 static TCMalloc_Central_FreeListPadded central_cache[kNumClasses]; |
|
2338 |
|
2339 // Page-level allocator |
|
2340 static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; |
|
2341 static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)]; |
|
2342 static bool phinited = false; |
|
2343 |
|
2344 // Avoid extra level of indirection by making "pageheap" be just an alias |
|
2345 // of pageheap_memory. |
|
2346 typedef union { |
|
2347 void* m_memory; |
|
2348 TCMalloc_PageHeap* m_pageHeap; |
|
2349 } PageHeapUnion; |
|
2350 |
|
2351 static inline TCMalloc_PageHeap* getPageHeap() |
|
2352 { |
|
2353 PageHeapUnion u = { &pageheap_memory[0] }; |
|
2354 return u.m_pageHeap; |
|
2355 } |
|
2356 |
|
2357 #define pageheap getPageHeap() |
|
2358 |
|
2359 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
|
2360 |
|
2361 #if !HAVE(DISPATCH_H) |
|
2362 #if OS(WINDOWS) |
|
2363 static void sleep(unsigned seconds) |
|
2364 { |
|
2365 ::Sleep(seconds * 1000); |
|
2366 } |
|
2367 #endif |
|
2368 |
|
2369 void TCMalloc_PageHeap::scavengerThread() |
|
2370 { |
|
2371 #if HAVE(PTHREAD_SETNAME_NP) |
|
2372 pthread_setname_np("JavaScriptCore: FastMalloc scavenger"); |
|
2373 #endif |
|
2374 |
|
2375 while (1) { |
|
2376 if (!shouldScavenge()) { |
|
2377 pthread_mutex_lock(&m_scavengeMutex); |
|
2378 m_scavengeThreadActive = false; |
|
2379 // Block until there are enough free committed pages to release back to the system. |
|
2380 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex); |
|
2381 m_scavengeThreadActive = true; |
|
2382 pthread_mutex_unlock(&m_scavengeMutex); |
|
2383 } |
|
2384 sleep(kScavengeDelayInSeconds); |
|
2385 { |
|
2386 SpinLockHolder h(&pageheap_lock); |
|
2387 pageheap->scavenge(); |
|
2388 } |
|
2389 } |
|
2390 } |
|
2391 |
|
2392 #else |
|
2393 |
|
2394 void TCMalloc_PageHeap::periodicScavenge() |
|
2395 { |
|
2396 { |
|
2397 SpinLockHolder h(&pageheap_lock); |
|
2398 pageheap->scavenge(); |
|
2399 } |
|
2400 |
|
2401 if (!shouldScavenge()) { |
|
2402 m_scavengingScheduled = false; |
|
2403 dispatch_suspend(m_scavengeTimer); |
|
2404 } |
|
2405 } |
|
2406 #endif // HAVE(DISPATCH_H) |
|
2407 |
|
2408 #endif |
|
2409 |
|
2410 // If TLS is available, we also store a copy |
|
2411 // of the per-thread object in a __thread variable |
|
2412 // since __thread variables are faster to read |
|
2413 // than pthread_getspecific(). We still need |
|
2414 // pthread_setspecific() because __thread |
|
2415 // variables provide no way to run cleanup |
|
2416 // code when a thread is destroyed. |
|
2417 #ifdef HAVE_TLS |
|
2418 static __thread TCMalloc_ThreadCache *threadlocal_heap; |
|
2419 #endif |
|
2420 // Thread-specific key. Initialization here is somewhat tricky |
|
2421 // because some Linux startup code invokes malloc() before it |
|
2422 // is in a good enough state to handle pthread_keycreate(). |
|
2423 // Therefore, we use TSD keys only after tsd_inited is set to true. |
|
2424 // Until then, we use a slow path to get the heap object. |
|
2425 static bool tsd_inited = false; |
|
2426 static pthread_key_t heap_key; |
|
2427 #if COMPILER(MSVC) |
|
2428 DWORD tlsIndex = TLS_OUT_OF_INDEXES; |
|
2429 #endif |
|
2430 |
|
2431 static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap) |
|
2432 { |
|
2433 // still do pthread_setspecific when using MSVC fast TLS to |
|
2434 // benefit from the delete callback. |
|
2435 pthread_setspecific(heap_key, heap); |
|
2436 #if COMPILER(MSVC) |
|
2437 TlsSetValue(tlsIndex, heap); |
|
2438 #endif |
|
2439 } |
|
2440 |
|
2441 // Allocator for thread heaps |
|
2442 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator; |
|
2443 |
|
2444 // Linked list of heap objects. Protected by pageheap_lock. |
|
2445 static TCMalloc_ThreadCache* thread_heaps = NULL; |
|
2446 static int thread_heap_count = 0; |
|
2447 |
|
2448 // Overall thread cache size. Protected by pageheap_lock. |
|
2449 static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize; |
|
2450 |
|
2451 // Global per-thread cache size. Writes are protected by |
|
2452 // pageheap_lock. Reads are done without any locking, which should be |
|
2453 // fine as long as size_t can be written atomically and we don't place |
|
2454 // invariants between this variable and other pieces of state. |
|
2455 static volatile size_t per_thread_cache_size = kMaxThreadCacheSize; |
|
2456 |
|
2457 //------------------------------------------------------------------- |
|
2458 // Central cache implementation |
|
2459 //------------------------------------------------------------------- |
|
2460 |
|
2461 void TCMalloc_Central_FreeList::Init(size_t cl) { |
|
2462 lock_.Init(); |
|
2463 size_class_ = cl; |
|
2464 DLL_Init(&empty_); |
|
2465 DLL_Init(&nonempty_); |
|
2466 counter_ = 0; |
|
2467 |
|
2468 cache_size_ = 1; |
|
2469 used_slots_ = 0; |
|
2470 ASSERT(cache_size_ <= kNumTransferEntries); |
|
2471 } |
|
2472 |
|
2473 void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) { |
|
2474 while (start) { |
|
2475 void *next = SLL_Next(start); |
|
2476 ReleaseToSpans(start); |
|
2477 start = next; |
|
2478 } |
|
2479 } |
|
2480 |
|
2481 ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) { |
|
2482 const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift; |
|
2483 Span* span = pageheap->GetDescriptor(p); |
|
2484 ASSERT(span != NULL); |
|
2485 ASSERT(span->refcount > 0); |
|
2486 |
|
2487 // If span is empty, move it to non-empty list |
|
2488 if (span->objects == NULL) { |
|
2489 DLL_Remove(span); |
|
2490 DLL_Prepend(&nonempty_, span); |
|
2491 Event(span, 'N', 0); |
|
2492 } |
|
2493 |
|
2494 // The following check is expensive, so it is disabled by default |
|
2495 if (false) { |
|
2496 // Check that object does not occur in list |
|
2497 unsigned got = 0; |
|
2498 for (void* p = span->objects; p != NULL; p = *((void**) p)) { |
|
2499 ASSERT(p != object); |
|
2500 got++; |
|
2501 } |
|
2502 ASSERT(got + span->refcount == |
|
2503 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass)); |
|
2504 } |
|
2505 |
|
2506 counter_++; |
|
2507 span->refcount--; |
|
2508 if (span->refcount == 0) { |
|
2509 Event(span, '#', 0); |
|
2510 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass); |
|
2511 DLL_Remove(span); |
|
2512 |
|
2513 // Release central list lock while operating on pageheap |
|
2514 lock_.Unlock(); |
|
2515 { |
|
2516 SpinLockHolder h(&pageheap_lock); |
|
2517 pageheap->Delete(span); |
|
2518 } |
|
2519 lock_.Lock(); |
|
2520 } else { |
|
2521 *(reinterpret_cast<void**>(object)) = span->objects; |
|
2522 span->objects = object; |
|
2523 } |
|
2524 } |
|
2525 |
|
2526 ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass( |
|
2527 size_t locked_size_class, bool force) { |
|
2528 static int race_counter = 0; |
|
2529 int t = race_counter++; // Updated without a lock, but who cares. |
|
2530 if (t >= static_cast<int>(kNumClasses)) { |
|
2531 while (t >= static_cast<int>(kNumClasses)) { |
|
2532 t -= kNumClasses; |
|
2533 } |
|
2534 race_counter = t; |
|
2535 } |
|
2536 ASSERT(t >= 0); |
|
2537 ASSERT(t < static_cast<int>(kNumClasses)); |
|
2538 if (t == static_cast<int>(locked_size_class)) return false; |
|
2539 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force); |
|
2540 } |
|
2541 |
|
2542 bool TCMalloc_Central_FreeList::MakeCacheSpace() { |
|
2543 // Is there room in the cache? |
|
2544 if (used_slots_ < cache_size_) return true; |
|
2545 // Check if we can expand this cache? |
|
2546 if (cache_size_ == kNumTransferEntries) return false; |
|
2547 // Ok, we'll try to grab an entry from some other size class. |
|
2548 if (EvictRandomSizeClass(size_class_, false) || |
|
2549 EvictRandomSizeClass(size_class_, true)) { |
|
2550 // Succeeded in evicting, we're going to make our cache larger. |
|
2551 cache_size_++; |
|
2552 return true; |
|
2553 } |
|
2554 return false; |
|
2555 } |
|
2556 |
|
2557 |
|
2558 namespace { |
|
2559 class LockInverter { |
|
2560 private: |
|
2561 SpinLock *held_, *temp_; |
|
2562 public: |
|
2563 inline explicit LockInverter(SpinLock* held, SpinLock *temp) |
|
2564 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } |
|
2565 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } |
|
2566 }; |
|
2567 } |
|
2568 |
|
2569 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) { |
|
2570 // Start with a quick check without taking a lock. |
|
2571 if (cache_size_ == 0) return false; |
|
2572 // We don't evict from a full cache unless we are 'forcing'. |
|
2573 if (force == false && used_slots_ == cache_size_) return false; |
|
2574 |
|
2575 // Grab lock, but first release the other lock held by this thread. We use |
|
2576 // the lock inverter to ensure that we never hold two size class locks |
|
2577 // concurrently. That can create a deadlock because there is no well |
|
2578 // defined nesting order. |
|
2579 LockInverter li(¢ral_cache[locked_size_class].lock_, &lock_); |
|
2580 ASSERT(used_slots_ <= cache_size_); |
|
2581 ASSERT(0 <= cache_size_); |
|
2582 if (cache_size_ == 0) return false; |
|
2583 if (used_slots_ == cache_size_) { |
|
2584 if (force == false) return false; |
|
2585 // ReleaseListToSpans releases the lock, so we have to make all the |
|
2586 // updates to the central list before calling it. |
|
2587 cache_size_--; |
|
2588 used_slots_--; |
|
2589 ReleaseListToSpans(tc_slots_[used_slots_].head); |
|
2590 return true; |
|
2591 } |
|
2592 cache_size_--; |
|
2593 return true; |
|
2594 } |
|
2595 |
|
2596 void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) { |
|
2597 SpinLockHolder h(&lock_); |
|
2598 if (N == num_objects_to_move[size_class_] && |
|
2599 MakeCacheSpace()) { |
|
2600 int slot = used_slots_++; |
|
2601 ASSERT(slot >=0); |
|
2602 ASSERT(slot < kNumTransferEntries); |
|
2603 TCEntry *entry = &tc_slots_[slot]; |
|
2604 entry->head = start; |
|
2605 entry->tail = end; |
|
2606 return; |
|
2607 } |
|
2608 ReleaseListToSpans(start); |
|
2609 } |
|
2610 |
|
2611 void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) { |
|
2612 int num = *N; |
|
2613 ASSERT(num > 0); |
|
2614 |
|
2615 SpinLockHolder h(&lock_); |
|
2616 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) { |
|
2617 int slot = --used_slots_; |
|
2618 ASSERT(slot >= 0); |
|
2619 TCEntry *entry = &tc_slots_[slot]; |
|
2620 *start = entry->head; |
|
2621 *end = entry->tail; |
|
2622 return; |
|
2623 } |
|
2624 |
|
2625 // TODO: Prefetch multiple TCEntries? |
|
2626 void *tail = FetchFromSpansSafe(); |
|
2627 if (!tail) { |
|
2628 // We are completely out of memory. |
|
2629 *start = *end = NULL; |
|
2630 *N = 0; |
|
2631 return; |
|
2632 } |
|
2633 |
|
2634 SLL_SetNext(tail, NULL); |
|
2635 void *head = tail; |
|
2636 int count = 1; |
|
2637 while (count < num) { |
|
2638 void *t = FetchFromSpans(); |
|
2639 if (!t) break; |
|
2640 SLL_Push(&head, t); |
|
2641 count++; |
|
2642 } |
|
2643 *start = head; |
|
2644 *end = tail; |
|
2645 *N = count; |
|
2646 } |
|
2647 |
|
2648 |
|
2649 void* TCMalloc_Central_FreeList::FetchFromSpansSafe() { |
|
2650 void *t = FetchFromSpans(); |
|
2651 if (!t) { |
|
2652 Populate(); |
|
2653 t = FetchFromSpans(); |
|
2654 } |
|
2655 return t; |
|
2656 } |
|
2657 |
|
2658 void* TCMalloc_Central_FreeList::FetchFromSpans() { |
|
2659 if (DLL_IsEmpty(&nonempty_)) return NULL; |
|
2660 Span* span = nonempty_.next; |
|
2661 |
|
2662 ASSERT(span->objects != NULL); |
|
2663 ASSERT_SPAN_COMMITTED(span); |
|
2664 span->refcount++; |
|
2665 void* result = span->objects; |
|
2666 span->objects = *(reinterpret_cast<void**>(result)); |
|
2667 if (span->objects == NULL) { |
|
2668 // Move to empty list |
|
2669 DLL_Remove(span); |
|
2670 DLL_Prepend(&empty_, span); |
|
2671 Event(span, 'E', 0); |
|
2672 } |
|
2673 counter_--; |
|
2674 return result; |
|
2675 } |
|
2676 |
|
2677 // Fetch memory from the system and add to the central cache freelist. |
|
2678 ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { |
|
2679 // Release central list lock while operating on pageheap |
|
2680 lock_.Unlock(); |
|
2681 const size_t npages = class_to_pages[size_class_]; |
|
2682 |
|
2683 Span* span; |
|
2684 { |
|
2685 SpinLockHolder h(&pageheap_lock); |
|
2686 span = pageheap->New(npages); |
|
2687 if (span) pageheap->RegisterSizeClass(span, size_class_); |
|
2688 } |
|
2689 if (span == NULL) { |
|
2690 MESSAGE("allocation failed: %d\n", errno); |
|
2691 lock_.Lock(); |
|
2692 return; |
|
2693 } |
|
2694 ASSERT_SPAN_COMMITTED(span); |
|
2695 ASSERT(span->length == npages); |
|
2696 // Cache sizeclass info eagerly. Locking is not necessary. |
|
2697 // (Instead of being eager, we could just replace any stale info |
|
2698 // about this span, but that seems to be no better in practice.) |
|
2699 for (size_t i = 0; i < npages; i++) { |
|
2700 pageheap->CacheSizeClass(span->start + i, size_class_); |
|
2701 } |
|
2702 |
|
2703 // Split the block into pieces and add to the free-list |
|
2704 // TODO: coloring of objects to avoid cache conflicts? |
|
2705 void** tail = &span->objects; |
|
2706 char* ptr = reinterpret_cast<char*>(span->start << kPageShift); |
|
2707 char* limit = ptr + (npages << kPageShift); |
|
2708 const size_t size = ByteSizeForClass(size_class_); |
|
2709 int num = 0; |
|
2710 char* nptr; |
|
2711 while ((nptr = ptr + size) <= limit) { |
|
2712 *tail = ptr; |
|
2713 tail = reinterpret_cast<void**>(ptr); |
|
2714 ptr = nptr; |
|
2715 num++; |
|
2716 } |
|
2717 ASSERT(ptr <= limit); |
|
2718 *tail = NULL; |
|
2719 span->refcount = 0; // No sub-object in use yet |
|
2720 |
|
2721 // Add span to list of non-empty spans |
|
2722 lock_.Lock(); |
|
2723 DLL_Prepend(&nonempty_, span); |
|
2724 counter_ += num; |
|
2725 } |
|
2726 |
|
2727 //------------------------------------------------------------------- |
|
2728 // TCMalloc_ThreadCache implementation |
|
2729 //------------------------------------------------------------------- |
|
2730 |
|
2731 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) { |
|
2732 if (bytes_until_sample_ < k) { |
|
2733 PickNextSample(k); |
|
2734 return true; |
|
2735 } else { |
|
2736 bytes_until_sample_ -= k; |
|
2737 return false; |
|
2738 } |
|
2739 } |
|
2740 |
|
2741 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) { |
|
2742 size_ = 0; |
|
2743 next_ = NULL; |
|
2744 prev_ = NULL; |
|
2745 tid_ = tid; |
|
2746 in_setspecific_ = false; |
|
2747 for (size_t cl = 0; cl < kNumClasses; ++cl) { |
|
2748 list_[cl].Init(); |
|
2749 } |
|
2750 |
|
2751 // Initialize RNG -- run it for a bit to get to good values |
|
2752 bytes_until_sample_ = 0; |
|
2753 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); |
|
2754 for (int i = 0; i < 100; i++) { |
|
2755 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2)); |
|
2756 } |
|
2757 } |
|
2758 |
|
2759 void TCMalloc_ThreadCache::Cleanup() { |
|
2760 // Put unused memory back into central cache |
|
2761 for (size_t cl = 0; cl < kNumClasses; ++cl) { |
|
2762 if (list_[cl].length() > 0) { |
|
2763 ReleaseToCentralCache(cl, list_[cl].length()); |
|
2764 } |
|
2765 } |
|
2766 } |
|
2767 |
|
2768 ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) { |
|
2769 ASSERT(size <= kMaxSize); |
|
2770 const size_t cl = SizeClass(size); |
|
2771 FreeList* list = &list_[cl]; |
|
2772 size_t allocationSize = ByteSizeForClass(cl); |
|
2773 if (list->empty()) { |
|
2774 FetchFromCentralCache(cl, allocationSize); |
|
2775 if (list->empty()) return NULL; |
|
2776 } |
|
2777 size_ -= allocationSize; |
|
2778 return list->Pop(); |
|
2779 } |
|
2780 |
|
2781 inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) { |
|
2782 size_ += ByteSizeForClass(cl); |
|
2783 FreeList* list = &list_[cl]; |
|
2784 list->Push(ptr); |
|
2785 // If enough data is free, put back into central cache |
|
2786 if (list->length() > kMaxFreeListLength) { |
|
2787 ReleaseToCentralCache(cl, num_objects_to_move[cl]); |
|
2788 } |
|
2789 if (size_ >= per_thread_cache_size) Scavenge(); |
|
2790 } |
|
2791 |
|
2792 // Remove some objects of class "cl" from central cache and add to thread heap |
|
2793 ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) { |
|
2794 int fetch_count = num_objects_to_move[cl]; |
|
2795 void *start, *end; |
|
2796 central_cache[cl].RemoveRange(&start, &end, &fetch_count); |
|
2797 list_[cl].PushRange(fetch_count, start, end); |
|
2798 size_ += allocationSize * fetch_count; |
|
2799 } |
|
2800 |
|
2801 // Remove some objects of class "cl" from thread heap and add to central cache |
|
2802 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { |
|
2803 ASSERT(N > 0); |
|
2804 FreeList* src = &list_[cl]; |
|
2805 if (N > src->length()) N = src->length(); |
|
2806 size_ -= N*ByteSizeForClass(cl); |
|
2807 |
|
2808 // We return prepackaged chains of the correct size to the central cache. |
|
2809 // TODO: Use the same format internally in the thread caches? |
|
2810 int batch_size = num_objects_to_move[cl]; |
|
2811 while (N > batch_size) { |
|
2812 void *tail, *head; |
|
2813 src->PopRange(batch_size, &head, &tail); |
|
2814 central_cache[cl].InsertRange(head, tail, batch_size); |
|
2815 N -= batch_size; |
|
2816 } |
|
2817 void *tail, *head; |
|
2818 src->PopRange(N, &head, &tail); |
|
2819 central_cache[cl].InsertRange(head, tail, N); |
|
2820 } |
|
2821 |
|
2822 // Release idle memory to the central cache |
|
2823 inline void TCMalloc_ThreadCache::Scavenge() { |
|
2824 // If the low-water mark for the free list is L, it means we would |
|
2825 // not have had to allocate anything from the central cache even if |
|
2826 // we had reduced the free list size by L. We aim to get closer to |
|
2827 // that situation by dropping L/2 nodes from the free list. This |
|
2828 // may not release much memory, but if so we will call scavenge again |
|
2829 // pretty soon and the low-water marks will be high on that call. |
|
2830 //int64 start = CycleClock::Now(); |
|
2831 |
|
2832 for (size_t cl = 0; cl < kNumClasses; cl++) { |
|
2833 FreeList* list = &list_[cl]; |
|
2834 const int lowmark = list->lowwatermark(); |
|
2835 if (lowmark > 0) { |
|
2836 const int drop = (lowmark > 1) ? lowmark/2 : 1; |
|
2837 ReleaseToCentralCache(cl, drop); |
|
2838 } |
|
2839 list->clear_lowwatermark(); |
|
2840 } |
|
2841 |
|
2842 //int64 finish = CycleClock::Now(); |
|
2843 //CycleTimer ct; |
|
2844 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); |
|
2845 } |
|
2846 |
|
2847 void TCMalloc_ThreadCache::PickNextSample(size_t k) { |
|
2848 // Make next "random" number |
|
2849 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers |
|
2850 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0); |
|
2851 uint32_t r = rnd_; |
|
2852 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly); |
|
2853 |
|
2854 // Next point is "rnd_ % (sample_period)". I.e., average |
|
2855 // increment is "sample_period/2". |
|
2856 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter); |
|
2857 static int last_flag_value = -1; |
|
2858 |
|
2859 if (flag_value != last_flag_value) { |
|
2860 SpinLockHolder h(&sample_period_lock); |
|
2861 int i; |
|
2862 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) { |
|
2863 if (primes_list[i] >= flag_value) { |
|
2864 break; |
|
2865 } |
|
2866 } |
|
2867 sample_period = primes_list[i]; |
|
2868 last_flag_value = flag_value; |
|
2869 } |
|
2870 |
|
2871 bytes_until_sample_ += rnd_ % sample_period; |
|
2872 |
|
2873 if (k > (static_cast<size_t>(-1) >> 2)) { |
|
2874 // If the user has asked for a huge allocation then it is possible |
|
2875 // for the code below to loop infinitely. Just return (note that |
|
2876 // this throws off the sampling accuracy somewhat, but a user who |
|
2877 // is allocating more than 1G of memory at a time can live with a |
|
2878 // minor inaccuracy in profiling of small allocations, and also |
|
2879 // would rather not wait for the loop below to terminate). |
|
2880 return; |
|
2881 } |
|
2882 |
|
2883 while (bytes_until_sample_ < k) { |
|
2884 // Increase bytes_until_sample_ by enough average sampling periods |
|
2885 // (sample_period >> 1) to allow us to sample past the current |
|
2886 // allocation. |
|
2887 bytes_until_sample_ += (sample_period >> 1); |
|
2888 } |
|
2889 |
|
2890 bytes_until_sample_ -= k; |
|
2891 } |
|
2892 |
|
2893 void TCMalloc_ThreadCache::InitModule() { |
|
2894 // There is a slight potential race here because of double-checked |
|
2895 // locking idiom. However, as long as the program does a small |
|
2896 // allocation before switching to multi-threaded mode, we will be |
|
2897 // fine. We increase the chances of doing such a small allocation |
|
2898 // by doing one in the constructor of the module_enter_exit_hook |
|
2899 // object declared below. |
|
2900 SpinLockHolder h(&pageheap_lock); |
|
2901 if (!phinited) { |
|
2902 #ifdef WTF_CHANGES |
|
2903 InitTSD(); |
|
2904 #endif |
|
2905 InitSizeClasses(); |
|
2906 threadheap_allocator.Init(); |
|
2907 span_allocator.Init(); |
|
2908 span_allocator.New(); // Reduce cache conflicts |
|
2909 span_allocator.New(); // Reduce cache conflicts |
|
2910 stacktrace_allocator.Init(); |
|
2911 DLL_Init(&sampled_objects); |
|
2912 for (size_t i = 0; i < kNumClasses; ++i) { |
|
2913 central_cache[i].Init(i); |
|
2914 } |
|
2915 pageheap->init(); |
|
2916 phinited = 1; |
|
2917 #if defined(WTF_CHANGES) && OS(DARWIN) |
|
2918 FastMallocZone::init(); |
|
2919 #endif |
|
2920 } |
|
2921 } |
|
2922 |
|
2923 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) { |
|
2924 // Create the heap and add it to the linked list |
|
2925 TCMalloc_ThreadCache *heap = threadheap_allocator.New(); |
|
2926 heap->Init(tid); |
|
2927 heap->next_ = thread_heaps; |
|
2928 heap->prev_ = NULL; |
|
2929 if (thread_heaps != NULL) thread_heaps->prev_ = heap; |
|
2930 thread_heaps = heap; |
|
2931 thread_heap_count++; |
|
2932 RecomputeThreadCacheSize(); |
|
2933 return heap; |
|
2934 } |
|
2935 |
|
2936 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { |
|
2937 #ifdef HAVE_TLS |
|
2938 // __thread is faster, but only when the kernel supports it |
|
2939 if (KernelSupportsTLS()) |
|
2940 return threadlocal_heap; |
|
2941 #elif COMPILER(MSVC) |
|
2942 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex)); |
|
2943 #else |
|
2944 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key)); |
|
2945 #endif |
|
2946 } |
|
2947 |
|
2948 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() { |
|
2949 TCMalloc_ThreadCache* ptr = NULL; |
|
2950 if (!tsd_inited) { |
|
2951 InitModule(); |
|
2952 } else { |
|
2953 ptr = GetThreadHeap(); |
|
2954 } |
|
2955 if (ptr == NULL) ptr = CreateCacheIfNecessary(); |
|
2956 return ptr; |
|
2957 } |
|
2958 |
|
2959 // In deletion paths, we do not try to create a thread-cache. This is |
|
2960 // because we may be in the thread destruction code and may have |
|
2961 // already cleaned up the cache for this thread. |
|
2962 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() { |
|
2963 if (!tsd_inited) return NULL; |
|
2964 void* const p = GetThreadHeap(); |
|
2965 return reinterpret_cast<TCMalloc_ThreadCache*>(p); |
|
2966 } |
|
2967 |
|
2968 void TCMalloc_ThreadCache::InitTSD() { |
|
2969 ASSERT(!tsd_inited); |
|
2970 pthread_key_create(&heap_key, DestroyThreadCache); |
|
2971 #if COMPILER(MSVC) |
|
2972 tlsIndex = TlsAlloc(); |
|
2973 #endif |
|
2974 tsd_inited = true; |
|
2975 |
|
2976 #if !COMPILER(MSVC) |
|
2977 // We may have used a fake pthread_t for the main thread. Fix it. |
|
2978 pthread_t zero; |
|
2979 memset(&zero, 0, sizeof(zero)); |
|
2980 #endif |
|
2981 #ifndef WTF_CHANGES |
|
2982 SpinLockHolder h(&pageheap_lock); |
|
2983 #else |
|
2984 ASSERT(pageheap_lock.IsHeld()); |
|
2985 #endif |
|
2986 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { |
|
2987 #if COMPILER(MSVC) |
|
2988 if (h->tid_ == 0) { |
|
2989 h->tid_ = GetCurrentThreadId(); |
|
2990 } |
|
2991 #else |
|
2992 if (pthread_equal(h->tid_, zero)) { |
|
2993 h->tid_ = pthread_self(); |
|
2994 } |
|
2995 #endif |
|
2996 } |
|
2997 } |
|
2998 |
|
2999 TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { |
|
3000 // Initialize per-thread data if necessary |
|
3001 TCMalloc_ThreadCache* heap = NULL; |
|
3002 { |
|
3003 SpinLockHolder h(&pageheap_lock); |
|
3004 |
|
3005 #if COMPILER(MSVC) |
|
3006 DWORD me; |
|
3007 if (!tsd_inited) { |
|
3008 me = 0; |
|
3009 } else { |
|
3010 me = GetCurrentThreadId(); |
|
3011 } |
|
3012 #else |
|
3013 // Early on in glibc's life, we cannot even call pthread_self() |
|
3014 pthread_t me; |
|
3015 if (!tsd_inited) { |
|
3016 memset(&me, 0, sizeof(me)); |
|
3017 } else { |
|
3018 me = pthread_self(); |
|
3019 } |
|
3020 #endif |
|
3021 |
|
3022 // This may be a recursive malloc call from pthread_setspecific() |
|
3023 // In that case, the heap for this thread has already been created |
|
3024 // and added to the linked list. So we search for that first. |
|
3025 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { |
|
3026 #if COMPILER(MSVC) |
|
3027 if (h->tid_ == me) { |
|
3028 #else |
|
3029 if (pthread_equal(h->tid_, me)) { |
|
3030 #endif |
|
3031 heap = h; |
|
3032 break; |
|
3033 } |
|
3034 } |
|
3035 |
|
3036 if (heap == NULL) heap = NewHeap(me); |
|
3037 } |
|
3038 |
|
3039 // We call pthread_setspecific() outside the lock because it may |
|
3040 // call malloc() recursively. The recursive call will never get |
|
3041 // here again because it will find the already allocated heap in the |
|
3042 // linked list of heaps. |
|
3043 if (!heap->in_setspecific_ && tsd_inited) { |
|
3044 heap->in_setspecific_ = true; |
|
3045 setThreadHeap(heap); |
|
3046 } |
|
3047 return heap; |
|
3048 } |
|
3049 |
|
3050 void TCMalloc_ThreadCache::BecomeIdle() { |
|
3051 if (!tsd_inited) return; // No caches yet |
|
3052 TCMalloc_ThreadCache* heap = GetThreadHeap(); |
|
3053 if (heap == NULL) return; // No thread cache to remove |
|
3054 if (heap->in_setspecific_) return; // Do not disturb the active caller |
|
3055 |
|
3056 heap->in_setspecific_ = true; |
|
3057 pthread_setspecific(heap_key, NULL); |
|
3058 #ifdef HAVE_TLS |
|
3059 // Also update the copy in __thread |
|
3060 threadlocal_heap = NULL; |
|
3061 #endif |
|
3062 heap->in_setspecific_ = false; |
|
3063 if (GetThreadHeap() == heap) { |
|
3064 // Somehow heap got reinstated by a recursive call to malloc |
|
3065 // from pthread_setspecific. We give up in this case. |
|
3066 return; |
|
3067 } |
|
3068 |
|
3069 // We can now get rid of the heap |
|
3070 DeleteCache(heap); |
|
3071 } |
|
3072 |
|
3073 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) { |
|
3074 // Note that "ptr" cannot be NULL since pthread promises not |
|
3075 // to invoke the destructor on NULL values, but for safety, |
|
3076 // we check anyway. |
|
3077 if (ptr == NULL) return; |
|
3078 #ifdef HAVE_TLS |
|
3079 // Prevent fast path of GetThreadHeap() from returning heap. |
|
3080 threadlocal_heap = NULL; |
|
3081 #endif |
|
3082 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr)); |
|
3083 } |
|
3084 |
|
3085 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) { |
|
3086 // Remove all memory from heap |
|
3087 heap->Cleanup(); |
|
3088 |
|
3089 // Remove from linked list |
|
3090 SpinLockHolder h(&pageheap_lock); |
|
3091 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; |
|
3092 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; |
|
3093 if (thread_heaps == heap) thread_heaps = heap->next_; |
|
3094 thread_heap_count--; |
|
3095 RecomputeThreadCacheSize(); |
|
3096 |
|
3097 threadheap_allocator.Delete(heap); |
|
3098 } |
|
3099 |
|
3100 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() { |
|
3101 // Divide available space across threads |
|
3102 int n = thread_heap_count > 0 ? thread_heap_count : 1; |
|
3103 size_t space = overall_thread_cache_size / n; |
|
3104 |
|
3105 // Limit to allowed range |
|
3106 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; |
|
3107 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; |
|
3108 |
|
3109 per_thread_cache_size = space; |
|
3110 } |
|
3111 |
|
3112 void TCMalloc_ThreadCache::Print() const { |
|
3113 for (size_t cl = 0; cl < kNumClasses; ++cl) { |
|
3114 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n", |
|
3115 ByteSizeForClass(cl), |
|
3116 list_[cl].length(), |
|
3117 list_[cl].lowwatermark()); |
|
3118 } |
|
3119 } |
|
3120 |
|
3121 // Extract interesting stats |
|
3122 struct TCMallocStats { |
|
3123 uint64_t system_bytes; // Bytes alloced from system |
|
3124 uint64_t thread_bytes; // Bytes in thread caches |
|
3125 uint64_t central_bytes; // Bytes in central cache |
|
3126 uint64_t transfer_bytes; // Bytes in central transfer cache |
|
3127 uint64_t pageheap_bytes; // Bytes in page heap |
|
3128 uint64_t metadata_bytes; // Bytes alloced for metadata |
|
3129 }; |
|
3130 |
|
3131 #ifndef WTF_CHANGES |
|
3132 // Get stats into "r". Also get per-size-class counts if class_count != NULL |
|
3133 static void ExtractStats(TCMallocStats* r, uint64_t* class_count) { |
|
3134 r->central_bytes = 0; |
|
3135 r->transfer_bytes = 0; |
|
3136 for (int cl = 0; cl < kNumClasses; ++cl) { |
|
3137 const int length = central_cache[cl].length(); |
|
3138 const int tc_length = central_cache[cl].tc_length(); |
|
3139 r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length; |
|
3140 r->transfer_bytes += |
|
3141 static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length; |
|
3142 if (class_count) class_count[cl] = length + tc_length; |
|
3143 } |
|
3144 |
|
3145 // Add stats from per-thread heaps |
|
3146 r->thread_bytes = 0; |
|
3147 { // scope |
|
3148 SpinLockHolder h(&pageheap_lock); |
|
3149 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { |
|
3150 r->thread_bytes += h->Size(); |
|
3151 if (class_count) { |
|
3152 for (size_t cl = 0; cl < kNumClasses; ++cl) { |
|
3153 class_count[cl] += h->freelist_length(cl); |
|
3154 } |
|
3155 } |
|
3156 } |
|
3157 } |
|
3158 |
|
3159 { //scope |
|
3160 SpinLockHolder h(&pageheap_lock); |
|
3161 r->system_bytes = pageheap->SystemBytes(); |
|
3162 r->metadata_bytes = metadata_system_bytes; |
|
3163 r->pageheap_bytes = pageheap->FreeBytes(); |
|
3164 } |
|
3165 } |
|
3166 #endif |
|
3167 |
|
3168 #ifndef WTF_CHANGES |
|
3169 // WRITE stats to "out" |
|
3170 static void DumpStats(TCMalloc_Printer* out, int level) { |
|
3171 TCMallocStats stats; |
|
3172 uint64_t class_count[kNumClasses]; |
|
3173 ExtractStats(&stats, (level >= 2 ? class_count : NULL)); |
|
3174 |
|
3175 if (level >= 2) { |
|
3176 out->printf("------------------------------------------------\n"); |
|
3177 uint64_t cumulative = 0; |
|
3178 for (int cl = 0; cl < kNumClasses; ++cl) { |
|
3179 if (class_count[cl] > 0) { |
|
3180 uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl); |
|
3181 cumulative += class_bytes; |
|
3182 out->printf("class %3d [ %8" PRIuS " bytes ] : " |
|
3183 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n", |
|
3184 cl, ByteSizeForClass(cl), |
|
3185 class_count[cl], |
|
3186 class_bytes / 1048576.0, |
|
3187 cumulative / 1048576.0); |
|
3188 } |
|
3189 } |
|
3190 |
|
3191 SpinLockHolder h(&pageheap_lock); |
|
3192 pageheap->Dump(out); |
|
3193 } |
|
3194 |
|
3195 const uint64_t bytes_in_use = stats.system_bytes |
|
3196 - stats.pageheap_bytes |
|
3197 - stats.central_bytes |
|
3198 - stats.transfer_bytes |
|
3199 - stats.thread_bytes; |
|
3200 |
|
3201 out->printf("------------------------------------------------\n" |
|
3202 "MALLOC: %12" PRIu64 " Heap size\n" |
|
3203 "MALLOC: %12" PRIu64 " Bytes in use by application\n" |
|
3204 "MALLOC: %12" PRIu64 " Bytes free in page heap\n" |
|
3205 "MALLOC: %12" PRIu64 " Bytes free in central cache\n" |
|
3206 "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n" |
|
3207 "MALLOC: %12" PRIu64 " Bytes free in thread caches\n" |
|
3208 "MALLOC: %12" PRIu64 " Spans in use\n" |
|
3209 "MALLOC: %12" PRIu64 " Thread heaps in use\n" |
|
3210 "MALLOC: %12" PRIu64 " Metadata allocated\n" |
|
3211 "------------------------------------------------\n", |
|
3212 stats.system_bytes, |
|
3213 bytes_in_use, |
|
3214 stats.pageheap_bytes, |
|
3215 stats.central_bytes, |
|
3216 stats.transfer_bytes, |
|
3217 stats.thread_bytes, |
|
3218 uint64_t(span_allocator.inuse()), |
|
3219 uint64_t(threadheap_allocator.inuse()), |
|
3220 stats.metadata_bytes); |
|
3221 } |
|
3222 |
|
3223 static void PrintStats(int level) { |
|
3224 const int kBufferSize = 16 << 10; |
|
3225 char* buffer = new char[kBufferSize]; |
|
3226 TCMalloc_Printer printer(buffer, kBufferSize); |
|
3227 DumpStats(&printer, level); |
|
3228 write(STDERR_FILENO, buffer, strlen(buffer)); |
|
3229 delete[] buffer; |
|
3230 } |
|
3231 |
|
3232 static void** DumpStackTraces() { |
|
3233 // Count how much space we need |
|
3234 int needed_slots = 0; |
|
3235 { |
|
3236 SpinLockHolder h(&pageheap_lock); |
|
3237 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) { |
|
3238 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects); |
|
3239 needed_slots += 3 + stack->depth; |
|
3240 } |
|
3241 needed_slots += 100; // Slop in case sample grows |
|
3242 needed_slots += needed_slots/8; // An extra 12.5% slop |
|
3243 } |
|
3244 |
|
3245 void** result = new void*[needed_slots]; |
|
3246 if (result == NULL) { |
|
3247 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n", |
|
3248 needed_slots); |
|
3249 return NULL; |
|
3250 } |
|
3251 |
|
3252 SpinLockHolder h(&pageheap_lock); |
|
3253 int used_slots = 0; |
|
3254 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) { |
|
3255 ASSERT(used_slots < needed_slots); // Need to leave room for terminator |
|
3256 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects); |
|
3257 if (used_slots + 3 + stack->depth >= needed_slots) { |
|
3258 // No more room |
|
3259 break; |
|
3260 } |
|
3261 |
|
3262 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); |
|
3263 result[used_slots+1] = reinterpret_cast<void*>(stack->size); |
|
3264 result[used_slots+2] = reinterpret_cast<void*>(stack->depth); |
|
3265 for (int d = 0; d < stack->depth; d++) { |
|
3266 result[used_slots+3+d] = stack->stack[d]; |
|
3267 } |
|
3268 used_slots += 3 + stack->depth; |
|
3269 } |
|
3270 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); |
|
3271 return result; |
|
3272 } |
|
3273 #endif |
|
3274 |
|
3275 #ifndef WTF_CHANGES |
|
3276 |
|
3277 // TCMalloc's support for extra malloc interfaces |
|
3278 class TCMallocImplementation : public MallocExtension { |
|
3279 public: |
|
3280 virtual void GetStats(char* buffer, int buffer_length) { |
|
3281 ASSERT(buffer_length > 0); |
|
3282 TCMalloc_Printer printer(buffer, buffer_length); |
|
3283 |
|
3284 // Print level one stats unless lots of space is available |
|
3285 if (buffer_length < 10000) { |
|
3286 DumpStats(&printer, 1); |
|
3287 } else { |
|
3288 DumpStats(&printer, 2); |
|
3289 } |
|
3290 } |
|
3291 |
|
3292 virtual void** ReadStackTraces() { |
|
3293 return DumpStackTraces(); |
|
3294 } |
|
3295 |
|
3296 virtual bool GetNumericProperty(const char* name, size_t* value) { |
|
3297 ASSERT(name != NULL); |
|
3298 |
|
3299 if (strcmp(name, "generic.current_allocated_bytes") == 0) { |
|
3300 TCMallocStats stats; |
|
3301 ExtractStats(&stats, NULL); |
|
3302 *value = stats.system_bytes |
|
3303 - stats.thread_bytes |
|
3304 - stats.central_bytes |
|
3305 - stats.pageheap_bytes; |
|
3306 return true; |
|
3307 } |
|
3308 |
|
3309 if (strcmp(name, "generic.heap_size") == 0) { |
|
3310 TCMallocStats stats; |
|
3311 ExtractStats(&stats, NULL); |
|
3312 *value = stats.system_bytes; |
|
3313 return true; |
|
3314 } |
|
3315 |
|
3316 if (strcmp(name, "tcmalloc.slack_bytes") == 0) { |
|
3317 // We assume that bytes in the page heap are not fragmented too |
|
3318 // badly, and are therefore available for allocation. |
|
3319 SpinLockHolder l(&pageheap_lock); |
|
3320 *value = pageheap->FreeBytes(); |
|
3321 return true; |
|
3322 } |
|
3323 |
|
3324 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { |
|
3325 SpinLockHolder l(&pageheap_lock); |
|
3326 *value = overall_thread_cache_size; |
|
3327 return true; |
|
3328 } |
|
3329 |
|
3330 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) { |
|
3331 TCMallocStats stats; |
|
3332 ExtractStats(&stats, NULL); |
|
3333 *value = stats.thread_bytes; |
|
3334 return true; |
|
3335 } |
|
3336 |
|
3337 return false; |
|
3338 } |
|
3339 |
|
3340 virtual bool SetNumericProperty(const char* name, size_t value) { |
|
3341 ASSERT(name != NULL); |
|
3342 |
|
3343 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { |
|
3344 // Clip the value to a reasonable range |
|
3345 if (value < kMinThreadCacheSize) value = kMinThreadCacheSize; |
|
3346 if (value > (1<<30)) value = (1<<30); // Limit to 1GB |
|
3347 |
|
3348 SpinLockHolder l(&pageheap_lock); |
|
3349 overall_thread_cache_size = static_cast<size_t>(value); |
|
3350 TCMalloc_ThreadCache::RecomputeThreadCacheSize(); |
|
3351 return true; |
|
3352 } |
|
3353 |
|
3354 return false; |
|
3355 } |
|
3356 |
|
3357 virtual void MarkThreadIdle() { |
|
3358 TCMalloc_ThreadCache::BecomeIdle(); |
|
3359 } |
|
3360 |
|
3361 virtual void ReleaseFreeMemory() { |
|
3362 SpinLockHolder h(&pageheap_lock); |
|
3363 pageheap->ReleaseFreePages(); |
|
3364 } |
|
3365 }; |
|
3366 #endif |
|
3367 |
|
3368 // The constructor allocates an object to ensure that initialization |
|
3369 // runs before main(), and therefore we do not have a chance to become |
|
3370 // multi-threaded before initialization. We also create the TSD key |
|
3371 // here. Presumably by the time this constructor runs, glibc is in |
|
3372 // good enough shape to handle pthread_key_create(). |
|
3373 // |
|
3374 // The constructor also takes the opportunity to tell STL to use |
|
3375 // tcmalloc. We want to do this early, before construct time, so |
|
3376 // all user STL allocations go through tcmalloc (which works really |
|
3377 // well for STL). |
|
3378 // |
|
3379 // The destructor prints stats when the program exits. |
|
3380 class TCMallocGuard { |
|
3381 public: |
|
3382 |
|
3383 TCMallocGuard() { |
|
3384 #ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS |
|
3385 // Check whether the kernel also supports TLS (needs to happen at runtime) |
|
3386 CheckIfKernelSupportsTLS(); |
|
3387 #endif |
|
3388 #ifndef WTF_CHANGES |
|
3389 #ifdef WIN32 // patch the windows VirtualAlloc, etc. |
|
3390 PatchWindowsFunctions(); // defined in windows/patch_functions.cc |
|
3391 #endif |
|
3392 #endif |
|
3393 free(malloc(1)); |
|
3394 TCMalloc_ThreadCache::InitTSD(); |
|
3395 free(malloc(1)); |
|
3396 #ifndef WTF_CHANGES |
|
3397 MallocExtension::Register(new TCMallocImplementation); |
|
3398 #endif |
|
3399 } |
|
3400 |
|
3401 #ifndef WTF_CHANGES |
|
3402 ~TCMallocGuard() { |
|
3403 const char* env = getenv("MALLOCSTATS"); |
|
3404 if (env != NULL) { |
|
3405 int level = atoi(env); |
|
3406 if (level < 1) level = 1; |
|
3407 PrintStats(level); |
|
3408 } |
|
3409 #ifdef WIN32 |
|
3410 UnpatchWindowsFunctions(); |
|
3411 #endif |
|
3412 } |
|
3413 #endif |
|
3414 }; |
|
3415 |
|
3416 #ifndef WTF_CHANGES |
|
3417 static TCMallocGuard module_enter_exit_hook; |
|
3418 #endif |
|
3419 |
|
3420 |
|
3421 //------------------------------------------------------------------- |
|
3422 // Helpers for the exported routines below |
|
3423 //------------------------------------------------------------------- |
|
3424 |
|
3425 #ifndef WTF_CHANGES |
|
3426 |
|
3427 static Span* DoSampledAllocation(size_t size) { |
|
3428 |
|
3429 // Grab the stack trace outside the heap lock |
|
3430 StackTrace tmp; |
|
3431 tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1); |
|
3432 tmp.size = size; |
|
3433 |
|
3434 SpinLockHolder h(&pageheap_lock); |
|
3435 // Allocate span |
|
3436 Span *span = pageheap->New(pages(size == 0 ? 1 : size)); |
|
3437 if (span == NULL) { |
|
3438 return NULL; |
|
3439 } |
|
3440 |
|
3441 // Allocate stack trace |
|
3442 StackTrace *stack = stacktrace_allocator.New(); |
|
3443 if (stack == NULL) { |
|
3444 // Sampling failed because of lack of memory |
|
3445 return span; |
|
3446 } |
|
3447 |
|
3448 *stack = tmp; |
|
3449 span->sample = 1; |
|
3450 span->objects = stack; |
|
3451 DLL_Prepend(&sampled_objects, span); |
|
3452 |
|
3453 return span; |
|
3454 } |
|
3455 #endif |
|
3456 |
|
3457 static inline bool CheckCachedSizeClass(void *ptr) { |
|
3458 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; |
|
3459 size_t cached_value = pageheap->GetSizeClassIfCached(p); |
|
3460 return cached_value == 0 || |
|
3461 cached_value == pageheap->GetDescriptor(p)->sizeclass; |
|
3462 } |
|
3463 |
|
3464 static inline void* CheckedMallocResult(void *result) |
|
3465 { |
|
3466 ASSERT(result == 0 || CheckCachedSizeClass(result)); |
|
3467 return result; |
|
3468 } |
|
3469 |
|
3470 static inline void* SpanToMallocResult(Span *span) { |
|
3471 ASSERT_SPAN_COMMITTED(span); |
|
3472 pageheap->CacheSizeClass(span->start, 0); |
|
3473 return |
|
3474 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); |
|
3475 } |
|
3476 |
|
3477 #ifdef WTF_CHANGES |
|
3478 template <bool crashOnFailure> |
|
3479 #endif |
|
3480 static ALWAYS_INLINE void* do_malloc(size_t size) { |
|
3481 void* ret = NULL; |
|
3482 |
|
3483 #ifdef WTF_CHANGES |
|
3484 ASSERT(!isForbidden()); |
|
3485 #endif |
|
3486 |
|
3487 // The following call forces module initialization |
|
3488 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); |
|
3489 #ifndef WTF_CHANGES |
|
3490 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { |
|
3491 Span* span = DoSampledAllocation(size); |
|
3492 if (span != NULL) { |
|
3493 ret = SpanToMallocResult(span); |
|
3494 } |
|
3495 } else |
|
3496 #endif |
|
3497 if (size > kMaxSize) { |
|
3498 // Use page-level allocator |
|
3499 SpinLockHolder h(&pageheap_lock); |
|
3500 Span* span = pageheap->New(pages(size)); |
|
3501 if (span != NULL) { |
|
3502 ret = SpanToMallocResult(span); |
|
3503 } |
|
3504 } else { |
|
3505 // The common case, and also the simplest. This just pops the |
|
3506 // size-appropriate freelist, afer replenishing it if it's empty. |
|
3507 ret = CheckedMallocResult(heap->Allocate(size)); |
|
3508 } |
|
3509 if (!ret) { |
|
3510 #ifdef WTF_CHANGES |
|
3511 if (crashOnFailure) // This branch should be optimized out by the compiler. |
|
3512 CRASH(); |
|
3513 #else |
|
3514 errno = ENOMEM; |
|
3515 #endif |
|
3516 } |
|
3517 return ret; |
|
3518 } |
|
3519 |
|
3520 static ALWAYS_INLINE void do_free(void* ptr) { |
|
3521 if (ptr == NULL) return; |
|
3522 ASSERT(pageheap != NULL); // Should not call free() before malloc() |
|
3523 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; |
|
3524 Span* span = NULL; |
|
3525 size_t cl = pageheap->GetSizeClassIfCached(p); |
|
3526 |
|
3527 if (cl == 0) { |
|
3528 span = pageheap->GetDescriptor(p); |
|
3529 cl = span->sizeclass; |
|
3530 pageheap->CacheSizeClass(p, cl); |
|
3531 } |
|
3532 if (cl != 0) { |
|
3533 #ifndef NO_TCMALLOC_SAMPLES |
|
3534 ASSERT(!pageheap->GetDescriptor(p)->sample); |
|
3535 #endif |
|
3536 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent(); |
|
3537 if (heap != NULL) { |
|
3538 heap->Deallocate(ptr, cl); |
|
3539 } else { |
|
3540 // Delete directly into central cache |
|
3541 SLL_SetNext(ptr, NULL); |
|
3542 central_cache[cl].InsertRange(ptr, ptr, 1); |
|
3543 } |
|
3544 } else { |
|
3545 SpinLockHolder h(&pageheap_lock); |
|
3546 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); |
|
3547 ASSERT(span != NULL && span->start == p); |
|
3548 #ifndef NO_TCMALLOC_SAMPLES |
|
3549 if (span->sample) { |
|
3550 DLL_Remove(span); |
|
3551 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects)); |
|
3552 span->objects = NULL; |
|
3553 } |
|
3554 #endif |
|
3555 pageheap->Delete(span); |
|
3556 } |
|
3557 } |
|
3558 |
|
3559 #ifndef WTF_CHANGES |
|
3560 // For use by exported routines below that want specific alignments |
|
3561 // |
|
3562 // Note: this code can be slow, and can significantly fragment memory. |
|
3563 // The expectation is that memalign/posix_memalign/valloc/pvalloc will |
|
3564 // not be invoked very often. This requirement simplifies our |
|
3565 // implementation and allows us to tune for expected allocation |
|
3566 // patterns. |
|
3567 static void* do_memalign(size_t align, size_t size) { |
|
3568 ASSERT((align & (align - 1)) == 0); |
|
3569 ASSERT(align > 0); |
|
3570 if (pageheap == NULL) TCMalloc_ThreadCache::InitModule(); |
|
3571 |
|
3572 // Allocate at least one byte to avoid boundary conditions below |
|
3573 if (size == 0) size = 1; |
|
3574 |
|
3575 if (size <= kMaxSize && align < kPageSize) { |
|
3576 // Search through acceptable size classes looking for one with |
|
3577 // enough alignment. This depends on the fact that |
|
3578 // InitSizeClasses() currently produces several size classes that |
|
3579 // are aligned at powers of two. We will waste time and space if |
|
3580 // we miss in the size class array, but that is deemed acceptable |
|
3581 // since memalign() should be used rarely. |
|
3582 size_t cl = SizeClass(size); |
|
3583 while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) { |
|
3584 cl++; |
|
3585 } |
|
3586 if (cl < kNumClasses) { |
|
3587 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); |
|
3588 return CheckedMallocResult(heap->Allocate(class_to_size[cl])); |
|
3589 } |
|
3590 } |
|
3591 |
|
3592 // We will allocate directly from the page heap |
|
3593 SpinLockHolder h(&pageheap_lock); |
|
3594 |
|
3595 if (align <= kPageSize) { |
|
3596 // Any page-level allocation will be fine |
|
3597 // TODO: We could put the rest of this page in the appropriate |
|
3598 // TODO: cache but it does not seem worth it. |
|
3599 Span* span = pageheap->New(pages(size)); |
|
3600 return span == NULL ? NULL : SpanToMallocResult(span); |
|
3601 } |
|
3602 |
|
3603 // Allocate extra pages and carve off an aligned portion |
|
3604 const Length alloc = pages(size + align); |
|
3605 Span* span = pageheap->New(alloc); |
|
3606 if (span == NULL) return NULL; |
|
3607 |
|
3608 // Skip starting portion so that we end up aligned |
|
3609 Length skip = 0; |
|
3610 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { |
|
3611 skip++; |
|
3612 } |
|
3613 ASSERT(skip < alloc); |
|
3614 if (skip > 0) { |
|
3615 Span* rest = pageheap->Split(span, skip); |
|
3616 pageheap->Delete(span); |
|
3617 span = rest; |
|
3618 } |
|
3619 |
|
3620 // Skip trailing portion that we do not need to return |
|
3621 const Length needed = pages(size); |
|
3622 ASSERT(span->length >= needed); |
|
3623 if (span->length > needed) { |
|
3624 Span* trailer = pageheap->Split(span, needed); |
|
3625 pageheap->Delete(trailer); |
|
3626 } |
|
3627 return SpanToMallocResult(span); |
|
3628 } |
|
3629 #endif |
|
3630 |
|
3631 // Helpers for use by exported routines below: |
|
3632 |
|
3633 #ifndef WTF_CHANGES |
|
3634 static inline void do_malloc_stats() { |
|
3635 PrintStats(1); |
|
3636 } |
|
3637 #endif |
|
3638 |
|
3639 static inline int do_mallopt(int, int) { |
|
3640 return 1; // Indicates error |
|
3641 } |
|
3642 |
|
3643 #ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance |
|
3644 static inline struct mallinfo do_mallinfo() { |
|
3645 TCMallocStats stats; |
|
3646 ExtractStats(&stats, NULL); |
|
3647 |
|
3648 // Just some of the fields are filled in. |
|
3649 struct mallinfo info; |
|
3650 memset(&info, 0, sizeof(info)); |
|
3651 |
|
3652 // Unfortunately, the struct contains "int" field, so some of the |
|
3653 // size values will be truncated. |
|
3654 info.arena = static_cast<int>(stats.system_bytes); |
|
3655 info.fsmblks = static_cast<int>(stats.thread_bytes |
|
3656 + stats.central_bytes |
|
3657 + stats.transfer_bytes); |
|
3658 info.fordblks = static_cast<int>(stats.pageheap_bytes); |
|
3659 info.uordblks = static_cast<int>(stats.system_bytes |
|
3660 - stats.thread_bytes |
|
3661 - stats.central_bytes |
|
3662 - stats.transfer_bytes |
|
3663 - stats.pageheap_bytes); |
|
3664 |
|
3665 return info; |
|
3666 } |
|
3667 #endif |
|
3668 |
|
3669 //------------------------------------------------------------------- |
|
3670 // Exported routines |
|
3671 //------------------------------------------------------------------- |
|
3672 |
|
3673 // CAVEAT: The code structure below ensures that MallocHook methods are always |
|
3674 // called from the stack frame of the invoked allocation function. |
|
3675 // heap-checker.cc depends on this to start a stack trace from |
|
3676 // the call to the (de)allocation function. |
|
3677 |
|
3678 #ifndef WTF_CHANGES |
|
3679 extern "C" |
|
3680 #else |
|
3681 #define do_malloc do_malloc<crashOnFailure> |
|
3682 |
|
3683 template <bool crashOnFailure> |
|
3684 void* malloc(size_t); |
|
3685 |
|
3686 void* fastMalloc(size_t size) |
|
3687 { |
|
3688 return malloc<true>(size); |
|
3689 } |
|
3690 |
|
3691 TryMallocReturnValue tryFastMalloc(size_t size) |
|
3692 { |
|
3693 return malloc<false>(size); |
|
3694 } |
|
3695 |
|
3696 template <bool crashOnFailure> |
|
3697 ALWAYS_INLINE |
|
3698 #endif |
|
3699 void* malloc(size_t size) { |
|
3700 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
3701 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur... |
|
3702 return 0; |
|
3703 size += sizeof(AllocAlignmentInteger); |
|
3704 void* result = do_malloc(size); |
|
3705 if (!result) |
|
3706 return 0; |
|
3707 |
|
3708 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
|
3709 result = static_cast<AllocAlignmentInteger*>(result) + 1; |
|
3710 #else |
|
3711 void* result = do_malloc(size); |
|
3712 #endif |
|
3713 |
|
3714 #ifndef WTF_CHANGES |
|
3715 MallocHook::InvokeNewHook(result, size); |
|
3716 #endif |
|
3717 return result; |
|
3718 } |
|
3719 |
|
3720 #ifndef WTF_CHANGES |
|
3721 extern "C" |
|
3722 #endif |
|
3723 void free(void* ptr) { |
|
3724 #ifndef WTF_CHANGES |
|
3725 MallocHook::InvokeDeleteHook(ptr); |
|
3726 #endif |
|
3727 |
|
3728 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
3729 if (!ptr) |
|
3730 return; |
|
3731 |
|
3732 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr); |
|
3733 if (*header != Internal::AllocTypeMalloc) |
|
3734 Internal::fastMallocMatchFailed(ptr); |
|
3735 do_free(header); |
|
3736 #else |
|
3737 do_free(ptr); |
|
3738 #endif |
|
3739 } |
|
3740 |
|
3741 #ifndef WTF_CHANGES |
|
3742 extern "C" |
|
3743 #else |
|
3744 template <bool crashOnFailure> |
|
3745 void* calloc(size_t, size_t); |
|
3746 |
|
3747 void* fastCalloc(size_t n, size_t elem_size) |
|
3748 { |
|
3749 return calloc<true>(n, elem_size); |
|
3750 } |
|
3751 |
|
3752 TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size) |
|
3753 { |
|
3754 return calloc<false>(n, elem_size); |
|
3755 } |
|
3756 |
|
3757 template <bool crashOnFailure> |
|
3758 ALWAYS_INLINE |
|
3759 #endif |
|
3760 void* calloc(size_t n, size_t elem_size) { |
|
3761 size_t totalBytes = n * elem_size; |
|
3762 |
|
3763 // Protect against overflow |
|
3764 if (n > 1 && elem_size && (totalBytes / elem_size) != n) |
|
3765 return 0; |
|
3766 |
|
3767 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
3768 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur... |
|
3769 return 0; |
|
3770 |
|
3771 totalBytes += sizeof(AllocAlignmentInteger); |
|
3772 void* result = do_malloc(totalBytes); |
|
3773 if (!result) |
|
3774 return 0; |
|
3775 |
|
3776 memset(result, 0, totalBytes); |
|
3777 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
|
3778 result = static_cast<AllocAlignmentInteger*>(result) + 1; |
|
3779 #else |
|
3780 void* result = do_malloc(totalBytes); |
|
3781 if (result != NULL) { |
|
3782 memset(result, 0, totalBytes); |
|
3783 } |
|
3784 #endif |
|
3785 |
|
3786 #ifndef WTF_CHANGES |
|
3787 MallocHook::InvokeNewHook(result, totalBytes); |
|
3788 #endif |
|
3789 return result; |
|
3790 } |
|
3791 |
|
3792 // Since cfree isn't used anywhere, we don't compile it in. |
|
3793 #ifndef WTF_CHANGES |
|
3794 #ifndef WTF_CHANGES |
|
3795 extern "C" |
|
3796 #endif |
|
3797 void cfree(void* ptr) { |
|
3798 #ifndef WTF_CHANGES |
|
3799 MallocHook::InvokeDeleteHook(ptr); |
|
3800 #endif |
|
3801 do_free(ptr); |
|
3802 } |
|
3803 #endif |
|
3804 |
|
3805 #ifndef WTF_CHANGES |
|
3806 extern "C" |
|
3807 #else |
|
3808 template <bool crashOnFailure> |
|
3809 void* realloc(void*, size_t); |
|
3810 |
|
3811 void* fastRealloc(void* old_ptr, size_t new_size) |
|
3812 { |
|
3813 return realloc<true>(old_ptr, new_size); |
|
3814 } |
|
3815 |
|
3816 TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size) |
|
3817 { |
|
3818 return realloc<false>(old_ptr, new_size); |
|
3819 } |
|
3820 |
|
3821 template <bool crashOnFailure> |
|
3822 ALWAYS_INLINE |
|
3823 #endif |
|
3824 void* realloc(void* old_ptr, size_t new_size) { |
|
3825 if (old_ptr == NULL) { |
|
3826 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
3827 void* result = malloc(new_size); |
|
3828 #else |
|
3829 void* result = do_malloc(new_size); |
|
3830 #ifndef WTF_CHANGES |
|
3831 MallocHook::InvokeNewHook(result, new_size); |
|
3832 #endif |
|
3833 #endif |
|
3834 return result; |
|
3835 } |
|
3836 if (new_size == 0) { |
|
3837 #ifndef WTF_CHANGES |
|
3838 MallocHook::InvokeDeleteHook(old_ptr); |
|
3839 #endif |
|
3840 free(old_ptr); |
|
3841 return NULL; |
|
3842 } |
|
3843 |
|
3844 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
3845 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur... |
|
3846 return 0; |
|
3847 new_size += sizeof(AllocAlignmentInteger); |
|
3848 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr); |
|
3849 if (*header != Internal::AllocTypeMalloc) |
|
3850 Internal::fastMallocMatchFailed(old_ptr); |
|
3851 old_ptr = header; |
|
3852 #endif |
|
3853 |
|
3854 // Get the size of the old entry |
|
3855 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift; |
|
3856 size_t cl = pageheap->GetSizeClassIfCached(p); |
|
3857 Span *span = NULL; |
|
3858 size_t old_size; |
|
3859 if (cl == 0) { |
|
3860 span = pageheap->GetDescriptor(p); |
|
3861 cl = span->sizeclass; |
|
3862 pageheap->CacheSizeClass(p, cl); |
|
3863 } |
|
3864 if (cl != 0) { |
|
3865 old_size = ByteSizeForClass(cl); |
|
3866 } else { |
|
3867 ASSERT(span != NULL); |
|
3868 old_size = span->length << kPageShift; |
|
3869 } |
|
3870 |
|
3871 // Reallocate if the new size is larger than the old size, |
|
3872 // or if the new size is significantly smaller than the old size. |
|
3873 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) { |
|
3874 // Need to reallocate |
|
3875 void* new_ptr = do_malloc(new_size); |
|
3876 if (new_ptr == NULL) { |
|
3877 return NULL; |
|
3878 } |
|
3879 #ifndef WTF_CHANGES |
|
3880 MallocHook::InvokeNewHook(new_ptr, new_size); |
|
3881 #endif |
|
3882 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); |
|
3883 #ifndef WTF_CHANGES |
|
3884 MallocHook::InvokeDeleteHook(old_ptr); |
|
3885 #endif |
|
3886 // We could use a variant of do_free() that leverages the fact |
|
3887 // that we already know the sizeclass of old_ptr. The benefit |
|
3888 // would be small, so don't bother. |
|
3889 do_free(old_ptr); |
|
3890 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
3891 new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1; |
|
3892 #endif |
|
3893 return new_ptr; |
|
3894 } else { |
|
3895 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
|
3896 old_ptr = static_cast<AllocAlignmentInteger*>(old_ptr) + 1; // Set old_ptr back to the user pointer. |
|
3897 #endif |
|
3898 return old_ptr; |
|
3899 } |
|
3900 } |
|
3901 |
|
3902 #ifdef WTF_CHANGES |
|
3903 #undef do_malloc |
|
3904 #else |
|
3905 |
|
3906 static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER; |
|
3907 |
|
3908 static inline void* cpp_alloc(size_t size, bool nothrow) { |
|
3909 for (;;) { |
|
3910 void* p = do_malloc(size); |
|
3911 #ifdef PREANSINEW |
|
3912 return p; |
|
3913 #else |
|
3914 if (p == NULL) { // allocation failed |
|
3915 // Get the current new handler. NB: this function is not |
|
3916 // thread-safe. We make a feeble stab at making it so here, but |
|
3917 // this lock only protects against tcmalloc interfering with |
|
3918 // itself, not with other libraries calling set_new_handler. |
|
3919 std::new_handler nh; |
|
3920 { |
|
3921 SpinLockHolder h(&set_new_handler_lock); |
|
3922 nh = std::set_new_handler(0); |
|
3923 (void) std::set_new_handler(nh); |
|
3924 } |
|
3925 // If no new_handler is established, the allocation failed. |
|
3926 if (!nh) { |
|
3927 if (nothrow) return 0; |
|
3928 throw std::bad_alloc(); |
|
3929 } |
|
3930 // Otherwise, try the new_handler. If it returns, retry the |
|
3931 // allocation. If it throws std::bad_alloc, fail the allocation. |
|
3932 // if it throws something else, don't interfere. |
|
3933 try { |
|
3934 (*nh)(); |
|
3935 } catch (const std::bad_alloc&) { |
|
3936 if (!nothrow) throw; |
|
3937 return p; |
|
3938 } |
|
3939 } else { // allocation success |
|
3940 return p; |
|
3941 } |
|
3942 #endif |
|
3943 } |
|
3944 } |
|
3945 |
|
3946 #if ENABLE(GLOBAL_FASTMALLOC_NEW) |
|
3947 |
|
3948 void* operator new(size_t size) { |
|
3949 void* p = cpp_alloc(size, false); |
|
3950 // We keep this next instruction out of cpp_alloc for a reason: when |
|
3951 // it's in, and new just calls cpp_alloc, the optimizer may fold the |
|
3952 // new call into cpp_alloc, which messes up our whole section-based |
|
3953 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc |
|
3954 // isn't the last thing this fn calls, and prevents the folding. |
|
3955 MallocHook::InvokeNewHook(p, size); |
|
3956 return p; |
|
3957 } |
|
3958 |
|
3959 void* operator new(size_t size, const std::nothrow_t&) __THROW { |
|
3960 void* p = cpp_alloc(size, true); |
|
3961 MallocHook::InvokeNewHook(p, size); |
|
3962 return p; |
|
3963 } |
|
3964 |
|
3965 void operator delete(void* p) __THROW { |
|
3966 MallocHook::InvokeDeleteHook(p); |
|
3967 do_free(p); |
|
3968 } |
|
3969 |
|
3970 void operator delete(void* p, const std::nothrow_t&) __THROW { |
|
3971 MallocHook::InvokeDeleteHook(p); |
|
3972 do_free(p); |
|
3973 } |
|
3974 |
|
3975 void* operator new[](size_t size) { |
|
3976 void* p = cpp_alloc(size, false); |
|
3977 // We keep this next instruction out of cpp_alloc for a reason: when |
|
3978 // it's in, and new just calls cpp_alloc, the optimizer may fold the |
|
3979 // new call into cpp_alloc, which messes up our whole section-based |
|
3980 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc |
|
3981 // isn't the last thing this fn calls, and prevents the folding. |
|
3982 MallocHook::InvokeNewHook(p, size); |
|
3983 return p; |
|
3984 } |
|
3985 |
|
3986 void* operator new[](size_t size, const std::nothrow_t&) __THROW { |
|
3987 void* p = cpp_alloc(size, true); |
|
3988 MallocHook::InvokeNewHook(p, size); |
|
3989 return p; |
|
3990 } |
|
3991 |
|
3992 void operator delete[](void* p) __THROW { |
|
3993 MallocHook::InvokeDeleteHook(p); |
|
3994 do_free(p); |
|
3995 } |
|
3996 |
|
3997 void operator delete[](void* p, const std::nothrow_t&) __THROW { |
|
3998 MallocHook::InvokeDeleteHook(p); |
|
3999 do_free(p); |
|
4000 } |
|
4001 |
|
4002 #endif |
|
4003 |
|
4004 extern "C" void* memalign(size_t align, size_t size) __THROW { |
|
4005 void* result = do_memalign(align, size); |
|
4006 MallocHook::InvokeNewHook(result, size); |
|
4007 return result; |
|
4008 } |
|
4009 |
|
4010 extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size) |
|
4011 __THROW { |
|
4012 if (((align % sizeof(void*)) != 0) || |
|
4013 ((align & (align - 1)) != 0) || |
|
4014 (align == 0)) { |
|
4015 return EINVAL; |
|
4016 } |
|
4017 |
|
4018 void* result = do_memalign(align, size); |
|
4019 MallocHook::InvokeNewHook(result, size); |
|
4020 if (result == NULL) { |
|
4021 return ENOMEM; |
|
4022 } else { |
|
4023 *result_ptr = result; |
|
4024 return 0; |
|
4025 } |
|
4026 } |
|
4027 |
|
4028 static size_t pagesize = 0; |
|
4029 |
|
4030 extern "C" void* valloc(size_t size) __THROW { |
|
4031 // Allocate page-aligned object of length >= size bytes |
|
4032 if (pagesize == 0) pagesize = getpagesize(); |
|
4033 void* result = do_memalign(pagesize, size); |
|
4034 MallocHook::InvokeNewHook(result, size); |
|
4035 return result; |
|
4036 } |
|
4037 |
|
4038 extern "C" void* pvalloc(size_t size) __THROW { |
|
4039 // Round up size to a multiple of pagesize |
|
4040 if (pagesize == 0) pagesize = getpagesize(); |
|
4041 size = (size + pagesize - 1) & ~(pagesize - 1); |
|
4042 void* result = do_memalign(pagesize, size); |
|
4043 MallocHook::InvokeNewHook(result, size); |
|
4044 return result; |
|
4045 } |
|
4046 |
|
4047 extern "C" void malloc_stats(void) { |
|
4048 do_malloc_stats(); |
|
4049 } |
|
4050 |
|
4051 extern "C" int mallopt(int cmd, int value) { |
|
4052 return do_mallopt(cmd, value); |
|
4053 } |
|
4054 |
|
4055 #ifdef HAVE_STRUCT_MALLINFO |
|
4056 extern "C" struct mallinfo mallinfo(void) { |
|
4057 return do_mallinfo(); |
|
4058 } |
|
4059 #endif |
|
4060 |
|
4061 //------------------------------------------------------------------- |
|
4062 // Some library routines on RedHat 9 allocate memory using malloc() |
|
4063 // and free it using __libc_free() (or vice-versa). Since we provide |
|
4064 // our own implementations of malloc/free, we need to make sure that |
|
4065 // the __libc_XXX variants (defined as part of glibc) also point to |
|
4066 // the same implementations. |
|
4067 //------------------------------------------------------------------- |
|
4068 |
|
4069 #if defined(__GLIBC__) |
|
4070 extern "C" { |
|
4071 #if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__) |
|
4072 // Potentially faster variants that use the gcc alias extension. |
|
4073 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check. |
|
4074 # define ALIAS(x) __attribute__ ((weak, alias (x))) |
|
4075 void* __libc_malloc(size_t size) ALIAS("malloc"); |
|
4076 void __libc_free(void* ptr) ALIAS("free"); |
|
4077 void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc"); |
|
4078 void* __libc_calloc(size_t n, size_t size) ALIAS("calloc"); |
|
4079 void __libc_cfree(void* ptr) ALIAS("cfree"); |
|
4080 void* __libc_memalign(size_t align, size_t s) ALIAS("memalign"); |
|
4081 void* __libc_valloc(size_t size) ALIAS("valloc"); |
|
4082 void* __libc_pvalloc(size_t size) ALIAS("pvalloc"); |
|
4083 int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign"); |
|
4084 # undef ALIAS |
|
4085 # else /* not __GNUC__ */ |
|
4086 // Portable wrappers |
|
4087 void* __libc_malloc(size_t size) { return malloc(size); } |
|
4088 void __libc_free(void* ptr) { free(ptr); } |
|
4089 void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); } |
|
4090 void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); } |
|
4091 void __libc_cfree(void* ptr) { cfree(ptr); } |
|
4092 void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); } |
|
4093 void* __libc_valloc(size_t size) { return valloc(size); } |
|
4094 void* __libc_pvalloc(size_t size) { return pvalloc(size); } |
|
4095 int __posix_memalign(void** r, size_t a, size_t s) { |
|
4096 return posix_memalign(r, a, s); |
|
4097 } |
|
4098 # endif /* __GNUC__ */ |
|
4099 } |
|
4100 #endif /* __GLIBC__ */ |
|
4101 |
|
4102 // Override __libc_memalign in libc on linux boxes specially. |
|
4103 // They have a bug in libc that causes them to (very rarely) allocate |
|
4104 // with __libc_memalign() yet deallocate with free() and the |
|
4105 // definitions above don't catch it. |
|
4106 // This function is an exception to the rule of calling MallocHook method |
|
4107 // from the stack frame of the allocation function; |
|
4108 // heap-checker handles this special case explicitly. |
|
4109 static void *MemalignOverride(size_t align, size_t size, const void *caller) |
|
4110 __THROW { |
|
4111 void* result = do_memalign(align, size); |
|
4112 MallocHook::InvokeNewHook(result, size); |
|
4113 return result; |
|
4114 } |
|
4115 void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; |
|
4116 |
|
4117 #endif |
|
4118 |
|
4119 #ifdef WTF_CHANGES |
|
4120 void releaseFastMallocFreeMemory() |
|
4121 { |
|
4122 // Flush free pages in the current thread cache back to the page heap. |
|
4123 // Low watermark mechanism in Scavenge() prevents full return on the first pass. |
|
4124 // The second pass flushes everything. |
|
4125 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) { |
|
4126 threadCache->Scavenge(); |
|
4127 threadCache->Scavenge(); |
|
4128 } |
|
4129 |
|
4130 SpinLockHolder h(&pageheap_lock); |
|
4131 pageheap->ReleaseFreePages(); |
|
4132 } |
|
4133 |
|
4134 FastMallocStatistics fastMallocStatistics() |
|
4135 { |
|
4136 FastMallocStatistics statistics; |
|
4137 |
|
4138 SpinLockHolder lockHolder(&pageheap_lock); |
|
4139 statistics.reservedVMBytes = static_cast<size_t>(pageheap->SystemBytes()); |
|
4140 statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes(); |
|
4141 |
|
4142 statistics.freeListBytes = 0; |
|
4143 for (unsigned cl = 0; cl < kNumClasses; ++cl) { |
|
4144 const int length = central_cache[cl].length(); |
|
4145 const int tc_length = central_cache[cl].tc_length(); |
|
4146 |
|
4147 statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length); |
|
4148 } |
|
4149 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) |
|
4150 statistics.freeListBytes += threadCache->Size(); |
|
4151 |
|
4152 return statistics; |
|
4153 } |
|
4154 |
|
4155 size_t fastMallocSize(const void* ptr) |
|
4156 { |
|
4157 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; |
|
4158 Span* span = pageheap->GetDescriptorEnsureSafe(p); |
|
4159 |
|
4160 if (!span || span->free) |
|
4161 return 0; |
|
4162 |
|
4163 for (void* free = span->objects; free != NULL; free = *((void**) free)) { |
|
4164 if (ptr == free) |
|
4165 return 0; |
|
4166 } |
|
4167 |
|
4168 if (size_t cl = span->sizeclass) |
|
4169 return ByteSizeForClass(cl); |
|
4170 |
|
4171 return span->length << kPageShift; |
|
4172 } |
|
4173 |
|
4174 #if OS(DARWIN) |
|
4175 |
|
4176 class FreeObjectFinder { |
|
4177 const RemoteMemoryReader& m_reader; |
|
4178 HashSet<void*> m_freeObjects; |
|
4179 |
|
4180 public: |
|
4181 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { } |
|
4182 |
|
4183 void visit(void* ptr) { m_freeObjects.add(ptr); } |
|
4184 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); } |
|
4185 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); } |
|
4186 size_t freeObjectCount() const { return m_freeObjects.size(); } |
|
4187 |
|
4188 void findFreeObjects(TCMalloc_ThreadCache* threadCache) |
|
4189 { |
|
4190 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0)) |
|
4191 threadCache->enumerateFreeObjects(*this, m_reader); |
|
4192 } |
|
4193 |
|
4194 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList) |
|
4195 { |
|
4196 for (unsigned i = 0; i < numSizes; i++) |
|
4197 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i); |
|
4198 } |
|
4199 }; |
|
4200 |
|
4201 class PageMapFreeObjectFinder { |
|
4202 const RemoteMemoryReader& m_reader; |
|
4203 FreeObjectFinder& m_freeObjectFinder; |
|
4204 |
|
4205 public: |
|
4206 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder) |
|
4207 : m_reader(reader) |
|
4208 , m_freeObjectFinder(freeObjectFinder) |
|
4209 { } |
|
4210 |
|
4211 int visit(void* ptr) const |
|
4212 { |
|
4213 if (!ptr) |
|
4214 return 1; |
|
4215 |
|
4216 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); |
|
4217 if (span->free) { |
|
4218 void* ptr = reinterpret_cast<void*>(span->start << kPageShift); |
|
4219 m_freeObjectFinder.visit(ptr); |
|
4220 } else if (span->sizeclass) { |
|
4221 // Walk the free list of the small-object span, keeping track of each object seen |
|
4222 for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject))) |
|
4223 m_freeObjectFinder.visit(nextObject); |
|
4224 } |
|
4225 return span->length; |
|
4226 } |
|
4227 }; |
|
4228 |
|
4229 class PageMapMemoryUsageRecorder { |
|
4230 task_t m_task; |
|
4231 void* m_context; |
|
4232 unsigned m_typeMask; |
|
4233 vm_range_recorder_t* m_recorder; |
|
4234 const RemoteMemoryReader& m_reader; |
|
4235 const FreeObjectFinder& m_freeObjectFinder; |
|
4236 |
|
4237 HashSet<void*> m_seenPointers; |
|
4238 Vector<Span*> m_coalescedSpans; |
|
4239 |
|
4240 public: |
|
4241 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder) |
|
4242 : m_task(task) |
|
4243 , m_context(context) |
|
4244 , m_typeMask(typeMask) |
|
4245 , m_recorder(recorder) |
|
4246 , m_reader(reader) |
|
4247 , m_freeObjectFinder(freeObjectFinder) |
|
4248 { } |
|
4249 |
|
4250 ~PageMapMemoryUsageRecorder() |
|
4251 { |
|
4252 ASSERT(!m_coalescedSpans.size()); |
|
4253 } |
|
4254 |
|
4255 void recordPendingRegions() |
|
4256 { |
|
4257 Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; |
|
4258 vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 }; |
|
4259 ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize); |
|
4260 |
|
4261 // Mark the memory region the spans represent as a candidate for containing pointers |
|
4262 if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE) |
|
4263 (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1); |
|
4264 |
|
4265 if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) { |
|
4266 m_coalescedSpans.clear(); |
|
4267 return; |
|
4268 } |
|
4269 |
|
4270 Vector<vm_range_t, 1024> allocatedPointers; |
|
4271 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) { |
|
4272 Span *theSpan = m_coalescedSpans[i]; |
|
4273 if (theSpan->free) |
|
4274 continue; |
|
4275 |
|
4276 vm_address_t spanStartAddress = theSpan->start << kPageShift; |
|
4277 vm_size_t spanSizeInBytes = theSpan->length * kPageSize; |
|
4278 |
|
4279 if (!theSpan->sizeclass) { |
|
4280 // If it's an allocated large object span, mark it as in use |
|
4281 if (!m_freeObjectFinder.isFreeObject(spanStartAddress)) |
|
4282 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes}); |
|
4283 } else { |
|
4284 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass); |
|
4285 |
|
4286 // Mark each allocated small object within the span as in use |
|
4287 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes; |
|
4288 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) { |
|
4289 if (!m_freeObjectFinder.isFreeObject(object)) |
|
4290 allocatedPointers.append((vm_range_t){object, objectSize}); |
|
4291 } |
|
4292 } |
|
4293 } |
|
4294 |
|
4295 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size()); |
|
4296 |
|
4297 m_coalescedSpans.clear(); |
|
4298 } |
|
4299 |
|
4300 int visit(void* ptr) |
|
4301 { |
|
4302 if (!ptr) |
|
4303 return 1; |
|
4304 |
|
4305 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); |
|
4306 if (!span->start) |
|
4307 return 1; |
|
4308 |
|
4309 if (m_seenPointers.contains(ptr)) |
|
4310 return span->length; |
|
4311 m_seenPointers.add(ptr); |
|
4312 |
|
4313 if (!m_coalescedSpans.size()) { |
|
4314 m_coalescedSpans.append(span); |
|
4315 return span->length; |
|
4316 } |
|
4317 |
|
4318 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; |
|
4319 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift; |
|
4320 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize; |
|
4321 |
|
4322 // If the new span is adjacent to the previous span, do nothing for now. |
|
4323 vm_address_t spanStartAddress = span->start << kPageShift; |
|
4324 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) { |
|
4325 m_coalescedSpans.append(span); |
|
4326 return span->length; |
|
4327 } |
|
4328 |
|
4329 // New span is not adjacent to previous span, so record the spans coalesced so far. |
|
4330 recordPendingRegions(); |
|
4331 m_coalescedSpans.append(span); |
|
4332 |
|
4333 return span->length; |
|
4334 } |
|
4335 }; |
|
4336 |
|
4337 class AdminRegionRecorder { |
|
4338 task_t m_task; |
|
4339 void* m_context; |
|
4340 unsigned m_typeMask; |
|
4341 vm_range_recorder_t* m_recorder; |
|
4342 const RemoteMemoryReader& m_reader; |
|
4343 |
|
4344 Vector<vm_range_t, 1024> m_pendingRegions; |
|
4345 |
|
4346 public: |
|
4347 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader) |
|
4348 : m_task(task) |
|
4349 , m_context(context) |
|
4350 , m_typeMask(typeMask) |
|
4351 , m_recorder(recorder) |
|
4352 , m_reader(reader) |
|
4353 { } |
|
4354 |
|
4355 void recordRegion(vm_address_t ptr, size_t size) |
|
4356 { |
|
4357 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE) |
|
4358 m_pendingRegions.append((vm_range_t){ ptr, size }); |
|
4359 } |
|
4360 |
|
4361 void visit(void *ptr, size_t size) |
|
4362 { |
|
4363 recordRegion(reinterpret_cast<vm_address_t>(ptr), size); |
|
4364 } |
|
4365 |
|
4366 void recordPendingRegions() |
|
4367 { |
|
4368 if (m_pendingRegions.size()) { |
|
4369 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size()); |
|
4370 m_pendingRegions.clear(); |
|
4371 } |
|
4372 } |
|
4373 |
|
4374 ~AdminRegionRecorder() |
|
4375 { |
|
4376 ASSERT(!m_pendingRegions.size()); |
|
4377 } |
|
4378 }; |
|
4379 |
|
4380 kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder) |
|
4381 { |
|
4382 RemoteMemoryReader memoryReader(task, reader); |
|
4383 |
|
4384 InitSizeClasses(); |
|
4385 |
|
4386 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress)); |
|
4387 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap); |
|
4388 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps); |
|
4389 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer); |
|
4390 |
|
4391 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses); |
|
4392 |
|
4393 FreeObjectFinder finder(memoryReader); |
|
4394 finder.findFreeObjects(threadHeaps); |
|
4395 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches); |
|
4396 |
|
4397 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_; |
|
4398 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder); |
|
4399 pageMap->visitValues(pageMapFinder, memoryReader); |
|
4400 |
|
4401 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder); |
|
4402 pageMap->visitValues(usageRecorder, memoryReader); |
|
4403 usageRecorder.recordPendingRegions(); |
|
4404 |
|
4405 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader); |
|
4406 pageMap->visitAllocations(adminRegionRecorder, memoryReader); |
|
4407 |
|
4408 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator); |
|
4409 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator); |
|
4410 |
|
4411 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); |
|
4412 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); |
|
4413 |
|
4414 adminRegionRecorder.recordPendingRegions(); |
|
4415 |
|
4416 return 0; |
|
4417 } |
|
4418 |
|
4419 size_t FastMallocZone::size(malloc_zone_t*, const void*) |
|
4420 { |
|
4421 return 0; |
|
4422 } |
|
4423 |
|
4424 void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t) |
|
4425 { |
|
4426 return 0; |
|
4427 } |
|
4428 |
|
4429 void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t) |
|
4430 { |
|
4431 return 0; |
|
4432 } |
|
4433 |
|
4434 void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr) |
|
4435 { |
|
4436 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer |
|
4437 // is not in this zone. When this happens, the pointer being freed was not allocated by any |
|
4438 // zone so we need to print a useful error for the application developer. |
|
4439 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr); |
|
4440 } |
|
4441 |
|
4442 void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t) |
|
4443 { |
|
4444 return 0; |
|
4445 } |
|
4446 |
|
4447 |
|
4448 #undef malloc |
|
4449 #undef free |
|
4450 #undef realloc |
|
4451 #undef calloc |
|
4452 |
|
4453 extern "C" { |
|
4454 malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print, |
|
4455 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics |
|
4456 |
|
4457 #if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !OS(IPHONE_OS) |
|
4458 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher. |
|
4459 #endif |
|
4460 #if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD) && !OS(IPHONE_OS) |
|
4461 , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher. |
|
4462 #endif |
|
4463 |
|
4464 }; |
|
4465 } |
|
4466 |
|
4467 FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator) |
|
4468 : m_pageHeap(pageHeap) |
|
4469 , m_threadHeaps(threadHeaps) |
|
4470 , m_centralCaches(centralCaches) |
|
4471 , m_spanAllocator(spanAllocator) |
|
4472 , m_pageHeapAllocator(pageHeapAllocator) |
|
4473 { |
|
4474 memset(&m_zone, 0, sizeof(m_zone)); |
|
4475 m_zone.version = 4; |
|
4476 m_zone.zone_name = "JavaScriptCore FastMalloc"; |
|
4477 m_zone.size = &FastMallocZone::size; |
|
4478 m_zone.malloc = &FastMallocZone::zoneMalloc; |
|
4479 m_zone.calloc = &FastMallocZone::zoneCalloc; |
|
4480 m_zone.realloc = &FastMallocZone::zoneRealloc; |
|
4481 m_zone.free = &FastMallocZone::zoneFree; |
|
4482 m_zone.valloc = &FastMallocZone::zoneValloc; |
|
4483 m_zone.destroy = &FastMallocZone::zoneDestroy; |
|
4484 m_zone.introspect = &jscore_fastmalloc_introspection; |
|
4485 malloc_zone_register(&m_zone); |
|
4486 } |
|
4487 |
|
4488 |
|
4489 void FastMallocZone::init() |
|
4490 { |
|
4491 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator); |
|
4492 } |
|
4493 |
|
4494 #endif // OS(DARWIN) |
|
4495 |
|
4496 } // namespace WTF |
|
4497 #endif // WTF_CHANGES |
|
4498 |
|
4499 #endif // FORCE_SYSTEM_MALLOC |