◐ Shell
clean mode source ↗

WIP: Cache for obmalloc arena_map_is_used() by nascheme · Pull Request #25130 · python/cpython

Expand Up @@ -1397,6 +1397,114 @@ static int arena_map_bot_count; static arena_map_bot_t arena_map_root; #endif
/* arena_map_cache[...] is a directly mapped cache for the result of * address_in_range(pool). The two low order bits correspond to the return * value of address_in_range(), 00 == no entry, 01 == small, 10 == large. For * the cache, small means it was allocated by obmalloc, large is allocated by * other malloc. The high order bits are the pool address. */ #define CACHE_BITS 7 #define CACHE_SIZE (1<<CACHE_BITS) #define CACHE_MASK (CACHE_SIZE - 1)
#define CACHE_VALUE_EMPTY 0 /* pool address contains large object (outside arena) */ #define CACHE_VALUE_LARGE 1 /* pool address contains small object (inside arena) */ #define CACHE_VALUE_SMALL 2 #define CACHE_VALUE_MASK (CACHE_VALUE_LARGE | CACHE_VALUE_SMALL)
static uintptr_t arena_map_cache[CACHE_SIZE];
/* turn off for benchmarks, some overhead */ #define CACHE_STATS
#ifdef CACHE_STATS static unsigned int cache_hits; static unsigned int cache_collisions; static unsigned int cache_lookups; #endif
static inline uintptr_t pool_number(poolp pool) { return AS_UINT(pool) >> POOL_BITS; }
static inline uintptr_t cache_index(poolp pool) { uintptr_t pool_number = AS_UINT(pool) >> POOL_BITS; return pool_number & CACHE_MASK; }
static inline void cache_put(poolp pool, int value) { int idx = cache_index(pool); uintptr_t entry = AS_UINT(pool) | value; assert((entry & ~CACHE_VALUE_MASK) == AS_UINT(pool)); #ifdef CACHE_STATS if (arena_map_cache[idx] != entry) { if (arena_map_cache[idx] != 0) { cache_collisions++; } arena_map_cache[idx] = entry; } #else arena_map_cache[idx] = entry; #endif }
static uintptr_t cache_get(poolp pool) { int idx = cache_index(pool); uintptr_t entry = arena_map_cache[idx]; #ifdef CACHE_STATS cache_lookups++; #endif if ((entry & ~CACHE_VALUE_MASK) != AS_UINT(pool)) { /* entry exists but pool addr doesn't match */ return CACHE_VALUE_EMPTY; } #ifdef CACHE_STATS cache_hits++; #endif return (entry & CACHE_VALUE_MASK); }
static inline void cache_clear(poolp pool) { int idx = cache_index(pool); arena_map_cache[idx] = 0; }
#if 0 /* setup cache for all pools in arena */ static void cache_mark_arena(struct arena_object *arenaobj) { uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenaobj->address, POOL_SIZE); for (uint i = 0; i < arenaobj->ntotalpools; i++) { cache_put((poolp)base, CACHE_VALUE_SMALL); base += POOL_SIZE; } } #endif
/* clear cache for all pools in arena */ static void cache_clear_arena(struct arena_object *arenaobj) { uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenaobj->address, POOL_SIZE); for (uint i = 0; i < arenaobj->ntotalpools; i++) { cache_clear((poolp)base); base += POOL_SIZE; } }
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or * it cannot be created */ static arena_map_bot_t * Expand Down Expand Up @@ -1627,6 +1735,8 @@ new_arena(void) } arenaobj->ntotalpools = arenaobj->nfreepools;
//cache_mark_arena(arenaobj);
return arenaobj; }
Expand All @@ -1639,7 +1749,19 @@ new_arena(void) static bool address_in_range(void *p, poolp pool) { return arena_map_is_used(p); bool in_arena; int cache_value = cache_get(pool); if (cache_value == CACHE_VALUE_EMPTY) { in_arena = arena_map_is_used(p); uintptr_t cache_value = in_arena ? CACHE_VALUE_SMALL : CACHE_VALUE_LARGE; cache_put(pool, cache_value); //assert(cache_get(pool) == cache_value); } else { in_arena = cache_value == CACHE_VALUE_SMALL; } assert(in_arena == arena_map_is_used(p)); return in_arena; } #else /* Expand Down Expand Up @@ -1889,6 +2011,7 @@ allocate_from_new_pool(uint size) return bp; }

/* pymalloc allocator
Return a pointer to newly allocated memory if pymalloc allocated memory. Expand Down Expand Up @@ -1940,7 +2063,7 @@ pymalloc_alloc(void *ctx, size_t nbytes) */ bp = allocate_from_new_pool(size); }
//cache_put(bp, CACHE_VALUE_SMALL); return (void *)bp; }
Expand All @@ -1955,6 +2078,7 @@ _PyObject_Malloc(void *ctx, size_t nbytes)
ptr = PyMem_RawMalloc(nbytes); if (ptr != NULL) { //cache_put(ptr, CACHE_VALUE_LARGE); raw_allocated_blocks++; } return ptr; Expand All @@ -1975,6 +2099,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
ptr = PyMem_RawCalloc(nelem, elsize); if (ptr != NULL) { //cache_put(ptr, CACHE_VALUE_LARGE); raw_allocated_blocks++; } return ptr; Expand Down Expand Up @@ -2082,6 +2207,7 @@ insert_to_freepool(poolp pool) #if WITH_PYMALLOC_RADIX_TREE /* mark arena region as not under control of obmalloc */ arena_map_mark_used(ao->address, 0); cache_clear_arena(ao); #endif
/* Free the entire arena. */ Expand Down Expand Up @@ -2236,6 +2362,7 @@ _PyObject_Free(void *ctx, void *p) if (UNLIKELY(!pymalloc_free(ctx, p))) { /* pymalloc didn't allocate this address */ PyMem_RawFree(p); cache_clear(POOL_ADDR(p)); raw_allocated_blocks--; } } Expand Down Expand Up @@ -3061,6 +3188,11 @@ _PyObject_DebugMallocStats(FILE *out) sizeof(arena_map_bot_t) * arena_map_bot_count); #endif (void)printone(out, "Total", total);
#ifdef CACHE_STATS fprintf(out, "cache hits %d lookups %d collisions %d %.3f\n", cache_hits, cache_lookups, cache_collisions, ((double)cache_hits)/(cache_lookups)); #endif return 1; }
Expand Down