2018-04-14 19:52:17 +07:00
|
|
|
|
# 4.14 glibc tcache 机制
|
|
|
|
|
|
2018-04-15 14:50:03 +07:00
|
|
|
|
- [tcache](#tcache)
|
|
|
|
|
- [安全性分析](#安全性分析)
|
|
|
|
|
- [CTF 实例](#ctf-实例)
|
2018-04-14 19:52:17 +07:00
|
|
|
|
- [参考资料](#参考资料)
|
|
|
|
|
|
|
|
|
|
|
2018-04-15 14:50:03 +07:00
|
|
|
|
## tcache
|
|
|
|
|
tcache 全名 thread local caching,它为每个线程创建一个缓存(cache),从而实现无锁的分配算法,有不错的性能提升。libc-2.26 正式提供了该机制,并默认开启,具体可以查看这次 [commit](https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=d5c3fafc4307c9b7a4c7d5cb381fcdbfad340bcc)。
|
|
|
|
|
|
|
|
|
|
#### 数据结构
|
|
|
|
|
glibc 在编译时使用 `USE_TCACHE` 条件来开启 tcache 机制,并定义了下面一些东西:
|
|
|
|
|
```c
|
|
|
|
|
#if USE_TCACHE
|
|
|
|
|
/* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */
|
|
|
|
|
# define TCACHE_MAX_BINS 64
|
|
|
|
|
# define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)
|
|
|
|
|
|
|
|
|
|
/* Only used to pre-fill the tunables. */
|
|
|
|
|
# define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
|
|
|
|
|
|
|
|
|
|
/* When "x" is from chunksize(). */
|
|
|
|
|
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
|
|
|
|
|
/* When "x" is a user-provided size. */
|
|
|
|
|
# define usize2tidx(x) csize2tidx (request2size (x))
|
|
|
|
|
|
|
|
|
|
/* With rounding and alignment, the bins are...
|
|
|
|
|
idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit)
|
|
|
|
|
idx 1 bytes 25..40 or 13..20
|
|
|
|
|
idx 2 bytes 41..56 or 21..28
|
|
|
|
|
etc. */
|
|
|
|
|
|
|
|
|
|
/* This is another arbitrary limit, which tunables can change. Each
|
|
|
|
|
tcache bin will hold at most this number of chunks. */
|
|
|
|
|
# define TCACHE_FILL_COUNT 7
|
|
|
|
|
#endif
|
|
|
|
|
```
|
|
|
|
|
值得注意的比如每个线程默认使用 64 个单链表结构的 bins,每个 bins 最多存放 7 个 chunk。chunk 的大小在 64 位机器上以 16 字节递增,从 24 到 1032 字节。32 位机器上则是以 8 字节递增,从 12 到 512 字节。所以 tcache bin 只用于存放 non-large 的 chunk。
|
|
|
|
|
|
|
|
|
|
然后引入了两个新的数据结构,`tcache_entry` 和 `tcache_perthread_struct`:
|
|
|
|
|
```c
|
|
|
|
|
/* We overlay this structure on the user-data portion of a chunk when
|
|
|
|
|
the chunk is stored in the per-thread cache. */
|
|
|
|
|
typedef struct tcache_entry
|
|
|
|
|
{
|
|
|
|
|
struct tcache_entry *next;
|
|
|
|
|
} tcache_entry;
|
|
|
|
|
|
|
|
|
|
/* There is one of these for each thread, which contains the
|
|
|
|
|
per-thread cache (hence "tcache_perthread_struct"). Keeping
|
|
|
|
|
overall size low is mildly important. Note that COUNTS and ENTRIES
|
|
|
|
|
are redundant (we could have just counted the linked list each
|
|
|
|
|
time), this is for performance reasons. */
|
|
|
|
|
typedef struct tcache_perthread_struct
|
|
|
|
|
{
|
|
|
|
|
char counts[TCACHE_MAX_BINS];
|
|
|
|
|
tcache_entry *entries[TCACHE_MAX_BINS];
|
|
|
|
|
} tcache_perthread_struct;
|
|
|
|
|
|
|
|
|
|
static __thread tcache_perthread_struct *tcache = NULL;
|
|
|
|
|
```
|
|
|
|
|
tcache_perthread_struct 包含一个数组 entries,用于放置 64 个 bins,数组 counts 存放每个 bins 中的 chunk 数量。每个被放入相应 bins 中的 chunk 都会在其用户数据中包含一个 tcache_entry(FD指针),指向同 bins 中的下一个 chunk,构成单链表。
|
|
|
|
|
|
|
|
|
|
tcache 初始化操作如下:
|
|
|
|
|
```c
|
|
|
|
|
static void
|
|
|
|
|
tcache_init(void)
|
|
|
|
|
{
|
|
|
|
|
mstate ar_ptr;
|
|
|
|
|
void *victim = 0;
|
|
|
|
|
const size_t bytes = sizeof (tcache_perthread_struct);
|
|
|
|
|
|
|
|
|
|
if (tcache_shutting_down)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
arena_get (ar_ptr, bytes);
|
|
|
|
|
victim = _int_malloc (ar_ptr, bytes);
|
|
|
|
|
if (!victim && ar_ptr != NULL)
|
|
|
|
|
{
|
|
|
|
|
ar_ptr = arena_get_retry (ar_ptr, bytes);
|
|
|
|
|
victim = _int_malloc (ar_ptr, bytes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (ar_ptr != NULL)
|
|
|
|
|
__libc_lock_unlock (ar_ptr->mutex);
|
|
|
|
|
|
|
|
|
|
/* In a low memory situation, we may not be able to allocate memory
|
|
|
|
|
- in which case, we just keep trying later. However, we
|
|
|
|
|
typically do this very early, so either there is sufficient
|
|
|
|
|
memory, or there isn't enough memory to do non-trivial
|
|
|
|
|
allocations anyway. */
|
|
|
|
|
if (victim)
|
|
|
|
|
{
|
|
|
|
|
tcache = (tcache_perthread_struct *) victim;
|
|
|
|
|
memset (tcache, 0, sizeof (tcache_perthread_struct));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
#### 使用
|
|
|
|
|
触发在 tcache 中放入 chunk 的操作:
|
|
|
|
|
- free 时:在 fastbin 的操作之前进行,如果 chunk size 符合要求,并且对应的 bins 还未装满,则将其放进去。
|
|
|
|
|
```c
|
|
|
|
|
#if USE_TCACHE
|
|
|
|
|
{
|
|
|
|
|
size_t tc_idx = csize2tidx (size);
|
|
|
|
|
|
|
|
|
|
if (tcache
|
|
|
|
|
&& tc_idx < mp_.tcache_bins
|
|
|
|
|
&& tcache->counts[tc_idx] < mp_.tcache_count)
|
|
|
|
|
{
|
|
|
|
|
tcache_put (p, tc_idx);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
```
|
|
|
|
|
- malloc 时:有三个地方会触发。
|
|
|
|
|
- 如果从 fastbin 中成功返回了一个需要的 chunk,那么对应 fastbin 中的其他 chunk 会被放进相应的 tcache bin 中,直到上限。需要注意的是 chunks 在 tcache bin 的顺序和在 fastbin 中的顺序是反过来的。
|
|
|
|
|
|
|
|
|
|
```c
|
|
|
|
|
#if USE_TCACHE
|
|
|
|
|
/* While we're here, if we see other chunks of the same size,
|
|
|
|
|
stash them in the tcache. */
|
|
|
|
|
size_t tc_idx = csize2tidx (nb);
|
|
|
|
|
if (tcache && tc_idx < mp_.tcache_bins)
|
|
|
|
|
{
|
|
|
|
|
mchunkptr tc_victim;
|
|
|
|
|
|
|
|
|
|
/* While bin not empty and tcache not full, copy chunks. */
|
|
|
|
|
while (tcache->counts[tc_idx] < mp_.tcache_count
|
|
|
|
|
&& (tc_victim = *fb) != NULL)
|
|
|
|
|
{
|
|
|
|
|
if (SINGLE_THREAD_P)
|
|
|
|
|
*fb = tc_victim->fd;
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
REMOVE_FB (fb, pp, tc_victim);
|
|
|
|
|
if (__glibc_unlikely (tc_victim == NULL))
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
tcache_put (tc_victim, tc_idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
```
|
|
|
|
|
- smallbin 中的情况与 fastbin 相似,双链表中的剩余 chunk 会被填充到 tcache bin 中,直到上限。
|
|
|
|
|
```c
|
|
|
|
|
#if USE_TCACHE
|
|
|
|
|
/* While we're here, if we see other chunks of the same size,
|
|
|
|
|
stash them in the tcache. */
|
|
|
|
|
size_t tc_idx = csize2tidx (nb);
|
|
|
|
|
if (tcache && tc_idx < mp_.tcache_bins)
|
|
|
|
|
{
|
|
|
|
|
mchunkptr tc_victim;
|
|
|
|
|
|
|
|
|
|
/* While bin not empty and tcache not full, copy chunks over. */
|
|
|
|
|
while (tcache->counts[tc_idx] < mp_.tcache_count
|
|
|
|
|
&& (tc_victim = last (bin)) != bin)
|
|
|
|
|
{
|
|
|
|
|
if (tc_victim != 0)
|
|
|
|
|
{
|
|
|
|
|
bck = tc_victim->bk;
|
|
|
|
|
set_inuse_bit_at_offset (tc_victim, nb);
|
|
|
|
|
if (av != &main_arena)
|
|
|
|
|
set_non_main_arena (tc_victim);
|
|
|
|
|
bin->bk = bck;
|
|
|
|
|
bck->fd = bin;
|
|
|
|
|
|
|
|
|
|
tcache_put (tc_victim, tc_idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
```
|
|
|
|
|
- binning code(chunk合并等其他情况)中,每一个符合要求的 chunk 都会优先被放入 tcache,而不是直接返回(除非tcache被装满)。寻找结束后,tcache 会返回其中一个。
|
|
|
|
|
```c
|
|
|
|
|
#if USE_TCACHE
|
|
|
|
|
/* Fill cache first, return to user only if cache fills.
|
|
|
|
|
We may return one of these chunks later. */
|
|
|
|
|
if (tcache_nb
|
|
|
|
|
&& tcache->counts[tc_idx] < mp_.tcache_count)
|
|
|
|
|
{
|
|
|
|
|
tcache_put (victim, tc_idx);
|
|
|
|
|
return_cached = 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
#endif
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
触发从 tcache 中取出 chunk 的操作:
|
|
|
|
|
- 在 `__libc_malloc()` 调用 `_int_malloc()` 之前,如果 tcache bin 中有符合要求的 chunk,则直接将它返回。
|
|
|
|
|
```c
|
|
|
|
|
#if USE_TCACHE
|
|
|
|
|
/* int_free also calls request2size, be careful to not pad twice. */
|
|
|
|
|
size_t tbytes;
|
|
|
|
|
checked_request2size (bytes, tbytes);
|
|
|
|
|
size_t tc_idx = csize2tidx (tbytes);
|
|
|
|
|
|
|
|
|
|
MAYBE_INIT_TCACHE ();
|
|
|
|
|
|
|
|
|
|
DIAG_PUSH_NEEDS_COMMENT;
|
|
|
|
|
if (tc_idx < mp_.tcache_bins
|
|
|
|
|
/*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
|
|
|
|
|
&& tcache
|
|
|
|
|
&& tcache->entries[tc_idx] != NULL)
|
|
|
|
|
{
|
|
|
|
|
return tcache_get (tc_idx);
|
|
|
|
|
}
|
|
|
|
|
DIAG_POP_NEEDS_COMMENT;
|
|
|
|
|
#endif
|
|
|
|
|
```
|
|
|
|
|
- bining code 中,如果在 tcache 中放入 chunk 达到上限,则会直接返回最后一个 chunk。
|
|
|
|
|
```c
|
|
|
|
|
#if USE_TCACHE
|
|
|
|
|
/* If we've processed as many chunks as we're allowed while
|
|
|
|
|
filling the cache, return one of the cached ones. */
|
|
|
|
|
++tcache_unsorted_count;
|
|
|
|
|
if (return_cached
|
|
|
|
|
&& mp_.tcache_unsorted_limit > 0
|
|
|
|
|
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
|
|
|
|
|
{
|
|
|
|
|
return tcache_get (tc_idx);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
```
|
|
|
|
|
当然默认情况下没有限制,所以这段代码也不会执行:
|
|
|
|
|
```c
|
|
|
|
|
.tcache_unsorted_limit = 0 /* No limit. */
|
|
|
|
|
```
|
|
|
|
|
- binning code 结束后,如果没有直接返回(如上),那么如果有至少一个符合要求的 chunk 被找到,则返回最后一个。
|
|
|
|
|
```c
|
|
|
|
|
#if USE_TCACHE
|
|
|
|
|
/* If all the small chunks we found ended up cached, return one now. */
|
|
|
|
|
if (return_cached)
|
|
|
|
|
{
|
|
|
|
|
return tcache_get (tc_idx);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
另外还需要注意的是 tcache 中的 chunk 不会被合并,无论是相邻 chunk,还是 chunk 和 top chunk。因为这些 chunk 会被标记为 inuse。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## 安全性分析
|
|
|
|
|
`tcache_put()` 和 `tcache_get()` 分别用于从单链表中放入和取出 chunk:
|
|
|
|
|
```c
|
|
|
|
|
/* Caller must ensure that we know tc_idx is valid and there's room
|
|
|
|
|
for more chunks. */
|
|
|
|
|
static __always_inline void
|
|
|
|
|
tcache_put (mchunkptr chunk, size_t tc_idx)
|
|
|
|
|
{
|
|
|
|
|
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
|
|
|
|
|
assert (tc_idx < TCACHE_MAX_BINS);
|
|
|
|
|
e->next = tcache->entries[tc_idx];
|
|
|
|
|
tcache->entries[tc_idx] = e;
|
|
|
|
|
++(tcache->counts[tc_idx]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Caller must ensure that we know tc_idx is valid and there's
|
|
|
|
|
available chunks to remove. */
|
|
|
|
|
static __always_inline void *
|
|
|
|
|
tcache_get (size_t tc_idx)
|
|
|
|
|
{
|
|
|
|
|
tcache_entry *e = tcache->entries[tc_idx];
|
|
|
|
|
assert (tc_idx < TCACHE_MAX_BINS);
|
|
|
|
|
assert (tcache->entries[tc_idx] > 0);
|
|
|
|
|
tcache->entries[tc_idx] = e->next;
|
|
|
|
|
--(tcache->counts[tc_idx]);
|
|
|
|
|
return (void *) e;
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
可以看到注释部分,它假设调用者已经对参数进行了有效性检查,然而由于对 tcache 的操作在 free 和 malloc 中往往都处于很靠前的位置,导致原来的许多有效性检查都被无视了。这样做虽然有利于提升执行效率,但对安全性造成了负面影响。
|
|
|
|
|
|
|
|
|
|
#### tcache_house_of_spirit
|
|
|
|
|
|
|
|
|
|
#### tcache_overlapping_chunks
|
|
|
|
|
|
|
|
|
|
#### tcache_poisoning
|
|
|
|
|
|
|
|
|
|
#### tcache_fastbin_dup
|
|
|
|
|
|
|
|
|
|
这一节的代码可以在[这里](../src/Others/4.14_glibc_tcache)找到。其他的一些情况可以参考章节 3.3.6。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## CTF 实例
|
|
|
|
|
在最近的 CTF 中,已经开始尝试使用 libc-2.26,比如章节 6.1.15 中的例子。
|
|
|
|
|
|
|
|
|
|
|
2018-04-14 19:52:17 +07:00
|
|
|
|
## 参考资料
|
|
|
|
|
- [thread local caching in glibc malloc](http://tukan.farm/2017/07/08/tcache/)
|
2018-04-15 14:50:03 +07:00
|
|
|
|
- [MallocInternals](https://sourceware.org/glibc/wiki/MallocInternals)
|