- Published on
lua 的垃圾清理
- Authors

- Name
- Ushen
每一个lua对象都有一个公共头部CommonHeader
它包含了一个垃圾回收对象链表next, 标记tt和marked
/*
** Common Header for all collectable objects (in macro form, to be
** included in other objects)
*/
#define CommonHeader struct GCObject *next; lu_byte tt; lu_byte marked
/* Common type for all collectable objects */
typedef struct GCObject {
CommonHeader;
} GCObject;
全局状态global_state里有一个属性allgc,存储全局的可收集对象列表
每创建一个可收集对象就往链表插入一个对象GCObject,采用头插法
/*
** create a new collectable object (with given type and size) and link
** it to 'allgc' list.
*/
GCObject *luaC_newobj (lua_State *L, int tt, size_t sz) {
global_State *g = G(L);
GCObject *o = cast(GCObject *, luaM_newobject(L, novariant(tt), sz));
o->marked = luaC_white(g);
o->tt = tt;
o->next = g->allgc;
g->allgc = o;
return o;
}
垃圾收集状态有9种,由global_state里的gcstate标记,初始为GCSpause
/*
** Possible states of the Garbage Collector
*/
#define GCSpropagate 0
#define GCSenteratomic 1
#define GCSatomic 2
#define GCSswpallgc 3
#define GCSswpfinobj 4
#define GCSswptobefnz 5
#define GCSswpend 6
#define GCScallfin 7
#define GCSpause 8
在lgc里面通过checkGC来检查是否需要垃圾清理,
这里的gcrunning默认是0,没有打开,打开需要等待调用lua_gc函数
在lbaselib.c里面的luaB_collectgarbage函数,
可以从应用层调用这个接口开始GC,也可以等待虚拟机自动运行。
/*
** Does one step of collection when debt becomes positive. 'pre'/'pos'
** allows some adjustments to be done only when needed. macro
** 'condchangemem' is used only for heavy tests (forcing a full
** GC cycle on every opportunity)
*/
#define luaC_condGC(L,pre,pos) \
{ if (G(L)->GCdebt > 0) { pre; luaC_step(L); pos;}; \
condchangemem(L,pre,pos); }
/* more often than not, 'pre'/'pos' are empty */
#define luaC_checkGC(L) luaC_condGC(L,(void)0,(void)0)
/*
** performs a basic GC step if collector is running
*/
void luaC_step (lua_State *L) {
global_State *g = G(L);
lua_assert(!g->gcemergency);
if (g->gcrunning) { /* running? */
if(isdecGCmodegen(g))
genstep(L, g);
else
incstep(L, g);
}
}
lua使用的是追踪类GC,有两种选择,分别是
/* kinds of Garbage Collection */
#define KGC_INC 0 /* incremental gc */
#define KGC_GEN 1 /* generational gc */
默认使用的是incremental gc,增量式垃圾回收,使用三色标记算法。好处是可以减少STW的时间。
另外一个选择是generational gc,分代垃圾回收,将对象按年龄分代,好处是减少GC影响范围
先看默认的 方法,会走到incstep
/*
** Performs a basic incremental step. The debt and step size are
** converted from bytes to "units of work"; then the function loops
** running single steps until adding that many units of work or
** finishing a cycle (pause state). Finally, it sets the debt that
** controls when next step will be performed.
*/
static void incstep (lua_State *L, global_State *g) {
int stepmul = (getgcparam(g->gcstepmul) | 1); /* avoid division by 0 */
l_mem debt = (g->GCdebt / WORK2MEM) * stepmul;
l_mem stepsize = (g->gcstepsize <= log2maxs(l_mem))
? ((cast(l_mem, 1) << g->gcstepsize) / WORK2MEM) * stepmul
: MAX_LMEM; /* overflow; keep maximum value */
do { /* repeat until pause or enough "credit" (negative debt) */
lu_mem work = singlestep(L); /* perform one single step */
debt -= work;
} while (debt > -stepsize && g->gcstate != GCSpause);
if (g->gcstate == GCSpause)
setpause(g); /* pause until next cycle */
else {
debt = (debt / stepmul) * WORK2MEM; /* convert 'work units' to bytes */
luaE_setdebt(g, debt);
}
}
在这里里面,singlesetp最多会执行一轮,重新回到GCSpause的时候就会结束
static lu_mem singlestep (lua_State *L) {
global_State *g = G(L);
lu_mem work;
lua_assert(!g->gcstopem); /* collector is not reentrant */
g->gcstopem = 1; /* no emergency collections while collecting */
switch (g->gcstate) {
case GCSpause: {
restartcollection(g);
g->gcstate = GCSpropagate;
work = 1;
break;
}
case GCSpropagate: {
if (g->gray == NULL) { /* no more gray objects? */
g->gcstate = GCSenteratomic; /* finish propagate phase */
work = 0;
}
else
work = propagatemark(g); /* traverse one gray object */
break;
}
case GCSenteratomic: {
work = atomic(L); /* work is what was traversed by 'atomic' */
entersweep(L);
g->GCestimate = gettotalbytes(g); /* first estimate */;
break;
}
case GCSswpallgc: { /* sweep "regular" objects */
work = sweepstep(L, g, GCSswpfinobj, &g->finobj);
break;
}
case GCSswpfinobj: { /* sweep objects with finalizers */
work = sweepstep(L, g, GCSswptobefnz, &g->tobefnz);
break;
}
case GCSswptobefnz: { /* sweep objects to be finalized */
work = sweepstep(L, g, GCSswpend, NULL);
break;
}
case GCSswpend: { /* finish sweeps */
checkSizes(L, g);
g->gcstate = GCScallfin;
work = 0;
break;
}
case GCScallfin: { /* call remaining finalizers */
if (g->tobefnz && !g->gcemergency) {
g->gcstopem = 0; /* ok collections during finalizers */
work = runafewfinalizers(L, GCFINMAX) * GCFINALIZECOST;
}
else { /* emergency mode or no more finalizers */
g->gcstate = GCSpause; /* finish collection */
work = 0;
}
break;
}
default: lua_assert(0); return 0;
}
g->gcstopem = 0;
return work;
}
状态默认为GCSpause,会按顺序一步步执行。
首先是GCSpause时,执行restartcollection,并将状态设置为GCSpropagate
初始化某些值并开始做标记,例如主线程mainthread和注册表。
static void cleargraylists (global_State *g) {
g->gray = g->grayagain = NULL;
g->weak = g->allweak = g->ephemeron = NULL;
}
/*
** mark root set and reset all gray lists, to start a new collection
*/
static void restartcollection (global_State *g) {
cleargraylists(g);
markobject(g, g->mainthread);
markvalue(g, &g->l_registry);
markmt(g);
markbeingfnz(g); /* mark any finalizing object left from previous cycle */
}
主要的函数在于reallymarkobject
这个函数只有白色的点会被执行,灰色和黑色的会被跳过
str对象会直接被标记为黑色,
upval对象,闭包打开时标记为灰色,否则标记为黑色。再继续标记upvalu的值
userdata,用户值为0时标记为黑色,否则是白色
其他的结构都标记为灰色,丢进grap列表。
三个颜色的含义
- 白色:没有被搜索的对象
- 灰色:搜索过但是没搜索完的对象
- 黑色:已经搜索完的对象
初始对象都为白色,如果到最后还是白色说明没有被引用到,则会被当成垃圾清理
/*
** Mark an object. Userdata with no user values, strings, and closed
** upvalues are visited and turned black here. Open upvalues are
** already indirectly linked through their respective threads in the
** 'twups' list, so they don't go to the gray list; nevertheless, they
** are kept gray to avoid barriers, as their values will be revisited
** by the thread or by 'remarkupvals'. Other objects are added to the
** gray list to be visited (and turned black) later. Both userdata and
** upvalues can call this function recursively, but this recursion goes
** for at most two levels: An upvalue cannot refer to another upvalue
** (only closures can), and a userdata's metatable must be a table.
*/
static void reallymarkobject (global_State *g, GCObject *o) {
switch (o->tt) {
case LUA_VSHRSTR:
case LUA_VLNGSTR: {
set2black(o); /* nothing to visit */
break;
}
case LUA_VUPVAL: {
UpVal *uv = gco2upv(o);
if (upisopen(uv))
set2gray(uv); /* open upvalues are kept gray */
else
set2black(uv); /* closed upvalues are visited here */
markvalue(g, uv->v); /* mark its content */
break;
}
case LUA_VUSERDATA: {
Udata *u = gco2u(o);
if (u->nuvalue == 0) { /* no user values? */
markobjectN(g, u->metatable); /* mark its metatable */
set2black(u); /* nothing else to mark */
break;
}
/* else... */
} /* FALLTHROUGH */
case LUA_VLCL: case LUA_VCCL: case LUA_VTABLE:
case LUA_VTHREAD: case LUA_VPROTO: {
linkobjgclist(o, g->gray); /* to be visited later */
break;
}
default: lua_assert(0); break;
}
}
第二步是GCSpropagate
这一步会从grap链表里面继续搜索,直到为空, 或者限制的work用完
灰色节点里面的元素也会根据上面的规则去标记颜色,
对于table类型的搜索,涉及到弱引用,weakKey和weakValue。
由lua的原表metatable实现,在原表设置的__mode属性中,k表示weakKey,v表示weakValue。
弱引用情况下,如果键或者值在别的地方没有被引用,也会被当成垃圾处理。
/*
** traverse one gray object, turning it to black.
*/
static lu_mem propagatemark (global_State *g) {
GCObject *o = g->gray;
nw2black(o);
g->gray = *getgclist(o); /* remove from 'gray' list */
switch (o->tt) {
case LUA_VTABLE: return traversetable(g, gco2t(o));
case LUA_VUSERDATA: return traverseudata(g, gco2u(o));
case LUA_VLCL: return traverseLclosure(g, gco2lcl(o));
case LUA_VCCL: return traverseCclosure(g, gco2ccl(o));
case LUA_VPROTO: return traverseproto(g, gco2p(o));
case LUA_VTHREAD: return traversethread(g, gco2th(o));
default: lua_assert(0); return 0;
}
}
第三步是GCSenteratomic
由于上一步是陆陆续续搜索的,所以在标记颜色的过程中,会有新的白色节点新增,
这时就无法知道白色是垃圾还是新增的,这里有一步原子操作atomic,这次一次搜索完grayagain的元素。
grayagain的元素,是在新增白色节点时,引用新增节点的元素如果已经被标记成黑色,会根据情况新增元素到grayagain里,表示再次变成灰色。
再次标记涉及两个函数luaC_barrier和luaC_barrierback
当一个黑色节点引用了一个新的白色节点时:
如果这个黑色节点是一个table,则调用luaC_barrierback,将这个黑色节点变成灰色。
其他情况调用luaC_barrier,将这个白色节点变成灰色。
它们的区别是一个将后面的节点变成灰色,一个将前面的节点变成灰色。
#define luaC_barrier(L,p,v) ( \
(iscollectable(v) && isblack(p) && iswhite(gcvalue(v))) ? \
luaC_barrier_(L,obj2gco(p),gcvalue(v)) : cast_void(0))
#define luaC_barrierback(L,p,v) ( \
(iscollectable(v) && isblack(p) && iswhite(gcvalue(v))) ? \
luaC_barrierback_(L,p) : cast_void(0))
/*
** Barrier that moves collector forward, that is, marks the white object
** 'v' being pointed by the black object 'o'. In the generational
** mode, 'v' must also become old, if 'o' is old; however, it cannot
** be changed directly to OLD, because it may still point to non-old
** objects. So, it is marked as OLD0. In the next cycle it will become
** OLD1, and in the next it will finally become OLD (regular old). By
** then, any object it points to will also be old. If called in the
** incremental sweep phase, it clears the black object to white (sweep
** it) to avoid other barrier calls for this same object. (That cannot
** be done is generational mode, as its sweep does not distinguish
** whites from deads.)
*/
void luaC_barrier_ (lua_State *L, GCObject *o, GCObject *v) {
global_State *g = G(L);
lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
if (keepinvariant(g)) { /* must keep invariant? */
reallymarkobject(g, v); /* restore invariant */
if (isold(o)) {
lua_assert(!isold(v)); /* white object could not be old */
setage(v, G_OLD0); /* restore generational invariant */
}
}
else { /* sweep phase */
lua_assert(issweepphase(g));
if (g->gckind == KGC_INC) /* incremental mode? */
makewhite(g, o); /* mark 'o' as white to avoid other barriers */
}
}
/*
** barrier that moves collector backward, that is, mark the black object
** pointing to a white object as gray again.
*/
void luaC_barrierback_ (lua_State *L, GCObject *o) {
global_State *g = G(L);
lua_assert(isblack(o) && !isdead(g, o));
lua_assert((g->gckind == KGC_GEN) == (isold(o) && getage(o) != G_TOUCHED1));
if (getage(o) == G_TOUCHED2) /* already in gray list? */
set2gray(o); /* make it gray to become touched1 */
else /* link it in 'grayagain' and paint it gray */
linkobjgclist(o, g->grayagain);
if (isold(o)) /* generational mode? */
setage(o, G_TOUCHED1); /* touched in current cycle */
}
除了新增白色节点被黑色节点引用的情况,还有另一种情况,当原子操作结束之后,又会陆陆续续加入新的白色节点,这种情况下新增的白色节点也会被当垃圾清理掉。
这里的处理方式是,有两种白色节点WHITE0BIT,WHITE1BIT,白色节点0和白色节点1
#define WHITE0BIT 3 /* object is white (type 0) */
#define WHITE1BIT 4 /* object is white (type 1) */
#define BLACKBIT 5 /* object is black */
#define FINALIZEDBIT 6 /* object has been marked for finalization */
#define TESTBIT 7
#define WHITEBITS bit2mask(WHITE0BIT, WHITE1BIT)
#define iswhite(x) testbits((x)->marked, WHITEBITS)
#define isblack(x) testbit((x)->marked, BLACKBIT)
#define isgray(x) /* neither white nor black */ \
(!testbits((x)->marked, WHITEBITS | bitmask(BLACKBIT)))
#define tofinalize(x) testbit((x)->marked, FINALIZEDBIT)
#define otherwhite(g) ((g)->currentwhite ^ WHITEBITS)
原子操作之后会马上进行一次清理。
GCSswpallgc,GCSswpfinobj,GCSswptobefnz都是在陆陆续续清理,每次stw 会清理GCSWEEPMAX 个
/*
** Maximum number of elements to sweep in each single step.
** (Large enough to dissipate fixed overheads but small enough
** to allow small steps for the collector.)
*/
#define GCSWEEPMAX 100