Published on

lua 的垃圾清理

Authors
  • avatar
    Name
    Ushen
    Twitter

每一个lua对象都有一个公共头部CommonHeader

它包含了一个垃圾回收对象链表next, 标记tt和marked

/*
** Common Header for all collectable objects (in macro form, to be
** included in other objects)
*/
#define CommonHeader  struct GCObject *next; lu_byte tt; lu_byte marked
/* Common type for all collectable objects */
typedef struct GCObject {
 CommonHeader;
} GCObject;

全局状态global_state里有一个属性allgc,存储全局的可收集对象列表

每创建一个可收集对象就往链表插入一个对象GCObject,采用头插法

/*
** create a new collectable object (with given type and size) and link
** it to 'allgc' list.
*/
GCObject *luaC_newobj (lua_State *L, int tt, size_t sz) {
 global_State *g = G(L);
 GCObject *o = cast(GCObject *, luaM_newobject(L, novariant(tt), sz));
 o->marked = luaC_white(g);
 o->tt = tt;
 o->next = g->allgc;
 g->allgc = o;
 return o;
}

垃圾收集状态有9种,由global_state里的gcstate标记,初始为GCSpause

/*
** Possible states of the Garbage Collector
*/
#define GCSpropagate 0
#define GCSenteratomic 1
#define GCSatomic 2
#define GCSswpallgc 3
#define GCSswpfinobj 4
#define GCSswptobefnz 5
#define GCSswpend 6
#define GCScallfin 7
#define GCSpause 8

在lgc里面通过checkGC来检查是否需要垃圾清理,

这里的gcrunning默认是0,没有打开,打开需要等待调用lua_gc函数

在lbaselib.c里面的luaB_collectgarbage函数,

可以从应用层调用这个接口开始GC,也可以等待虚拟机自动运行。

/*
** Does one step of collection when debt becomes positive. 'pre'/'pos'
** allows some adjustments to be done only when needed. macro
** 'condchangemem' is used only for heavy tests (forcing a full
** GC cycle on every opportunity)
*/
#define luaC_condGC(L,pre,pos) \
    { if (G(L)->GCdebt > 0) { pre; luaC_step(L); pos;}; \
 condchangemem(L,pre,pos); }

/* more often than not, 'pre'/'pos' are empty */
#define luaC_checkGC(L)     luaC_condGC(L,(void)0,(void)0)

/*
** performs a basic GC step if collector is running
*/
void luaC_step (lua_State *L) {
 global_State *g = G(L);
 lua_assert(!g->gcemergency);
 if (g->gcrunning) {  /* running? */
 if(isdecGCmodegen(g))
 genstep(L, g);
 else
 incstep(L, g);
  }
}

lua使用的是追踪类GC,有两种选择,分别是

/* kinds of Garbage Collection */
#define KGC_INC 0 /* incremental gc */
#define KGC_GEN 1 /* generational gc */

默认使用的是incremental gc,增量式垃圾回收,使用三色标记算法。好处是可以减少STW的时间。

另外一个选择是generational gc,分代垃圾回收,将对象按年龄分代,好处是减少GC影响范围

先看默认的 方法,会走到incstep

/*
** Performs a basic incremental step. The debt and step size are
** converted from bytes to "units of work"; then the function loops
** running single steps until adding that many units of work or
** finishing a cycle (pause state). Finally, it sets the debt that
** controls when next step will be performed.
*/
static void incstep (lua_State *L, global_State *g) {
 int stepmul = (getgcparam(g->gcstepmul) | 1);  /* avoid division by 0 */
 l_mem debt = (g->GCdebt / WORK2MEM) * stepmul;
 l_mem stepsize = (g->gcstepsize <= log2maxs(l_mem))
                 ? ((cast(l_mem, 1) << g->gcstepsize) / WORK2MEM) * stepmul
                 : MAX_LMEM;  /* overflow; keep maximum value */
 do {  /* repeat until pause or enough "credit" (negative debt) */
 lu_mem work = singlestep(L);  /* perform one single step */
 debt -= work;
  } while (debt > -stepsize && g->gcstate != GCSpause);
 if (g->gcstate == GCSpause)
 setpause(g);  /* pause until next cycle */
 else {
 debt = (debt / stepmul) * WORK2MEM;  /* convert 'work units' to bytes */
 luaE_setdebt(g, debt);
  }
}

在这里里面,singlesetp最多会执行一轮,重新回到GCSpause的时候就会结束

static lu_mem singlestep (lua_State *L) {
 global_State *g = G(L);
 lu_mem work;
 lua_assert(!g->gcstopem);  /* collector is not reentrant */
 g->gcstopem = 1;  /* no emergency collections while collecting */
 switch (g->gcstate) {
 case GCSpause: {
 restartcollection(g);
 g->gcstate = GCSpropagate;
 work = 1;
 break;
    }
 case GCSpropagate: {
 if (g->gray == NULL) {  /* no more gray objects? */
 g->gcstate = GCSenteratomic;  /* finish propagate phase */
 work = 0;
      }
 else
 work = propagatemark(g);  /* traverse one gray object */
 break;
    }
 case GCSenteratomic: {
 work = atomic(L);  /* work is what was traversed by 'atomic' */
 entersweep(L);
 g->GCestimate = gettotalbytes(g);  /* first estimate */;
 break;
    }
 case GCSswpallgc: {  /* sweep "regular" objects */
 work = sweepstep(L, g, GCSswpfinobj, &g->finobj);
 break;
    }
 case GCSswpfinobj: {  /* sweep objects with finalizers */
 work = sweepstep(L, g, GCSswptobefnz, &g->tobefnz);
 break;
    }
 case GCSswptobefnz: {  /* sweep objects to be finalized */
 work = sweepstep(L, g, GCSswpend, NULL);
 break;
    }
 case GCSswpend: {  /* finish sweeps */
 checkSizes(L, g);
 g->gcstate = GCScallfin;
 work = 0;
 break;
    }
 case GCScallfin: {  /* call remaining finalizers */
 if (g->tobefnz && !g->gcemergency) {
 g->gcstopem = 0;  /* ok collections during finalizers */
 work = runafewfinalizers(L, GCFINMAX) * GCFINALIZECOST;
      }
 else {  /* emergency mode or no more finalizers */
 g->gcstate = GCSpause;  /* finish collection */
 work = 0;
      }
 break;
    }
 default: lua_assert(0); return 0;
  }
 g->gcstopem = 0;
 return work;
}

状态默认为GCSpause,会按顺序一步步执行。

首先是GCSpause时,执行restartcollection,并将状态设置为GCSpropagate

初始化某些值并开始做标记,例如主线程mainthread和注册表。

static void cleargraylists (global_State *g) {
 g->gray = g->grayagain = NULL;
 g->weak = g->allweak = g->ephemeron = NULL;
}

/*
** mark root set and reset all gray lists, to start a new collection
*/
static void restartcollection (global_State *g) {
 cleargraylists(g);
 markobject(g, g->mainthread);
 markvalue(g, &g->l_registry);
 markmt(g);
 markbeingfnz(g);  /* mark any finalizing object left from previous cycle */
}

主要的函数在于reallymarkobject

这个函数只有白色的点会被执行,灰色和黑色的会被跳过

str对象会直接被标记为黑色,

upval对象,闭包打开时标记为灰色,否则标记为黑色。再继续标记upvalu的值

userdata,用户值为0时标记为黑色,否则是白色

其他的结构都标记为灰色,丢进grap列表。

三个颜色的含义

  • 白色:没有被搜索的对象
  • 灰色:搜索过但是没搜索完的对象
  • 黑色:已经搜索完的对象

初始对象都为白色,如果到最后还是白色说明没有被引用到,则会被当成垃圾清理

/*
** Mark an object.  Userdata with no user values, strings, and closed
** upvalues are visited and turned black here.  Open upvalues are
** already indirectly linked through their respective threads in the
** 'twups' list, so they don't go to the gray list; nevertheless, they
** are kept gray to avoid barriers, as their values will be revisited
** by the thread or by 'remarkupvals'.  Other objects are added to the
** gray list to be visited (and turned black) later.  Both userdata and
** upvalues can call this function recursively, but this recursion goes
** for at most two levels: An upvalue cannot refer to another upvalue
** (only closures can), and a userdata's metatable must be a table.
*/
static void reallymarkobject (global_State *g, GCObject *o) {
 switch (o->tt) {
 case LUA_VSHRSTR:
 case LUA_VLNGSTR: {
 set2black(o);  /* nothing to visit */
 break;
    }
 case LUA_VUPVAL: {
 UpVal *uv = gco2upv(o);
 if (upisopen(uv))
 set2gray(uv);  /* open upvalues are kept gray */
 else
 set2black(uv);  /* closed upvalues are visited here */
 markvalue(g, uv->v);  /* mark its content */
 break;
    }
 case LUA_VUSERDATA: {
 Udata *u = gco2u(o);
 if (u->nuvalue == 0) {  /* no user values? */
 markobjectN(g, u->metatable);  /* mark its metatable */
 set2black(u);  /* nothing else to mark */
 break;
      }
      /* else... */
    }  /* FALLTHROUGH */
 case LUA_VLCL: case LUA_VCCL: case LUA_VTABLE:
 case LUA_VTHREAD: case LUA_VPROTO: {
 linkobjgclist(o, g->gray);  /* to be visited later */
 break;
    }
 default: lua_assert(0); break;
  }
}

第二步是GCSpropagate

这一步会从grap链表里面继续搜索,直到为空, 或者限制的work用完

灰色节点里面的元素也会根据上面的规则去标记颜色,

对于table类型的搜索,涉及到弱引用,weakKey和weakValue。

由lua的原表metatable实现,在原表设置的__mode属性中,k表示weakKey,v表示weakValue。

弱引用情况下,如果键或者值在别的地方没有被引用,也会被当成垃圾处理。

/*
** traverse one gray object, turning it to black.
*/
static lu_mem propagatemark (global_State *g) {
 GCObject *o = g->gray;
 nw2black(o);
 g->gray = *getgclist(o);  /* remove from 'gray' list */
 switch (o->tt) {
 case LUA_VTABLE: return traversetable(g, gco2t(o));
 case LUA_VUSERDATA: return traverseudata(g, gco2u(o));
 case LUA_VLCL: return traverseLclosure(g, gco2lcl(o));
 case LUA_VCCL: return traverseCclosure(g, gco2ccl(o));
 case LUA_VPROTO: return traverseproto(g, gco2p(o));
 case LUA_VTHREAD: return traversethread(g, gco2th(o));
 default: lua_assert(0); return 0;
  }
}

第三步是GCSenteratomic

由于上一步是陆陆续续搜索的,所以在标记颜色的过程中,会有新的白色节点新增,

这时就无法知道白色是垃圾还是新增的,这里有一步原子操作atomic,这次一次搜索完grayagain的元素。

grayagain的元素,是在新增白色节点时,引用新增节点的元素如果已经被标记成黑色,会根据情况新增元素到grayagain里,表示再次变成灰色。

再次标记涉及两个函数luaC_barrier和luaC_barrierback

当一个黑色节点引用了一个新的白色节点时:

如果这个黑色节点是一个table,则调用luaC_barrierback,将这个黑色节点变成灰色。

其他情况调用luaC_barrier,将这个白色节点变成灰色。

它们的区别是一个将后面的节点变成灰色,一个将前面的节点变成灰色。

#define luaC_barrier(L,p,v) (  \
    (iscollectable(v) && isblack(p) && iswhite(gcvalue(v))) ? \
 luaC_barrier_(L,obj2gco(p),gcvalue(v)) : cast_void(0))

#define luaC_barrierback(L,p,v) (  \
    (iscollectable(v) && isblack(p) && iswhite(gcvalue(v))) ? \
 luaC_barrierback_(L,p) : cast_void(0))

/*
** Barrier that moves collector forward, that is, marks the white object
** 'v' being pointed by the black object 'o'.  In the generational
** mode, 'v' must also become old, if 'o' is old; however, it cannot
** be changed directly to OLD, because it may still point to non-old
** objects. So, it is marked as OLD0. In the next cycle it will become
** OLD1, and in the next it will finally become OLD (regular old). By
** then, any object it points to will also be old.  If called in the
** incremental sweep phase, it clears the black object to white (sweep
** it) to avoid other barrier calls for this same object. (That cannot
** be done is generational mode, as its sweep does not distinguish
** whites from deads.)
*/
void luaC_barrier_ (lua_State *L, GCObject *o, GCObject *v) {
 global_State *g = G(L);
 lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
 if (keepinvariant(g)) {  /* must keep invariant? */
 reallymarkobject(g, v);  /* restore invariant */
 if (isold(o)) {
 lua_assert(!isold(v));  /* white object could not be old */
 setage(v, G_OLD0);  /* restore generational invariant */
    }
  }
 else {  /* sweep phase */
 lua_assert(issweepphase(g));
 if (g->gckind == KGC_INC)  /* incremental mode? */
 makewhite(g, o);  /* mark 'o' as white to avoid other barriers */
  }
}


/*
** barrier that moves collector backward, that is, mark the black object
** pointing to a white object as gray again.
*/
void luaC_barrierback_ (lua_State *L, GCObject *o) {
 global_State *g = G(L);
 lua_assert(isblack(o) && !isdead(g, o));
 lua_assert((g->gckind == KGC_GEN) == (isold(o) && getage(o) != G_TOUCHED1));
 if (getage(o) == G_TOUCHED2)  /* already in gray list? */
 set2gray(o);  /* make it gray to become touched1 */
 else  /* link it in 'grayagain' and paint it gray */
 linkobjgclist(o, g->grayagain);
 if (isold(o))  /* generational mode? */
 setage(o, G_TOUCHED1);  /* touched in current cycle */
}

除了新增白色节点被黑色节点引用的情况,还有另一种情况,当原子操作结束之后,又会陆陆续续加入新的白色节点,这种情况下新增的白色节点也会被当垃圾清理掉。

这里的处理方式是,有两种白色节点WHITE0BIT,WHITE1BIT,白色节点0和白色节点1

#define WHITE0BIT 3  /* object is white (type 0) */
#define WHITE1BIT 4  /* object is white (type 1) */
#define BLACKBIT 5  /* object is black */
#define FINALIZEDBIT 6  /* object has been marked for finalization */

#define TESTBIT 7



#define WHITEBITS bit2mask(WHITE0BIT, WHITE1BIT)


#define iswhite(x)      testbits((x)->marked, WHITEBITS)
#define isblack(x)      testbit((x)->marked, BLACKBIT)
#define isgray(x)  /* neither white nor black */ \
    (!testbits((x)->marked, WHITEBITS | bitmask(BLACKBIT)))

#define tofinalize(x)   testbit((x)->marked, FINALIZEDBIT)

#define otherwhite(g)   ((g)->currentwhite ^ WHITEBITS)

原子操作之后会马上进行一次清理。

GCSswpallgc,GCSswpfinobj,GCSswptobefnz都是在陆陆续续清理,每次stw 会清理GCSWEEPMAX 个

/*
** Maximum number of elements to sweep in each single step.
** (Large enough to dissipate fixed overheads but small enough
** to allow small steps for the collector.)
*/
#define GCSWEEPMAX 100