mirror of
https://github.com/justinethier/cyclone.git
synced 2025-05-21 06:39:16 +02:00
555 lines
18 KiB
C
555 lines
18 KiB
C
/* A basic mark-sweep GC
|
|
As of now, the GC code is based off the implementation from chibi scheme
|
|
|
|
Goals of this project:
|
|
- write algorithms
|
|
- add test cases
|
|
- integrate with types
|
|
- integrate with cyclone
|
|
- extend to tri-color marking an on-the-fly collection
|
|
- etc...
|
|
*/
|
|
|
|
#include "cyclone/types.h"
|
|
|
|
gc_heap *gc_heap_create(size_t size, size_t max_size, size_t chunk_size)
|
|
{
|
|
gc_free_list *free, *next;
|
|
gc_heap *h;
|
|
// TODO: mmap?
|
|
h = malloc(gc_heap_pad_size(size));
|
|
if (!h) return NULL;
|
|
h->size = size;
|
|
h->chunk_size = chunk_size;
|
|
h->max_size = max_size;
|
|
//printf("DEBUG h->data addr: %p\n", &(h->data));
|
|
h->data = (char *) gc_heap_align(sizeof(h->data) + (uint)&(h->data));
|
|
//printf("DEBUG h->data addr: %p\n", h->data);
|
|
h->next = NULL;
|
|
free = h->free_list = (gc_free_list *)h->data;
|
|
next = (gc_free_list *)(((char *) free) + gc_heap_align(gc_free_chunk_size));
|
|
free->size = 0; // First one is just a dummy record
|
|
free->next = next;
|
|
next->size = size - gc_heap_align(gc_free_chunk_size);
|
|
next->next = NULL;
|
|
#if GC_DEBUG_PRINTFS
|
|
fprintf(stderr, ("heap: %p-%p data: %p-%p size: %d\n"),
|
|
h, ((char*)h)+gc_heap_pad_size(size), h->data, h->data + size, size);
|
|
fprintf(stderr, ("first: %p end: %p\n"),
|
|
(object)gc_heap_first_block(h), (object)gc_heap_end(h));
|
|
fprintf(stderr, ("free1: %p-%p free2: %p-%p\n"),
|
|
free, ((char*)free)+free->size, next, ((char*)next)+next->size);
|
|
#endif
|
|
return h;
|
|
}
|
|
|
|
int gc_grow_heap(gc_heap *h, size_t size, size_t chunk_size)
|
|
{
|
|
size_t cur_size, new_size;
|
|
gc_heap *h_last = gc_heap_last(h);
|
|
cur_size = h_last->size;
|
|
// JAE - For now, just add a new page
|
|
new_size = cur_size; //gc_heap_align(((cur_size > size) ? cur_size : size) * 2);
|
|
h_last->next = gc_heap_create(new_size, h_last->max_size, chunk_size);
|
|
return (h_last->next != NULL);
|
|
}
|
|
|
|
void *gc_try_alloc(gc_heap *h, size_t size)
|
|
{
|
|
gc_free_list *f1, *f2, *f3;
|
|
for (; h; h = h->next) { // All heaps
|
|
// TODO: chunk size (ignoring for now)
|
|
|
|
for (f1 = h->free_list, f2 = f1->next; f2; f1 = f2, f2 = f2->next) { // all free in this heap
|
|
if (f2->size >= size) { // Big enough for request
|
|
// TODO: take whole chunk or divide up f2 (using f3)?
|
|
if (f2->size >= (size + gc_heap_align(1) /* min obj size */)) {
|
|
f3 = (gc_free_list *) (((char *)f2) + size);
|
|
f3->size = f2->size - size;
|
|
f3->next = f2->next;
|
|
f1->next = f3;
|
|
} else { /* Take the whole chunk */
|
|
f1->next = f2->next;
|
|
}
|
|
return f2;
|
|
}
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void *gc_alloc(gc_heap *h, size_t size, int *heap_grown)
|
|
{
|
|
void *result = NULL;
|
|
size_t max_freed = 0, sum_freed = 0, total_size;
|
|
// TODO: check return value, if null (could not alloc) then
|
|
// run a collection and check how much free space there is. if less
|
|
// the allowed ratio, try growing heap.
|
|
// then try realloc. if cannot alloc now, then throw out of memory error
|
|
size = gc_heap_align(size);
|
|
result = gc_try_alloc(h, size);
|
|
if (!result) {
|
|
// TODO: may want to consider not doing this now, and implementing gc_collect as
|
|
// part of the runtime, since we would have all of the roots, stack args,
|
|
// etc available there.
|
|
// max_freed = gc_collect(h); TODO: this does not work yet!
|
|
//
|
|
// total_size = gc_heap_total_size(h);
|
|
// if (((max_freed < size) ||
|
|
// ((total_size > sum_freed) &&
|
|
// (total_size - sum_freed) > (total_size * 0.75))) // Grow ratio
|
|
// && ((!h->max_size) || (total_size < h->max_size))) {
|
|
gc_grow_heap(h, size, 0);
|
|
*heap_grown = 1;
|
|
// }
|
|
result = gc_try_alloc(h, size);
|
|
if (!result) {
|
|
fprintf(stderr, "out of memory error allocating %d bytes\n", size);
|
|
exit(1); // TODO: throw error???
|
|
}
|
|
}
|
|
#if GC_DEBUG_PRINTFS
|
|
fprintf(stdout, "alloc %p size = %d\n", result, size);
|
|
#endif
|
|
return result;
|
|
}
|
|
|
|
size_t gc_allocated_bytes(object obj)
|
|
{
|
|
tag_type t;
|
|
if (is_value_type(obj))
|
|
return gc_heap_align(1);
|
|
t = type_of(obj);
|
|
if (t == cons_tag) return gc_heap_align(sizeof(cons_type));
|
|
if (t == macro_tag) return gc_heap_align(sizeof(macro_type));
|
|
if (t == closure0_tag) return gc_heap_align(sizeof(closure0_type));
|
|
if (t == closure1_tag) return gc_heap_align(sizeof(closure1_type));
|
|
if (t == closure2_tag) return gc_heap_align(sizeof(closure2_type));
|
|
if (t == closure3_tag) return gc_heap_align(sizeof(closure3_type));
|
|
if (t == closure4_tag) return gc_heap_align(sizeof(closure4_type));
|
|
if (t == closureN_tag){
|
|
return gc_heap_align(sizeof(closureN_type) + sizeof(object) * ((closureN_type *)obj)->num_elt);
|
|
}
|
|
if (t == vector_tag){
|
|
return gc_heap_align(sizeof(vector_type) + sizeof(object) * ((vector_type *)obj)->num_elt);
|
|
}
|
|
if (t == string_tag){
|
|
return gc_heap_align(sizeof(string_type) + string_len(obj) + 1);
|
|
}
|
|
if (t == integer_tag) return gc_heap_align(sizeof(integer_type));
|
|
if (t == double_tag) return gc_heap_align(sizeof(double_type));
|
|
if (t == port_tag) return gc_heap_align(sizeof(port_type));
|
|
if (t == cvar_tag) return gc_heap_align(sizeof(cvar_type));
|
|
|
|
//#if GC_DEBUG_PRINTFS
|
|
fprintf(stderr, "gc_allocated_bytes: unexpected object %p of type %ld\n", obj, t);
|
|
exit(1);
|
|
//#endif
|
|
return 0;
|
|
}
|
|
|
|
gc_heap *gc_heap_last(gc_heap *h)
|
|
{
|
|
while (h->next)
|
|
h = h->next;
|
|
return h;
|
|
}
|
|
|
|
size_t gc_heap_total_size(gc_heap *h)
|
|
{
|
|
size_t total_size = 0;
|
|
while(h) {
|
|
total_size += h->size;
|
|
h = h->next;
|
|
}
|
|
return total_size;
|
|
}
|
|
|
|
void gc_mark(gc_heap *h, object obj)
|
|
{
|
|
if (nullp(obj) || is_value_type(obj) || mark(obj))
|
|
return;
|
|
|
|
#if GC_DEBUG_PRINTFS
|
|
// fprintf(stdout, "gc_mark %p\n", obj);
|
|
#endif
|
|
((list)obj)->hdr.mark = 1;
|
|
// TODO: mark heap saves (??)
|
|
// could this be a write barrier?
|
|
|
|
// Mark objects this one references
|
|
if (type_of(obj) == cons_tag) {
|
|
gc_mark(h, car(obj));
|
|
gc_mark(h, cdr(obj));
|
|
} else if (type_of(obj) == closure1_tag) {
|
|
gc_mark(h, ((closure1) obj)->elt1);
|
|
} else if (type_of(obj) == closure2_tag) {
|
|
gc_mark(h, ((closure2) obj)->elt1);
|
|
gc_mark(h, ((closure2) obj)->elt2);
|
|
} else if (type_of(obj) == closure3_tag) {
|
|
gc_mark(h, ((closure3) obj)->elt1);
|
|
gc_mark(h, ((closure3) obj)->elt2);
|
|
gc_mark(h, ((closure3) obj)->elt3);
|
|
} else if (type_of(obj) == closure4_tag) {
|
|
gc_mark(h, ((closure4) obj)->elt1);
|
|
gc_mark(h, ((closure4) obj)->elt2);
|
|
gc_mark(h, ((closure4) obj)->elt3);
|
|
gc_mark(h, ((closure4) obj)->elt4);
|
|
} else if (type_of(obj) == closureN_tag) {
|
|
int i, n = ((closureN) obj)->num_elt;
|
|
for (i = 0; i < n; i++) {
|
|
gc_mark(h, ((closureN) obj)->elts[i]);
|
|
}
|
|
} else if (type_of(obj) == vector_tag) {
|
|
int i, n = ((vector) obj)->num_elt;
|
|
for (i = 0; i < n; i++) {
|
|
gc_mark(h, ((vector) obj)->elts[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t gc_sweep(gc_heap *h, size_t *sum_freed_ptr)
|
|
{
|
|
size_t freed, max_freed=0, sum_freed=0, size;
|
|
object p, end;
|
|
gc_free_list *q, *r, *s;
|
|
for (; h; h = h->next) { // All heaps
|
|
#if GC_DEBUG_CONCISE_PRINTFS
|
|
fprintf(stdout, "sweep heap %p, size = %d\n", h, h->size);
|
|
#endif
|
|
p = gc_heap_first_block(h);
|
|
q = h->free_list;
|
|
end = gc_heap_end(h);
|
|
while (p < end) {
|
|
// find preceding/succeeding free list pointers for p
|
|
for (r = q->next; r && ((char *)r < (char *)p); q=r, r=r->next);
|
|
|
|
if ((char *)r == (char *)p) { // this is a free block, skip it
|
|
p = (object) (((char *)p) + r->size);
|
|
#if GC_DEBUG_PRINTFS
|
|
fprintf(stdout, "skip free block %p size = %d\n", p, r->size);
|
|
#endif
|
|
continue;
|
|
}
|
|
size = gc_heap_align(gc_allocated_bytes(p));
|
|
//fprintf(stdout, "check object %p, size = %d\n", p, size);
|
|
|
|
#if GC_DEBUG_CONCISE_PRINTFS
|
|
// DEBUG
|
|
if (!is_object_type(p))
|
|
fprintf(stderr, "sweep: invalid object at %p", p);
|
|
if ((char *)q + q->size > (char *)p)
|
|
fprintf(stderr, "bad size at %p < %p + %u", p, q, q->size);
|
|
if (r && ((char *)p) + size > (char *)r)
|
|
fprintf(stderr, "sweep: bad size at %p + %d > %p", p, size, r);
|
|
// END DEBUG
|
|
#endif
|
|
|
|
if (!mark(p)) {
|
|
#if GC_DEBUG_PRINTFS
|
|
fprintf(stdout, "sweep: object is not marked %p\n", p);
|
|
#endif
|
|
// free p
|
|
sum_freed += size;
|
|
if (((((char *)q) + q->size) == (char *)p) && (q != h->free_list)) {
|
|
/* merge q with p */
|
|
if (r && r->size && ((((char *)p)+size) == (char *)r)) {
|
|
// ... and with r
|
|
q->next = r->next;
|
|
freed = q->size + size + r->size;
|
|
p = (object) (((char *)p) + size + r->size);
|
|
} else {
|
|
freed = q->size + size;
|
|
p = (object) (((char *)p) + size);
|
|
}
|
|
q->size = freed;
|
|
} else {
|
|
s = (gc_free_list *)p;
|
|
if (r && r->size && ((((char *)p) + size) == (char *)r)) {
|
|
// merge p with r
|
|
s->size = size + r->size;
|
|
s->next = r->next;
|
|
q->next = s;
|
|
freed = size + r->size;
|
|
} else {
|
|
s->size = size;
|
|
s->next = r;
|
|
q->next = s;
|
|
freed = size;
|
|
}
|
|
p = (object) (((char *)p) + freed);
|
|
}
|
|
if (freed > max_freed)
|
|
max_freed = freed;
|
|
} else {
|
|
#if GC_DEBUG_PRINTFS
|
|
// fprintf(stdout, "sweep: object is marked %p\n", p);
|
|
#endif
|
|
//if (mark(p) != 1) {
|
|
// printf("unexpected mark value %d\n", mark(p));
|
|
// exit(1);
|
|
//}
|
|
|
|
((list)p)->hdr.mark = 0;
|
|
p = (object)(((char *)p) + size);
|
|
}
|
|
}
|
|
}
|
|
if (sum_freed_ptr) *sum_freed_ptr = sum_freed;
|
|
return max_freed;
|
|
}
|
|
|
|
void gc_thr_grow_move_buffer(gc_thread_data *d)
|
|
{
|
|
if (!d) return;
|
|
|
|
if (d->moveBufLen == 0) { // Special case
|
|
d->moveBufLen = 128;
|
|
d->moveBuf = NULL;
|
|
} else {
|
|
d->moveBufLen *= 2;
|
|
}
|
|
|
|
d->moveBuf = realloc(d->moveBuf, d->moveBufLen * sizeof(void *));
|
|
#if GC_DEBUG_CONCISE_PRINTFS
|
|
printf("grew moveBuffer, len = %d\n", d->moveBufLen);
|
|
#endif
|
|
}
|
|
|
|
void gc_thr_add_to_move_buffer(gc_thread_data *d, int *alloci, object obj)
|
|
{
|
|
if (*alloci == d->moveBufLen) {
|
|
gc_thr_grow_move_buffer(d);
|
|
}
|
|
|
|
d->moveBuf[*alloci] = obj;
|
|
(*alloci)++;
|
|
}
|
|
|
|
// Generic buffer functions
|
|
void **vpbuffer_realloc(void **buf, int *len)
|
|
{
|
|
return realloc(buf, (*len) * sizeof(void *));
|
|
}
|
|
|
|
void **vpbuffer_add(void **buf, int *len, int i, void *obj)
|
|
{
|
|
if (i == *len) {
|
|
*len *= 2;
|
|
buf = vpbuffer_realloc(buf, len);
|
|
}
|
|
buf[i] = obj;
|
|
return buf;
|
|
}
|
|
|
|
void vpbuffer_free(void **buf)
|
|
{
|
|
free(buf);
|
|
}
|
|
|
|
|
|
// void gc_init()
|
|
// {
|
|
// }
|
|
// END heap definitions
|
|
|
|
|
|
/*
|
|
Rough plan for how to implement new GC algorithm. We need to do this in
|
|
phases in order to have any hope of getting everything working. Let's prove
|
|
the algorithm out, then extend support to multiple mutators if everything
|
|
looks good.
|
|
|
|
PHASE 1 - separation of mutator and collector into separate threads
|
|
|
|
need to syncronize access (preferably via atomics) for anything shared between the
|
|
collector and mutator threads.
|
|
|
|
can cooperate be part of a minor gc? in that case, the
|
|
marking could be done as part of allocation
|
|
|
|
but then what exactly does that mean, to mark gray? because
|
|
objects moved to the heap will be set to mark color at that
|
|
point (until collector thread finishes). but would want
|
|
objects on the heap referenced by them to be traced, so
|
|
I suppose that is the purpose of the gray, to indicate
|
|
those still need to be traced. but need to think this through,
|
|
do we need the markbuffer and last read/write? do those make
|
|
sense with mta approach (assume so)???
|
|
|
|
ONLY CONCERN - what happens if an object on the stack
|
|
has a reference to an object on the heap that is collected?
|
|
but how would this happen? collector marks global roots before
|
|
telling mutators to go to async, and once mutators go async
|
|
any allocations will not be collected. also once collectors go
|
|
async they have a chance to markgray, which will include the write
|
|
barrier. so given that, is it still possible for an old heap ref to
|
|
sneak into a stack object during the async phase?
|
|
|
|
more questions on above point:
|
|
- figure out how/if after cooperation/async, can a stack object pick
|
|
up a reference to a heap object that will be collected during that GC cycle?
|
|
need to be able to prevent this somehow...
|
|
|
|
- need to figure out real world use case(s) where this could happen, to try and
|
|
figure out how to address this problem
|
|
|
|
from my understanding of the paper, the write barrier prevents this. consider, at the
|
|
start of async, the mutator's roots, global roots, and anything on the write barrier
|
|
have been marked. any new objects will be allocated as marked. that way, anything the
|
|
mutator could later access is either marked or will be after tracing. the only exception
|
|
is if the mutator changes a reference such that tracing will no longer find an object.
|
|
but the write barrier prevents this - during tracing a heap update causes the old
|
|
object to be marked as well. so it will eventually be traced, and there should be no
|
|
dangling objects after GC completes.
|
|
|
|
PHASE 2 - multi-threaded mutator (IE, more than one stack thread):
|
|
|
|
- how does the collector handle stack objects that reference objects from
|
|
another thread's stack?
|
|
* minor GC will only relocate that thread's objects, so another thread's would not
|
|
be moved. however, if another thread references one of the GC'd thread's
|
|
stack objects, it will now get a forwarding pointer. even worse, what if the
|
|
other thread is blocked and the reference becomes corrupt due to the stack
|
|
longjmp? there are major issues with one thread referencing another thread's
|
|
objects.
|
|
* had considered adding a stack bit to the object header. if we do this and
|
|
initialize it during object creation, a thread could in theory detect
|
|
if an object belongs to another thread. but it might be expensive because
|
|
a read barrier would have to be used to check the object's stack bit and
|
|
address (to see if it is on this heap).
|
|
* alternatively, how would one thread pick up a reference to another one's
|
|
objects? are there any ways to detect these events and deal with them?
|
|
it might be possible to detect such a case and allocate the object on the heap,
|
|
replacing it with a fwd pointer. unfortunately that means we need a read
|
|
barrier (ick) to handle forwarding pointers in arbitrary places
|
|
* but does that mean we need a fwd pointer to be live for awhile? do we need
|
|
a read barrier to get this to work? obviously we want to avoid a read barrier
|
|
at all costs.
|
|
- what are the real costs of allowing forwarding pointers to exist outside of just
|
|
minor GC? assume each runtime primitive would need to be updated to handle the
|
|
case where the obj is a fwd pointer - is it just a matter of each function
|
|
detecting this and (possibly) calling itself again with the 'real' address?
|
|
obviously that makes the runtime slower due to more checks, but maybe it is
|
|
not *so* bad?
|
|
*/
|
|
|
|
// tri-color GC section, WIP
|
|
//
|
|
// Note: will need to use atomics and/or locking to access any
|
|
// variables shared between threads
|
|
typedef enum { STATUS_ASYNC
|
|
, STATUS_SYNC1
|
|
, STATUS_SYNC2
|
|
} gc_status_type;
|
|
|
|
typedef enum { STAGE_CLEAR_OR_MARKING
|
|
, STAGE_TRACING
|
|
, STAGE_REF_PROCESSING
|
|
, STAGE_SWEEPING
|
|
, STAGE_RESTING
|
|
} gc_stage_type;
|
|
|
|
static int gc_color_mark = 0; // Black
|
|
static const int gc_color_grey = 1;
|
|
static int gc_color_clear = 2; // White
|
|
static const int gc_color_blue = 3;
|
|
|
|
static int gc_status_col;
|
|
static int gc_stage;
|
|
|
|
// Does not need sync, only used by collector thread
|
|
static void **mark_stack = NULL;
|
|
static int mark_stack_len = 128;
|
|
static int mark_stack_i = 0;
|
|
|
|
// GC functions called by the Mutator threads
|
|
|
|
void gc_mut_update()
|
|
{
|
|
// TODO: how does this fit in with the write buffer?
|
|
// this part is important, especially during tracing
|
|
}
|
|
|
|
// Done as part of gc_move
|
|
// ideally want to do this without needing sync. we need to sync to get markColor in coop, though
|
|
//void gc_mut_create()
|
|
|
|
// TODO: when is this called, is this good enough, etc??
|
|
void gc_mut_cooperate(gc_thread_data *thd)
|
|
{
|
|
if (thd->gc_mut_status == gc_status_col) { // TODO: synchronization of var access
|
|
if (thd->gc_mut_status == STATUS_SYNC2) { // TODO: more sync??
|
|
// Since everything is on the stack, at this point probably only need
|
|
// to worry about anything on the stack that is referencing a heap object
|
|
// For each x in roots:
|
|
// MarkGray(x)
|
|
thd->gc_alloc_color = gc_color_mark; // TODO: synchronization for global??
|
|
}
|
|
thd->gc_mut_status = gc_status_col; // TODO: syncronization??
|
|
}
|
|
}
|
|
|
|
// Collector functions
|
|
void gc_mark_gray(gc_thread_data *thd, object obj)
|
|
{
|
|
if (is_object_type(obj) && mark(obj) == gc_color_clear) { // TODO: sync??
|
|
// TODO: lock mark buffer (not ideal, but a possible first step)?
|
|
// pthread_mutex_lock
|
|
thd->mark_buffer = vpbuffer_add(thd->mark_buffer,
|
|
&(thd->mark_buffer_len),
|
|
thd->last_write,
|
|
obj);
|
|
// pthread_mutex_unlock
|
|
// unlock mark buffer
|
|
ATOMIC_INC(&(thd->last_write));
|
|
}
|
|
}
|
|
|
|
void gc_col_mark_gray(object obj)
|
|
{
|
|
if (is_object_type(obj) && mark(obj) == gc_color_clear) { // TODO: sync??
|
|
mark_stack = vpbuffer_add(mark_stack, &mark_stack_len, mark_stack_i++, obj);
|
|
}
|
|
}
|
|
|
|
void gc_col_empty_collector_stack()
|
|
{
|
|
// TODO:
|
|
// while (!markstack.empty())
|
|
// markBlack(markstack.pop())
|
|
}
|
|
// GC Collection cycle
|
|
|
|
// END tri-color marking section
|
|
|
|
//// Unit testing:
|
|
//int main(int argc, char **argv) {
|
|
// int a = 1, b = 2, c = 3, i;
|
|
// void **buf = NULL;
|
|
// int size = 1;
|
|
//
|
|
// buf = vpbuffer_realloc(buf, &size);
|
|
// printf("buf = %p, size = %d\n", buf, size);
|
|
// buf = vpbuffer_add(buf, &size, 0, &a);
|
|
// printf("buf = %p, size = %d\n", buf, size);
|
|
// buf = vpbuffer_add(buf, &size, 1, &b);
|
|
// printf("buf = %p, size = %d\n", buf, size);
|
|
// buf = vpbuffer_add(buf, &size, 2, &c);
|
|
// printf("buf = %p, size = %d\n", buf, size);
|
|
// buf = vpbuffer_add(buf, &size, 3, &a);
|
|
// printf("buf = %p, size = %d\n", buf, size);
|
|
// buf = vpbuffer_add(buf, &size, 4, &b);
|
|
// printf("buf = %p, size = %d\n", buf, size);
|
|
// for (i = 5; i < 20; i++) {
|
|
// buf = vpbuffer_add(buf, &size, i, &c);
|
|
// }
|
|
//
|
|
// for (i = 0; i < 20; i++){
|
|
// printf("%d\n", *((int *) buf[i]));
|
|
// }
|
|
// vpbuffer_free(buf);
|
|
// printf("buf = %p, size = %d\n", buf, size);
|
|
// return 0;
|
|
//}
|
|
//
|