/***********************************************************************/
/*                                                                     */
/*                           Objective Caml                            */
/*                                                                     */
/*             Damien Doligez, projet Para, INRIA Rocquencourt         */
/*                                                                     */
/*  Copyright 1996 Institut National de Recherche en Informatique et   */
/*  Automatique.  Distributed only by permission.                      */
/*                                                                     */
/***********************************************************************/

/* $Id: major_gc.c,v 1.16 1997/09/02 12:54:03 xleroy Exp $ */
#include "multithreads.h"
#include "compact.h"
#include "config.h"
#include "fail.h"
#include "freelist.h"
#include "gc.h"
#include "gc_ctrl.h"
#include "major_gc.h"
#include "misc.h"
#include "mlvalues.h"
#include "roots.h"
#include "weak.h"

#ifdef __STDC__
#include <limits.h>
#else
#ifdef ARCH_SIXTYFOUR
#define LONG_MAX 0x7FFFFFFFFFFFFFFF
#else
#define LONG_MAX 0x7FFFFFFF
#endif
#endif

//_//TLS unsigned long percent_free;
//_//TLS long major_heap_increment;
//_//TLS char *heap_start, *heap_end;
//_//TLS page_table_entry *page_table;
//_//TLS asize_t page_table_size;
//_//TLS char *gc_sweep_hp;
//_//TLS int gc_phase;
//_//TLS value *gray_vals;
//TLS static value *gray_vals;
//_//TLS value *gray_vals_cur, *gray_vals_end;
//_//TLS asize_t gray_vals_size;
//_//TLS int heap_is_pure;   /* The heap is pure if the only gray objects
//_//                              below [markhp] are also in [gray_vals]. */
//TLS static asize_t gray_vals_size;
//TLS static int heap_is_pure;   /* The heap is pure if the only gray objects
//                             below [markhp] are also in [gray_vals]. */
//_//TLS unsigned long allocated_words;
//_//TLS unsigned long extra_heap_memory;
//_//extern TLS char *fl_merge;  /* Defined in freelist.c. */

//_//TLS char *markhp, *chunk, *limit;
//TLS static char *markhp, *chunk, *limit;

static void update_weak_pointers (void);

static void realloc_gray_vals (void)
{
  value *new;
int threadId = getThreadId(getPeThread());

  Assert (tls[threadId].gray_vals_cur == tls[threadId].gray_vals_end);
  if (tls[threadId].gray_vals_size < tls[threadId].stat_heap_size / 128){
    gc_message ("Growing gray_vals to %luk bytes\n",
                (long) tls[threadId].gray_vals_size * sizeof (value) / 512);
    new = (value *) realloc ((char *) tls[threadId].gray_vals,
                             2 * tls[threadId].gray_vals_size * sizeof (value));
    if (new == NULL){
      gc_message ("No room for growing gray_vals\n", 0);
      tls[threadId].gray_vals_cur = tls[threadId].gray_vals;
      tls[threadId].heap_is_pure = 0;
    }else{
      tls[threadId].gray_vals = new;
      tls[threadId].gray_vals_cur = tls[threadId].gray_vals + tls[threadId].gray_vals_size;
      tls[threadId].gray_vals_size *= 2;
      tls[threadId].gray_vals_end = tls[threadId].gray_vals + tls[threadId].gray_vals_size;
    }
  }else{
    tls[threadId].gray_vals_cur = tls[threadId].gray_vals + tls[threadId].gray_vals_size / 2;
    tls[threadId].heap_is_pure = 0;
  }
}

void darken (value v, value *p)

                /* not used */
{
int threadId = getThreadId(getPeThread());
  if (Is_block (v) && Is_in_heap (v)) {
    if (Tag_val(v) == Infix_tag) v -= Infix_offset_val(v);
    if (Is_white_val (v)){
      Hd_val (v) = Grayhd_hd (Hd_val (v));
      *tls[threadId].gray_vals_cur++ = v;
      if (tls[threadId].gray_vals_cur >= tls[threadId].gray_vals_end) realloc_gray_vals ();
    }
  }
}

static void start_cycle (void)
{
int threadId = getThreadId(getPeThread());
  Assert (tls[threadId].gc_phase == Phase_idle);
  Assert (tls[threadId].gray_vals_cur == tls[threadId].gray_vals);
  darken_all_roots();
  tls[threadId].gc_phase = Phase_mark;
  tls[threadId].markhp = NULL;
}

static void mark_slice (long int work)
{
  value *gray_vals_ptr;  /* Local copy of gray_vals_cur */
  value v, child;
  header_t hd;
  mlsize_t size, i;
int threadId = getThreadId(getPeThread());

  gray_vals_ptr = tls[threadId].gray_vals_cur;
  while (work > 0){
    if (gray_vals_ptr > tls[threadId].gray_vals){
      v = *--gray_vals_ptr;
      hd = Hd_val(v);
      Assert (Is_gray_hd (hd));
      Hd_val (v) = Blackhd_hd (hd);
      size = Wosize_hd(hd);
      if (Tag_hd (hd) < No_scan_tag){
        for (i = 0; i < size; i++){
          child = Field (v, i);
          if (Is_block (child) && Is_in_heap (child)) {
            hd = Hd_val(child);
            if (Tag_hd(hd) == Infix_tag) {
              child -= Infix_offset_val(child);
              hd = Hd_val(child);
            }
            if (Is_white_hd (hd)){
              Hd_val (child) = Grayhd_hd (hd);
              *gray_vals_ptr++ = child;
              if (gray_vals_ptr >= tls[threadId].gray_vals_end) {
                tls[threadId].gray_vals_cur = gray_vals_ptr;
                realloc_gray_vals ();
                gray_vals_ptr = tls[threadId].gray_vals_cur;
              }
            }
          }
        }
      }
      work -= Whsize_wosize(size);
    }else if (tls[threadId].markhp != NULL){
      if (tls[threadId].markhp == tls[threadId].limit){
        tls[threadId].chunk = Chunk_next (tls[threadId].chunk);
        if (tls[threadId].chunk == NULL){
          tls[threadId].markhp = NULL;
        }else{
          tls[threadId].markhp = tls[threadId].chunk;
          tls[threadId].limit = tls[threadId].chunk + Chunk_size (tls[threadId].chunk);
        }
      }else{
        if (Is_gray_val (Val_hp (tls[threadId].markhp))){
          Assert (gray_vals_ptr == tls[threadId].gray_vals);
          *gray_vals_ptr++ = Val_hp (tls[threadId].markhp);
        }
        tls[threadId].markhp += Bhsize_hp (tls[threadId].markhp);
      }
    }else if (!tls[threadId].heap_is_pure){
      tls[threadId].heap_is_pure = 1;
      tls[threadId].chunk = tls[threadId].heap_start;
      tls[threadId].markhp = tls[threadId].chunk;
      tls[threadId].limit = tls[threadId].chunk + Chunk_size (tls[threadId].chunk);
    }else{
      /* Marking is done. */

      update_weak_pointers ();

      /* Initialise the sweep phase. */
      tls[threadId].gray_vals_cur = gray_vals_ptr;
      tls[threadId].gc_sweep_hp = tls[threadId].heap_start;
      fl_init_merge ();
      tls[threadId].gc_phase = Phase_sweep;
      tls[threadId].chunk = tls[threadId].heap_start;
      tls[threadId].gc_sweep_hp = tls[threadId].chunk;
      tls[threadId].limit = tls[threadId].chunk + Chunk_size (tls[threadId].chunk);
      work = 0;
    }
  }
  tls[threadId].gray_vals_cur = gray_vals_ptr;
}

/* Walk through the linked list of weak arrays.
   Arrays that are white are removed from this list.
   For the other arrays, pointers to white objects are erased.
*/
static void update_weak_pointers (void)
{
int threadId = getThreadId(getPeThread());
  value *prev = &tls[threadId].weak_list_head;
  value *cur = (value *) *prev;
  mlsize_t sz, i;

  while (cur != NULL){
    if (Color_val (cur) == White){
      *prev = Field (cur, 0);
      cur = (value *) *prev;
    }else{
      value curfield;

      sz = Wosize_val (cur);
      for (i = 1; i < sz; i++){
        curfield = Field (cur, i);
        if (curfield != 0 && Is_block (curfield) && Is_white_val (curfield)){
          Field (cur, i) = 0;
        }
      }
      prev = &Field (cur, 0);
      cur = (value *) *prev;
    }
  }
}

static void sweep_slice (long int work)
{
  char *hp;
  header_t hd;
int threadId = getThreadId(getPeThread());

  while (work > 0){
    if (tls[threadId].gc_sweep_hp < tls[threadId].limit){
      hp = tls[threadId].gc_sweep_hp;
      hd = Hd_hp (hp);
      work -= Whsize_hd (hd);
      tls[threadId].gc_sweep_hp += Bhsize_hd (hd);
      switch (Color_hd (hd)){
      case White:
        if (Tag_hd (hd) == Final_tag){
          Final_fun (Val_hp (hp)) (Val_hp (hp));
        }
        tls[threadId].gc_sweep_hp = fl_merge_block (Bp_hp (hp));
        break;
      case Blue:
        /* Only the blocks of the free-list are blue.  See [freelist.c]. */
        tls[threadId].fl_merge = Bp_hp (hp);
        break;
      default:          /* Gray or Black */
        Assert(Color_hd(hd) == Black);
        Hd_hp (hp) = Whitehd_hd (hd);
        break;
      }
      Assert (tls[threadId].gc_sweep_hp <= tls[threadId].limit);
    }else{
      tls[threadId].chunk = Chunk_next (tls[threadId].chunk);
      if (tls[threadId].chunk == NULL){
        /* Sweeping is done. */
        ++tls[threadId].stat_major_collections;
        work = 0;
        tls[threadId].gc_phase = Phase_idle;
      }else{
        tls[threadId].gc_sweep_hp = tls[threadId].chunk;
        tls[threadId].limit = tls[threadId].chunk + Chunk_size (tls[threadId].chunk);
      }
    }
  }
}

/* The main entry point for the GC.  Called at each minor GC. */
void major_collection_slice (void)
{
  /*
     Free memory at the start of the GC cycle (garbage + free list) (assumed):
                 FM = stat_heap_size * percent_free / (100 + percent_free)
     Garbage at the start of the GC cycle:
                 G = FM * 2/3
     Proportion of free memory consumed since the previous slice:
                 PH = allocated_words / G
     Proportion of extra-heap memory consumed since the previous slice:
                 PE = extra_heap_memory / stat_heap_size
     Proportion of total work to do in this slice:
                 P  = PH + PE
     Amount of marking work for the GC cycle:
                 MW = stat_heap_size * 100 / (100 + percent_free)
     Amount of sweeping work for the GC cycle:
                 SW = stat_heap_size
     Amount of marking work for this slice:
                 MS = MW * P
                 MS = 3/2 * 100 * allocated_words / percent_free
                      + extra_heap_memory * 100 / (100 + percent_free)
     Amount of sweeping work for this slice:
                 SS = SW * P
                 SS = 3/2 * (100 + percent_free)/percent_free * allocated_words
                      + extra_heap_memory
     This slice will either mark 2*MS words or sweep 2*SS words.
  */
int threadId = getThreadId(getPeThread());

#define Margin 100  /* Make it a little faster to be on the safe side. */

  if (tls[threadId].gc_phase == Phase_idle) start_cycle ();

  if (tls[threadId].gc_phase == Phase_mark){
    mark_slice (300 * (tls[threadId].allocated_words / tls[threadId].percent_free + 1)
                + 200 * (tls[threadId].extra_heap_memory / (100 + tls[threadId].percent_free) + 1)
		+ Margin);
    gc_message ("!", 0);
  }else{
    Assert (tls[threadId].gc_phase == Phase_sweep);
    sweep_slice (3 * (100 + tls[threadId].percent_free) * (tls[threadId].allocated_words / tls[threadId].percent_free + 1)
                 + 2 * tls[threadId].extra_heap_memory
                 + Margin);
    gc_message ("$", 0);
  }

  if (tls[threadId].gc_phase == Phase_idle) compact_heap_maybe ();

  tls[threadId].stat_major_words += tls[threadId].allocated_words;
  tls[threadId].allocated_words = 0;
  tls[threadId].extra_heap_memory = 0;
}

/* The minor heap must be empty when this function is called. */
/* This does not call compact_heap_maybe because the estimations of
   free and live memory are only valid for a cycle done incrementally.
   Besides, this function is called by compact_heap_maybe.
*/
void finish_major_cycle (void)
{
int threadId = getThreadId(getPeThread());
  if (tls[threadId].gc_phase == Phase_idle) start_cycle ();
  if (tls[threadId].gc_phase == Phase_mark) mark_slice (LONG_MAX);
  Assert (tls[threadId].gc_phase == Phase_sweep);
  sweep_slice (LONG_MAX);
  Assert (tls[threadId].gc_phase == Phase_idle);
  tls[threadId].stat_major_words += tls[threadId].allocated_words;
  tls[threadId].allocated_words = 0;
}

asize_t round_heap_chunk_size (asize_t request)
{
int threadId = getThreadId(getPeThread());
						Assert (tls[threadId].major_heap_increment >= Heap_chunk_min);
  if (request < tls[threadId].major_heap_increment){
                              Assert (tls[threadId].major_heap_increment % Page_size == 0);
    return tls[threadId].major_heap_increment;
  }else if (request <= Heap_chunk_max){
    return ((request + Page_size - 1) >> Page_log) << Page_log;
  }else{
    raise_out_of_memory ();
    /* not reached */ return 0;
  }
}

void init_major_heap (asize_t heap_size)
{
  asize_t i;
  void *block;
int threadId = getThreadId(getPeThread());

  tls[threadId].stat_heap_size = round_heap_chunk_size (heap_size);
  Assert (tls[threadId].stat_heap_size % Page_size == 0);

  tls[threadId].heap_start = aligned_malloc (tls[threadId].stat_heap_size + sizeof (heap_chunk_head),
                               sizeof (heap_chunk_head), &block);
  if (tls[threadId].heap_start == NULL)
    fatal_error ("Fatal error: not enough memory for the initial heap.\n");
  tls[threadId].heap_start += sizeof (heap_chunk_head);
  Assert ((unsigned long) tls[threadId].heap_start % Page_size == 0);
  Chunk_size (tls[threadId].heap_start) = tls[threadId].stat_heap_size;
  Chunk_next (tls[threadId].heap_start) = NULL;
  Chunk_block (tls[threadId].heap_start) = block;
  tls[threadId].heap_end = tls[threadId].heap_start + tls[threadId].stat_heap_size;
  Assert ((unsigned long) tls[threadId].heap_end % Page_size == 0);
  tls[threadId].page_table_size = 4 * tls[threadId].stat_heap_size / Page_size;
  tls[threadId].page_table =
	  (page_table_entry *) malloc (tls[threadId].page_table_size * sizeof(page_table_entry));
  if (tls[threadId].page_table == NULL)
    fatal_error ("Fatal error: not enough memory for the initial heap.\n");
  for (i = 0; i < tls[threadId].page_table_size; i++){
    tls[threadId].page_table [i] = Not_in_heap;
  }
  for (i = Page (tls[threadId].heap_start); i < Page (tls[threadId].heap_end); i++){
    tls[threadId].page_table [i] = In_heap;
  }
  Hd_hp (tls[threadId].heap_start) = Make_header (Wosize_bhsize (tls[threadId].stat_heap_size), 0, Blue);
  fl_init_merge ();
  fl_merge_block (Bp_hp (tls[threadId].heap_start));
  tls[threadId].gc_phase = Phase_idle;
  tls[threadId].gray_vals_size = 2048;
  tls[threadId].gray_vals = (value *) malloc (tls[threadId].gray_vals_size * sizeof (value));
  if (tls[threadId].gray_vals == NULL)
    fatal_error ("Fatal error: not enough memory for the initial heap.\n");
  tls[threadId].gray_vals_cur = tls[threadId].gray_vals;
  tls[threadId].gray_vals_end = tls[threadId].gray_vals + tls[threadId].gray_vals_size;
  tls[threadId].heap_is_pure = 1;
  tls[threadId].allocated_words = 0;
  tls[threadId].extra_heap_memory = 0;
}
