shell bypass 403

GrazzMean Shell

Uname: Linux web3.us.cloudlogin.co 5.10.226-xeon-hst #2 SMP Fri Sep 13 12:28:44 UTC 2024 x86_64
Software: Apache
PHP version: 8.1.31 [ PHP INFO ] PHP os: Linux
Server Ip: 162.210.96.117
Your Ip: 3.129.23.30
User: edustar (269686) | Group: tty (888)
Safe Mode: OFF
Disable Function:
NONE

name : falcon.diff
diff --git a/common.mk b/common.mk
index eb89a2b..59cdfe4 100644
--- a/common.mk
+++ b/common.mk
@@ -630,7 +630,8 @@ file.$(OBJEXT): {$(VPATH)}file.c $(RUBY_H_INCLUDES) {$(VPATH)}io.h \
 gc.$(OBJEXT): {$(VPATH)}gc.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
   {$(VPATH)}regex.h $(ENCODING_H_INCLUDES) $(VM_CORE_H_INCLUDES) \
   {$(VPATH)}gc.h {$(VPATH)}io.h {$(VPATH)}eval_intern.h {$(VPATH)}util.h \
-  {$(VPATH)}debug.h {$(VPATH)}internal.h {$(VPATH)}constant.h
+  {$(VPATH)}debug.h {$(VPATH)}internal.h {$(VPATH)}constant.h \
+  {$(VPATH)}pool_alloc.inc.h {$(VPATH)}pool_alloc.h
 hash.$(OBJEXT): {$(VPATH)}hash.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h \
   $(ENCODING_H_INCLUDES)
 inits.$(OBJEXT): {$(VPATH)}inits.c $(RUBY_H_INCLUDES) \
@@ -693,7 +694,7 @@ signal.$(OBJEXT): {$(VPATH)}signal.c $(RUBY_H_INCLUDES) \
   $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h
 sprintf.$(OBJEXT): {$(VPATH)}sprintf.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
   {$(VPATH)}regex.h {$(VPATH)}vsnprintf.c $(ENCODING_H_INCLUDES)
-st.$(OBJEXT): {$(VPATH)}st.c $(RUBY_H_INCLUDES)
+st.$(OBJEXT): {$(VPATH)}st.c $(RUBY_H_INCLUDES) {$(VPATH)}pool_alloc.h
 strftime.$(OBJEXT): {$(VPATH)}strftime.c $(RUBY_H_INCLUDES) \
   {$(VPATH)}timev.h
 string.$(OBJEXT): {$(VPATH)}string.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
diff --git a/configure.in b/configure.in
index 6d24689..1e15b10 100644
--- a/configure.in
+++ b/configure.in
@@ -1315,6 +1315,29 @@ main() {
 CFLAGS="$save_CFLAGS"])
 AC_DEFINE_UNQUOTED(GC_MARK_STACKFRAME_WORD, $rb_cv_gc_mark_stackframe_word)
 
+AS_CASE(["$target_os"],
+[openbsd*], [
+  AC_CACHE_CHECK(for heap align log on openbsd, rb_cv_page_size_log,
+    [rb_cv_page_size_log=no
+     for page_log in 12 13; do
+       AC_TRY_RUN([
+#include <math.h>
+#include <unistd.h>
+
+int
+main() {
+  if ((int)log2((double)sysconf(_SC_PAGESIZE)) != $page_log) return 1;
+  return 0;
+}
+       ],
+       rb_cv_page_size_log="$page_log"; break)
+     done])
+  if test $rb_cv_page_size_log != no; then
+    AC_DEFINE_UNQUOTED(HEAP_ALIGN_LOG, $rb_cv_page_size_log)
+  else
+    AC_DEFINE_UNQUOTED(HEAP_ALIGN_LOG, 12)
+  fi
+])
 
 dnl Checks for library functions.
 AC_TYPE_GETGROUPS
@@ -1415,7 +1438,8 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid fork spawnv syscall __syscall chroot ge
 	      setsid telldir seekdir fchmod cosh sinh tanh log2 round\
 	      setuid setgid daemon select_large_fdset setenv unsetenv\
               mktime timegm gmtime_r clock_gettime gettimeofday poll ppoll\
-              pread sendfile shutdown sigaltstack dl_iterate_phdr)
+              pread sendfile shutdown sigaltstack dl_iterate_phdr\
+              dup3 pipe2 posix_memalign memalign)
 
 AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value,
   [AC_TRY_COMPILE([
diff --git a/ext/-test-/st/numhash/numhash.c b/ext/-test-/st/numhash/numhash.c
index e186cd4..53d9e1b 100644
--- a/ext/-test-/st/numhash/numhash.c
+++ b/ext/-test-/st/numhash/numhash.c
@@ -54,7 +54,7 @@ numhash_i(st_data_t key, st_data_t value, st_data_t arg, int error)
 static VALUE
 numhash_each(VALUE self)
 {
-    return st_foreach((st_table *)DATA_PTR(self), numhash_i, self) ? Qtrue : Qfalse;
+    return st_foreach_check((st_table *)DATA_PTR(self), numhash_i, self, 0) ? Qtrue : Qfalse;
 }
 
 void
diff --git a/file.c b/file.c
index 67b281a..e89a559 100644
--- a/file.c
+++ b/file.c
@@ -164,6 +164,10 @@ rb_get_path_check(VALUE obj, int level)
     }
     StringValue(tmp);
 
+    if (RBASIC(obj)->klass == rb_cExpandedPath) {
+        return obj;
+    }
+
     tmp = file_path_convert(tmp);
     if (obj != tmp && insecure_obj_p(tmp, level)) {
 	rb_insecure_operation();
@@ -2864,6 +2868,16 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result)
     BUFINIT();
     tainted = OBJ_TAINTED(fname);
 
+    if (RBASIC(fname)->klass == rb_cExpandedPath) {
+        size_t dlen = RSTRING_LEN(fname);
+        BUFCHECK(dlen > buflen);
+        strncpy(buf, RSTRING_PTR(fname), dlen + 1);
+        rb_str_set_len(result, dlen);
+        rb_enc_associate(result, rb_enc_check(result, fname));
+        ENC_CODERANGE_CLEAR(result);
+        return result;
+    }
+
     if (s[0] == '~' && abs_mode == 0) {      /* execute only if NOT absolute_path() */
 	long userlen = 0;
 	tainted = 1;
diff --git a/gc.c b/gc.c
index e65d0ec..a72a855 100644
--- a/gc.c
+++ b/gc.c
@@ -20,10 +20,12 @@
 #include "vm_core.h"
 #include "internal.h"
 #include "gc.h"
+#include "pool_alloc.h"
 #include "constant.h"
 #include <stdio.h>
 #include <setjmp.h>
 #include <sys/types.h>
+#include <assert.h>
 
 #ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
@@ -35,7 +37,12 @@
 
 #if defined _WIN32 || defined __CYGWIN__
 #include <windows.h>
+#elif defined(HAVE_POSIX_MEMALIGN)
+#elif defined(HAVE_MEMALIGN)
+#include <malloc.h>
 #endif
+static void aligned_free(void *);
+static void *aligned_malloc(size_t alignment, size_t size);
 
 #ifdef HAVE_VALGRIND_MEMCHECK_H
 # include <valgrind/memcheck.h>
@@ -84,10 +91,12 @@ typedef struct {
     unsigned int initial_malloc_limit;
     unsigned int initial_heap_min_slots;
     unsigned int initial_free_min;
+#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
+#endif
 } ruby_gc_params_t;
 
-ruby_gc_params_t initial_params = {
+static ruby_gc_params_t initial_params = {
     GC_MALLOC_LIMIT,
     HEAP_MIN_SLOTS,
     FREE_MIN,
@@ -103,7 +112,10 @@ ruby_gc_params_t initial_params = {
 int ruby_gc_debug_indent = 0;
 
 /* for GC profile */
+#ifndef GC_PROFILE_MORE_DETAIL
 #define GC_PROFILE_MORE_DETAIL 0
+#endif
+
 typedef struct gc_profile_record {
     double gc_time;
     double gc_mark_time;
@@ -301,17 +313,20 @@ typedef struct RVALUE {
 #endif
 
 struct heaps_slot {
-    void *membase;
-    RVALUE *slot;
-    size_t limit;
+    struct heaps_header *membase;
+    RVALUE *freelist;
     struct heaps_slot *next;
     struct heaps_slot *prev;
+    struct heaps_slot *free_next;
+    uintptr_t bits[1];
 };
 
-struct sorted_heaps_slot {
+struct heaps_header {
+    struct heaps_slot *base;
+    uintptr_t *bits;
     RVALUE *start;
     RVALUE *end;
-    struct heaps_slot *slot;
+    size_t limit;
 };
 
 struct gc_list {
@@ -319,7 +334,27 @@ struct gc_list {
     struct gc_list *next;
 };
 
+#ifndef CALC_EXACT_MALLOC_SIZE
 #define CALC_EXACT_MALLOC_SIZE 0
+#endif
+
+#ifdef POOL_ALLOC_API
+/* POOL ALLOC API */
+#define POOL_ALLOC_PART 1
+#include "pool_alloc.inc.h"
+#undef POOL_ALLOC_PART
+
+typedef struct pool_layout_t pool_layout_t;
+struct pool_layout_t {
+    pool_header
+      p6,  /* st_table && st_table_entry */
+      p11;  /* st_table.bins init size */
+} pool_layout = {
+    INIT_POOL(void*[6]),
+    INIT_POOL(void*[11])
+};
+static void pool_finalize_header(pool_header *header);
+#endif
 
 typedef struct rb_objspace {
     struct {
@@ -330,16 +365,19 @@ typedef struct rb_objspace {
 	size_t allocations;
 #endif
     } malloc_params;
+#ifdef POOL_ALLOC_API
+    pool_layout_t *pool_headers;
+#endif
     struct {
 	size_t increment;
 	struct heaps_slot *ptr;
 	struct heaps_slot *sweep_slots;
-	struct sorted_heaps_slot *sorted;
+	struct heaps_slot *free_slots;
+	struct heaps_header **sorted;
 	size_t length;
 	size_t used;
-	RVALUE *freelist;
+        struct heaps_slot *reserve_slots;
 	RVALUE *range[2];
-	RVALUE *freed;
 	size_t live_num;
 	size_t free_num;
 	size_t free_min;
@@ -350,6 +388,7 @@ typedef struct rb_objspace {
 	int dont_gc;
 	int dont_lazy_sweep;
 	int during_gc;
+	rb_atomic_t finalizing;
     } flags;
     struct {
 	st_table *table;
@@ -377,7 +416,11 @@ typedef struct rb_objspace {
 #define ruby_initial_gc_stress	initial_params.gc_stress
 int *ruby_initial_gc_stress_ptr = &ruby_initial_gc_stress;
 #else
+#  ifdef POOL_ALLOC_API
+static rb_objspace_t rb_objspace = {{GC_MALLOC_LIMIT}, &pool_layout, {HEAP_MIN_SLOTS}};
+#  else
 static rb_objspace_t rb_objspace = {{GC_MALLOC_LIMIT}, {HEAP_MIN_SLOTS}};
+#  endif
 int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #endif
 #define malloc_limit		objspace->malloc_params.limit
@@ -385,13 +428,12 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #define heaps			objspace->heap.ptr
 #define heaps_length		objspace->heap.length
 #define heaps_used		objspace->heap.used
-#define freelist		objspace->heap.freelist
 #define lomem			objspace->heap.range[0]
 #define himem			objspace->heap.range[1]
 #define heaps_inc		objspace->heap.increment
-#define heaps_freed		objspace->heap.freed
 #define dont_gc 		objspace->flags.dont_gc
 #define during_gc		objspace->flags.during_gc
+#define finalizing		objspace->flags.finalizing
 #define finalizer_table 	objspace->final.table
 #define deferred_final_list	objspace->final.deferred
 #define mark_stack		objspace->markstack.buffer
@@ -403,7 +445,16 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #define initial_heap_min_slots	initial_params.initial_heap_min_slots
 #define initial_free_min	initial_params.initial_free_min
 
+#define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)
+
+#define nonspecial_obj_id(obj) (VALUE)((SIGNED_VALUE)(obj)|FIXNUM_FLAG)
+
+#define HEAP_HEADER(p) ((struct heaps_header *)(p))
+
 static void rb_objspace_call_finalizer(rb_objspace_t *objspace);
+static VALUE define_final0(VALUE obj, VALUE block);
+VALUE rb_define_final(VALUE obj, VALUE block);
+VALUE rb_undefine_final(VALUE obj);
 
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
 rb_objspace_t *
@@ -413,6 +464,10 @@ rb_objspace_alloc(void)
     memset(objspace, 0, sizeof(*objspace));
     malloc_limit = initial_malloc_limit;
     ruby_gc_stress = ruby_initial_gc_stress;
+#ifdef POOL_ALLOC_API
+    objspace->pool_headers = (pool_layout_t*) malloc(sizeof(pool_layout));
+    memcpy(objspace->pool_headers, &pool_layout, sizeof(pool_layout));
+#endif
 
     return objspace;
 }
@@ -478,40 +533,60 @@ rb_objspace_free(rb_objspace_t *objspace)
 	struct gc_list *list, *next;
 	for (list = global_List; list; list = next) {
 	    next = list->next;
-	    free(list);
+	    xfree(list);
 	}
     }
+    if (objspace->heap.reserve_slots) {
+        struct heaps_slot *list, *next;
+        for (list = objspace->heap.reserve_slots; list; list = next) {
+            next = list->free_next;
+            free(list);
+        }
+    }
     if (objspace->heap.sorted) {
 	size_t i;
 	for (i = 0; i < heaps_used; ++i) {
-	    free(objspace->heap.sorted[i].slot->membase);
-	    free(objspace->heap.sorted[i].slot);
+            free(objspace->heap.sorted[i]->base);
+	    aligned_free(objspace->heap.sorted[i]);
 	}
 	free(objspace->heap.sorted);
 	heaps_used = 0;
 	heaps = 0;
     }
+#ifdef POOL_ALLOC_API
+    if (objspace->pool_headers) {
+        pool_finalize_header(&objspace->pool_headers->p6);
+        pool_finalize_header(&objspace->pool_headers->p11);
+        free(objspace->pool_headers);
+    }
+#endif
     free(objspace);
 }
 #endif
 
-/* tiny heap size */
-/* 32KB */
-/*#define HEAP_SIZE 0x8000 */
-/* 128KB */
-/*#define HEAP_SIZE 0x20000 */
-/* 64KB */
-/*#define HEAP_SIZE 0x10000 */
-/* 16KB */
-#define HEAP_SIZE 0x4000
-/* 8KB */
-/*#define HEAP_SIZE 0x2000 */
-/* 4KB */
-/*#define HEAP_SIZE 0x1000 */
-/* 2KB */
-/*#define HEAP_SIZE 0x800 */
-
-#define HEAP_OBJ_LIMIT (unsigned int)(HEAP_SIZE / sizeof(struct RVALUE))
+#ifndef HEAP_ALIGN_LOG
+/* default tiny heap size: 16KB */
+#define HEAP_ALIGN_LOG 15
+#endif
+#define HEAP_ALIGN (1UL << HEAP_ALIGN_LOG)
+#define HEAP_ALIGN_MASK (~(~0UL << HEAP_ALIGN_LOG))
+#define REQUIRED_SIZE_BY_MALLOC (sizeof(size_t) * 5)
+#define HEAP_SIZE (HEAP_ALIGN - REQUIRED_SIZE_BY_MALLOC)
+#define CEILDIV(i, mod) (((i) + (mod) - 1)/(mod))
+
+#define HEAP_OBJ_LIMIT (unsigned int)((HEAP_SIZE - sizeof(struct heaps_header))/sizeof(struct RVALUE))
+#define HEAP_BITMAP_LIMIT CEILDIV(CEILDIV(HEAP_SIZE, sizeof(struct RVALUE)), sizeof(uintptr_t)*8)
+#define HEAP_SLOT_SIZE (sizeof(struct heaps_slot) + (HEAP_BITMAP_LIMIT-1) * sizeof(uintptr_t))
+
+#define GET_HEAP_HEADER(x) (HEAP_HEADER(((uintptr_t)x) & ~(HEAP_ALIGN_MASK)))
+#define GET_HEAP_SLOT(x) (GET_HEAP_HEADER(x)->base)
+#define GET_HEAP_BITMAP(x) (GET_HEAP_HEADER(x)->bits)
+#define NUM_IN_SLOT(p) (((uintptr_t)p & HEAP_ALIGN_MASK)/sizeof(RVALUE))
+#define BITMAP_INDEX(p) (NUM_IN_SLOT(p) / (sizeof(uintptr_t) * 8))
+#define BITMAP_OFFSET(p) (NUM_IN_SLOT(p) & ((sizeof(uintptr_t) * 8)-1))
+#define MARKED_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] & ((uintptr_t)1 << BITMAP_OFFSET(p)))
+#define MARK_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] = bits[BITMAP_INDEX(p)] | ((uintptr_t)1 << BITMAP_OFFSET(p)))
+#define CLEAR_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] &= ~((uintptr_t)1 << BITMAP_OFFSET(p)))
 
 extern st_table *rb_class_tbl;
 
@@ -823,8 +898,10 @@ vm_xfree(rb_objspace_t *objspace, void *ptr)
     size_t size;
     ptr = ((size_t *)ptr) - 1;
     size = ((size_t*)ptr)[0];
-    objspace->malloc_params.allocated_size -= size;
-    objspace->malloc_params.allocations--;
+    if (size) {
+	objspace->malloc_params.allocated_size -= size;
+	objspace->malloc_params.allocations--;
+    }
 #endif
 
     free(ptr);
@@ -894,6 +971,27 @@ ruby_xfree(void *x)
 	vm_xfree(&rb_objspace, x);
 }
 
+#ifdef POOL_ALLOC_API
+/* POOL ALLOC API */
+#define POOL_ALLOC_PART 2
+#include "pool_alloc.inc.h"
+#undef POOL_ALLOC_PART
+
+void
+ruby_xpool_free(void *ptr)
+{
+    pool_free_entry((void**)ptr);
+}
+
+#define CONCRET_POOL_MALLOC(pnts) \
+void * ruby_xpool_malloc_##pnts##p () { \
+    return pool_alloc_entry(&rb_objspace.pool_headers->p##pnts ); \
+}
+CONCRET_POOL_MALLOC(6)
+CONCRET_POOL_MALLOC(11)
+#undef CONCRET_POOL_MALLOC
+
+#endif
 
 /*
  *  call-seq:
@@ -984,70 +1082,140 @@ rb_gc_unregister_address(VALUE *addr)
     }
 }
 
-
 static void
 allocate_sorted_heaps(rb_objspace_t *objspace, size_t next_heaps_length)
 {
-    struct sorted_heaps_slot *p;
-    size_t size;
+    struct heaps_header **p;
+    struct heaps_slot *slot;
+    size_t size, add, i;
 
-    size = next_heaps_length*sizeof(struct sorted_heaps_slot);
+    size = next_heaps_length*sizeof(struct heaps_header *);
+    add = next_heaps_length - heaps_used;
 
     if (heaps_used > 0) {
-	p = (struct sorted_heaps_slot *)realloc(objspace->heap.sorted, size);
+	p = (struct heaps_header **)realloc(objspace->heap.sorted, size);
 	if (p) objspace->heap.sorted = p;
     }
     else {
-	p = objspace->heap.sorted = (struct sorted_heaps_slot *)malloc(size);
+	p = objspace->heap.sorted = (struct heaps_header **)malloc(size);
     }
 
     if (p == 0) {
 	during_gc = 0;
 	rb_memerror();
     }
-    heaps_length = next_heaps_length;
+
+    for (i = 0; i < add; i++) {
+        slot = (struct heaps_slot *)malloc(HEAP_SLOT_SIZE);
+        if (slot == 0) {
+            during_gc = 0;
+            rb_memerror();
+            return;
+        }
+        slot->free_next = objspace->heap.reserve_slots;
+        objspace->heap.reserve_slots = slot;
+    }
+}
+
+static void *
+aligned_malloc(size_t alignment, size_t size)
+{
+    void *res;
+
+#if defined __MINGW32__
+    res = __mingw_aligned_malloc(size, alignment);
+#elif defined _WIN32 && !defined __CYGWIN__
+    res = _aligned_malloc(size, alignment);
+#elif defined(HAVE_POSIX_MEMALIGN)
+    if (posix_memalign(&res, alignment, size) == 0) {
+        return res;
+    }
+    else {
+        return NULL;
+    }
+#elif defined(HAVE_MEMALIGN)
+    res = memalign(alignment, size);
+#else
+    char* aligned;
+    res = malloc(alignment + size + sizeof(void*));
+    aligned = (char*)res + alignment + sizeof(void*);
+    aligned -= ((VALUE)aligned & (alignment - 1));
+    ((void**)aligned)[-1] = res;
+    res = (void*)aligned;
+#endif
+
+#if defined(_DEBUG) || defined(GC_DEBUG)
+    /* alignment must be a power of 2 */
+    assert((alignment - 1) & alignment == 0);
+    assert(alignment % sizeof(void*) == 0);
+#endif
+    return res;
+}
+
+static void
+aligned_free(void *ptr)
+{
+#if defined __MINGW32__
+    __mingw_aligned_free(ptr);
+#elif defined _WIN32 && !defined __CYGWIN__
+    _aligned_free(ptr);
+#elif defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)
+    free(ptr);
+#else
+    free(((void**)ptr)[-1]);
+#endif
+}
+
+static void
+link_free_heap_slot(rb_objspace_t *objspace, struct heaps_slot *slot)
+{
+    slot->free_next = objspace->heap.free_slots;
+    objspace->heap.free_slots = slot;
+}
+
+static void
+unlink_free_heap_slot(rb_objspace_t *objspace, struct heaps_slot *slot)
+{
+    objspace->heap.free_slots = slot->free_next;
+    slot->free_next = NULL;
 }
 
 static void
 assign_heap_slot(rb_objspace_t *objspace)
 {
-    RVALUE *p, *pend, *membase;
+    RVALUE *p, *pend;
+    struct heaps_header *membase;
     struct heaps_slot *slot;
     size_t hi, lo, mid;
     size_t objs;
 
     objs = HEAP_OBJ_LIMIT;
-    p = (RVALUE*)malloc(HEAP_SIZE);
-    if (p == 0) {
-	during_gc = 0;
-	rb_memerror();
-    }
-    slot = (struct heaps_slot *)malloc(sizeof(struct heaps_slot));
-    if (slot == 0) {
-	xfree(p);
+    membase = (struct heaps_header*)aligned_malloc(HEAP_ALIGN, HEAP_SIZE);
+    if (membase == 0) {
 	during_gc = 0;
 	rb_memerror();
     }
+    assert(objspace->heap.reserve_slots != NULL);
+    slot = objspace->heap.reserve_slots;
+    objspace->heap.reserve_slots = slot->free_next;
     MEMZERO((void*)slot, struct heaps_slot, 1);
 
     slot->next = heaps;
     if (heaps) heaps->prev = slot;
     heaps = slot;
 
-    membase = p;
+    p = (RVALUE*)((VALUE)membase + sizeof(struct heaps_header));
     if ((VALUE)p % sizeof(RVALUE) != 0) {
-	p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE)));
-	if ((HEAP_SIZE - HEAP_OBJ_LIMIT * sizeof(RVALUE)) < (size_t)((char*)p - (char*)membase)) {
-	    objs--;
-	}
+       p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE)));
+       objs = (HEAP_SIZE - (size_t)((VALUE)p - (VALUE)membase))/sizeof(RVALUE);
     }
 
     lo = 0;
     hi = heaps_used;
     while (lo < hi) {
-	register RVALUE *mid_membase;
+	register struct heaps_header *mid_membase;
 	mid = (lo + hi) / 2;
-	mid_membase = objspace->heap.sorted[mid].slot->membase;
+        mid_membase = objspace->heap.sorted[mid];
 	if (mid_membase < membase) {
 	    lo = mid + 1;
 	}
@@ -1059,14 +1227,16 @@ assign_heap_slot(rb_objspace_t *objspace)
 	}
     }
     if (hi < heaps_used) {
-	MEMMOVE(&objspace->heap.sorted[hi+1], &objspace->heap.sorted[hi], struct sorted_heaps_slot, heaps_used - hi);
-    }
-    objspace->heap.sorted[hi].slot = slot;
-    objspace->heap.sorted[hi].start = p;
-    objspace->heap.sorted[hi].end = (p + objs);
+	MEMMOVE(&objspace->heap.sorted[hi+1], &objspace->heap.sorted[hi], struct heaps_header*, heaps_used - hi);
+    }
+    objspace->heap.sorted[hi] = membase;
+    membase->start = p;
+    membase->end = (p + objs);
+    membase->base = heaps;
+    membase->bits = heaps->bits;
+    membase->limit = objs;
     heaps->membase = membase;
-    heaps->slot = p;
-    heaps->limit = objs;
+    memset(heaps->bits, 0, HEAP_BITMAP_LIMIT * sizeof(uintptr_t));
     objspace->heap.free_num += objs;
     pend = p + objs;
     if (lomem == 0 || lomem > p) lomem = p;
@@ -1075,19 +1245,24 @@ assign_heap_slot(rb_objspace_t *objspace)
 
     while (p < pend) {
 	p->as.free.flags = 0;
-	p->as.free.next = freelist;
-	freelist = p;
+	p->as.free.next = heaps->freelist;
+	heaps->freelist = p;
 	p++;
     }
+    link_free_heap_slot(objspace, heaps);
 }
 
 static void
 add_heap_slots(rb_objspace_t *objspace, size_t add)
 {
     size_t i;
+    size_t next_heaps_length;
 
-    if ((heaps_used + add) > heaps_length) {
-        allocate_sorted_heaps(objspace, heaps_used + add);
+    next_heaps_length = heaps_used + add;
+
+    if (next_heaps_length > heaps_length) {
+        allocate_sorted_heaps(objspace, next_heaps_length);
+        heaps_length = next_heaps_length;
     }
 
     for (i = 0; i < add; i++) {
@@ -1137,6 +1312,7 @@ set_heaps_increment(rb_objspace_t *objspace)
 
     if (next_heaps_length > heaps_length) {
 	allocate_sorted_heaps(objspace, next_heaps_length);
+        heaps_length = next_heaps_length;
     }
 }
 
@@ -1159,6 +1335,7 @@ rb_during_gc(void)
 }
 
 #define RANY(o) ((RVALUE*)(o))
+#define has_free_object (objspace->heap.free_slots && objspace->heap.free_slots->freelist)
 
 VALUE
 rb_newobj(void)
@@ -1179,15 +1356,18 @@ rb_newobj(void)
 	}
     }
 
-    if (UNLIKELY(!freelist)) {
+    if (UNLIKELY(!has_free_object)) {
 	if (!gc_lazy_sweep(objspace)) {
 	    during_gc = 0;
 	    rb_memerror();
 	}
     }
 
-    obj = (VALUE)freelist;
-    freelist = freelist->as.free.next;
+    obj = (VALUE)objspace->heap.free_slots->freelist;
+    objspace->heap.free_slots->freelist = RANY(obj)->as.free.next;
+    if (objspace->heap.free_slots->freelist == NULL) {
+        unlink_free_heap_slot(objspace, objspace->heap.free_slots);
+    }
 
     MEMZERO((void*)obj, RVALUE, 1);
 #ifdef GC_DEBUG
@@ -1356,10 +1536,10 @@ gc_mark_all(rb_objspace_t *objspace)
 
     init_mark_stack(objspace);
     for (i = 0; i < heaps_used; i++) {
-	p = objspace->heap.sorted[i].start; pend = objspace->heap.sorted[i].end;
+	p = objspace->heap.sorted[i]->start; pend = objspace->heap.sorted[i]->end;
 	while (p < pend) {
-	    if ((p->as.basic.flags & FL_MARK) &&
-		(p->as.basic.flags != FL_MARK)) {
+	    if (MARKED_IN_BITMAP(GET_HEAP_BITMAP(p), p) &&
+		p->as.basic.flags) {
 		gc_mark_children(objspace, (VALUE)p, 0);
 	    }
 	    p++;
@@ -1387,26 +1567,27 @@ static inline int
 is_pointer_to_heap(rb_objspace_t *objspace, void *ptr)
 {
     register RVALUE *p = RANY(ptr);
-    register struct sorted_heaps_slot *heap;
+    register struct heaps_header *heap;
     register size_t hi, lo, mid;
 
     if (p < lomem || p > himem) return FALSE;
     if ((VALUE)p % sizeof(RVALUE) != 0) return FALSE;
+    heap = GET_HEAP_HEADER(p);
 
     /* check if p looks like a pointer using bsearch*/
     lo = 0;
     hi = heaps_used;
     while (lo < hi) {
 	mid = (lo + hi) / 2;
-	heap = &objspace->heap.sorted[mid];
-	if (heap->start <= p) {
-	    if (p < heap->end)
-		return TRUE;
-	    lo = mid + 1;
-	}
-	else {
-	    hi = mid;
-	}
+        if (heap > objspace->heap.sorted[mid]) {
+            lo = mid + 1;
+        }
+        else if (heap < objspace->heap.sorted[mid]) {
+            hi = mid;
+        }
+        else {
+            return (p >= heap->start && p < heap->end) ? TRUE : FALSE;
+        }
     }
     return FALSE;
 }
@@ -1449,10 +1630,10 @@ struct mark_tbl_arg {
 };
 
 static int
-mark_entry(ID key, VALUE value, st_data_t data)
+mark_entry(st_data_t key, st_data_t value, st_data_t data)
 {
     struct mark_tbl_arg *arg = (void*)data;
-    gc_mark(arg->objspace, value, arg->lev);
+    gc_mark(arg->objspace, (VALUE)value, arg->lev);
     return ST_CONTINUE;
 }
 
@@ -1467,10 +1648,10 @@ mark_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
 }
 
 static int
-mark_key(VALUE key, VALUE value, st_data_t data)
+mark_key(st_data_t key, st_data_t value, st_data_t data)
 {
     struct mark_tbl_arg *arg = (void*)data;
-    gc_mark(arg->objspace, key, arg->lev);
+    gc_mark(arg->objspace, (VALUE)key, arg->lev);
     return ST_CONTINUE;
 }
 
@@ -1491,11 +1672,11 @@ rb_mark_set(st_table *tbl)
 }
 
 static int
-mark_keyvalue(VALUE key, VALUE value, st_data_t data)
+mark_keyvalue(st_data_t key, st_data_t value, st_data_t data)
 {
     struct mark_tbl_arg *arg = (void*)data;
-    gc_mark(arg->objspace, key, arg->lev);
-    gc_mark(arg->objspace, value, arg->lev);
+    gc_mark(arg->objspace, (VALUE)key, arg->lev);
+    gc_mark(arg->objspace, (VALUE)value, arg->lev);
     return ST_CONTINUE;
 }
 
@@ -1565,7 +1746,9 @@ mark_m_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
 static int
 free_method_entry_i(ID key, rb_method_entry_t *me, st_data_t data)
 {
-    rb_free_method_entry(me);
+    if (!me->mark) {
+	rb_free_method_entry(me);
+    }
     return ST_CONTINUE;
 }
 
@@ -1622,6 +1805,16 @@ rb_gc_mark_maybe(VALUE obj)
     }
 }
 
+static int
+gc_mark_ptr(rb_objspace_t *objspace, VALUE ptr)
+{
+    register uintptr_t *bits = GET_HEAP_BITMAP(ptr);
+    if (MARKED_IN_BITMAP(bits, ptr)) return 0;
+    MARK_IN_BITMAP(bits, ptr);
+    objspace->heap.live_num++;
+    return 1;
+}
+
 static void
 gc_mark(rb_objspace_t *objspace, VALUE ptr, int lev)
 {
@@ -1630,9 +1823,7 @@ gc_mark(rb_objspace_t *objspace, VALUE ptr, int lev)
     obj = RANY(ptr);
     if (rb_special_const_p(ptr)) return; /* special const not marked */
     if (obj->as.basic.flags == 0) return;       /* free cell */
-    if (obj->as.basic.flags & FL_MARK) return;  /* already marked */
-    obj->as.basic.flags |= FL_MARK;
-    objspace->heap.live_num++;
+    if (!gc_mark_ptr(objspace, ptr)) return;	/* already marked */
 
     if (lev > GC_LEVEL_MAX || (lev == 0 && stack_check(STACKFRAME_FOR_GC_MARK))) {
 	if (!mark_stack_overflow) {
@@ -1659,6 +1850,7 @@ static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
 {
     register RVALUE *obj = RANY(ptr);
+    register uintptr_t *bits;
 
     goto marking;		/* skip */
 
@@ -1666,8 +1858,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
     obj = RANY(ptr);
     if (rb_special_const_p(ptr)) return; /* special const not marked */
     if (obj->as.basic.flags == 0) return;       /* free cell */
-    if (obj->as.basic.flags & FL_MARK) return;  /* already marked */
-    obj->as.basic.flags |= FL_MARK;
+    bits = GET_HEAP_BITMAP(ptr);
+    if (MARKED_IN_BITMAP(bits, ptr)) return;  /* already marked */
+    MARK_IN_BITMAP(bits, ptr);
     objspace->heap.live_num++;
 
   marking:
@@ -1819,6 +2012,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
       case T_CLASS:
       case T_MODULE:
 	mark_m_tbl(objspace, RCLASS_M_TBL(obj), lev);
+	if (!RCLASS_EXT(obj)) break;
 	mark_tbl(objspace, RCLASS_IV_TBL(obj), lev);
 	mark_const_tbl(objspace, RCLASS_CONST_TBL(obj), lev);
 	ptr = RCLASS_SUPER(obj);
@@ -1929,15 +2123,22 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
 
 static int obj_free(rb_objspace_t *, VALUE);
 
-static inline void
-add_freelist(rb_objspace_t *objspace, RVALUE *p)
+static inline struct heaps_slot *
+add_slot_local_freelist(rb_objspace_t *objspace, RVALUE *p)
 {
+    struct heaps_slot *slot;
+
     VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
     p->as.free.flags = 0;
-    p->as.free.next = freelist;
-    freelist = p;
+    slot = GET_HEAP_SLOT(p);
+    p->as.free.next = slot->freelist;
+    slot->freelist = p;
+
+    return slot;
 }
 
+static void free_unused_heap(rb_objspace_t *objspace, struct heaps_header *heap);
+
 static void
 finalize_list(rb_objspace_t *objspace, RVALUE *p)
 {
@@ -1945,17 +2146,16 @@ finalize_list(rb_objspace_t *objspace, RVALUE *p)
 	RVALUE *tmp = p->as.free.next;
 	run_final(objspace, (VALUE)p);
 	if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */
-            if (objspace->heap.sweep_slots) {
-                p->as.free.flags = 0;
-            }
-            else {
+            add_slot_local_freelist(objspace, p);
+            if (!is_lazy_sweeping(objspace)) {
                 GC_PROF_DEC_LIVE_NUM;
-                add_freelist(objspace, p);
             }
 	}
 	else {
-	    struct heaps_slot *slot = (struct heaps_slot *)(VALUE)RDATA(p)->dmark;
-	    slot->limit--;
+            struct heaps_header *heap = GET_HEAP_HEADER(p);
+            if (--heap->limit == 0) {
+                free_unused_heap(objspace, heap);
+            }
 	}
 	p = tmp;
     }
@@ -1976,97 +2176,106 @@ unlink_heap_slot(rb_objspace_t *objspace, struct heaps_slot *slot)
     slot->next = NULL;
 }
 
-
 static void
-free_unused_heaps(rb_objspace_t *objspace)
+free_unused_heap(rb_objspace_t *objspace, struct heaps_header *heap)
 {
-    size_t i, j;
-    RVALUE *last = 0;
-
-    for (i = j = 1; j < heaps_used; i++) {
-	if (objspace->heap.sorted[i].slot->limit == 0) {
-	    if (!last) {
-		last = objspace->heap.sorted[i].slot->membase;
-	    }
-	    else {
-		free(objspace->heap.sorted[i].slot->membase);
-	    }
-            free(objspace->heap.sorted[i].slot);
-	    heaps_used--;
-	}
-	else {
-	    if (i != j) {
-		objspace->heap.sorted[j] = objspace->heap.sorted[i];
-	    }
-	    j++;
-	}
-    }
-    if (last) {
-	if (last < heaps_freed) {
-	    free(heaps_freed);
-	    heaps_freed = last;
-	}
-	else {
-	    free(last);
-	}
+    register size_t hi, lo, mid;
+    lo = 0;
+    hi = heaps_used;
+    while (lo < hi) {
+        mid = (lo + hi) / 2;
+        if (heap > objspace->heap.sorted[mid]) {
+            lo = mid + 1;
+        }
+        else if (heap < objspace->heap.sorted[mid]) {
+            hi = mid;
+        }
+        else {
+            /* remove unused heap */
+            struct heaps_slot* h = objspace->heap.sorted[mid]->base;
+            h->free_next = objspace->heap.reserve_slots;
+            objspace->heap.reserve_slots = h;
+            aligned_free(objspace->heap.sorted[mid]);
+            heaps_used--;
+            MEMMOVE(objspace->heap.sorted + mid, objspace->heap.sorted + mid + 1,
+                    struct heaps_header *, heaps_used - mid);
+            return;
+        }
     }
 }
 
 static void
+gc_clear_slot_bits(struct heaps_slot *slot)
+{
+    memset(slot->bits, 0, HEAP_BITMAP_LIMIT * sizeof(uintptr_t));
+}
+
+static void
 slot_sweep(rb_objspace_t *objspace, struct heaps_slot *sweep_slot)
 {
     size_t free_num = 0, final_num = 0;
     RVALUE *p, *pend;
-    RVALUE *free = freelist, *final = deferred_final_list;
+    RVALUE *final = deferred_final_list;
     int deferred;
+    uintptr_t *bits;
 
-    p = sweep_slot->slot; pend = p + sweep_slot->limit;
+    p = sweep_slot->membase->start; pend = sweep_slot->membase->end;
+    bits = sweep_slot->bits;
     while (p < pend) {
-        if (!(p->as.basic.flags & FL_MARK)) {
-            if (p->as.basic.flags &&
-                ((deferred = obj_free(objspace, (VALUE)p)) ||
-		 (FL_TEST(p, FL_FINALIZE)))) {
-                if (!deferred) {
-                    p->as.free.flags = T_ZOMBIE;
-                    RDATA(p)->dfree = 0;
+        if ((!(MARKED_IN_BITMAP(bits, p))) && BUILTIN_TYPE(p) != T_ZOMBIE) {
+            if (p->as.basic.flags) {
+                if ((deferred = obj_free(objspace, (VALUE)p)) ||
+                    (FL_TEST(p, FL_FINALIZE))) {
+                    if (!deferred) {
+                        p->as.free.flags = T_ZOMBIE;
+                        RDATA(p)->dfree = 0;
+                    }
+                    p->as.free.next = deferred_final_list;
+                    deferred_final_list = p;
+                    assert(BUILTIN_TYPE(p) == T_ZOMBIE);
+                    final_num++;
+                }
+                else {
+                    VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
+                    p->as.free.flags = 0;
+                    p->as.free.next = sweep_slot->freelist;
+                    sweep_slot->freelist = p;
+                    free_num++;
                 }
-                p->as.free.flags |= FL_MARK;
-                p->as.free.next = deferred_final_list;
-                deferred_final_list = p;
-                final_num++;
             }
             else {
-                add_freelist(objspace, p);
                 free_num++;
             }
         }
-        else if (BUILTIN_TYPE(p) == T_ZOMBIE) {
-            /* objects to be finalized */
-            /* do nothing remain marked */
-        }
-        else {
-            RBASIC(p)->flags &= ~FL_MARK;
-        }
         p++;
     }
-    if (final_num + free_num == sweep_slot->limit &&
+    gc_clear_slot_bits(sweep_slot);
+    if (final_num + free_num == sweep_slot->membase->limit &&
         objspace->heap.free_num > objspace->heap.do_heap_free) {
         RVALUE *pp;
 
         for (pp = deferred_final_list; pp != final; pp = pp->as.free.next) {
-	    RDATA(pp)->dmark = (void (*)(void *))(VALUE)sweep_slot;
+	    RDATA(pp)->dmark = 0;
             pp->as.free.flags |= FL_SINGLETON; /* freeing page mark */
         }
-        sweep_slot->limit = final_num;
-        freelist = free;	/* cancel this page from freelist */
+        sweep_slot->membase->limit = final_num;
         unlink_heap_slot(objspace, sweep_slot);
+        if (final_num == 0) {
+            free_unused_heap(objspace, sweep_slot->membase);
+        }
     }
     else {
+        if (free_num > 0) {
+            link_free_heap_slot(objspace, sweep_slot);
+        }
+        else {
+            sweep_slot->free_next = NULL;
+        }
         objspace->heap.free_num += free_num;
     }
     objspace->heap.final_num += final_num;
 
-    if (deferred_final_list) {
+    if (deferred_final_list && !finalizing) {
         rb_thread_t *th = GET_THREAD();
         if (th) {
             RUBY_VM_SET_FINALIZER_INTERRUPT(th);
@@ -2078,7 +2287,7 @@ static int
 ready_to_gc(rb_objspace_t *objspace)
 {
     if (dont_gc || during_gc) {
-	if (!freelist) {
+	if (!has_free_object) {
             if (!heaps_increment(objspace)) {
                 set_heaps_increment(objspace);
                 heaps_increment(objspace);
@@ -2092,7 +2301,6 @@ ready_to_gc(rb_objspace_t *objspace)
 static void
 before_gc_sweep(rb_objspace_t *objspace)
 {
-    freelist = 0;
     objspace->heap.do_heap_free = (size_t)((heaps_used * HEAP_OBJ_LIMIT) * 0.65);
     objspace->heap.free_min = (size_t)((heaps_used * HEAP_OBJ_LIMIT)  * 0.2);
     if (objspace->heap.free_min < initial_free_min) {
@@ -2101,6 +2309,7 @@ before_gc_sweep(rb_objspace_t *objspace)
     }
     objspace->heap.sweep_slots = heaps;
     objspace->heap.free_num = 0;
+    objspace->heap.free_slots = NULL;
 
     /* sweep unlinked method entries */
     if (GET_VM()->unlinked_method_entry_list) {
@@ -2123,8 +2332,6 @@ after_gc_sweep(rb_objspace_t *objspace)
 	if (malloc_limit < initial_malloc_limit) malloc_limit = initial_malloc_limit;
     }
     malloc_increase = 0;
-
-    free_unused_heaps(objspace);
 }
 
 static int
@@ -2137,7 +2344,7 @@ lazy_sweep(rb_objspace_t *objspace)
         next = objspace->heap.sweep_slots->next;
 	slot_sweep(objspace, objspace->heap.sweep_slots);
         objspace->heap.sweep_slots = next;
-        if (freelist) {
+        if (has_free_object) {
             during_gc = 0;
             return TRUE;
         }
@@ -2149,10 +2356,10 @@ static void
 rest_sweep(rb_objspace_t *objspace)
 {
     if (objspace->heap.sweep_slots) {
-       while (objspace->heap.sweep_slots) {
-           lazy_sweep(objspace);
-       }
-       after_gc_sweep(objspace);
+	while (objspace->heap.sweep_slots) {
+	    lazy_sweep(objspace);
+	}
+	after_gc_sweep(objspace);
     }
 }
 
@@ -2199,9 +2406,9 @@ gc_lazy_sweep(rb_objspace_t *objspace)
     }
 
     GC_PROF_SWEEP_TIMER_START;
-    if(!(res = lazy_sweep(objspace))) {
+    if (!(res = lazy_sweep(objspace))) {
         after_gc_sweep(objspace);
-        if(freelist) {
+        if (has_free_object) {
             res = TRUE;
             during_gc = 0;
         }
@@ -2234,12 +2441,17 @@ void
 rb_gc_force_recycle(VALUE p)
 {
     rb_objspace_t *objspace = &rb_objspace;
-    GC_PROF_DEC_LIVE_NUM;
-    if (RBASIC(p)->flags & FL_MARK) {
-        RANY(p)->as.free.flags = 0;
+    struct heaps_slot *slot;
+
+    if (MARKED_IN_BITMAP(GET_HEAP_BITMAP(p), p)) {
+        add_slot_local_freelist(objspace, (RVALUE *)p);
     }
     else {
-        add_freelist(objspace, (RVALUE *)p);
+        GC_PROF_DEC_LIVE_NUM;
+        slot = add_slot_local_freelist(objspace, (RVALUE *)p);
+        if (slot->free_next == NULL) {
+            link_free_heap_slot(objspace, slot);
+        }
     }
 }
 
@@ -2611,7 +2823,7 @@ static VALUE
 objspace_each_objects(VALUE arg)
 {
     size_t i;
-    RVALUE *membase = 0;
+    struct heaps_header *membase = 0;
     RVALUE *pstart, *pend;
     rb_objspace_t *objspace = &rb_objspace;
     struct each_obj_args *args = (struct each_obj_args *)arg;
@@ -2619,16 +2831,16 @@ objspace_each_objects(VALUE arg)
 
     i = 0;
     while (i < heaps_used) {
-	while (0 < i && (uintptr_t)membase < (uintptr_t)objspace->heap.sorted[i-1].slot->membase)
+	while (0 < i && membase < objspace->heap.sorted[i-1])
 	    i--;
-	while (i < heaps_used && (uintptr_t)objspace->heap.sorted[i].slot->membase <= (uintptr_t)membase)
+	while (i < heaps_used && objspace->heap.sorted[i] <= membase)
 	    i++;
 	if (heaps_used <= i)
 	  break;
-	membase = objspace->heap.sorted[i].slot->membase;
+	membase = objspace->heap.sorted[i];
 
-	pstart = objspace->heap.sorted[i].slot->slot;
-	pend = pstart + objspace->heap.sorted[i].slot->limit;
+	pstart = membase->start;
+	pend = membase->end;
 
 	for (; pstart != pend; pstart++) {
 	    if (pstart->as.basic.flags) {
@@ -2642,6 +2854,7 @@ objspace_each_objects(VALUE arg)
 	    }
 	}
     }
+    RB_GC_GUARD(v);
 
     return Qnil;
 }
@@ -2885,11 +3098,12 @@ run_single_final(VALUE arg)
 }
 
 static void
-run_finalizer(rb_objspace_t *objspace, VALUE objid, VALUE table)
+run_finalizer(rb_objspace_t *objspace, VALUE obj, VALUE table)
 {
     long i;
     int status;
     VALUE args[3];
+    VALUE objid = nonspecial_obj_id(obj);
 
     if (RARRAY_LEN(table) > 0) {
 	args[1] = rb_obj_freeze(rb_ary_new3(1, objid));
@@ -2913,13 +3127,11 @@ run_finalizer(rb_objspace_t *objspace, VALUE objid, VALUE table)
 static void
 run_final(rb_objspace_t *objspace, VALUE obj)
 {
-    VALUE objid;
     RUBY_DATA_FUNC free_func = 0;
     st_data_t key, table;
 
     objspace->heap.final_num--;
 
-    objid = rb_obj_id(obj);	/* make obj into id */
     RBASIC(obj)->klass = 0;
 
     if (RTYPEDDATA_P(obj)) {
@@ -2934,7 +3146,7 @@ run_final(rb_objspace_t *objspace, VALUE obj)
 
     key = (st_data_t)obj;
     if (st_delete(finalizer_table, &key, &table)) {
-	run_finalizer(objspace, objid, (VALUE)table);
+	run_finalizer(objspace, obj, (VALUE)table);
     }
 }
 
@@ -2952,16 +3164,20 @@ finalize_deferred(rb_objspace_t *objspace)
 void
 rb_gc_finalize_deferred(void)
 {
-    finalize_deferred(&rb_objspace);
+    rb_objspace_t *objspace = &rb_objspace;
+    if (ATOMIC_EXCHANGE(finalizing, 1)) return;
+    finalize_deferred(objspace);
+    ATOMIC_SET(finalizing, 0);
 }
 
 static int
 chain_finalized_object(st_data_t key, st_data_t val, st_data_t arg)
 {
     RVALUE *p = (RVALUE *)key, **final_list = (RVALUE **)arg;
-    if ((p->as.basic.flags & (FL_FINALIZE|FL_MARK)) == FL_FINALIZE) {
+    if ((p->as.basic.flags & FL_FINALIZE) == FL_FINALIZE &&
+        !MARKED_IN_BITMAP(GET_HEAP_BITMAP(p), p)) {
 	if (BUILTIN_TYPE(p) != T_ZOMBIE) {
-	    p->as.free.flags = FL_MARK | T_ZOMBIE; /* remain marked */
+	    p->as.free.flags = T_ZOMBIE;
 	    RDATA(p)->dfree = 0;
 	}
 	p->as.free.next = *final_list;
@@ -3004,6 +3220,8 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)
     /* run finalizers */
     rest_sweep(objspace);
 
+    if (ATOMIC_EXCHANGE(finalizing, 1)) return;
+
     do {
 	/* XXX: this loop will make no sense */
 	/* because mark will not be removed */
@@ -3018,8 +3236,9 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)
 	st_foreach(finalizer_table, force_chain_object, (st_data_t)&list);
 	while (list) {
 	    struct force_finalize_list *curr = list;
-	    run_finalizer(objspace, rb_obj_id(curr->obj), curr->table);
-	    st_delete(finalizer_table, (st_data_t*)&curr->obj, 0);
+	    st_data_t obj = (st_data_t)curr->obj;
+	    run_finalizer(objspace, curr->obj, curr->table);
+	    st_delete(finalizer_table, &obj, 0);
 	    list = curr->next;
 	    xfree(curr);
 	}
@@ -3030,7 +3249,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)
 
     /* run data object's finalizers */
     for (i = 0; i < heaps_used; i++) {
-	p = objspace->heap.sorted[i].start; pend = objspace->heap.sorted[i].end;
+	p = objspace->heap.sorted[i]->start; pend = objspace->heap.sorted[i]->end;
 	while (p < pend) {
 	    if (BUILTIN_TYPE(p) == T_DATA &&
 		DATA_PTR(p) && RANY(p)->as.data.dfree &&
@@ -3066,6 +3285,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)
 
     st_free_table(finalizer_table);
     finalizer_table = 0;
+    ATOMIC_SET(finalizing, 0);
 }
 
 void
@@ -3073,8 +3293,39 @@ rb_gc(void)
 {
     rb_objspace_t *objspace = &rb_objspace;
     garbage_collect(objspace);
-    finalize_deferred(objspace);
-    free_unused_heaps(objspace);
+    if (!finalizing) finalize_deferred(objspace);
+}
+
+static inline int
+is_id_value(rb_objspace_t *objspace, VALUE ptr)
+{
+    if (!is_pointer_to_heap(objspace, (void *)ptr)) return FALSE;
+    if (BUILTIN_TYPE(ptr) > T_FIXNUM) return FALSE;
+    if (BUILTIN_TYPE(ptr) == T_ICLASS) return FALSE;
+    return TRUE;
+}
+
+static inline int
+is_dead_object(rb_objspace_t *objspace, VALUE ptr)
+{
+    struct heaps_slot *slot = objspace->heap.sweep_slots;
+    if (!is_lazy_sweeping(objspace) || MARKED_IN_BITMAP(GET_HEAP_BITMAP(ptr), ptr))
+	return FALSE;
+    while (slot) {
+	if ((VALUE)slot->membase->start <= ptr && ptr < (VALUE)(slot->membase->end))
+	    return TRUE;
+	slot = slot->next;
+    }
+    return FALSE;
+}
+
+static inline int
+is_live_object(rb_objspace_t *objspace, VALUE ptr)
+{
+    if (BUILTIN_TYPE(ptr) == 0) return FALSE;
+    if (RBASIC(ptr)->klass == 0) return FALSE;
+    if (is_dead_object(objspace, ptr)) return FALSE;
+    return TRUE;
 }
 
 /*
@@ -3119,11 +3370,10 @@ id2ref(VALUE obj, VALUE objid)
 	return ID2SYM(symid);
     }
 
-    if (!is_pointer_to_heap(objspace, (void *)ptr) ||
-	BUILTIN_TYPE(ptr) > T_FIXNUM || BUILTIN_TYPE(ptr) == T_ICLASS) {
+    if (!is_id_value(objspace, ptr)) {
 	rb_raise(rb_eRangeError, "%p is not id value", p0);
     }
-    if (BUILTIN_TYPE(ptr) == 0 || RBASIC(ptr)->klass == 0) {
+    if (!is_live_object(objspace, ptr)) {
 	rb_raise(rb_eRangeError, "%p is recycled object", p0);
     }
     return (VALUE)ptr;
@@ -3193,7 +3443,7 @@ rb_obj_id(VALUE obj)
     if (SPECIAL_CONST_P(obj)) {
         return LONG2NUM((SIGNED_VALUE)obj);
     }
-    return (VALUE)((SIGNED_VALUE)obj|FIXNUM_FLAG);
+    return nonspecial_obj_id(obj);
 }
 
 static int
@@ -3236,7 +3486,7 @@ count_objects(int argc, VALUE *argv, VALUE os)
     VALUE hash;
 
     if (rb_scan_args(argc, argv, "01", &hash) == 1) {
-        if (TYPE(hash) != T_HASH)
+        if (!RB_TYPE_P(hash, T_HASH))
             rb_raise(rb_eTypeError, "non-hash given");
     }
 
@@ -3247,7 +3497,7 @@ count_objects(int argc, VALUE *argv, VALUE os)
     for (i = 0; i < heaps_used; i++) {
         RVALUE *p, *pend;
 
-        p = objspace->heap.sorted[i].start; pend = objspace->heap.sorted[i].end;
+        p = objspace->heap.sorted[i]->start; pend = objspace->heap.sorted[i]->end;
         for (;p < pend; p++) {
             if (p->as.basic.flags) {
                 counts[BUILTIN_TYPE(p)]++;
@@ -3256,7 +3506,7 @@ count_objects(int argc, VALUE *argv, VALUE os)
                 freed++;
             }
         }
-        total += objspace->heap.sorted[i].slot->limit;
+        total += objspace->heap.sorted[i]->limit;
     }
 
     if (hash == Qnil) {
@@ -3355,7 +3605,7 @@ gc_stat(int argc, VALUE *argv, VALUE self)
     VALUE hash;
 
     if (rb_scan_args(argc, argv, "01", &hash) == 1) {
-        if (TYPE(hash) != T_HASH)
+        if (!RB_TYPE_P(hash, T_HASH))
             rb_raise(rb_eTypeError, "non-hash given");
     }
 
@@ -3410,6 +3660,33 @@ gc_malloc_allocations(VALUE self)
 }
 #endif
 
+/*
+ *  call-seq:
+ *     GC::Profiler.raw_data -> [Hash, ...]
+ *
+ *  Returns an Array of individual raw profile data Hashes ordered
+ *  from earliest to latest by <tt>:GC_INVOKE_TIME</tt>.  For example:
+ *
+ *    [{:GC_TIME=>1.3000000000000858e-05,
+ *      :GC_INVOKE_TIME=>0.010634999999999999,
+ *      :HEAP_USE_SIZE=>289640,
+ *      :HEAP_TOTAL_SIZE=>588960,
+ *      :HEAP_TOTAL_OBJECTS=>14724,
+ *      :GC_IS_MARKED=>false},
+ *      ...
+ *    ]
+ *
+ *  The keys mean:
+ *
+ *  +:GC_TIME+:: Time taken for this run in milliseconds
+ *  +:GC_INVOKE_TIME+:: Time the GC was invoked since startup in seconds
+ *  +:HEAP_USE_SIZE+:: Bytes of heap used
+ *  +:HEAP_TOTAL_SIZE+:: Size of heap in bytes
+ *  +:HEAP_TOTAL_OBJECTS+:: Number of objects
+ *  +:GC_IS_MARKED+:: Is the GC in the mark phase
+ *
+ */
+
 static VALUE
 gc_profile_record_get(void)
 {
@@ -3602,6 +3879,7 @@ Init_GC(void)
     rb_mProfiler = rb_define_module_under(rb_mGC, "Profiler");
     rb_define_singleton_method(rb_mProfiler, "enabled?", gc_profile_enable_get, 0);
     rb_define_singleton_method(rb_mProfiler, "enable", gc_profile_enable, 0);
+    rb_define_singleton_method(rb_mProfiler, "raw_data", gc_profile_record_get, 0);
     rb_define_singleton_method(rb_mProfiler, "disable", gc_profile_disable, 0);
     rb_define_singleton_method(rb_mProfiler, "clear", gc_profile_clear, 0);
     rb_define_singleton_method(rb_mProfiler, "result", gc_profile_result, 0);
diff --git a/hash.c b/hash.c
index fbd8237..32917c3 100644
--- a/hash.c
+++ b/hash.c
@@ -44,7 +44,7 @@ rb_any_cmp(VALUE a, VALUE b)
     if (FIXNUM_P(a) && FIXNUM_P(b)) {
 	return a != b;
     }
-    if (TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString &&
+    if (RB_TYPE_P(a, T_STRING) && RBASIC(a)->klass == rb_cString &&
 	TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString) {
 	return rb_str_hash_cmp(a, b);
     }
@@ -80,20 +80,14 @@ rb_any_hash(VALUE a)
     VALUE hval;
     st_index_t hnum;
 
-    switch (TYPE(a)) {
-      case T_FIXNUM:
-      case T_SYMBOL:
-      case T_NIL:
-      case T_FALSE:
-      case T_TRUE:
-	hnum = rb_hash_end(rb_hash_start((unsigned int)a));
-	break;
-
-      case T_STRING:
+    if (SPECIAL_CONST_P(a)) {
+	if (a == Qundef) return 0;
+	hnum = rb_hash_end(rb_hash_start((st_index_t)a));
+    }
+    else if (BUILTIN_TYPE(a) == T_STRING) {
 	hnum = rb_str_hash(a);
-	break;
-
-      default:
+    }
+    else {
         hval = rb_hash(a);
 	hnum = FIX2LONG(hval);
     }
@@ -106,10 +100,8 @@ static const struct st_hash_type objhash = {
     rb_any_hash,
 };
 
-static const struct st_hash_type identhash = {
-    st_numcmp,
-    st_numhash,
-};
+extern const struct st_hash_type st_hashtype_num;
+#define identhash st_hashtype_num
 
 typedef int st_foreach_func(st_data_t, st_data_t, st_data_t);
 
@@ -124,7 +116,6 @@ foreach_safe_i(st_data_t key, st_data_t value, struct foreach_safe_arg *arg)
 {
     int status;
 
-    if (key == Qundef) return ST_CONTINUE;
     status = (*arg->func)(key, value, arg->arg);
     if (status == ST_CONTINUE) {
 	return ST_CHECK;
@@ -140,7 +131,7 @@ st_foreach_safe(st_table *table, int (*func)(ANYARGS), st_data_t a)
     arg.tbl = table;
     arg.func = (st_foreach_func *)func;
     arg.arg = a;
-    if (st_foreach(table, foreach_safe_i, (st_data_t)&arg)) {
+    if (st_foreach_check(table, foreach_safe_i, (st_data_t)&arg, 0)) {
 	rb_raise(rb_eRuntimeError, "hash modified during iteration");
     }
 }
@@ -154,21 +145,21 @@ struct hash_foreach_arg {
 };
 
 static int
-hash_foreach_iter(st_data_t key, st_data_t value, struct hash_foreach_arg *arg)
+hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp)
 {
+    struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp;
     int status;
     st_table *tbl;
 
     tbl = RHASH(arg->hash)->ntbl;
-    if ((VALUE)key == Qundef) return ST_CONTINUE;
     status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg);
     if (RHASH(arg->hash)->ntbl != tbl) {
 	rb_raise(rb_eRuntimeError, "rehash occurred during iteration");
     }
     switch (status) {
       case ST_DELETE:
-	st_delete_safe(tbl, &key, 0, Qundef);
 	FL_SET(arg->hash, HASH_DELETED);
+	return ST_DELETE;
       case ST_CONTINUE:
 	break;
       case ST_STOP:
@@ -184,7 +175,7 @@ hash_foreach_ensure(VALUE hash)
 
     if (RHASH(hash)->iter_lev == 0) {
 	if (FL_TEST(hash, HASH_DELETED)) {
-	    st_cleanup_safe(RHASH(hash)->ntbl, Qundef);
+	    st_cleanup_safe(RHASH(hash)->ntbl, (st_data_t)Qundef);
 	    FL_UNSET(hash, HASH_DELETED);
 	}
     }
@@ -192,9 +183,10 @@ hash_foreach_ensure(VALUE hash)
 }
 
 static VALUE
-hash_foreach_call(struct hash_foreach_arg *arg)
+hash_foreach_call(VALUE arg)
 {
-    if (st_foreach(RHASH(arg->hash)->ntbl, hash_foreach_iter, (st_data_t)arg)) {
+    VALUE hash = ((struct hash_foreach_arg *)arg)->hash;
+    if (st_foreach_check(RHASH(hash)->ntbl, hash_foreach_iter, (st_data_t)arg, (st_data_t)Qundef)) {
 	rb_raise(rb_eRuntimeError, "hash modified during iteration");
     }
     return Qnil;
@@ -447,7 +439,7 @@ rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg)
 {
     st_table *tbl = (st_table *)arg;
 
-    if (key != Qundef) st_insert(tbl, key, value);
+    st_insert(tbl, (st_data_t)key, (st_data_t)value);
     return ST_CONTINUE;
 }
 
@@ -490,6 +482,20 @@ rb_hash_rehash(VALUE hash)
     return hash;
 }
 
+static VALUE
+hash_default_value(VALUE hash, VALUE key)
+{
+    if (rb_method_basic_definition_p(CLASS_OF(hash), id_default)) {
+	VALUE ifnone = RHASH_IFNONE(hash);
+	if (!FL_TEST(hash, HASH_PROC_DEFAULT)) return ifnone;
+	if (key == Qundef) return Qnil;
+	return rb_funcall(ifnone, id_yield, 2, hash, key);
+    }
+    else {
+	return rb_funcall(hash, id_default, 1, key);
+    }
+}
+
 /*
  *  call-seq:
  *     hsh[key]    ->  value
@@ -510,13 +516,7 @@ rb_hash_aref(VALUE hash, VALUE key)
     st_data_t val;
 
     if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) {
-	if (!FL_TEST(hash, HASH_PROC_DEFAULT) &&
-	    rb_method_basic_definition_p(CLASS_OF(hash), id_default)) {
-	    return RHASH_IFNONE(hash);
-	}
-	else {
-	    return rb_funcall(hash, id_default, 1, key);
-	}
+	return hash_default_value(hash, key);
     }
     return (VALUE)val;
 }
@@ -659,7 +659,7 @@ rb_hash_default(int argc, VALUE *argv, VALUE hash)
 static VALUE
 rb_hash_set_default(VALUE hash, VALUE ifnone)
 {
-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     RHASH_IFNONE(hash) = ifnone;
     FL_UNSET(hash, HASH_PROC_DEFAULT);
     return ifnone;
@@ -707,7 +707,7 @@ rb_hash_set_default_proc(VALUE hash, VALUE proc)
 {
     VALUE b;
 
-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     b = rb_check_convert_type(proc, T_DATA, "Proc", "to_proc");
     if (NIL_P(b) || !rb_obj_is_proc(b)) {
 	rb_raise(rb_eTypeError,
@@ -776,7 +776,7 @@ rb_hash_delete_key(VALUE hash, VALUE key)
     if (!RHASH(hash)->ntbl)
         return Qundef;
     if (RHASH(hash)->iter_lev > 0) {
-	if (st_delete_safe(RHASH(hash)->ntbl, &ktmp, &val, Qundef)) {
+	if (st_delete_safe(RHASH(hash)->ntbl, &ktmp, &val, (st_data_t)Qundef)) {
 	    FL_SET(hash, HASH_DELETED);
 	    return (VALUE)val;
 	}
@@ -809,7 +809,7 @@ rb_hash_delete(VALUE hash, VALUE key)
 {
     VALUE val;
 
-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     val = rb_hash_delete_key(hash, key);
     if (val != Qundef) return val;
     if (rb_block_given_p()) {
@@ -828,7 +828,6 @@ shift_i(VALUE key, VALUE value, VALUE arg)
 {
     struct shift_var *var = (struct shift_var *)arg;
 
-    if (key == Qundef) return ST_CONTINUE;
     if (var->key != Qundef) return ST_STOP;
     var->key = key;
     var->val = value;
@@ -840,7 +839,6 @@ shift_i_safe(VALUE key, VALUE value, VALUE arg)
 {
     struct shift_var *var = (struct shift_var *)arg;
 
-    if (key == Qundef) return ST_CONTINUE;
     var->key = key;
     var->val = value;
     return ST_STOP;
@@ -864,29 +862,25 @@ rb_hash_shift(VALUE hash)
 {
     struct shift_var var;
 
-    rb_hash_modify(hash);
-    var.key = Qundef;
-    rb_hash_foreach(hash, RHASH(hash)->iter_lev > 0 ? shift_i_safe : shift_i,
-		    (VALUE)&var);
-
-    if (var.key != Qundef) {
-	if (RHASH(hash)->iter_lev > 0) {
-	    rb_hash_delete_key(hash, var.key);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl) {
+	var.key = Qundef;
+	rb_hash_foreach(hash, RHASH(hash)->iter_lev > 0 ? shift_i_safe : shift_i,
+			(VALUE)&var);
+
+	if (var.key != Qundef) {
+	    if (RHASH(hash)->iter_lev > 0) {
+		rb_hash_delete_key(hash, var.key);
+	    }
+	    return rb_assoc_new(var.key, var.val);
 	}
-	return rb_assoc_new(var.key, var.val);
-    }
-    else if (FL_TEST(hash, HASH_PROC_DEFAULT)) {
-	return rb_funcall(RHASH_IFNONE(hash), id_yield, 2, hash, Qnil);
-    }
-    else {
-	return RHASH_IFNONE(hash);
     }
+    return hash_default_value(hash, Qnil);
 }
 
 static int
 delete_if_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_yield_values(2, key, value))) {
 	rb_hash_delete_key(hash, key);
     }
@@ -912,8 +906,9 @@ VALUE
 rb_hash_delete_if(VALUE hash)
 {
     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
-    rb_hash_foreach(hash, delete_if_i, hash);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl)
+	rb_hash_foreach(hash, delete_if_i, hash);
     return hash;
 }
 
@@ -984,7 +979,6 @@ rb_hash_values_at(int argc, VALUE *argv, VALUE hash)
 static int
 select_i(VALUE key, VALUE value, VALUE result)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_yield_values(2, key, value)))
 	rb_hash_aset(result, key, value);
     return ST_CONTINUE;
@@ -1018,7 +1012,6 @@ rb_hash_select(VALUE hash)
 static int
 keep_if_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (!RTEST(rb_yield_values(2, key, value))) {
 	return ST_DELETE;
     }
@@ -1040,7 +1033,7 @@ rb_hash_select_bang(VALUE hash)
     st_index_t n;
 
     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     if (!RHASH(hash)->ntbl)
         return Qnil;
     n = RHASH(hash)->ntbl->num_entries;
@@ -1065,8 +1058,9 @@ VALUE
 rb_hash_keep_if(VALUE hash)
 {
     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
-    rb_hash_foreach(hash, keep_if_i, hash);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl)
+	rb_hash_foreach(hash, keep_if_i, hash);
     return hash;
 }
 
@@ -1144,9 +1138,7 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val)
 static int
 replace_i(VALUE key, VALUE val, VALUE hash)
 {
-    if (key != Qundef) {
-	rb_hash_aset(hash, key, val);
-    }
+    rb_hash_aset(hash, key, val);
 
     return ST_CONTINUE;
 }
@@ -1227,7 +1219,6 @@ rb_hash_empty_p(VALUE hash)
 static int
 each_value_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(value);
     return ST_CONTINUE;
 }
@@ -1262,7 +1253,6 @@ rb_hash_each_value(VALUE hash)
 static int
 each_key_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(key);
     return ST_CONTINUE;
 }
@@ -1296,7 +1286,6 @@ rb_hash_each_key(VALUE hash)
 static int
 each_pair_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(rb_assoc_new(key, value));
     return ST_CONTINUE;
 }
@@ -1334,7 +1323,6 @@ rb_hash_each_pair(VALUE hash)
 static int
 to_a_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, rb_assoc_new(key, value));
     return ST_CONTINUE;
 }
@@ -1367,7 +1355,6 @@ inspect_i(VALUE key, VALUE value, VALUE str)
 {
     VALUE str2;
 
-    if (key == Qundef) return ST_CONTINUE;
     str2 = rb_inspect(key);
     if (RSTRING_LEN(str) > 1) {
 	rb_str_cat2(str, ", ");
@@ -1434,7 +1421,6 @@ rb_hash_to_hash(VALUE hash)
 static int
 keys_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, key);
     return ST_CONTINUE;
 }
@@ -1465,7 +1451,6 @@ rb_hash_keys(VALUE hash)
 static int
 values_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, value);
     return ST_CONTINUE;
 }
@@ -1524,7 +1509,6 @@ rb_hash_search_value(VALUE key, VALUE value, VALUE arg)
 {
     VALUE *data = (VALUE *)arg;
 
-    if (key == Qundef) return ST_CONTINUE;
     if (rb_equal(value, data[1])) {
 	data[0] = Qtrue;
 	return ST_STOP;
@@ -1568,7 +1552,6 @@ eql_i(VALUE key, VALUE val1, VALUE arg)
     struct equal_data *data = (struct equal_data *)arg;
     st_data_t val2;
 
-    if (key == Qundef) return ST_CONTINUE;
     if (!st_lookup(data->tbl, key, &val2)) {
 	data->result = Qfalse;
 	return ST_STOP;
@@ -1599,7 +1582,7 @@ hash_equal(VALUE hash1, VALUE hash2, int eql)
     struct equal_data data;
 
     if (hash1 == hash2) return Qtrue;
-    if (TYPE(hash2) != T_HASH) {
+    if (!RB_TYPE_P(hash2, T_HASH)) {
 	if (!rb_respond_to(hash2, rb_intern("to_hash"))) {
 	    return Qfalse;
 	}
@@ -1670,7 +1653,6 @@ hash_i(VALUE key, VALUE val, VALUE arg)
     st_index_t *hval = (st_index_t *)arg;
     st_index_t hdata[2];
 
-    if (key == Qundef) return ST_CONTINUE;
     hdata[0] = rb_hash(key);
     hdata[1] = rb_hash(val);
     *hval ^= st_hash(hdata, sizeof(hdata), 0);
@@ -1711,7 +1693,6 @@ rb_hash_hash(VALUE hash)
 static int
 rb_hash_invert_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_hash_aset(hash, value, key);
     return ST_CONTINUE;
 }
@@ -1740,7 +1721,6 @@ rb_hash_invert(VALUE hash)
 static int
 rb_hash_update_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     hash_update(hash, key);
     st_insert(RHASH(hash)->ntbl, key, value);
     return ST_CONTINUE;
@@ -1749,7 +1729,6 @@ rb_hash_update_i(VALUE key, VALUE value, VALUE hash)
 static int
 rb_hash_update_block_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (rb_hash_has_key(hash, key)) {
 	value = rb_yield_values(3, key, rb_hash_aref(hash, key), value);
     }
@@ -1806,7 +1785,6 @@ rb_hash_update_func_i(VALUE key, VALUE value, VALUE arg0)
     struct update_arg *arg = (struct update_arg *)arg0;
     VALUE hash = arg->hash;
 
-    if (key == Qundef) return ST_CONTINUE;
     if (rb_hash_has_key(hash, key)) {
 	value = (*arg->func)(key, rb_hash_aref(hash, key), value);
     }
@@ -1863,7 +1841,6 @@ assoc_i(VALUE key, VALUE val, VALUE arg)
 {
     VALUE *args = (VALUE *)arg;
 
-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_equal(args[0], key))) {
 	args[1] = rb_assoc_new(key, val);
 	return ST_STOP;
@@ -1901,7 +1878,6 @@ rassoc_i(VALUE key, VALUE val, VALUE arg)
 {
     VALUE *args = (VALUE *)arg;
 
-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_equal(args[0], val))) {
 	args[1] = rb_assoc_new(key, val);
 	return ST_STOP;
@@ -2198,7 +2174,7 @@ rb_env_path_tainted(void)
 }
 
 #if defined(_WIN32) || (defined(HAVE_SETENV) && defined(HAVE_UNSETENV))
-#elif defined __sun__
+#elif defined __sun
 static int
 in_origenv(const char *str)
 {
@@ -2286,7 +2262,7 @@ ruby_setenv(const char *name, const char *value)
 	    rb_sys_fail("unsetenv");
 #endif
     }
-#elif defined __sun__
+#elif defined __sun
     size_t len;
     char **env_ptr, *str;
     if (strchr(name, '=')) {
@@ -3084,11 +3060,9 @@ env_invert(void)
 static int
 env_replace_i(VALUE key, VALUE val, VALUE keys)
 {
-    if (key != Qundef) {
-	env_aset(Qnil, key, val);
-	if (rb_ary_includes(keys, key)) {
-	    rb_ary_delete(keys, key);
-	}
+    env_aset(Qnil, key, val);
+    if (rb_ary_includes(keys, key)) {
+	rb_ary_delete(keys, key);
     }
     return ST_CONTINUE;
 }
@@ -3120,12 +3094,10 @@ env_replace(VALUE env, VALUE hash)
 static int
 env_update_i(VALUE key, VALUE val)
 {
-    if (key != Qundef) {
-	if (rb_block_given_p()) {
-	    val = rb_yield_values(3, key, rb_f_getenv(Qnil, key), val);
-	}
-	env_aset(Qnil, key, val);
+    if (rb_block_given_p()) {
+	val = rb_yield_values(3, key, rb_f_getenv(Qnil, key), val);
     }
+    env_aset(Qnil, key, val);
     return ST_CONTINUE;
 }
 
@@ -3150,15 +3122,116 @@ env_update(VALUE env, VALUE hash)
 }
 
 /*
- *  A <code>Hash</code> is a collection of key-value pairs. It is
- *  similar to an <code>Array</code>, except that indexing is done via
- *  arbitrary keys of any object type, not an integer index. Hashes enumerate
- *  their values in the order that the corresponding keys were inserted.
+ *  A Hash is a dictionary-like collection of unique keys and their values.
+ *  Also called associative arrays, they are similar to Arrays, but where an
+ *  Array uses integers as its index, a Hash allows you to use any object
+ *  type.
+ *
+ *  Hashes enumerate their values in the order that the corresponding keys
+ *  were inserted.
+ *
+ *  A Hash can be easily created by using its implicit form:
+ *
+ *    grades = { "Jane Doe" => 10, "Jim Doe" => 6 }
+ *
+ *  Hashes allow an alternate syntax form when your keys are always symbols.
+ *  Instead of
+ *
+ *    options = { :font_size => 10, :font_family => "Arial" }
+ *
+ *  You could write it as:
+ *
+ *    options = { font_size: 10, font_family: "Arial" }
+ *
+ *  Each named key is a symbol you can access in hash:
+ *
+ *    options[:font_size]  # => 10
+ *
+ *  A Hash can also be created through its ::new method:
+ *
+ *    grades = Hash.new
+ *    grades["Dorothy Doe"] = 9
  *
  *  Hashes have a <em>default value</em> that is returned when accessing
- *  keys that do not exist in the hash. By default, that value is
- *  <code>nil</code>.
+ *  keys that do not exist in the hash. If no default is set +nil+ is used.
+ *  You can set the default value by sending it as an argument to Hash.new:
+ *
+ *    grades = Hash.new(0)
+ *
+ *  Or by using the #default= method:
+ *
+ *    grades = {"Timmy Doe" => 8}
+ *    grades.default = 0
+ *
+ *  Accessing a value in a Hash requires using its key:
+ *
+ *    puts grades["Jane Doe"] # => 10
+ *
+ *  === Common Uses
+ *
+ *  Hashes are an easy way to represent data structures, such as
+ *
+ *    books         = {}
+ *    books[:matz]  = "The Ruby Language"
+ *    books[:black] = "The Well-Grounded Rubyist"
+ *
+ *  Hashes are also commonly used as a way to have named parameters in
+ *  functions. Note that no brackets are used below. If a hash is the last
+ *  argument on a method call, no braces are needed, thus creating a really
+ *  clean interface:
+ *
+ *    Person.create(name: "John Doe", age: 27)
+ *
+ *    def self.create(params)
+ *      @name = params[:name]
+ *      @age  = params[:age]
+ *    end
+ *
+ *  === Hash Keys
+ *
+ *  Two objects refer to the same hash key when their <code>hash</code> value
+ *  is identical and the two objects are <code>eql?</code> to each other.
+ *
+ *  A user-defined class may be used as a hash key if the <code>hash</code>
+ *  and <code>eql?</code> methods are overridden to provide meaningful
+ *  behavior.  By default, separate instances refer to separate hash keys.
+ *
+ *  A typical implementation of <code>hash</code> is based on the
+ *  object's data while <code>eql?</code> is usually aliased to the overridden
+ *  <code>==</code> method:
+ *
+ *    class Book
+ *      attr_reader :author, :title
+ *
+ *      def initialize(author, title)
+ *        @author = author
+ *        @title = title
+ *      end
+ *
+ *      def ==(other)
+ *        self.class === other and
+ *          other.author == @author and
+ *          other.title == @title
+ *      end
+ *
+ *      alias eql? ==
+ *
+ *      def hash
+ *        @author.hash ^ @title.hash # XOR
+ *      end
+ *    end
+ *
+ *    book1 = Book.new 'matz', 'Ruby in a Nutshell'
+ *    book2 = Book.new 'matz', 'Ruby in a Nutshell'
+ *
+ *    reviews = {}
+ *
+ *    reviews[book1] = 'Great reference!'
+ *    reviews[book2] = 'Nice and compact!'
+ *
+ *    reviews.length #=> 1
  *
+ *  See also Object#hash and Object#eql?
  */
 
 void
diff --git a/include/ruby/st.h b/include/ruby/st.h
index 50f2a75..119dfde 100644
--- a/include/ruby/st.h
+++ b/include/ruby/st.h
@@ -36,7 +36,7 @@ typedef unsigned long st_data_t;
 #elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
 typedef unsigned LONG_LONG st_data_t;
 #else
-# error ---->> st.c requires sizeof(void*) == sizeof(long) to be compiled. <<----
+# error ---->> st.c requires sizeof(void*) == sizeof(long) or sizeof(LONG_LONG) to be compiled. <<----
 #endif
 #define ST_DATA_T_DEFINED
 
@@ -74,6 +74,11 @@ struct st_hash_type {
 
 #define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT)
 
+typedef struct st_packed_entry {
+    st_index_t hash;
+    st_data_t key, val;
+} st_packed_entry;
+
 struct st_table {
     const struct st_hash_type *type;
     st_index_t num_bins;
@@ -91,8 +96,17 @@ struct st_table {
     __extension__
 #endif
     st_index_t num_entries : ST_INDEX_BITS - 1;
-    struct st_table_entry **bins;
-    struct st_table_entry *head, *tail;
+    union {
+	struct {
+	    struct st_table_entry **bins;
+	    struct st_table_entry *head, *tail;
+	} big;
+	struct {
+	    struct st_packed_entry *entries;
+	    st_index_t real_entries;
+	} packed;
+	st_packed_entry upacked;
+    } as;
 };
 
 #define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0)
@@ -114,6 +128,7 @@ int st_insert2(st_table *, st_data_t, st_data_t, st_data_t (*)(st_data_t));
 int st_lookup(st_table *, st_data_t, st_data_t *);
 int st_get_key(st_table *, st_data_t, st_data_t *);
 int st_foreach(st_table *, int (*)(ANYARGS), st_data_t);
+int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t);
 int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t);
 void st_add_direct(st_table *, st_data_t, st_data_t);
 void st_free_table(st_table *);
diff --git a/internal.h b/internal.h
index 5d0cff0..99b4015 100644
--- a/internal.h
+++ b/internal.h
@@ -112,6 +112,9 @@ VALUE rb_iseq_clone(VALUE iseqval, VALUE newcbase);
 
 /* load.c */
 VALUE rb_get_load_path(void);
+void rb_reset_expanded_cache();
+void rb_load_path_ary_push(VALUE path);
+extern VALUE rb_cExpandedPath;
 
 /* math.c */
 VALUE rb_math_atan2(VALUE, VALUE);
diff --git a/load.c b/load.c
index 0ff4b60..80b2256 100644
--- a/load.c
+++ b/load.c
@@ -4,6 +4,7 @@
 
 #include "ruby/ruby.h"
 #include "ruby/util.h"
+#include "ruby/encoding.h"
 #include "internal.h"
 #include "dln.h"
 #include "eval_intern.h"
@@ -18,6 +19,7 @@ VALUE ruby_dln_librefs;
 #define IS_DLEXT(e) (strcmp((e), DLEXT) == 0)
 #endif
 
+static int sorted_loaded_features = 1;
 
 static const char *const loadable_ext[] = {
     ".rb", DLEXT,
@@ -27,28 +29,45 @@ static const char *const loadable_ext[] = {
     0
 };
 
-VALUE
-rb_get_load_path(void)
-{
-    VALUE load_path = GET_VM()->load_path;
-    return load_path;
-}
+static VALUE rb_checked_expanded_cache(int*);
+static void rb_set_expanded_cache(VALUE, int);
+static VALUE rb_expand_load_paths(long, VALUE*, int*);
+static int cached_expanded_load_path = 1;
+VALUE rb_cExpandedPath;
 
 VALUE
 rb_get_expanded_load_path(void)
 {
-    VALUE load_path = rb_get_load_path();
-    VALUE ary;
-    long i;
+    VALUE expanded = rb_checked_expanded_cache(NULL);
 
-    ary = rb_ary_new2(RARRAY_LEN(load_path));
-    for (i = 0; i < RARRAY_LEN(load_path); ++i) {
-	VALUE path = rb_file_expand_path(RARRAY_PTR(load_path)[i], Qnil);
-	rb_str_freeze(path);
-	rb_ary_push(ary, path);
+    if ( !RTEST(expanded) ) {
+	VALUE load_path = GET_VM()->load_path;
+	int has_relative = 0;
+
+	if (!load_path) return 0;
+
+	expanded = rb_expand_load_paths(
+			RARRAY_LEN(load_path), RARRAY_PTR(load_path),
+			&has_relative);
+	RB_GC_GUARD(load_path);
+
+	if (cached_expanded_load_path) {
+	    rb_set_expanded_cache(expanded, has_relative);
+	}
+    } else {
+	expanded = rb_ary_dup(expanded);
     }
-    rb_obj_freeze(ary);
-    return ary;
+    return expanded;
+}
+
+VALUE
+rb_get_load_path(void)
+{
+    VALUE load_path =
+	cached_expanded_load_path ?
+	    rb_get_expanded_load_path():
+	    GET_VM()->load_path;
+    return load_path;
 }
 
 static VALUE
@@ -129,6 +148,9 @@ loaded_feature_path_i(st_data_t v, st_data_t b, st_data_t f)
     return ST_STOP;
 }
 
+static long rb_feature_first_equal_or_greater(VALUE, const char *, long);
+static int  rb_stop_search_feature(VALUE, const char *, long);
+
 static int
 rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const char **fn)
 {
@@ -151,8 +173,10 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
 	type = 0;
     }
     features = get_loaded_features();
-    for (i = 0; i < RARRAY_LEN(features); ++i) {
+    i = rb_feature_first_equal_or_greater(features, feature, len);
+    for (; i < RARRAY_LEN(features); ++i) {
 	v = RARRAY_PTR(features)[i];
+	if (rb_stop_search_feature(v, feature, len)) break;
 	f = StringValuePtr(v);
 	if ((n = RSTRING_LEN(v)) < len) continue;
 	if (strncmp(f, feature, len) != 0) {
@@ -176,14 +200,14 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
 	}
     }
     loading_tbl = get_loading_table();
-    if (loading_tbl) {
+    if (loading_tbl && loading_tbl->num_entries > 0) {
 	f = 0;
 	if (!expanded) {
 	    struct loaded_feature_searching fs;
 	    fs.name = feature;
 	    fs.len = len;
 	    fs.type = type;
-	    fs.load_path = load_path ? load_path : rb_get_load_path();
+	    fs.load_path = load_path ? load_path : rb_get_expanded_load_path();
 	    fs.result = 0;
 	    st_foreach(loading_tbl, loaded_feature_path_i, (st_data_t)&fs);
 	    if ((f = fs.result) != 0) {
@@ -251,6 +275,170 @@ rb_feature_provided(const char *feature, const char **loading)
     return FALSE;
 }
 
+static long
+feature_basename_length(const char *feature, long flen)
+{
+    if (sorted_loaded_features) {
+	const char *ext = strrchr(feature, '.');
+	return ext && !strchr(ext, '/') ? ext - feature : flen;
+    } else {
+	return 0;
+    }
+}
+
+static int
+compare_feature_name(const char *left, long llen, const char *right, long rlen)
+{
+    int diff = 0;
+    while (llen-- && rlen--) {
+	diff = left[llen] - right[rlen];
+	if (diff) break;
+	if (left[llen] == '/') break;
+    }
+    return diff;
+}
+
+static int
+rb_compare_feature_name(VALUE loaded, const char *feature, long flen)
+{
+    const char *loaded_name = StringValuePtr(loaded);
+    long loaded_len = feature_basename_length(loaded_name, RSTRING_LEN(loaded));
+    return compare_feature_name(loaded_name, loaded_len, feature, flen);
+}
+
+/* used to find when equal features run out */
+static int
+rb_stop_search_feature(VALUE loaded, const char *feature, long flen)
+{
+    if (sorted_loaded_features)
+	return rb_compare_feature_name(loaded, feature, flen) > 0;
+    else
+	return FALSE;
+}
+
+/* returns first position to search feature from */
+static long
+rb_feature_first_equal_or_greater(VALUE features, const char *feature, long flen)
+{
+    if (sorted_loaded_features) {
+	long before = 0, first = RARRAY_LEN(features);
+	VALUE *values = RARRAY_PTR(features);
+	if (first == 0)
+	    return 0;
+	if (rb_compare_feature_name(values[0], feature, flen) >= 0)
+	    return 0;
+
+	while (first - before > 1) {
+	    long mid = (first + before) / 2;
+	    long cmp = rb_compare_feature_name(values[mid], feature, flen);
+	    if (cmp >= 0)
+		first = mid;
+	    else
+		before = mid;
+	}
+	return first;
+    } else {
+	return 0;
+    }
+}
+
+/* returns position to insert new feature in */
+static long
+rb_feature_first_greater(VALUE features, const char *feature, long flen)
+{
+    if (sorted_loaded_features) {
+	long before = 0, first = RARRAY_LEN(features);
+	VALUE *values = RARRAY_PTR(features);
+	if (first == 0)
+	    return 0;
+	if (rb_compare_feature_name(values[0], feature, flen) > 0)
+	    return 0;
+	if (rb_compare_feature_name(values[first-1], feature, flen) <= 0)
+	    return first;
+
+	while (first - before > 1) {
+	    long mid = (first + before) / 2;
+	    long cmp = rb_compare_feature_name(values[mid], feature, flen);
+	    if (cmp > 0)
+		first = mid;
+	    else
+		before = mid;
+	}
+	return first;
+    } else {
+	return RARRAY_LEN(features);
+    }
+}
+
+
+static VALUE
+rb_push_feature_1(VALUE features, VALUE feature)
+{
+    const char *fname = StringValuePtr(feature);
+    long flen = feature_basename_length(fname, RSTRING_LEN(feature));
+    long i = rb_feature_first_greater(features, fname, flen);
+    rb_ary_push(features, feature);
+    if ( i < RARRAY_LEN(features) - 1 ) {
+	MEMMOVE(RARRAY_PTR(features) + i + 1, RARRAY_PTR(features) + i,
+		VALUE, RARRAY_LEN(features) - i - 1);
+	RARRAY_PTR(features)[i] = feature;
+    }
+    return features;
+}
+
+static VALUE
+rb_push_feature_m(long argc, VALUE *argv, VALUE features)
+{
+    while (argc--) {
+	rb_push_feature_1(features, *argv++);
+    }
+    return features;
+}
+
+static VALUE
+rb_concat_features(VALUE features, VALUE add)
+{
+    add = rb_convert_type(add, T_ARRAY, "Array", "to_ary");
+    if (RARRAY_LEN(add)) {
+	rb_push_feature_m(RARRAY_LEN(add), RARRAY_PTR(add), features);
+    }
+    return features;
+}
+static const char *load_features_undefined_methods[] = {
+    "[]=", "reverse!", "rotate!", "sort!", "sort_by!",
+    "collect!", "map!", "shuffle!", "fill", "insert",
+    NULL
+};
+
+static VALUE
+rb_loaded_features_init(void)
+{
+    char *sorted_flag;
+    const char **name;
+    VALUE loaded_features = rb_ary_new();
+    VALUE loaded_features_c = rb_singleton_class(loaded_features);
+
+    sorted_flag = getenv("RUBY_LOADED_FEATURES_SORTED");
+    if (sorted_flag != NULL) {
+	int sorted_set = atoi(sorted_flag);
+	if (RTEST(ruby_verbose))
+	    fprintf(stderr, "sorted_loaded_features=%d (%d)\n", sorted_set, sorted_loaded_features);
+	sorted_loaded_features = sorted_set;
+    }
+
+    for(name = load_features_undefined_methods; *name; name++) {
+	rb_undef_method(loaded_features_c, *name);
+    }
+
+    if (sorted_loaded_features) {
+	rb_define_method(loaded_features_c, "<<", rb_push_feature_1, 1);
+	rb_define_method(loaded_features_c, "push", rb_push_feature_m, -1);
+	rb_define_method(loaded_features_c, "concat", rb_concat_features, 1);
+	rb_define_method(loaded_features_c, "unshift", rb_push_feature_m, -1);
+    }
+    return loaded_features;
+}
+
 static void
 rb_provide_feature(VALUE feature)
 {
@@ -258,7 +446,10 @@ rb_provide_feature(VALUE feature)
 	rb_raise(rb_eRuntimeError,
 		 "$LOADED_FEATURES is frozen; cannot append feature");
     }
-    rb_ary_push(get_loaded_features(), feature);
+    if (sorted_loaded_features)
+	rb_push_feature_1(get_loaded_features(), feature);
+    else
+	rb_ary_push(get_loaded_features(), feature);
 }
 
 void
@@ -760,6 +951,230 @@ rb_f_autoload_p(VALUE obj, VALUE sym)
     return rb_mod_autoload_p(klass, sym);
 }
 
+/* $LOAD_PATH methods which invalidates cache */
+static const char *load_path_reset_cache_methods[] = {
+    "[]=", "collect!", "compact!", "delete",
+    "delete_if", "fill", "flatten!", "insert", "keep_if",
+    "map!", "reject!", "replace", "select!", "shuffle!",
+    "sort!", "sort_by!", "uniq!", NULL
+};
+
+/* $LOAD_PATH methods which sends also to cache */
+static const char *load_path_apply_to_cache_methods[] = {
+    "clear", "delete_at", "pop", "reverse!", "rotate!",
+    "shift", "slice!", NULL
+};
+
+/* $LOAD_PATH methods which sends to cache whith expanded arguments */
+static const char *load_path_apply_expanded_methods[] = {
+    "<<", "push", "unshift", NULL
+};
+
+void
+rb_reset_expanded_cache()
+{
+    GET_VM()->load_path_expanded_cache = 0;
+}
+
+static VALUE
+rb_load_path_expanded_cache()
+{
+    VALUE cache = GET_VM()->load_path_expanded_cache;
+    VALUE expanded = Qnil;
+    if (RTEST(cache)) {
+	expanded = RARRAY_PTR(cache)[2];
+    }
+    return expanded;
+}
+
+/* Return cache only if we still in the same working directory
+ * and filesystem_encoding didn't change
+ * Invalidate cache otherwise
+ */
+static VALUE
+rb_checked_expanded_cache(int *has_relative)
+{
+    VALUE cache = GET_VM()->load_path_expanded_cache;
+    VALUE expanded = Qnil;
+    if (RTEST(cache)) {
+	VALUE curwd = RARRAY_PTR(cache)[0];
+	VALUE encindex = RARRAY_PTR(cache)[1];
+	int cache_valid = rb_filesystem_encindex() == FIX2INT(encindex);
+
+	if ( cache_valid ) {
+	    cache_valid = curwd == Qtrue;
+	    if (has_relative) {
+		*has_relative = cache_valid;
+	    }
+	    if (!cache_valid ) {
+		char *cwd = my_getcwd();
+		cache_valid = !strcmp(RSTRING_PTR(curwd), cwd);
+		xfree(cwd);
+	    }
+	}
+
+	if ( !cache_valid ) {
+	    rb_reset_expanded_cache();
+	} else {
+	    expanded = RARRAY_PTR(cache)[2];
+	}
+    }
+    RB_GC_GUARD(cache);
+    return expanded;
+}
+
+static void
+rb_set_expanded_cache(VALUE expanded, int has_relative)
+{
+    VALUE cache = rb_ary_new2(3);
+
+    if (has_relative) {
+	char *cwd = my_getcwd();
+	rb_ary_push(cache, rb_str_new_cstr(cwd));
+	xfree(cwd);
+    } else {
+	rb_ary_push(cache, Qtrue);
+    }
+
+    rb_ary_push(cache, INT2FIX(rb_filesystem_encindex()));
+    rb_ary_push(cache, rb_ary_dup(expanded));
+    GET_VM()->load_path_expanded_cache = cache;
+}
+
+static VALUE
+rb_expand_load_paths(long pathc, VALUE* paths, int *has_relative)
+{
+    long i;
+    const char *p;
+    VALUE path, expanded = rb_ary_new2(pathc);
+
+    for(i = 0; i < pathc; i++) {
+	path = rb_get_path(paths[i]);
+	p = RSTRING_PTR(path);
+	*has_relative = *has_relative || !rb_is_absolute_path(p);
+	path = rb_file_expand_path(path, Qnil);
+	RBASIC(path)->klass = rb_cExpandedPath;
+	rb_str_freeze(path);
+	rb_ary_push(expanded, path);
+    }
+
+    return expanded;
+}
+
+/* Invalidating $LOAD_PATH methods implementation */
+static VALUE
+rb_load_path_reset_cache_method(int argc, VALUE *argv, VALUE self)
+{
+    rb_reset_expanded_cache();
+    return rb_call_super(argc, argv);
+}
+
+/* Proxying $LOAD_PATH methods implementation */
+static VALUE
+rb_load_path_apply_to_cache_method(int argc, VALUE *argv, VALUE self)
+{
+    VALUE load_path_expanded = rb_load_path_expanded_cache();
+    if (RTEST(load_path_expanded)) {
+	ID func = rb_frame_this_func();
+	rb_funcall2(load_path_expanded, func, argc, argv);
+    }
+    return rb_call_super(argc, argv);
+}
+
+/* Proxying with expansion $LOAD_PATH methods implementation */
+static VALUE
+rb_load_path_apply_expanded_method(int argc, VALUE *argv, VALUE self)
+{
+    int old_has_relative = 0;
+    /* We call methods on cache only if we still in the same working directory */
+    VALUE load_path_expanded = rb_checked_expanded_cache(&old_has_relative);
+    if (RTEST(load_path_expanded)) {
+	int has_relative = 0;
+	ID func = rb_frame_this_func();
+	VALUE expanded = rb_expand_load_paths(argc, argv, &has_relative);
+
+	rb_funcall2(load_path_expanded, func, argc, RARRAY_PTR(expanded));
+
+	if (!old_has_relative && has_relative) {
+	    rb_set_expanded_cache(load_path_expanded, has_relative);
+	}
+	RB_GC_GUARD(expanded);
+    }
+    return rb_call_super(argc, argv);
+}
+/* $LOAD_PATH.concat(ary) - special, we call push(*ary) instead
+ * cause I'm lazy a bit and wish not to rewrite method above second time :)
+ */
+static VALUE
+rb_load_path_concat(VALUE self, VALUE ary)
+{
+    ID push;
+    CONST_ID(push, "push");
+    RB_GC_GUARD(ary);
+    return rb_funcall2(self, push, (int)RARRAY_LEN(ary), RARRAY_PTR(ary));
+}
+
+void
+rb_load_path_ary_push(VALUE path)
+{
+    int old_has_relative = 0;
+    VALUE load_path_expanded = rb_checked_expanded_cache(&old_has_relative);
+    if (RTEST(load_path_expanded)) {
+	int has_relative = 0;
+	VALUE expanded = rb_expand_load_paths(1, &path, &has_relative);
+
+	rb_ary_push(load_path_expanded, RARRAY_PTR(expanded)[0]);
+
+	if (!old_has_relative && has_relative) {
+	    rb_set_expanded_cache(load_path_expanded, has_relative);
+	}
+	RB_GC_GUARD(expanded);
+    }
+
+    rb_ary_push(GET_VM()->load_path, path);
+}
+
+static VALUE
+rb_load_path_init(void)
+{
+    const char **name;
+    VALUE load_path = rb_ary_new();
+    char *cached_flag;
+
+    cached_flag = getenv("RUBY_CACHED_LOAD_PATH");
+    if (cached_flag != NULL) {
+	cached_expanded_load_path = atoi(cached_flag);
+    }
+
+    rb_cExpandedPath = rb_class_new(rb_cString); /* XXX could GC collect it before next line is executed? */
+    rb_iv_set(rb_cFile, "expanded_path", rb_cExpandedPath); /* prevent from GC */
+
+    /* Do all the magick if user did not disable it
+     * with RUBY_CACHED_LOAD_PATH=0 environment variable
+     */
+    if (cached_expanded_load_path) {
+	VALUE load_path_c = rb_singleton_class(load_path);
+
+	for(name = load_path_reset_cache_methods; *name; name++ ) {
+	    rb_define_method(load_path_c, *name, rb_load_path_reset_cache_method, -1);
+	}
+
+	for(name = load_path_apply_to_cache_methods; *name; name++ ) {
+	    rb_define_method(load_path_c, *name, rb_load_path_apply_to_cache_method, -1);
+	}
+
+	for(name = load_path_apply_expanded_methods; *name; name++ ) {
+	    rb_define_method(load_path_c, *name, rb_load_path_apply_expanded_method, -1);
+	}
+
+	rb_define_method(load_path_c, "concat", rb_load_path_concat, 1);
+    }
+
+    rb_reset_expanded_cache();
+
+    return load_path;
+}
+
 void
 Init_load()
 {
@@ -772,11 +1187,11 @@ Init_load()
     rb_define_hooked_variable(var_load_path, (VALUE*)vm, load_path_getter, rb_gvar_readonly_setter);
     rb_alias_variable(rb_intern("$-I"), id_load_path);
     rb_alias_variable(rb_intern("$LOAD_PATH"), id_load_path);
-    vm->load_path = rb_ary_new();
+    vm->load_path = rb_load_path_init();
 
     rb_define_virtual_variable("$\"", get_loaded_features, 0);
     rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0);
-    vm->loaded_features = rb_ary_new();
+    vm->loaded_features = rb_loaded_features_init();
 
     rb_define_global_function("load", rb_f_load, -1);
     rb_define_global_function("require", rb_f_require, 1);
diff --git a/pool_alloc.h b/pool_alloc.h
new file mode 100644
index 0000000..957708e
--- /dev/null
+++ b/pool_alloc.h
@@ -0,0 +1,11 @@
+#ifndef POOL_ALLOC_H
+#define POOL_ALLOC_H
+
+#define POOL_ALLOC_API
+#ifdef POOL_ALLOC_API
+void  ruby_xpool_free(void *ptr);
+void *ruby_xpool_malloc_6p();
+void *ruby_xpool_malloc_11p();
+#endif
+
+#endif
diff --git a/pool_alloc.inc.h b/pool_alloc.inc.h
new file mode 100644
index 0000000..026f40c
--- /dev/null
+++ b/pool_alloc.inc.h
@@ -0,0 +1,223 @@
+/*
+ * this is generic pool allocator
+ * you should define following macroses:
+ * ITEM_NAME - unique identifier, which allows to hold functions in a namespace
+ * ITEM_TYPEDEF(name) - passed to typedef to localize item type
+ * free_entry - desired name of function for free entry
+ * alloc_entry - defired name of function for allocate entry
+ */
+
+#if POOL_ALLOC_PART == 1
+#ifdef HEAP_ALIGN_LOG
+#define DEFAULT_POOL_SIZE (1 << HEAP_ALIGN_LOG)
+#else
+#define DEFAULT_POOL_SIZE (sizeof(void*) * 2048)
+#endif
+typedef unsigned int pool_holder_counter;
+
+typedef struct pool_entry_list pool_entry_list;
+typedef struct pool_holder pool_holder;
+
+typedef struct pool_header {
+    pool_holder         *first;
+    rb_atomic_t          lock;
+    pool_holder_counter  size; // size of entry in sizeof(void*) items
+    pool_holder_counter  total; // size of entry in sizeof(void*) items
+} pool_header;
+
+struct pool_holder {
+    pool_holder_counter free, total;
+    pool_header  *header;
+    void               *freep;
+    pool_holder        *fore, *back;
+    void *data[1];
+};
+#define POOL_DATA_SIZE(pool_size) (((pool_size) - sizeof(void*) * 6 - offsetof(pool_holder, data)) / sizeof(void*))
+#define POOL_ENTRY_SIZE(item_type) ((sizeof(item_type) - 1) / sizeof(void*) + 1)
+#define POOL_HOLDER_COUNT(pool_size, item_type) (POOL_DATA_SIZE(pool_size)/POOL_ENTRY_SIZE(item_type))
+#define INIT_POOL(item_type) {NULL, 0, POOL_ENTRY_SIZE(item_type), POOL_HOLDER_COUNT(DEFAULT_POOL_SIZE, item_type)}
+
+#elif POOL_ALLOC_PART == 2
+
+#if   defined(_WIN32)
+#define native_thread_yield() Sleep(0)
+#elif HAVE_SCHED_YIELD
+#define native_thread_yield() (void)sched_yield()
+#else
+#define native_thread_yield() ((void)0)
+#endif
+
+#define MAX_TRY_CICLES 5
+static inline int
+living_threads()
+{
+    rb_vm_t *vm = GET_VM();
+    st_table *living_threads;
+    return vm && (living_threads = vm->living_threads) ? living_threads->num_entries : 1;
+}
+
+static void
+lock_header(pool_header *header)
+{
+    int i;
+    if (living_threads() == 1) {
+	header->lock = 1;
+	return;
+    }
+    i = MAX_TRY_CICLES;
+    while(ATOMIC_EXCHANGE(header->lock, 1)) {
+	if (--i == 0) {
+	    native_thread_yield();
+	    i = MAX_TRY_CICLES;
+	}
+    }
+}
+
+static inline void
+unlock_header(pool_header *header)
+{
+    if (living_threads() == 1) {
+	header->lock = 0;
+	return;
+    }
+    ATOMIC_SET(header->lock, 0);
+}
+
+static pool_holder *
+pool_holder_alloc(pool_header *header)
+{
+    pool_holder *holder;
+    pool_holder_counter i, size, count;
+    register void **ptr;
+
+    size_t sz = offsetof(pool_holder, data) +
+	    header->size * header->total * sizeof(void*);
+#define objspace (&rb_objspace)
+    unlock_header(header);
+    vm_malloc_prepare(objspace, DEFAULT_POOL_SIZE);
+    lock_header(header);
+    if (header->first != NULL) {
+	return header->first;
+    }
+    holder = (pool_holder*) aligned_malloc(DEFAULT_POOL_SIZE, sz);
+    if (!holder) {
+	unlock_header(header);
+	if (!garbage_collect_with_gvl(objspace)) {
+	    ruby_memerror();
+	}
+	holder = (pool_holder*) aligned_malloc(DEFAULT_POOL_SIZE, sz);
+	if (!holder) {
+	    ruby_memerror();
+	}
+	lock_header(header);
+    }
+    malloc_increase += DEFAULT_POOL_SIZE;
+#if CALC_EXACT_MALLOC_SIZE
+    objspace->malloc_params.allocated_size += DEFAULT_POOL_SIZE;
+    objspace->malloc_params.allocations++;
+#endif
+#undef objspace
+
+    size = header->size;
+    count = header->total;
+    holder->free = count;
+    holder->total = count;
+    holder->header = header;
+    holder->fore = NULL;
+    holder->back = NULL;
+    holder->freep = &holder->data;
+    ptr = holder->data;
+    for(i = count - 1; i; i-- ) {
+	ptr = *ptr = ptr + size;
+    }
+    *ptr = NULL;
+    header->first = holder;
+    return holder;
+}
+
+static inline void
+pool_holder_unchaing(pool_header *header, pool_holder *holder)
+{
+    register pool_holder *fore = holder->fore, *back = holder->back;
+    holder->fore = NULL;
+    holder->back = NULL;
+    if (fore != NULL)  fore->back     = back;
+    if (back != NULL)  back->fore     = fore;
+    else               header->first = fore;
+}
+
+static inline pool_holder *
+entry_holder(void **entry)
+{
+    return (pool_holder*)(((uintptr_t)entry) & ~(DEFAULT_POOL_SIZE - 1));
+}
+
+static inline void
+pool_free_entry(void **entry)
+{
+    pool_holder *holder = entry_holder(entry);
+    pool_header *header = holder->header;
+
+    lock_header(header);
+
+    if (holder->free++ == 0) {
+	register pool_holder *first = header->first;
+	if (first == NULL) {
+	    header->first = holder;
+	} else {
+	    holder->back = first;
+	    holder->fore = first->fore;
+	    first->fore = holder;
+	    if (holder->fore)
+		holder->fore->back = holder;
+	}
+    } else if (holder->free == holder->total && header->first != holder ) {
+	pool_holder_unchaing(header, holder);
+	aligned_free(holder);
+#if CALC_EXACT_MALLOC_SIZE
+	rb_objspace.malloc_params.allocated_size -= DEFAULT_POOL_SIZE;
+	rb_objspace.malloc_params.allocations--;
+#endif
+        unlock_header(header);
+	return;
+    }
+
+    *entry = holder->freep;
+    holder->freep = entry;
+    unlock_header(header);
+}
+
+static inline void*
+pool_alloc_entry(pool_header *header)
+{
+    pool_holder *holder;
+    void **result;
+
+    lock_header(header);
+    holder = header->first;
+
+    if (holder == NULL) {
+	holder = pool_holder_alloc(header);
+    }
+
+    result = holder->freep;
+    holder->freep = *result;
+
+    if (--holder->free == 0) {
+	pool_holder_unchaing(header, holder);
+    }
+
+    unlock_header(header);
+
+    return result;
+}
+
+static void
+pool_finalize_header(pool_header *header)
+{
+    if (header->first) {
+        aligned_free(header->first);
+        header->first = NULL;
+    }
+}
+#endif
diff --git a/ruby.c b/ruby.c
index 3c97d01..b9b9fd5 100644
--- a/ruby.c
+++ b/ruby.c
@@ -209,7 +209,6 @@ push_include(const char *path, VALUE (*filter)(VALUE))
 {
     const char sep = PATH_SEP_CHAR;
     const char *p, *s;
-    VALUE load_path = GET_VM()->load_path;
 
     p = path;
     while (*p) {
@@ -217,7 +216,7 @@ push_include(const char *path, VALUE (*filter)(VALUE))
 	    p++;
 	if (!*p) break;
 	for (s = p; *s && *s != sep; s = CharNext(s));
-	rb_ary_push(load_path, (*filter)(rubylib_mangled_path(p, s - p)));
+	rb_load_path_ary_push((*filter)(rubylib_mangled_path(p, s - p)));
 	p = s;
     }
 }
@@ -338,7 +337,6 @@ ruby_init_loadpath(void)
 void
 ruby_init_loadpath_safe(int safe_level)
 {
-    VALUE load_path;
     ID id_initial_load_path_mark;
     extern const char ruby_initial_load_paths[];
     const char *paths = ruby_initial_load_paths;
@@ -438,7 +436,6 @@ ruby_init_loadpath_safe(int safe_level)
 #define RUBY_RELATIVE(path, len) rubylib_mangled_path((path), (len))
 #define PREFIX_PATH() RUBY_RELATIVE(exec_prefix, sizeof(exec_prefix)-1)
 #endif
-    load_path = GET_VM()->load_path;
 
     if (safe_level == 0) {
 #ifdef MANGLED_PATH
@@ -452,7 +449,7 @@ ruby_init_loadpath_safe(int safe_level)
 	size_t len = strlen(paths);
 	VALUE path = RUBY_RELATIVE(paths, len);
 	rb_ivar_set(path, id_initial_load_path_mark, path);
-	rb_ary_push(load_path, path);
+	rb_load_path_ary_push(path);
 	paths += len + 1;
     }
 
@@ -1349,6 +1346,7 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
 	for (i = 0; i < RARRAY_LEN(load_path); ++i) {
 	    rb_enc_associate(RARRAY_PTR(load_path)[i], lenc);
 	}
+	rb_reset_expanded_cache();
     }
     if (!(opt->disable & DISABLE_BIT(gems))) {
 #if defined DISABLE_RUBYGEMS && DISABLE_RUBYGEMS
diff --git a/st.c b/st.c
index fda5784..890bdca 100644
--- a/st.c
+++ b/st.c
@@ -7,6 +7,7 @@
 #include "st.h"
 #else
 #include "ruby/ruby.h"
+#include "pool_alloc.h"
 #endif
 
 #include <stdio.h>
@@ -25,8 +26,17 @@ struct st_table_entry {
     st_table_entry *fore, *back;
 };
 
-#define ST_DEFAULT_MAX_DENSITY 5
+#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1];
+
+#define ST_DEFAULT_MAX_DENSITY 2
 #define ST_DEFAULT_INIT_TABLE_SIZE 11
+#define ST_DEFAULT_SECOND_TABLE_SIZE 19
+#define ST_DEFAULT_PACKED_TABLE_SIZE 18
+#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
+#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
+
+STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT]))
+STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE]))
 
     /*
      * DEFAULT_MAX_DENSITY is the default for the largest we allow the
@@ -38,7 +48,8 @@ struct st_table_entry {
      *
      */
 
-static const struct st_hash_type type_numhash = {
+#define type_numhash st_hashtype_num
+const struct st_hash_type st_hashtype_num = {
     st_numcmp,
     st_numhash,
 };
@@ -61,20 +72,127 @@ static void rehash(st_table *);
 #ifdef RUBY
 #define malloc xmalloc
 #define calloc xcalloc
+#define realloc xrealloc
 #define free(x) xfree(x)
 #endif
 
 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
 
-#define alloc(type) (type*)malloc((size_t)sizeof(type))
-#define Calloc(n,s) (char*)calloc((n),(s))
-
 #define EQUAL(table,x,y) ((x)==(y) || (*(table)->type->compare)((x),(y)) == 0)
 
-/* remove cast to unsigned int in the future */
-#define do_hash(key,table) (unsigned int)(st_index_t)(*(table)->type->hash)((key))
+#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
 #define do_hash_bin(key,table) (do_hash((key), (table))%(table)->num_bins)
 
+/* preparation for possible allocation improvements */
+#ifdef POOL_ALLOC_API
+#define st_alloc_entry() (st_table_entry *)ruby_xpool_malloc_6p()
+#define st_free_entry(entry) ruby_xpool_free(entry)
+#define st_alloc_table() (st_table *)ruby_xpool_malloc_6p()
+#define st_dealloc_table(table) ruby_xpool_free(table)
+static inline st_table_entry **
+st_alloc_bins(st_index_t size)
+{
+    st_table_entry **result;
+    if (size == 11) {
+        result = (st_table_entry **) ruby_xpool_malloc_11p();
+        memset(result, 0, 11 * sizeof(st_table_entry *));
+    }
+    else
+        result = (st_table_entry **) ruby_xcalloc(size, sizeof(st_table_entry*));
+    return result;
+}
+static inline void
+st_free_bins(st_table_entry **bins, st_index_t size)
+{
+    if (size == 11)
+	ruby_xpool_free(bins);
+    else
+	ruby_xfree(bins);
+}
+static inline st_table_entry**
+st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
+{
+    st_free_bins(bins, oldsize);
+    return st_alloc_bins(newsize);
+}
+#else
+#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
+#define st_free_entry(entry) free(entry)
+#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
+#define st_dealloc_table(table) free(table)
+#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
+#define st_free_bins(bins, size) free(bins)
+static inline st_table_entry**
+st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
+{
+    bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
+    MEMZERO(bins, st_table_entry*, newsize);
+    return bins;
+}
+#endif
+
+/* Shortage */
+#define bins as.big.bins
+#define head as.big.head
+#define tail as.big.tail
+#define real_entries as.packed.real_entries
+
+/* preparation for possible packing improvements */
+#define PACKED_BINS(table) ((table)->as.packed.entries)
+#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
+#define PKEY(table, i) PACKED_ENT((table), (i)).key
+#define PVAL(table, i) PACKED_ENT((table), (i)).val
+#define PHASH(table, i) PACKED_ENT((table), (i)).hash
+#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
+#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
+#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
+
+/* this function depends much on packed layout, so that it placed here */
+static inline void
+remove_packed_entry(st_table *table, st_index_t i)
+{
+    table->real_entries--;
+    table->num_entries--;
+    if (i < table->real_entries) {
+	MEMMOVE(&PACKED_ENT(table, i), &PACKED_ENT(table, i+1),
+		st_packed_entry, table->real_entries - i);
+    }
+}
+
+static inline void
+remove_safe_packed_entry(st_table *table, st_index_t i, st_data_t never)
+{
+    table->num_entries--;
+    PKEY_SET(table, i, never);
+    PVAL_SET(table, i, never);
+    PHASH_SET(table, i, 0);
+}
+
+/* ultrapacking */
+#define real_upacked num_bins
+#define MAX_UPACKED_HASH 1
+#define ULTRAPACKED(table) ((table)->real_upacked <= 1)
+#define UPACKED_ENT(table) ((table)->as.upacked)
+#define UPKEY(table) UPACKED_ENT(table).key
+#define UPVAL(table) UPACKED_ENT(table).val
+#define UPHASH(table) UPACKED_ENT(table).hash
+#define UPKEY_SET(table, v) (UPACKED_ENT(table).key = (v))
+#define UPVAL_SET(table, v) (UPACKED_ENT(table).val = (v))
+#define UPHASH_SET(table, v) (UPACKED_ENT(table).hash = (v))
+static inline void
+remove_upacked_entry(st_table *table)
+{
+    table->real_upacked = table->num_entries = 0;
+}
+
+static inline void
+remove_safe_upacked_entry(st_table *table, st_data_t never)
+{
+    table->num_entries = 0;
+    UPKEY_SET(table, never);
+    UPVAL_SET(table, never);
+    UPHASH_SET(table, 0);
+}
 /*
  * MINSIZE is the minimum size of a dictionary.
  */
@@ -85,8 +203,8 @@ static void rehash(st_table *);
 Table of prime numbers 2^n+a, 2<=n<=30.
 */
 static const unsigned int primes[] = {
-	8 + 3,
-	16 + 3,
+	ST_DEFAULT_INIT_TABLE_SIZE,
+	ST_DEFAULT_SECOND_TABLE_SIZE,
 	32 + 5,
 	64 + 3,
 	128 + 3,
@@ -161,8 +279,6 @@ stat_col(void)
 }
 #endif
 
-#define MAX_PACKED_NUMHASH (ST_DEFAULT_INIT_TABLE_SIZE/2)
-
 st_table*
 st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
 {
@@ -181,14 +297,19 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
     }
 #endif
 
-    size = new_size(size);	/* round up to prime number */
 
-    tbl = alloc(st_table);
+    tbl = st_alloc_table();
     tbl->type = type;
     tbl->num_entries = 0;
-    tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH;
+    tbl->entries_packed = size <= MAX_PACKED_HASH;
+    if (tbl->entries_packed) {
+	size = size <= MAX_UPACKED_HASH ? 0 : ST_DEFAULT_PACKED_TABLE_SIZE;
+    }
+    else {
+	size = new_size(size);	/* round up to prime number */
+    }
     tbl->num_bins = size;
-    tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+    tbl->bins = size ? st_alloc_bins(size) : 0;
     tbl->head = 0;
     tbl->tail = 0;
 
@@ -243,17 +364,23 @@ st_clear(st_table *table)
     register st_table_entry *ptr, *next;
     st_index_t i;
 
+    if (ULTRAPACKED(table)) {
+	remove_upacked_entry(table);
+	return;
+    }
+
     if (table->entries_packed) {
         table->num_entries = 0;
+        table->real_entries = 0;
         return;
     }
 
-    for(i = 0; i < table->num_bins; i++) {
+    for (i = 0; i < table->num_bins; i++) {
 	ptr = table->bins[i];
 	table->bins[i] = 0;
 	while (ptr != 0) {
 	    next = ptr->next;
-	    free(ptr);
+	    st_free_entry(ptr);
 	    ptr = next;
 	}
     }
@@ -266,14 +393,19 @@ void
 st_free_table(st_table *table)
 {
     st_clear(table);
-    free(table->bins);
-    free(table);
+    if (!ULTRAPACKED(table)) {
+	st_free_bins(table->bins, table->num_bins);
+    }
+    st_dealloc_table(table);
 }
 
 size_t
 st_memsize(const st_table *table)
 {
-    if (table->entries_packed) {
+    if (ULTRAPACKED(table)) {
+	return sizeof(st_table);
+    }
+    else if (table->entries_packed) {
 	return table->num_bins * sizeof (void *) + sizeof(st_table);
     }
     else {
@@ -306,46 +438,77 @@ count_collision(const struct st_hash_type *type)
 #define FOUND_ENTRY
 #endif
 
-#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
-    (bin_pos) = (hash_val)%(table)->num_bins;\
-    (ptr) = (table)->bins[(bin_pos)];\
-    FOUND_ENTRY;\
-    if (PTR_NOT_EQUAL((table), (ptr), (hash_val), key)) {\
-	COLLISION;\
-	while (PTR_NOT_EQUAL((table), (ptr)->next, (hash_val), key)) {\
-	    (ptr) = (ptr)->next;\
-	}\
-	(ptr) = (ptr)->next;\
-    }\
-} while (0)
+#define FIND_ENTRY(table, ptr, hash_val, bin_pos) \
+    ((ptr) = find_entry((table), key, (hash_val), ((bin_pos) = (hash_val)%(table)->num_bins)))
+
+static st_table_entry *
+find_entry(st_table *table, st_data_t key, st_index_t hash_val, st_index_t bin_pos)
+{
+    register st_table_entry *ptr = table->bins[bin_pos];
+    FOUND_ENTRY;
+    if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
+	COLLISION;
+	while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
+	    ptr = ptr->next;
+	}
+	ptr = ptr->next;
+    }
+    return ptr;
+}
+
+static inline st_index_t
+find_packed_index(st_table *table, st_index_t hash_val, st_data_t key)
+{
+    st_index_t i = 0;
+    while (i < table->real_entries &&
+	   (PHASH(table, i) != hash_val || !EQUAL(table, key, PKEY(table, i)))) {
+	i++;
+    }
+    return i;
+}
+
+static inline int
+check_upacked(st_table *table, st_index_t hash_val, st_data_t key)
+{
+    return table->num_entries &&
+	UPHASH(table) == hash_val &&
+	EQUAL(table, key, UPKEY(table));
+}
 
 #define collision_check 0
 
 int
 st_lookup(st_table *table, register st_data_t key, st_data_t *value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
     register st_table_entry *ptr;
 
+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                if (value !=0) *value = (st_data_t)table->bins[i*2+1];
-                return 1;
-            }
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    return 1;
+	}
         return 0;
     }
 
-    hash_val = do_hash(key, table);
-    FIND_ENTRY(table, ptr, hash_val, bin_pos);
+    ptr = find_entry(table, key, hash_val, hash_val % table->num_bins);
 
     if (ptr == 0) {
 	return 0;
     }
     else {
-	if (value != 0)  *value = ptr->record;
+	if (value != 0) *value = ptr->record;
 	return 1;
     }
 }
@@ -353,22 +516,29 @@ st_lookup(st_table *table, register st_data_t key, st_data_t *value)
 int
 st_get_key(st_table *table, register st_data_t key, st_data_t *result)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
     register st_table_entry *ptr;
 
+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    if (result != 0) *result = UPKEY(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                if (result !=0) *result = (st_data_t)table->bins[i*2];
-                return 1;
-            }
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    if (result != 0) *result = PKEY(table, i);
+	    return 1;
+	}
         return 0;
     }
 
-    hash_val = do_hash(key, table);
-    FIND_ENTRY(table, ptr, hash_val, bin_pos);
+    ptr = find_entry(table, key, hash_val, hash_val % table->num_bins);
 
     if (ptr == 0) {
 	return 0;
@@ -382,85 +552,151 @@ st_get_key(st_table *table, register st_data_t key, st_data_t *result)
 #undef collision_check
 #define collision_check 1
 
-#define MORE_PACKABLE_P(table) \
-    ((st_index_t)((table)->num_entries+1) * 2 <= (table)->num_bins && \
-     (table)->num_entries+1 <= MAX_PACKED_NUMHASH)
-
-#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
-do {\
-    st_table_entry *entry;\
-    if ((table)->num_entries > ST_DEFAULT_MAX_DENSITY * (table)->num_bins) {\
-	rehash(table);\
-        (bin_pos) = (hash_val) % (table)->num_bins;\
-    }\
-    \
-    entry = alloc(st_table_entry);\
-    \
-    entry->hash = (hash_val);\
-    entry->key = (key);\
-    entry->record = (value);\
-    entry->next = (table)->bins[(bin_pos)];\
-    if ((table)->head != 0) {\
-	entry->fore = 0;\
-	(entry->back = (table)->tail)->fore = entry;\
-	(table)->tail = entry;\
-    }\
-    else {\
-	(table)->head = (table)->tail = entry;\
-	entry->fore = entry->back = 0;\
-    }\
-    (table)->bins[(bin_pos)] = entry;\
-    (table)->num_entries++;\
-} while (0)
+static inline st_table_entry *
+new_entry(st_table * table, st_data_t key, st_data_t value,
+	st_index_t hash_val, register st_index_t bin_pos)
+{
+    register st_table_entry *entry = st_alloc_entry();
+
+    entry->next = table->bins[bin_pos];
+    table->bins[bin_pos] = entry;
+    entry->hash = hash_val;
+    entry->key = key;
+    entry->record = value;
+
+    return entry;
+}
+
+static inline void
+add_direct(st_table *table, st_data_t key, st_data_t value,
+	   st_index_t hash_val, register st_index_t bin_pos)
+{
+    register st_table_entry *entry;
+    if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {
+	rehash(table);
+        bin_pos = hash_val % table->num_bins;
+    }
+
+    entry = new_entry(table, key, value, hash_val, bin_pos);
+
+    if (table->head != 0) {
+	entry->fore = 0;
+	(entry->back = table->tail)->fore = entry;
+	table->tail = entry;
+    }
+    else {
+	table->head = table->tail = entry;
+	entry->fore = entry->back = 0;
+    }
+    table->num_entries++;
+}
 
 static void
 unpack_entries(register st_table *table)
 {
     st_index_t i;
-    struct st_table_entry *packed_bins[MAX_PACKED_NUMHASH*2];
+    st_packed_entry packed_bins[MAX_PACKED_HASH];
+    register st_table_entry *entry, *preventry = 0, **chain;
     st_table tmp_table = *table;
 
-    memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * table->num_entries*2);
-    table->bins = packed_bins;
+    MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH);
+    table->as.packed.entries = packed_bins;
     tmp_table.entries_packed = 0;
-    tmp_table.num_entries = 0;
-    memset(tmp_table.bins, 0, sizeof(struct st_table_entry *) * tmp_table.num_bins);
-    for (i = 0; i < table->num_entries; i++) {
-        st_insert(&tmp_table, (st_data_t)packed_bins[i*2], (st_data_t)packed_bins[i*2+1]);
-    }
+#if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE
+    MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins);
+#else
+    tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins);
+    tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
+#endif
+    i = 0;
+    chain = &tmp_table.head;
+    do {
+	st_data_t key = packed_bins[i].key;
+	st_data_t val = packed_bins[i].val;
+	st_index_t hash = packed_bins[i].hash;
+	entry = new_entry(&tmp_table, key, val, hash,
+			  hash % ST_DEFAULT_INIT_TABLE_SIZE);
+	*chain = entry;
+	entry->back = preventry;
+	preventry = entry;
+	chain = &entry->fore;
+    } while (++i < MAX_PACKED_HASH);
+    *chain = NULL;
+    tmp_table.tail = entry;
     *table = tmp_table;
 }
 
+static void
+add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val)
+{
+    if (table->real_entries < MAX_PACKED_HASH) {
+	st_index_t i = table->real_entries++;
+	PKEY_SET(table, i, key);
+	PVAL_SET(table, i, value);
+	PHASH_SET(table, i, hash_val);
+	table->num_entries++;
+    }
+    else {
+	unpack_entries(table);
+	add_direct(table, key, value, hash_val, hash_val % table->num_bins);
+    }
+}
+
+static void
+add_upacked_direct(register st_table *table, register st_data_t key, st_data_t value, st_index_t hash_val)
+{
+    if (table->real_upacked) {
+	st_packed_entry *entries = (st_packed_entry *) st_alloc_bins(ST_DEFAULT_PACKED_TABLE_SIZE);
+	entries[0] = UPACKED_ENT(table);
+	entries[1].hash = hash_val;
+	entries[1].key = key;
+	entries[1].val = value;
+	table->num_bins = ST_DEFAULT_PACKED_TABLE_SIZE;
+	table->real_entries = 2;
+	table->num_entries++;
+	table->as.packed.entries = entries;
+    }
+    else {
+	table->real_upacked = 1;
+	table->num_entries = 1;
+	UPHASH_SET(table, hash_val);
+	UPKEY_SET(table, key);
+	UPVAL_SET(table, value);
+    }
+}
+
 int
 st_insert(register st_table *table, register st_data_t key, st_data_t value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+    register st_index_t bin_pos;
     register st_table_entry *ptr;
 
+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    UPVAL_SET(table, value);
+	    return 1;
+	}
+	add_upacked_direct(table, key, value, hash_val);
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                table->bins[i*2+1] = (struct st_table_entry*)value;
-                return 1;
-            }
-        }
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return 0;
-        }
-        else {
-            unpack_entries(table);
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    PVAL_SET(table, i, value);
+	    return 1;
         }
+	add_packed_direct(table, key, value, hash_val);
+	return 0;
     }
 
-    hash_val = do_hash(key, table);
     FIND_ENTRY(table, ptr, hash_val, bin_pos);
 
     if (ptr == 0) {
-	ADD_DIRECT(table, key, value, hash_val, bin_pos);
+	add_direct(table, key, value, hash_val, bin_pos);
 	return 0;
     }
     else {
@@ -473,34 +709,38 @@ int
 st_insert2(register st_table *table, register st_data_t key, st_data_t value,
 	   st_data_t (*func)(st_data_t))
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+    register st_index_t bin_pos;
     register st_table_entry *ptr;
 
+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    UPVAL_SET(table, value);
+	    return 1;
+	}
+	key = (*func)(key);
+	add_upacked_direct(table, key, value, hash_val);
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                table->bins[i*2+1] = (struct st_table_entry*)value;
-                return 1;
-            }
-        }
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return 0;
-        }
-        else {
-            unpack_entries(table);
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    PVAL_SET(table, i, value);
+	    return 1;
+	}
+	key = (*func)(key);
+	add_packed_direct(table, key, value, hash_val);
+	return 0;
     }
 
-    hash_val = do_hash(key, table);
     FIND_ENTRY(table, ptr, hash_val, bin_pos);
 
     if (ptr == 0) {
 	key = (*func)(key);
-	ADD_DIRECT(table, key, value, hash_val, bin_pos);
+	add_direct(table, key, value, hash_val, bin_pos);
 	return 0;
     }
     else {
@@ -512,36 +752,30 @@ st_insert2(register st_table *table, register st_data_t key, st_data_t value,
 void
 st_add_direct(st_table *table, st_data_t key, st_data_t value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+
+    hash_val = do_hash(key, table);
+    if (ULTRAPACKED(table)) {
+	add_upacked_direct(table, key, value, hash_val);
+	return;
+    }
 
     if (table->entries_packed) {
-        int i;
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return;
-        }
-        else {
-            unpack_entries(table);
-        }
+	add_packed_direct(table, key, value, hash_val);
+	return;
     }
 
-    hash_val = do_hash(key, table);
-    bin_pos = hash_val % table->num_bins;
-    ADD_DIRECT(table, key, value, hash_val, bin_pos);
+    add_direct(table, key, value, hash_val, hash_val % table->num_bins);
 }
 
 static void
 rehash(register st_table *table)
 {
     register st_table_entry *ptr, **new_bins;
-    st_index_t i, new_num_bins, hash_val;
+    st_index_t new_num_bins, hash_val;
 
     new_num_bins = new_size(table->num_bins+1);
-    new_bins = (st_table_entry**)
-	xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
-    for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0;
+    new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins);
     table->num_bins = new_num_bins;
     table->bins = new_bins;
 
@@ -558,34 +792,37 @@ st_table*
 st_copy(st_table *old_table)
 {
     st_table *new_table;
-    st_table_entry *ptr, *entry, *prev, **tail;
+    st_table_entry *ptr, *entry, *prev, **tailp;
     st_index_t num_bins = old_table->num_bins;
     st_index_t hash_val;
 
-    new_table = alloc(st_table);
+    new_table = st_alloc_table();
     if (new_table == 0) {
 	return 0;
     }
 
     *new_table = *old_table;
-    new_table->bins = (st_table_entry**)
-	Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+    if (ULTRAPACKED(old_table)) {
+	return new_table;
+    }
+
+    new_table->bins = st_alloc_bins(num_bins);
 
     if (new_table->bins == 0) {
-	free(new_table);
+	st_dealloc_table(new_table);
 	return 0;
     }
 
     if (old_table->entries_packed) {
-        memcpy(new_table->bins, old_table->bins, sizeof(struct st_table_entry *) * old_table->num_bins);
+        MEMCPY(new_table->bins, old_table->bins, st_table_entry*, old_table->num_bins);
         return new_table;
     }
 
     if ((ptr = old_table->head) != 0) {
 	prev = 0;
-	tail = &new_table->head;
+	tailp = &new_table->head;
 	do {
-	    entry = alloc(st_table_entry);
+	    entry = st_alloc_entry();
 	    if (entry == 0) {
 		st_free_table(new_table);
 		return 0;
@@ -595,8 +832,8 @@ st_copy(st_table *old_table)
 	    entry->next = new_table->bins[hash_val];
 	    new_table->bins[hash_val] = entry;
 	    entry->back = prev;
-	    *tail = prev = entry;
-	    tail = &entry->fore;
+	    *tailp = prev = entry;
+	    tailp = &entry->fore;
 	} while ((ptr = ptr->fore) != 0);
 	new_table->tail = prev;
     }
@@ -604,21 +841,22 @@ st_copy(st_table *old_table)
     return new_table;
 }
 
-#define REMOVE_ENTRY(table, ptr) do					\
-    {									\
-	if ((ptr)->fore == 0 && (ptr)->back == 0) {			\
-	    (table)->head = 0;						\
-	    (table)->tail = 0;						\
-	}								\
-	else {								\
-	    st_table_entry *fore = (ptr)->fore, *back = (ptr)->back;	\
-	    if (fore) fore->back = back;				\
-	    if (back) back->fore = fore;				\
-	    if ((ptr) == (table)->head) (table)->head = fore;		\
-	    if ((ptr) == (table)->tail) (table)->tail = back;		\
-	}								\
-	(table)->num_entries--;						\
-    } while (0)
+static inline void
+remove_entry(st_table *table, st_table_entry *ptr)
+{
+    if (ptr->fore == 0 && ptr->back == 0) {
+	table->head = 0;
+	table->tail = 0;
+    }
+    else {
+	st_table_entry *fore = ptr->fore, *back = ptr->back;
+	if (fore) fore->back = back;
+	if (back) back->fore = fore;
+	if (ptr == table->head) table->head = fore;
+	if (ptr == table->tail) table->tail = back;
+    }
+    table->num_entries--;
+}
 
 int
 st_delete(register st_table *table, register st_data_t *key, st_data_t *value)
@@ -627,30 +865,38 @@ st_delete(register st_table *table, register st_data_t *key, st_data_t *value)
     st_table_entry **prev;
     register st_table_entry *ptr;
 
+    hash_val = do_hash(*key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, *key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    *key = UPKEY(table);
+	    remove_upacked_entry(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == *key) {
-                if (value != 0) *value = (st_data_t)table->bins[i*2+1];
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
-                return 1;
-            }
+	st_index_t i = find_packed_index(table, hash_val, *key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    *key = PKEY(table, i);
+	    remove_packed_entry(table, i);
+	    return 1;
         }
         if (value != 0) *value = 0;
         return 0;
     }
 
-    hash_val = do_hash_bin(*key, table);
-
-    for (prev = &table->bins[hash_val]; (ptr = *prev) != 0; prev = &ptr->next) {
+    prev = &table->bins[hash_val % table->num_bins];
+    for (;(ptr = *prev) != 0; prev = &ptr->next) {
 	if (EQUAL(table, *key, ptr->key)) {
 	    *prev = ptr->next;
-	    REMOVE_ENTRY(table, ptr);
+	    remove_entry(table, ptr);
 	    if (value != 0) *value = ptr->record;
 	    *key = ptr->key;
-	    free(ptr);
+	    st_free_entry(ptr);
 	    return 1;
 	}
     }
@@ -665,25 +911,36 @@ st_delete_safe(register st_table *table, register st_data_t *key, st_data_t *val
     st_index_t hash_val;
     register st_table_entry *ptr;
 
+    hash_val = do_hash(*key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, *key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    *key = UPKEY(table);
+	    remove_safe_upacked_entry(table, never);
+	    return 1;
+	}
+	if (value != 0) *value = 0;
+	return 0;
+    }
+
     if (table->entries_packed) {
-	st_index_t i;
-	for (i = 0; i < table->num_entries; i++) {
-	    if ((st_data_t)table->bins[i*2] == *key) {
-		if (value != 0) *value = (st_data_t)table->bins[i*2+1];
-		table->bins[i*2] = (void *)never;
-		return 1;
-	    }
+	st_index_t i = find_packed_index(table, hash_val, *key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    *key = PKEY(table, i);
+	    remove_safe_packed_entry(table, i, never);
+	    return 1;
 	}
 	if (value != 0) *value = 0;
 	return 0;
     }
 
-    hash_val = do_hash_bin(*key, table);
-    ptr = table->bins[hash_val];
+    ptr = table->bins[hash_val % table->num_bins];
 
     for (; ptr != 0; ptr = ptr->next) {
 	if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
-	    REMOVE_ENTRY(table, ptr);
+	    remove_entry(table, ptr);
 	    *key = ptr->key;
 	    if (value != 0) *value = ptr->record;
 	    ptr->key = ptr->record = never;
@@ -701,17 +958,23 @@ st_cleanup_safe(st_table *table, st_data_t never)
     st_table_entry *ptr, **last, *tmp;
     st_index_t i;
 
+    if (ULTRAPACKED(table)) {
+	table->real_upacked = table->num_entries;
+	return;
+    }
+
     if (table->entries_packed) {
 	st_index_t i = 0, j = 0;
-	while ((st_data_t)table->bins[i*2] != never) {
-	    if (i++ == table->num_entries) return;
+	while (PKEY(table, i) != never) {
+	    if (i++ == table->real_entries) return;
 	}
-	for (j = i; ++i < table->num_entries;) {
-	    if ((st_data_t)table->bins[i*2] == never) continue;
-	    table->bins[j*2] = table->bins[i*2];
-	    table->bins[j*2+1] = table->bins[i*2+1];
+	for (j = i; ++i < table->real_entries;) {
+	    if (PKEY(table, i) == never) continue;
+	    PACKED_ENT(table, j) = PACKED_ENT(table, i);
 	    j++;
 	}
+	table->real_entries = j;
+	/* table->num_entries really should be equal j at this moment, but let set it anyway */
 	table->num_entries = j;
 	return;
     }
@@ -722,7 +985,7 @@ st_cleanup_safe(st_table *table, st_data_t never)
 	    if (ptr->key == never) {
 		tmp = ptr;
 		*last = ptr = ptr->next;
-		free(tmp);
+		st_free_entry(tmp);
 	    }
 	    else {
 		ptr = *(last = &ptr->next);
@@ -732,50 +995,78 @@ st_cleanup_safe(st_table *table, st_data_t never)
 }
 
 int
-st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
+st_foreach_check(st_table *table, int (*func)(ANYARGS), st_data_t arg, st_data_t never)
 {
     st_table_entry *ptr, **last, *tmp;
     enum st_retval retval;
-    st_index_t i;
+    st_data_t key, val;
+    st_index_t hash, i = 0;
+
+    if (table->num_entries == 0) {
+	return 0;
+    }
+
+    if (ULTRAPACKED(table)) {
+	key = UPKEY(table);
+	val = UPVAL(table);
+	hash = UPHASH(table);
+	if (key == never) return 0;
+	retval = (*func)(key, val, arg);
+	if (!ULTRAPACKED(table)) {
+	    goto packed;
+	}
+	switch(retval) {
+	  case ST_CHECK:
+	    if (UPHASH(table) == 0 && UPKEY(table) == never)
+		break;
+	    if (check_upacked(table, hash, key))
+		break;
+	    goto deleted;
+	  case ST_DELETE:
+	    remove_safe_upacked_entry(table, never);
+	  case ST_CONTINUE:
+	  case ST_STOP:
+	    break;
+	}
+	return 0;
+    }
 
     if (table->entries_packed) {
-        for (i = 0; i < table->num_entries; i++) {
-            st_index_t j;
-            st_data_t key, val;
-            key = (st_data_t)table->bins[i*2];
-            val = (st_data_t)table->bins[i*2+1];
-            retval = (*func)(key, val, arg);
+	for (i = 0; i < table->real_entries; i++) {
+	    key = PKEY(table, i);
+	    val = PVAL(table, i);
+	    hash = PHASH(table, i);
+	    if (key == never) continue;
+	    retval = (*func)(key, val, arg);
+	  packed:
 	    if (!table->entries_packed) {
-		FIND_ENTRY(table, ptr, key, i);
+		FIND_ENTRY(table, ptr, hash, i);
 		if (retval == ST_CHECK) {
 		    if (!ptr) goto deleted;
 		    goto unpacked_continue;
 		}
 		goto unpacked;
 	    }
-            switch (retval) {
+	    switch (retval) {
 	      case ST_CHECK:	/* check if hash is modified during iteration */
-                for (j = 0; j < table->num_entries; j++) {
-                    if ((st_data_t)table->bins[j*2] == key)
-                        break;
-                }
-                if (j == table->num_entries) {
+		if (PHASH(table, i) == 0 && PKEY(table, i) == never) {
+		    break;
+		}
+		i = find_packed_index(table, hash, key);
+		if (i == table->real_entries) {
 		    goto deleted;
-                }
+		}
 		/* fall through */
 	      case ST_CONTINUE:
 		break;
 	      case ST_STOP:
 		return 0;
 	      case ST_DELETE:
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
-                i--;
-                break;
-            }
-        }
-        return 0;
+		remove_safe_packed_entry(table, i, never);
+		break;
+	    }
+	}
+	return 0;
     }
     else {
 	ptr = table->head;
@@ -783,6 +1074,8 @@ st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 
     if (ptr != 0) {
 	do {
+	    if (ptr->key == never)
+		goto unpacked_continue;
 	    i = ptr->hash % table->num_bins;
 	    retval = (*func)(ptr->key, ptr->record, arg);
 	  unpacked:
@@ -808,10 +1101,100 @@ st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 		for (; (tmp = *last) != 0; last = &tmp->next) {
 		    if (ptr == tmp) {
 			tmp = ptr->fore;
+			remove_entry(table, ptr);
+			ptr->key = ptr->record = never;
+			ptr->hash = 0;
+			ptr = tmp;
+			break;
+		    }
+		}
+	    }
+	} while (ptr && table->head);
+    }
+    return 0;
+}
+
+int
+st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
+{
+    st_table_entry *ptr, **last, *tmp;
+    enum st_retval retval;
+    st_data_t key, val;
+    st_index_t hash, i = 0;
+
+    if (table->num_entries == 0) {
+	return 0;
+    }
+
+    if (ULTRAPACKED(table)) {
+	key = UPKEY(table);
+	val = UPVAL(table);
+	hash = UPHASH(table);
+	retval = (*func)(key, val, arg);
+	if (!ULTRAPACKED(table)) {
+	    goto packed;
+	}
+	switch (retval) {
+	  case ST_DELETE:
+	    remove_upacked_entry(table);
+	  case ST_CONTINUE:
+	  case ST_CHECK:
+	  case ST_STOP:
+	    break;
+	}
+	return 0;
+    }
+
+    if (table->entries_packed) {
+	for (i = 0; i < table->real_entries; i++) {
+	    key = PKEY(table, i);
+	    val = PVAL(table, i);
+	    hash = PHASH(table, i);
+	    retval = (*func)(key, val, arg);
+	  packed:
+	    if (!table->entries_packed) {
+		FIND_ENTRY(table, ptr, hash, i);
+		if (!ptr) return 0;
+		goto unpacked;
+	    }
+	    switch (retval) {
+	      case ST_CONTINUE:
+		break;
+	      case ST_CHECK:
+	      case ST_STOP:
+		return 0;
+	      case ST_DELETE:
+		remove_packed_entry(table, i);
+		i--;
+		break;
+	    }
+	}
+	return 0;
+    }
+    else {
+	ptr = table->head;
+    }
+
+    if (ptr != 0) {
+	do {
+	    i = ptr->hash % table->num_bins;
+	    retval = (*func)(ptr->key, ptr->record, arg);
+	  unpacked:
+	    switch (retval) {
+	      case ST_CONTINUE:
+		ptr = ptr->fore;
+		break;
+	      case ST_CHECK:
+	      case ST_STOP:
+		return 0;
+	      case ST_DELETE:
+		last = &table->bins[ptr->hash % table->num_bins];
+		for (; (tmp = *last) != 0; last = &tmp->next) {
+		    if (ptr == tmp) {
+			tmp = ptr->fore;
 			*last = ptr->next;
-			REMOVE_ENTRY(table, ptr);
-			free(ptr);
-			if (ptr == tmp) return 0;
+			remove_entry(table, ptr);
+			st_free_entry(ptr);
 			ptr = tmp;
 			break;
 		    }
@@ -834,13 +1217,13 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
         for (i = table->num_entries-1; 0 <= i; i--) {
             int j;
             st_data_t key, val;
-            key = (st_data_t)table->bins[i*2];
-            val = (st_data_t)table->bins[i*2+1];
+            key = PKEY(table, i);
+            val = PVAL(table, i);
             retval = (*func)(key, val, arg);
             switch (retval) {
 	      case ST_CHECK:	/* check if hash is modified during iteration */
                 for (j = 0; j < table->num_entries; j++) {
-                    if ((st_data_t)table->bins[j*2] == key)
+                    if (PKEY(table, j) == key)
                         break;
                 }
                 if (j == table->num_entries) {
@@ -854,9 +1237,7 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 	      case ST_STOP:
 		return 0;
 	      case ST_DELETE:
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+		remove_packed_entry(table, i);
                 break;
             }
         }
@@ -889,8 +1270,8 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 		    if (ptr == tmp) {
 			tmp = ptr->back;
 			*last = ptr->next;
-			REMOVE_ENTRY(table, ptr);
-			free(ptr);
+			remove_entry(table, ptr);
+			st_free_entry(ptr);
 			ptr = tmp;
 			break;
 		    }
diff --git a/variable.c b/variable.c
index 3da500e..303bb27 100644
--- a/variable.c
+++ b/variable.c
@@ -473,7 +473,7 @@ void
 rb_gc_mark_global_tbl(void)
 {
     if (rb_global_tbl)
-        st_foreach_safe(rb_global_tbl, mark_global_entry, 0);
+        st_foreach(rb_global_tbl, mark_global_entry, 0);
 }
 
 static ID
@@ -765,7 +765,7 @@ rb_f_global_variables(void)
     char buf[2];
     int i;
 
-    st_foreach_safe(rb_global_tbl, gvar_i, ary);
+    st_foreach(rb_global_tbl, gvar_i, ary);
     buf[0] = '$';
     for (i = 1; i <= 9; ++i) {
 	buf[1] = (char)(i + '0');
@@ -923,7 +923,7 @@ static int
 givar_i(VALUE obj, st_table *tbl)
 {
     if (rb_special_const_p(obj)) {
-	st_foreach_safe(tbl, givar_mark_i, 0);
+	st_foreach(tbl, givar_mark_i, 0);
     }
     return ST_CONTINUE;
 }
@@ -933,7 +933,7 @@ rb_mark_generic_ivar_tbl(void)
 {
     if (!generic_iv_tbl) return;
     if (special_generic_ivar == 0) return;
-    st_foreach_safe(generic_iv_tbl, givar_i, 0);
+    st_foreach(generic_iv_tbl, givar_i, 0);
 }
 
 void
@@ -1170,7 +1170,7 @@ obj_ivar_each(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
     data.func = (int (*)(ID key, VALUE val, st_data_t arg))func;
     data.arg = arg;
 
-    st_foreach_safe(tbl, obj_ivar_i, (st_data_t)&data);
+    st_foreach(tbl, obj_ivar_i, (st_data_t)&data);
 }
 
 void
@@ -1730,7 +1730,7 @@ rb_mod_const_at(VALUE mod, void *data)
 	tbl = st_init_numtable();
     }
     if (RCLASS_CONST_TBL(mod)) {
-	st_foreach_safe(RCLASS_CONST_TBL(mod), sv_i, (st_data_t)tbl);
+	st_foreach(RCLASS_CONST_TBL(mod), sv_i, (st_data_t)tbl);
     }
     return tbl;
 }
@@ -1765,7 +1765,7 @@ rb_const_list(void *data)
 
     if (!tbl) return rb_ary_new2(0);
     ary = rb_ary_new2(tbl->num_entries);
-    st_foreach_safe(tbl, list_i, ary);
+    st_foreach(tbl, list_i, ary);
     st_free_table(tbl);
 
     return ary;
diff --git a/vm.c b/vm.c
index e997afa..064a4f8 100644
--- a/vm.c
+++ b/vm.c
@@ -1575,6 +1575,7 @@ rb_vm_mark(void *ptr)
 	RUBY_MARK_UNLESS_NULL(vm->thgroup_default);
 	RUBY_MARK_UNLESS_NULL(vm->mark_object_ary);
 	RUBY_MARK_UNLESS_NULL(vm->load_path);
+	RUBY_MARK_UNLESS_NULL(vm->load_path_expanded_cache);
 	RUBY_MARK_UNLESS_NULL(vm->loaded_features);
 	RUBY_MARK_UNLESS_NULL(vm->top_self);
 	RUBY_MARK_UNLESS_NULL(vm->coverages);
@@ -1606,16 +1607,17 @@ ruby_vm_destruct(rb_vm_t *vm)
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
 	struct rb_objspace *objspace = vm->objspace;
 #endif
+	if (vm->living_threads) {
+	    st_table *living_threads = vm->living_threads;
+	    vm->living_threads = 0;
+	    st_free_table(living_threads);
+	}
 	rb_gc_force_recycle(vm->self);
 	vm->main_thread = 0;
 	if (th) {
 	    rb_fiber_reset_root_local_storage(th->self);
 	    thread_free(th);
 	}
-	if (vm->living_threads) {
-	    st_free_table(vm->living_threads);
-	    vm->living_threads = 0;
-	}
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
 	if (objspace) {
 	    rb_objspace_free(objspace);
diff --git a/vm_core.h b/vm_core.h
index 7211005..e787d4b 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -298,6 +298,7 @@ typedef struct rb_vm_struct {
     /* load */
     VALUE top_self;
     VALUE load_path;
+    VALUE load_path_expanded_cache;
     VALUE loaded_features;
     struct st_table *loading_table;
 
© 2025 GrazzMean