NonMoving: Optimise allocator cache behavior
authorBen Gamari <ben@smart-cactus.org>
Sun, 12 May 2019 03:04:54 +0000 (23:04 -0400)
committerBen Gamari <ben@smart-cactus.org>
Sun, 19 May 2019 18:50:01 +0000 (14:50 -0400)
Previously we would look at the segment header to determine the block
size despite the fact that we already had the block size at hand.

rts/sm/NonMoving.c
rts/sm/NonMoving.h

index 4fbc5bf..7afc87e 100644 (file)
@@ -167,6 +167,20 @@ static struct NonmovingSegment *nonmovingPopFreeSegment(void)
     }
 }
 
+unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size)
+{
+  // We compute the overwhelmingly common size cases directly to avoid a very
+  // expensive integer division.
+  switch (log_block_size) {
+    case 3:  return nonmovingBlockCount(3);
+    case 4:  return nonmovingBlockCount(4);
+    case 5:  return nonmovingBlockCount(5);
+    case 6:  return nonmovingBlockCount(6);
+    case 7:  return nonmovingBlockCount(7);
+    default: return nonmovingBlockCount(log_block_size);
+  }
+}
+
 /*
  * Request a fresh segment from the free segment list or allocate one of the
  * given node.
@@ -215,10 +229,10 @@ static inline unsigned long log2_ceil(unsigned long x)
 }
 
 // Advance a segment's next_free pointer. Returns true if segment if full.
-static bool advance_next_free(struct NonmovingSegment *seg)
+static bool advance_next_free(struct NonmovingSegment *seg, const unsigned int blk_count)
 {
     const uint8_t *bitmap = seg->bitmap;
-    const unsigned int blk_count = nonmovingSegmentBlockCount(seg);
+    ASSERT(blk_count == nonmovingSegmentBlockCount(seg));
 #if defined(NAIVE_ADVANCE_FREE)
     // reference implementation
     for (unsigned int i = seg->next_free+1; i < blk_count; i++) {
@@ -260,22 +274,23 @@ static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *al
 GNUC_ATTR_HOT
 void *nonmovingAllocate(Capability *cap, StgWord sz)
 {
-    unsigned int allocator_idx = log2_ceil(sz * sizeof(StgWord)) - NONMOVING_ALLOCA0;
+    unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord));
+    unsigned int block_count = nonmovingBlockCountFromSize(log_block_size);
 
     // The max we ever allocate is 3276 bytes (anything larger is a large
     // object and not moved) which is covered by allocator 9.
-    ASSERT(allocator_idx < NONMOVING_ALLOCA_CNT);
+    ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT);
 
-    struct NonmovingAllocator *alloca = nonmovingHeap.allocators[allocator_idx];
+    struct NonmovingAllocator *alloca = nonmovingHeap.allocators[log_block_size - NONMOVING_ALLOCA0];
 
     // Allocate into current segment
     struct NonmovingSegment *current = alloca->current[cap->no];
     ASSERT(current); // current is never NULL
-    void *ret = nonmovingSegmentGetBlock(current, current->next_free);
+    void *ret = nonmovingSegmentGetBlock_(current, log_block_size, current->next_free);
     ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment
 
     // Advance the current segment's next_free or allocate a new segment if full
-    bool full = advance_next_free(current);
+    bool full = advance_next_free(current, block_count);
     if (full) {
         // Current segment is full: update live data estimate link it to
         // filled, take an active segment if one exists, otherwise allocate a
@@ -283,8 +298,9 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
 
         // Update live data estimate.
         // See Note [Live data accounting in nonmoving collector].
-        unsigned int new_blocks =  nonmovingSegmentBlockCount(current) - current->next_free_snap;
-        atomic_inc(&oldest_gen->live_estimate, new_blocks * nonmovingSegmentBlockSize(current) / sizeof(W_));
+        unsigned int new_blocks = block_count - current->next_free_snap;
+        unsigned int block_size = 1 << log_block_size;
+        atomic_inc(&oldest_gen->live_estimate, new_blocks * block_size / sizeof(W_));
 
         // push the current segment to the filled list
         nonmovingPushFilledSegment(current);
@@ -295,7 +311,7 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
         // there are no active segments, allocate new segment
         if (new_current == NULL) {
             new_current = nonmovingAllocSegment(cap->node);
-            nonmovingInitSegment(new_current, NONMOVING_ALLOCA0 + allocator_idx);
+            nonmovingInitSegment(new_current, log_block_size);
         }
 
         // make it current
index 165c7b1..a26bc75 100644 (file)
@@ -170,28 +170,24 @@ INLINE_HEADER unsigned int nonmovingBlockCount(uint8_t log_block_size)
   return segment_data_size / (blk_size + 1);
 }
 
+unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size);
+
 // How many blocks does the given segment contain? Also the size of the bitmap.
 INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg)
 {
-  // We compute the overwhelmingly common size cases directly to avoid a very
-  // expensive integer division.
-  switch (seg->block_size) {
-    case 3:  return nonmovingBlockCount(3);
-    case 4:  return nonmovingBlockCount(4);
-    case 5:  return nonmovingBlockCount(5);
-    case 6:  return nonmovingBlockCount(6);
-    case 7:  return nonmovingBlockCount(7);
-    default: return nonmovingBlockCount(seg->block_size);
-  }
+  return nonmovingBlockCountFromSize(seg->block_size);
 }
 
-// Get a pointer to the given block index
-INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
+// Get a pointer to the given block index assuming that the block size is as
+// given (avoiding a potential cache miss when this information is already
+// available). The log_block_size argument must be equal to seg->block_size.
+INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint8_t log_block_size, nonmoving_block_idx i)
 {
+  ASSERT(log_block_size == seg->block_size);
   // Block size in bytes
-  unsigned int blk_size = nonmovingSegmentBlockSize(seg);
+  unsigned int blk_size = 1 << log_block_size;
   // Bitmap size in bytes
-  W_ bitmap_size = nonmovingSegmentBlockCount(seg) * sizeof(uint8_t);
+  W_ bitmap_size = nonmovingBlockCountFromSize(log_block_size) * sizeof(uint8_t);
   // Where the actual data starts (address of the first block).
   // Use ROUNDUP_BYTES_TO_WDS to align to word size. Note that
   // ROUNDUP_BYTES_TO_WDS returns in _words_, not in _bytes_, so convert it back
@@ -200,6 +196,12 @@ INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmo
   return (void*)(data + i*blk_size);
 }
 
+// Get a pointer to the given block index.
+INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
+{
+  return nonmovingSegmentGetBlock_(seg, seg->block_size, i);
+}
+
 // Get the segment which a closure resides in. Assumes that pointer points into
 // non-moving heap.
 INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment_unchecked(StgPtr p)