rts: enable parallel GC scan of large (32M+) allocation area
[ghc.git] / rts / RtsFlags.c
index af1b204..7e06d84 100644 (file)
@@ -14,6 +14,8 @@
 #include "Profiling.h"
 #include "RtsFlags.h"
 #include "sm/OSMem.h"
+#include "hooks/Hooks.h"
+#include "Capability.h"
 
 #ifdef HAVE_CTYPE_H
 #include <ctype.h>
@@ -52,6 +54,24 @@ int       win32_prog_argc = 0;
 wchar_t **win32_prog_argv = NULL;
 #endif
 
+// The global rtsConfig, set from the RtsConfig supplied by the call
+// to hs_init_ghc().
+RtsConfig rtsConfig;
+
+const RtsConfig defaultRtsConfig  = {
+    .rts_opts_enabled = RtsOptsSafeOnly,
+    .rts_opts_suggestions = rtsTrue,
+    .rts_opts = NULL,
+    .rts_hs_main = rtsFalse,
+    .keep_cafs = rtsFalse,
+    .defaultsHook = FlagDefaultsHook,
+    .onExitHook = OnExitHook,
+    .stackOverflowHook = StackOverflowHook,
+    .outOfHeapHook = OutOfHeapHook,
+    .mallocFailHook = MallocFailHook,
+    .gcDoneHook = NULL
+};
+
 /*
  * constants, used later
  */
@@ -62,31 +82,40 @@ wchar_t **win32_prog_argv = NULL;
    Static function decls
    -------------------------------------------------------------------------- */
 
-static void procRtsOpts      (HsBool is_hs_main, int rts_argc0, RtsOptsEnabledEnum enabled);
+static void procRtsOpts (int rts_argc0, RtsOptsEnabledEnum enabled);
 
 static void normaliseRtsOpts (void);
 
-static void initStatsFile    (FILE *f);
+static void initStatsFile (FILE *f);
+
+static int  openStatsFile (
+    char *filename, const char *FILENAME_FMT, FILE **file_ret);
+
+static StgWord64 decodeSize (
+    const char *flag, uint32_t offset, StgWord64 min, StgWord64 max);
 
-static int  openStatsFile    (char *filename, const char *FILENAME_FMT,
-                              FILE **file_ret);
+static void bad_option (const char *s);
 
-static StgWord64 decodeSize  (const char *flag, nat offset,
-                              StgWord64 min, StgWord64 max);
+#ifdef DEBUG
+static void read_debug_flags(const char *arg);
+#endif
 
-static void bad_option       (const char *s);
+#ifdef PROFILING
+static rtsBool read_heap_profiling_flag(const char *arg);
+#endif
 
 #ifdef TRACING
-static void read_trace_flags(char *arg);
+static void read_trace_flags(const char *arg);
 #endif
 
-static void errorUsage      (void) GNU_ATTRIBUTE(__noreturn__);
+static void errorUsage (void) GNU_ATTRIBUTE(__noreturn__);
 
-static char *  copyArg  (char *arg);
+static char *  copyArg (char *arg);
 static char ** copyArgv (int argc, char *argv[]);
 static void    freeArgv (int argc, char *argv[]);
+static void setProgName (char *argv[]);
 
-static void errorRtsOptsDisabled(HsBool is_hs_main, const char *s);
+static void errorRtsOptsDisabled (const char *s);
 
 /* -----------------------------------------------------------------------------
  * Command-line option parsing routines.
@@ -108,6 +137,8 @@ void initRtsFlagsDefaults(void)
     RtsFlags.GcFlags.stkChunkBufferSize = (1 * 1024) / sizeof(W_);
 
     RtsFlags.GcFlags.minAllocAreaSize   = (512 * 1024)        / BLOCK_SIZE;
+    RtsFlags.GcFlags.largeAllocLim      = 0; /* defaults to minAllocAreasize */
+    RtsFlags.GcFlags.nurseryChunkSize   = 0;
     RtsFlags.GcFlags.minOldGenSize      = (1024 * 1024)       / BLOCK_SIZE;
     RtsFlags.GcFlags.maxHeapSize        = 0;    /* off by default */
     RtsFlags.GcFlags.heapSizeSuggestion = 0;    /* none */
@@ -125,20 +156,11 @@ void initRtsFlagsDefaults(void)
 #else
     RtsFlags.GcFlags.doIdleGC           = rtsFalse;
 #endif
-
-#if osf3_HOST_OS
-/* ToDo: Perhaps by adjusting this value we can make linking without
- * -static work (i.e., not generate a core-dumping executable)? */
-# if SIZEOF_VOID_P == 8
-    RtsFlags.GcFlags.heapBase           = 0x180000000L;
-# else
-#  error I have no idea where to begin the heap on a non-64-bit osf3 machine.
-# endif
-#else
     RtsFlags.GcFlags.heapBase           = 0;   /* means don't care */
-#endif
+    RtsFlags.GcFlags.allocLimitGrace    = (100*1024) / BLOCK_SIZE;
+    RtsFlags.GcFlags.numa               = rtsFalse;
+    RtsFlags.GcFlags.numaMask           = 1;
 
-#ifdef DEBUG
     RtsFlags.DebugFlags.scheduler       = rtsFalse;
     RtsFlags.DebugFlags.interpreter     = rtsFalse;
     RtsFlags.DebugFlags.weak            = rtsFalse;
@@ -154,7 +176,7 @@ void initRtsFlagsDefaults(void)
     RtsFlags.DebugFlags.squeeze         = rtsFalse;
     RtsFlags.DebugFlags.hpc             = rtsFalse;
     RtsFlags.DebugFlags.sparks          = rtsFalse;
-#endif
+    RtsFlags.DebugFlags.numa            = rtsFalse;
 
 #if defined(PROFILING)
     RtsFlags.CcFlags.doCostCentres      = 0;
@@ -200,13 +222,14 @@ void initRtsFlagsDefaults(void)
     RtsFlags.MiscFlags.linkerMemBase    = 0;
 
 #ifdef THREADED_RTS
-    RtsFlags.ParFlags.nNodes            = 1;
+    RtsFlags.ParFlags.nCapabilities     = 1;
     RtsFlags.ParFlags.migrate           = rtsTrue;
     RtsFlags.ParFlags.parGcEnabled      = 1;
     RtsFlags.ParFlags.parGcGen          = 0;
     RtsFlags.ParFlags.parGcLoadBalancingEnabled = rtsTrue;
-    RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
+    RtsFlags.ParFlags.parGcLoadBalancingGen = ~0u; /* auto, based on -A */
     RtsFlags.ParFlags.parGcNoSyncWithIdle   = 0;
+    RtsFlags.ParFlags.parGcThreads      = 0; /* defaults to -N */
     RtsFlags.ParFlags.setAffinity       = 0;
 #endif
 
@@ -218,12 +241,6 @@ void initRtsFlagsDefaults(void)
     RtsFlags.TickyFlags.showTickyStats   = rtsFalse;
     RtsFlags.TickyFlags.tickyFile        = NULL;
 #endif
-
-#ifdef USE_PAPI
-    /* By default no special measurements taken */
-    RtsFlags.PapiFlags.eventType        = 0;
-    RtsFlags.PapiFlags.numUserEvents    = 0;
-#endif
 }
 
 static const char *
@@ -241,17 +258,22 @@ usage_text[] = {
 "  -?       Prints this message and exits; the program is not executed",
 "  --info   Print information about the RTS used by this program",
 "",
-"  -K<size> Sets the maximum stack size (default 8M)  Egs: -K32k   -K512k",
+"  -K<size>  Sets the maximum stack size (default: 80% of the heap)",
+"            Egs: -K32k -K512k -K8M",
 "  -ki<size> Sets the initial thread stack size (default 1k)  Egs: -ki4k -ki2m",
 "  -kc<size> Sets the stack chunk size (default 32k)",
 "  -kb<size> Sets the stack chunk buffer size (default 1k)",
 "",
-"  -A<size> Sets the minimum allocation area size (default 512k) Egs: -A1m -A10k",
-"  -M<size> Sets the maximum heap size (default unlimited)  Egs: -M256k -M1G",
-"  -H<size> Sets the minimum heap size (default 0M)   Egs: -H24m  -H1G",
-"  -m<n>    Minimum % of heap which must be available (default 3%)",
-"  -G<n>    Number of generations (default: 2)",
-"  -c<n>    Use in-place compaction instead of copying in the oldest generation",
+"  -A<size>  Sets the minimum allocation area size (default 512k) Egs: -A1m -A10k",
+"  -AL<size> Sets the amount of large-object memory that can be allocated",
+"            before a GC is triggered (default: the value of -A)",
+"  -n<size>  Allocation area chunk size (0 = disabled, default: 0)",
+"  -O<size>  Sets the minimum size of the old generation (default 1M)",
+"  -M<size>  Sets the maximum heap size (default unlimited)  Egs: -M256k -M1G",
+"  -H<size>  Sets the minimum heap size (default 0M)   Egs: -H24m  -H1G",
+"  -m<n>     Minimum % of heap which must be available (default 3%)",
+"  -G<n>     Number of generations (default: 2)",
+"  -c<n>     Use in-place compaction instead of copying in the oldest generation",
 "           when live data is at least <n>% of the maximum heap size set with",
 "           -M (default: 30%)",
 "  -c       Use in-place compaction for all oldest generation collections",
@@ -274,8 +296,6 @@ usage_text[] = {
 "  -p       Time/allocation profile        (output file <program>.prof)",
 "  -P       More detailed Time/Allocation profile",
 "  -Pa      Give information about *all* cost centres",
-
-# if defined(PROFILING)
 "",
 "  -h<break-down> Heap residency profile (hp2ps) (output file <program>.hp)",
 "     break-down: c = cost centre stack (default)",
@@ -301,8 +321,7 @@ usage_text[] = {
 "  -xt            Include threads (TSOs) in a heap profile",
 "",
 "  -xc      Show current cost centre stack on raising an exception",
-# endif
-#endif /* PROFILING or PAR */
+#endif /* PROFILING */
 
 #ifdef TRACING
 "",
@@ -368,17 +387,28 @@ usage_text[] = {
 "",
 #endif /* DEBUG */
 #if defined(THREADED_RTS) && !defined(NOSMP)
-"  -N[<n>]   Use <n> processors (default: 1, -N alone determines",
-"            the number of processors to use automatically)",
+"  -N[<n>]    Use <n> processors (default: 1, -N alone determines",
+"             the number of processors to use automatically)",
+"  -maxN[<n>] Use up to <n> processors automatically",
 "  -qg[<n>]  Use parallel GC only for generations >= <n>",
 "            (default: 0, -qg alone turns off parallel GC)",
 "  -qb[<n>]  Use load-balancing in the parallel GC only for generations >= <n>",
-"            (default: 1, -qb alone turns off load-balancing)",
+"            (default: 1 for -A < 32M, 0 otherwise;"
+"             -qb alone turns off load-balancing)",
+"  -qn<n>    Use <n> threads for parallel GC (defaults to value of -N)",
 "  -qa       Use the OS to set thread affinity (experimental)",
 "  -qm       Don't automatically migrate threads between CPUs",
 "  -qi<n>    If a processor has been idle for the last <n> GCs, do not",
 "            wake it up for a non-load-balancing parallel GC.",
 "            (0 disables,  default: 0)",
+"  --numa[=<node_mask>]",
+"            Use NUMA, nodes given by <node_mask> (default: off)",
+#if defined(DEBUG)
+"  --debug-numa[=<num_nodes>]",
+"            Pretend NUMA: like --numa, but without the system calls.",
+"            Can be used on non-NUMA systems for debugging.",
+"",
+#endif
 #endif
 "  --install-signal-handlers=<yes|no>",
 "            Install signal handlers (default: yes)",
@@ -389,19 +419,8 @@ usage_text[] = {
 "  -xm       Base address to mmap memory in the GHCi linker",
 "            (hex; must be <80000000)",
 #endif
-#if defined(USE_PAPI)
-"  -aX       CPU performance counter measurements using PAPI",
-"            (use with the -s<file> option).  X is one of:",
-"",
-/* "            y - cycles", */
-"            1 - level 1 cache misses",
-"            2 - level 2 cache misses",
-"            b - branch mispredictions",
-"            s - stalled cycles",
-"            e - cache miss and branch misprediction events",
-"            +PAPI_EVENT   - collect papi preset event PAPI_EVENT",
-"            #NATIVE_EVENT - collect native event NATIVE_EVENT (in hex)",
-#endif
+"  -xq       The allocation limit given to a thread after it receives",
+"            an AllocationLimitExceeded exception. (default: 100k)",
 "",
 "RTS options may also be specified using the GHCRTS environment variable.",
 "",
@@ -411,8 +430,7 @@ usage_text[] = {
 0
 };
 
-STATIC_INLINE rtsBool
-strequal(const char *a, const char * b)
+STATIC_INLINE rtsBool strequal(const char *a, const char * b)
 {
     return(strcmp(a, b) == 0);
 }
@@ -452,10 +470,10 @@ static void splitRtsFlags(const char *s)
     } while (*c1 != '\0');
 }
 
-static void
-errorRtsOptsDisabled(HsBool is_hs_main, const char *s) {
+static void errorRtsOptsDisabled(const char *s)
+{
     char *advice;
-    if (is_hs_main) {
+    if (rtsConfig.rts_hs_main) {
         advice = "Link with -rtsopts to enable them.";
     } else {
         advice = "Use hs_init_with_rtsopts() to enable them.";
@@ -478,16 +496,17 @@ errorRtsOptsDisabled(HsBool is_hs_main, const char *s) {
 
      - prog_name   (global) contains the basename of prog_argv[0]
 
+     - rtsConfig   (global) contains the supplied RtsConfig
+
   -------------------------------------------------------------------------- */
 
-void setupRtsFlags (int *argc, char *argv[],
-                    RtsOptsEnabledEnum rtsOptsEnabled,
-                    const char *ghc_rts_opts,
-                    HsBool is_hs_main)
+void setupRtsFlags (int *argc, char *argv[], RtsConfig rts_config)
 {
-    nat mode;
-    nat total_arg;
-    nat arg, rts_argc0;
+    uint32_t mode;
+    uint32_t total_arg;
+    uint32_t arg, rts_argc0;
+
+    rtsConfig = rts_config;
 
     setProgName (argv);
     total_arg = *argc;
@@ -505,10 +524,10 @@ void setupRtsFlags (int *argc, char *argv[],
     // (arguments from the GHCRTS environment variable and the command
     // line override these).
     {
-        if (ghc_rts_opts != NULL) {
-            splitRtsFlags(ghc_rts_opts);
-            // opts from ghc_rts_opts are always enabled:
-            procRtsOpts(is_hs_main, rts_argc0, RtsOptsAll);
+        if (rtsConfig.rts_opts != NULL) {
+            splitRtsFlags(rtsConfig.rts_opts);
+            // opts from rts_opts are always enabled:
+            procRtsOpts(rts_argc0, RtsOptsAll);
             rts_argc0 = rts_argc;
         }
     }
@@ -519,12 +538,13 @@ void setupRtsFlags (int *argc, char *argv[],
         char *ghc_rts = getenv("GHCRTS");
 
         if (ghc_rts != NULL) {
-            if (rtsOptsEnabled == RtsOptsNone) {
-                errorRtsOptsDisabled(is_hs_main, "Warning: Ignoring GHCRTS variable as RTS options are disabled.\n         %s");
+            if (rtsConfig.rts_opts_enabled == RtsOptsNone) {
+                errorRtsOptsDisabled(
+                    "Warning: Ignoring GHCRTS variable as RTS options are disabled.\n         %s");
                 // We don't actually exit, just warn
             } else {
                 splitRtsFlags(ghc_rts);
-                procRtsOpts(is_hs_main, rts_argc0, rtsOptsEnabled);
+                procRtsOpts(rts_argc0, rtsConfig.rts_opts_enabled);
                 rts_argc0 = rts_argc;
             }
         }
@@ -563,7 +583,7 @@ void setupRtsFlags (int *argc, char *argv[],
     }
     argv[*argc] = (char *) 0;
 
-    procRtsOpts(is_hs_main, rts_argc0, rtsOptsEnabled);
+    procRtsOpts(rts_argc0, rtsConfig.rts_opts_enabled);
 
     appendRtsArg((char *)0);
     rts_argc--; // appendRtsArg will have bumped it for the NULL (#7227)
@@ -575,9 +595,11 @@ void setupRtsFlags (int *argc, char *argv[],
     if (RtsFlags.GcFlags.statsFile != NULL) {
         initStatsFile (RtsFlags.GcFlags.statsFile);
     }
+#ifdef TICKY_TICKY
     if (RtsFlags.TickyFlags.tickyFile != NULL) {
-        initStatsFile (RtsFlags.GcFlags.statsFile);
+        initStatsFile (RtsFlags.TickyFlags.tickyFile);
     }
+#endif
 }
 
 /* -----------------------------------------------------------------------------
@@ -585,44 +607,47 @@ void setupRtsFlags (int *argc, char *argv[],
  * -------------------------------------------------------------------------- */
 
 #if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_TYPES_H) && !defined(mingw32_HOST_OS)
-static void checkSuid(HsBool is_hs_main, RtsOptsEnabledEnum enabled)
+static void checkSuid(RtsOptsEnabledEnum enabled)
 {
     if (enabled == RtsOptsSafeOnly) {
         /* This doesn't cover linux/posix capabilities like CAP_DAC_OVERRIDE,
            we'd have to link with -lcap for that. */
         if ((getuid() != geteuid()) || (getgid() != getegid())) {
-            errorRtsOptsDisabled(is_hs_main, "RTS options are disabled for setuid binaries. %s");
+            errorRtsOptsDisabled(
+                "RTS options are disabled for setuid binaries. %s");
             stg_exit(EXIT_FAILURE);
         }
     }
 }
 #else
-static void checkSuid(HsBool is_hs_main STG_UNUSED, RtsOptsEnabledEnum enabled STG_UNUSED)
+static void checkSuid (RtsOptsEnabledEnum enabled STG_UNUSED)
 {
 }
 #endif
 
-static void checkUnsafe(HsBool is_hs_main, RtsOptsEnabledEnum enabled)
+static void checkUnsafe(RtsOptsEnabledEnum enabled)
 {
     if (enabled == RtsOptsSafeOnly) {
-        errorRtsOptsDisabled(is_hs_main, "Most RTS options are disabled. %s");
+        errorRtsOptsDisabled("Most RTS options are disabled. %s");
         stg_exit(EXIT_FAILURE);
     }
 }
 
-static void procRtsOpts (HsBool is_hs_main, int rts_argc0, RtsOptsEnabledEnum rtsOptsEnabled)
+static void procRtsOpts (int rts_argc0,
+                         RtsOptsEnabledEnum rtsOptsEnabled)
 {
     rtsBool error = rtsFalse;
     int arg;
+    int unchecked_arg_start;
 
     if (!(rts_argc0 < rts_argc)) return;
 
     if (rtsOptsEnabled == RtsOptsNone) {
-        errorRtsOptsDisabled(is_hs_main, "RTS options are disabled. %s");
+        errorRtsOptsDisabled("RTS options are disabled. %s");
         stg_exit(EXIT_FAILURE);
     }
 
-    checkSuid(is_hs_main, rtsOptsEnabled);
+    checkSuid(rtsOptsEnabled);
 
     // Process RTS (rts_argv) part: mainly to determine statsfile
     for (arg = rts_argc0; arg < rts_argc; arg++) {
@@ -633,8 +658,9 @@ static void procRtsOpts (HsBool is_hs_main, int rts_argc0, RtsOptsEnabledEnum rt
            at the start each iteration and checked at the end. */
         rtsBool option_checked = rtsFalse;
 
+// See Note [OPTION_SAFE vs OPTION_UNSAFE].
 #define OPTION_SAFE option_checked = rtsTrue;
-#define OPTION_UNSAFE checkUnsafe(is_hs_main, rtsOptsEnabled); option_checked = rtsTrue;
+#define OPTION_UNSAFE checkUnsafe(rtsOptsEnabled); option_checked = rtsTrue;
 
         if (rts_argv[arg][0] != '-') {
             fflush(stdout);
@@ -642,7 +668,9 @@ static void procRtsOpts (HsBool is_hs_main, int rts_argc0, RtsOptsEnabledEnum rt
             error = rtsTrue;
 
         } else {
-
+            /* 0 is dash, 1 is first letter */
+            /* see Trac #9839 */
+            unchecked_arg_start = 1;
             switch(rts_argv[arg][1]) {
 
               /* process: general args, then PROFILING-only ones, then
@@ -656,7 +684,8 @@ static void procRtsOpts (HsBool is_hs_main, int rts_argc0, RtsOptsEnabledEnum rt
 # define TICKY_BUILD_ONLY(x) x
 #else
 # define TICKY_BUILD_ONLY(x) \
-errorBelch("the flag %s requires the program to be built with -ticky", rts_argv[arg]); \
+errorBelch("the flag %s requires the program to be built with -ticky", \
+           rts_argv[arg]);                                             \
 error = rtsTrue;
 #endif
 
@@ -664,7 +693,8 @@ error = rtsTrue;
 # define PROFILING_BUILD_ONLY(x)   x
 #else
 # define PROFILING_BUILD_ONLY(x) \
-errorBelch("the flag %s requires the program to be built with -prof", rts_argv[arg]); \
+errorBelch("the flag %s requires the program to be built with -prof", \
+           rts_argv[arg]);                                            \
 error = rtsTrue;
 #endif
 
@@ -672,7 +702,8 @@ error = rtsTrue;
 # define TRACING_BUILD_ONLY(x)   x
 #else
 # define TRACING_BUILD_ONLY(x) \
-errorBelch("the flag %s requires the program to be built with -eventlog or -debug", rts_argv[arg]); \
+errorBelch("the flag %s requires the program to be built with -eventlog or -debug", \
+           rts_argv[arg]);                                              \
 error = rtsTrue;
 #endif
 
@@ -680,7 +711,8 @@ error = rtsTrue;
 # define THREADED_BUILD_ONLY(x)      x
 #else
 # define THREADED_BUILD_ONLY(x) \
-errorBelch("the flag %s requires the program to be built with -threaded", rts_argv[arg]); \
+errorBelch("the flag %s requires the program to be built with -threaded", \
+           rts_argv[arg]);                                              \
 error = rtsTrue;
 #endif
 
@@ -688,7 +720,8 @@ error = rtsTrue;
 # define DEBUG_BUILD_ONLY(x) x
 #else
 # define DEBUG_BUILD_ONLY(x) \
-errorBelch("the flag %s requires the program to be built with -debug", rts_argv[arg]); \
+errorBelch("the flag %s requires the program to be built with -debug", \
+           rts_argv[arg]);                                             \
 error = rtsTrue;
 #endif
 
@@ -723,6 +756,52 @@ error = rtsTrue;
                       printRtsInfo();
                       stg_exit(0);
                   }
+#if defined(THREADED_RTS)
+                  else if (!strncmp("numa", &rts_argv[arg][2], 4)) {
+                      OPTION_SAFE;
+                      StgWord mask;
+                      if (rts_argv[arg][6] == '=') {
+                          mask = (StgWord)strtol(rts_argv[arg]+7,
+                                                 (char **) NULL, 10);
+                      } else {
+                          mask = (StgWord)~0;
+                      }
+                      if (!osNumaAvailable()) {
+                          errorBelch("%s: OS reports NUMA is not available",
+                                     rts_argv[arg]);
+                          error = rtsTrue;
+                          break;
+                      }
+
+                      RtsFlags.GcFlags.numa = rtsTrue;
+                      RtsFlags.GcFlags.numaMask = mask;
+                  }
+#endif
+#if defined(DEBUG) && defined(THREADED_RTS)
+                  else if (!strncmp("debug-numa", &rts_argv[arg][2], 10)) {
+                      OPTION_SAFE;
+                      size_t nNodes;
+                      if (rts_argv[arg][12] == '=' &&
+                          isdigit(rts_argv[arg][13])) {
+                          nNodes = (StgWord)strtol(rts_argv[arg]+13,
+                                                 (char **) NULL, 10);
+                      } else {
+                          errorBelch("%s: missing number of nodes",
+                                     rts_argv[arg]);
+                          error = rtsTrue;
+                          break;
+                      }
+                      if (nNodes > MAX_NUMA_NODES) {
+                          errorBelch("%s: Too many NUMA nodes (max %d)",
+                                     rts_argv[arg], MAX_NUMA_NODES);
+                          error = rtsTrue;
+                      } else {
+                          RtsFlags.GcFlags.numa = rtsTrue;
+                          RtsFlags.DebugFlags.numa = rtsTrue;
+                          RtsFlags.GcFlags.numaMask = (1<<nNodes) - 1;
+                      }
+                  }
+#endif
                   else {
                       OPTION_SAFE;
                       errorBelch("unknown RTS option: %s",rts_argv[arg]);
@@ -731,54 +810,30 @@ error = rtsTrue;
                   break;
               case 'A':
                   OPTION_UNSAFE;
-                  RtsFlags.GcFlags.minAllocAreaSize
-                      = decodeSize(rts_argv[arg], 2, BLOCK_SIZE, HS_INT_MAX)
-                           / BLOCK_SIZE;
-                  break;
-
-#ifdef USE_PAPI
-              case 'a':
-                OPTION_UNSAFE;
-                switch(rts_argv[arg][2]) {
-                case '1':
-                  RtsFlags.PapiFlags.eventType = PAPI_FLAG_CACHE_L1;
-                  break;
-                case '2':
-                  RtsFlags.PapiFlags.eventType = PAPI_FLAG_CACHE_L2;
-                  break;
-                case 'b':
-                  RtsFlags.PapiFlags.eventType = PAPI_FLAG_BRANCH;
-                  break;
-                case 's':
-                  RtsFlags.PapiFlags.eventType = PAPI_FLAG_STALLS;
-                  break;
-                case 'e':
-                  RtsFlags.PapiFlags.eventType = PAPI_FLAG_CB_EVENTS;
-                  break;
-                case '+':
-                case '#':
-                  if (RtsFlags.PapiFlags.numUserEvents >= MAX_PAPI_USER_EVENTS) {
-                      errorBelch("maximum number of PAPI events reached");
-                      stg_exit(EXIT_FAILURE);
+                  if (rts_argv[arg][2] == 'L') {
+                      RtsFlags.GcFlags.largeAllocLim
+                          = decodeSize(rts_argv[arg], 3, 2*BLOCK_SIZE,
+                                       HS_INT_MAX) / BLOCK_SIZE;
+                  } else {
+                      // minimum two blocks in the nursery, so that we have one
+                      // to grab for allocate().
+                      RtsFlags.GcFlags.minAllocAreaSize
+                          = decodeSize(rts_argv[arg], 2, 2*BLOCK_SIZE,
+                                       HS_INT_MAX) / BLOCK_SIZE;
                   }
-                  nat eventNum  = RtsFlags.PapiFlags.numUserEvents++;
-                  char kind     = rts_argv[arg][2];
-                  nat eventKind = kind == '+' ? PAPI_PRESET_EVENT_KIND : PAPI_NATIVE_EVENT_KIND;
-
-                  RtsFlags.PapiFlags.userEvents[eventNum] = rts_argv[arg] + 3;
-                  RtsFlags.PapiFlags.eventType = PAPI_USER_EVENTS;
-                  RtsFlags.PapiFlags.userEventsKind[eventNum] = eventKind;
                   break;
-                default:
-                  bad_option( rts_argv[arg] );
-                }
-                break;
-#endif
+              case 'n':
+                  OPTION_UNSAFE;
+                  RtsFlags.GcFlags.nurseryChunkSize
+                      = decodeSize(rts_argv[arg], 2, 2*BLOCK_SIZE, HS_INT_MAX)
+                           / BLOCK_SIZE;
+                  break;
 
               case 'B':
                 OPTION_UNSAFE;
                 RtsFlags.GcFlags.ringBell = rtsTrue;
-                break;
+                unchecked_arg_start++;
+                goto check_rest;
 
               case 'c':
                   OPTION_UNSAFE;
@@ -793,7 +848,8 @@ error = rtsTrue;
               case 'w':
                 OPTION_UNSAFE;
                 RtsFlags.GcFlags.sweep = rtsTrue;
-                break;
+                unchecked_arg_start++;
+                goto check_rest;
 
               case 'F':
                 OPTION_UNSAFE;
@@ -805,71 +861,14 @@ error = rtsTrue;
 
               case 'D':
               OPTION_SAFE;
-              DEBUG_BUILD_ONLY(
-              {
-                  char *c;
-
-                  for (c  = rts_argv[arg] + 2; *c != '\0'; c++) {
-                      switch (*c) {
-                      case 's':
-                          RtsFlags.DebugFlags.scheduler = rtsTrue;
-                          break;
-                      case 'i':
-                          RtsFlags.DebugFlags.interpreter = rtsTrue;
-                          break;
-                      case 'w':
-                          RtsFlags.DebugFlags.weak = rtsTrue;
-                          break;
-                      case 'G':
-                          RtsFlags.DebugFlags.gccafs = rtsTrue;
-                          break;
-                      case 'g':
-                          RtsFlags.DebugFlags.gc = rtsTrue;
-                          break;
-                      case 'b':
-                          RtsFlags.DebugFlags.block_alloc = rtsTrue;
-                          break;
-                      case 'S':
-                          RtsFlags.DebugFlags.sanity = rtsTrue;
-                          break;
-                      case 't':
-                          RtsFlags.DebugFlags.stable = rtsTrue;
-                          break;
-                      case 'p':
-                          RtsFlags.DebugFlags.prof = rtsTrue;
-                          break;
-                      case 'l':
-                          RtsFlags.DebugFlags.linker = rtsTrue;
-                          break;
-                      case 'a':
-                          RtsFlags.DebugFlags.apply = rtsTrue;
-                          break;
-                      case 'm':
-                          RtsFlags.DebugFlags.stm = rtsTrue;
-                          break;
-                      case 'z':
-                          RtsFlags.DebugFlags.squeeze = rtsTrue;
-                          break;
-                      case 'c':
-                          RtsFlags.DebugFlags.hpc = rtsTrue;
-                          break;
-                      case 'r':
-                          RtsFlags.DebugFlags.sparks = rtsTrue;
-                          break;
-                      default:
-                          bad_option( rts_argv[arg] );
-                      }
-                  }
-                  // -Dx also turns on -v.  Use -l to direct trace
-                  // events to the .eventlog file instead.
-                  RtsFlags.TraceFlags.tracing = TRACE_STDERR;
-              })
+              DEBUG_BUILD_ONLY(read_debug_flags(rts_argv[arg]);)
               break;
 
               case 'K':
                   OPTION_UNSAFE;
                   RtsFlags.GcFlags.maxStkSize =
-                      decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
+                      decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX)
+                      / sizeof(W_);
                   break;
 
               case 'k':
@@ -877,19 +876,23 @@ error = rtsTrue;
                 switch(rts_argv[arg][2]) {
                 case 'c':
                   RtsFlags.GcFlags.stkChunkSize =
-                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
+                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX)
+                      / sizeof(W_);
                   break;
                 case 'b':
                   RtsFlags.GcFlags.stkChunkBufferSize =
-                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
+                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX)
+                      / sizeof(W_);
                   break;
                 case 'i':
                   RtsFlags.GcFlags.initialStkSize =
-                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
+                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX)
+                      / sizeof(W_);
                   break;
                 default:
                   RtsFlags.GcFlags.initialStkSize =
-                      decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
+                      decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX)
+                      / sizeof(W_);
                   break;
                 }
                 break;
@@ -897,19 +900,52 @@ error = rtsTrue;
               case 'M':
                   OPTION_UNSAFE;
                   RtsFlags.GcFlags.maxHeapSize =
-                      decodeSize(rts_argv[arg], 2, BLOCK_SIZE, HS_WORD_MAX) / BLOCK_SIZE;
-                  /* user give size in *bytes* but "maxHeapSize" is in *blocks* */
+                      decodeSize(rts_argv[arg], 2, BLOCK_SIZE, HS_WORD_MAX)
+                      / BLOCK_SIZE;
+                  /* user give size in *bytes* but "maxHeapSize" is in
+                   * *blocks* */
                   break;
 
               case 'm':
-                  OPTION_UNSAFE;
-                  RtsFlags.GcFlags.pcFreeHeap = atof(rts_argv[arg]+2);
+                /* Case for maxN feature request ticket #10728, it's a little
+                   odd being so far from the N case. */
+#if !defined(NOSMP)
+                if (strncmp("maxN", &rts_argv[arg][1], 4) == 0) {
+                  OPTION_SAFE;
+                  THREADED_BUILD_ONLY(
+                    int nCapabilities;
+                    int proc = (int)getNumberOfProcessors();
+
+                    nCapabilities = strtol(rts_argv[arg]+5, (char **) NULL, 10);
+                    if (nCapabilities > proc) { nCapabilities = proc; }
 
-                  if (RtsFlags.GcFlags.pcFreeHeap < 0 ||
-                      RtsFlags.GcFlags.pcFreeHeap > 100)
+                    if (nCapabilities <= 0) {
+                      errorBelch("bad value for -maxN");
+                      error = rtsTrue;
+                    }
+#if defined(PROFILING)
+                    RtsFlags.ParFlags.nCapabilities = 1;
+#else
+                    RtsFlags.ParFlags.nCapabilities = (uint32_t)nCapabilities;
+#endif
+                  ) break;
+                } else {
+#endif
+                    OPTION_UNSAFE;
+                    RtsFlags.GcFlags.pcFreeHeap = atof(rts_argv[arg]+2);
+
+                    /* -m was allowing bad flags to go unreported */
+                    if (RtsFlags.GcFlags.pcFreeHeap == 0.0 &&
+                           rts_argv[arg][2] != '0')
                       bad_option( rts_argv[arg] );
-                  break;
 
+                    if (RtsFlags.GcFlags.pcFreeHeap < 0 ||
+                        RtsFlags.GcFlags.pcFreeHeap > 100)
+                        bad_option( rts_argv[arg] );
+                    break;
+#if !defined(NOSMP)
+                }
+#endif
               case 'G':
                   OPTION_UNSAFE;
                   RtsFlags.GcFlags.generations =
@@ -921,11 +957,20 @@ error = rtsTrue;
                   if (rts_argv[arg][2] == '\0') {
                       RtsFlags.GcFlags.heapSizeSuggestionAuto = rtsTrue;
                   } else {
-                      RtsFlags.GcFlags.heapSizeSuggestion =
-                          (nat)(decodeSize(rts_argv[arg], 2, BLOCK_SIZE, HS_WORD_MAX) / BLOCK_SIZE);
+                      RtsFlags.GcFlags.heapSizeSuggestion = (uint32_t)
+                          (decodeSize(rts_argv[arg], 2, BLOCK_SIZE, HS_WORD_MAX)
+                          / BLOCK_SIZE);
                   }
                   break;
 
+              case 'O':
+                  OPTION_UNSAFE;
+                  RtsFlags.GcFlags.minOldGenSize =
+                      (uint32_t)(decodeSize(rts_argv[arg], 2, BLOCK_SIZE,
+                                       HS_WORD_MAX)
+                            / BLOCK_SIZE);
+                  break;
+
               case 'I': /* idle GC delay */
                 OPTION_UNSAFE;
                 if (rts_argv[arg][2] == '\0') {
@@ -944,7 +989,8 @@ error = rtsTrue;
               case 'T':
                   OPTION_SAFE;
                   RtsFlags.GcFlags.giveStats = COLLECT_GC_STATS;
-                  break; /* Don't initialize statistics file. */
+                  unchecked_arg_start++;
+                  goto check_rest; /* Don't initialize statistics file. */
 
               case 'S':
                   OPTION_SAFE; /* but see below */
@@ -976,7 +1022,8 @@ error = rtsTrue;
               case 'Z':
                 OPTION_UNSAFE;
                 RtsFlags.GcFlags.squeezeUpdFrames = rtsFalse;
-                break;
+                unchecked_arg_start++;
+                goto check_rest;
 
               /* =========== PROFILING ========================== */
 
@@ -987,8 +1034,14 @@ error = rtsTrue;
                 switch (rts_argv[arg][2]) {
                   case 'a':
                     RtsFlags.CcFlags.doCostCentres = COST_CENTRES_ALL;
+                    if (rts_argv[arg][3] != '\0') {
+                      errorBelch("flag -Pa given an argument"
+                                 " when none was expected: %s"
+                                ,rts_argv[arg]);
+                      error = rtsTrue;
+                    }
                     break;
-                  default:
+                  case '\0':
                       if (rts_argv[arg][1] == 'P') {
                           RtsFlags.CcFlags.doCostCentres =
                               COST_CENTRES_VERBOSE;
@@ -997,13 +1050,17 @@ error = rtsTrue;
                               COST_CENTRES_SUMMARY;
                       }
                       break;
+                  default:
+                    unchecked_arg_start++;
+                    goto check_rest;
                 }
                 ) break;
 
               case 'R':
                   OPTION_SAFE;
                   PROFILING_BUILD_ONLY(
-                      RtsFlags.ProfFlags.maxRetainerSetSize = atof(rts_argv[arg]+2);
+                      RtsFlags.ProfFlags.maxRetainerSetSize =
+                        atof(rts_argv[arg]+2);
                   ) break;
               case 'L':
                   OPTION_SAFE;
@@ -1015,122 +1072,21 @@ error = rtsTrue;
                   ) break;
               case 'h': /* serial heap profile */
 #if !defined(PROFILING)
-                OPTION_UNSAFE;
                 switch (rts_argv[arg][2]) {
                   case '\0':
                   case 'T':
+                    OPTION_UNSAFE;
                     RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CLOSURE_TYPE;
                     break;
                   default:
-                    errorBelch("invalid heap profile option: %s",rts_argv[arg]);
-                    error = rtsTrue;
+                    OPTION_SAFE;
+                    PROFILING_BUILD_ONLY();
                 }
 #else
                 OPTION_SAFE;
                 PROFILING_BUILD_ONLY(
-                switch (rts_argv[arg][2]) {
-                case '\0':
-                case 'C':
-                case 'c':
-                case 'M':
-                case 'm':
-                case 'D':
-                case 'd':
-                case 'Y':
-                case 'y':
-                case 'R':
-                case 'r':
-                case 'B':
-                case 'b':
-                    if (rts_argv[arg][2] != '\0' && rts_argv[arg][3] != '\0') {
-                        {
-                            char *left  = strchr(rts_argv[arg], '{');
-                            char *right = strrchr(rts_argv[arg], '}');
-
-                            // curly braces are optional, for
-                            // backwards compat.
-                            if (left)
-                                left = left+1;
-                            else
-                                left = rts_argv[arg] + 3;
-
-                            if (!right)
-                                right = rts_argv[arg] + strlen(rts_argv[arg]);
-
-                            *right = '\0';
-
-                            switch (rts_argv[arg][2]) {
-                            case 'c': // cost centre label select
-                                RtsFlags.ProfFlags.ccSelector = left;
-                                break;
-                            case 'C':
-                                RtsFlags.ProfFlags.ccsSelector = left;
-                                break;
-                            case 'M':
-                            case 'm': // cost centre module select
-                                RtsFlags.ProfFlags.modSelector = left;
-                                break;
-                            case 'D':
-                            case 'd': // closure descr select
-                                RtsFlags.ProfFlags.descrSelector = left;
-                                break;
-                            case 'Y':
-                            case 'y': // closure type select
-                                RtsFlags.ProfFlags.typeSelector = left;
-                                break;
-                            case 'R':
-                            case 'r': // retainer select
-                                RtsFlags.ProfFlags.retainerSelector = left;
-                                break;
-                            case 'B':
-                            case 'b': // biography select
-                                RtsFlags.ProfFlags.bioSelector = left;
-                                break;
-                            }
-                        }
-                        break;
-                    }
-
-                    if (RtsFlags.ProfFlags.doHeapProfile != 0) {
-                        errorBelch("multiple heap profile options");
-                        error = rtsTrue;
-                        break;
-                    }
-
-                    switch (rts_argv[arg][2]) {
-                    case '\0':
-                    case 'C':
-                    case 'c':
-                        RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS;
-                        break;
-                    case 'M':
-                    case 'm':
-                          RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD;
-                          break;
-                    case 'D':
-                    case 'd':
-                          RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR;
-                          break;
-                    case 'Y':
-                    case 'y':
-                          RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE;
-                          break;
-                    case 'R':
-                    case 'r':
-                          RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_RETAINER;
-                          break;
-                    case 'B':
-                    case 'b':
-                          RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_LDV;
-                          break;
-                    }
-                    break;
-
-                default:
-                    errorBelch("invalid heap profile option: %s",rts_argv[arg]);
-                    error = rtsTrue;
-                }
-                )
+                    error = read_heap_profiling_flag(rts_argv[arg]);
+                );
 #endif /* PROFILING */
                 break;
 
@@ -1172,24 +1128,26 @@ error = rtsTrue;
                 THREADED_BUILD_ONLY(
                 if (rts_argv[arg][2] == '\0') {
 #if defined(PROFILING)
-                    RtsFlags.ParFlags.nNodes = 1;
+                    RtsFlags.ParFlags.nCapabilities = 1;
 #else
-                    RtsFlags.ParFlags.nNodes = getNumberOfProcessors();
+                    RtsFlags.ParFlags.nCapabilities = getNumberOfProcessors();
 #endif
                 } else {
-                    int nNodes;
+                    int nCapabilities;
                     OPTION_SAFE; /* but see extra checks below... */
-                    nNodes = strtol(rts_argv[arg]+2, (char **) NULL, 10);
-                    if (nNodes <= 0) {
+
+                    nCapabilities = strtol(rts_argv[arg]+2, (char **) NULL, 10);
+
+                    if (nCapabilities <= 0) {
                       errorBelch("bad value for -N");
                       error = rtsTrue;
                     }
                     if (rtsOptsEnabled == RtsOptsSafeOnly &&
-                        nNodes > (int)getNumberOfProcessors()) {
-                      errorRtsOptsDisabled(is_hs_main, "Using large values for -N is not allowed by default. %s");
+                      nCapabilities > (int)getNumberOfProcessors()) {
+                      errorRtsOptsDisabled("Using large values for -N is not allowed by default. %s");
                       stg_exit(EXIT_FAILURE);
                     }
-                    RtsFlags.ParFlags.nNodes = (nat)nNodes;
+                    RtsFlags.ParFlags.nCapabilities = (uint32_t)nCapabilities;
                 }
                 ) break;
 
@@ -1227,10 +1185,12 @@ error = rtsTrue;
                         break;
                     case 'b':
                         if (rts_argv[arg][3] == '\0') {
-                            RtsFlags.ParFlags.parGcLoadBalancingEnabled = rtsFalse;
+                            RtsFlags.ParFlags.parGcLoadBalancingEnabled =
+                                rtsFalse;
                         }
                         else {
-                            RtsFlags.ParFlags.parGcLoadBalancingEnabled = rtsTrue;
+                            RtsFlags.ParFlags.parGcLoadBalancingEnabled =
+                                rtsTrue;
                             RtsFlags.ParFlags.parGcLoadBalancingGen
                                 = strtol(rts_argv[arg]+3, (char **) NULL, 10);
                         }
@@ -1239,6 +1199,17 @@ error = rtsTrue;
                         RtsFlags.ParFlags.parGcNoSyncWithIdle
                             = strtol(rts_argv[arg]+3, (char **) NULL, 10);
                         break;
+                    case 'n': {
+                        int threads;
+                        threads = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+                        if (threads <= 0) {
+                            errorBelch("-qn must be 1 or greater");
+                            error = rtsTrue;
+                        } else {
+                            RtsFlags.ParFlags.parGcThreads = threads;
+                        }
+                        break;
+                    }
                     case 'a':
                         RtsFlags.ParFlags.setAffinity = rtsTrue;
                         break;
@@ -1310,6 +1281,7 @@ error = rtsTrue;
               /* =========== EXTENDED OPTIONS =================== */
 
               case 'x': /* Extend the argument space */
+                unchecked_arg_start++;
                 switch(rts_argv[arg][2]) {
                   case '\0':
                     OPTION_SAFE;
@@ -1344,21 +1316,34 @@ error = rtsTrue;
                     break;
 #endif
 
-                case 'c': /* Debugging tool: show current cost centre on an exception */
+                case 'c': /* Debugging tool: show current cost centre on
+                           an exception */
                     OPTION_SAFE;
                     PROFILING_BUILD_ONLY(
                         RtsFlags.ProfFlags.showCCSOnException = rtsTrue;
                         );
-                    break;
+                    unchecked_arg_start++;
+                    goto check_rest;
 
                 case 't':  /* Include memory used by TSOs in a heap profile */
                     OPTION_SAFE;
                     PROFILING_BUILD_ONLY(
                         RtsFlags.ProfFlags.includeTSOs = rtsTrue;
                         );
-                    break;
+                    unchecked_arg_start++;
+                    goto check_rest;
 
-                  /* The option prefix '-xx' is reserved for future extension.  KSW 1999-11. */
+                  /*
+                   * The option prefix '-xx' is reserved for future
+                   * extension.  KSW 1999-11.
+                   */
+
+                case 'q':
+                  OPTION_UNSAFE;
+                  RtsFlags.GcFlags.allocLimitGrace
+                      = decodeSize(rts_argv[arg], 3, BLOCK_SIZE, HS_INT_MAX)
+                          / BLOCK_SIZE;
+                  break;
 
                   default:
                     OPTION_SAFE;
@@ -1368,6 +1353,22 @@ error = rtsTrue;
                 }
                 break;  /* defensive programming */
 
+            /* check the rest to be sure there is nothing afterwards.*/
+            /* see Trac #9839 */
+            check_rest:
+                {
+                    /* start checking from the first unchecked position,
+                     * not from index 2*/
+                    /* see Trac #9839 */
+                    if (rts_argv[arg][unchecked_arg_start] != '\0') {
+                      errorBelch("flag -%c given an argument"
+                                 " when none was expected: %s",
+                                 rts_argv[arg][1],rts_argv[arg]);
+                      error = rtsTrue;
+                    }
+                    break;
+                }
+
               /* =========== OH DEAR ============================ */
               default:
                 OPTION_SAFE;
@@ -1445,9 +1446,33 @@ static void normaliseRtsOpts (void)
 
     if (RtsFlags.GcFlags.stkChunkBufferSize >
         RtsFlags.GcFlags.stkChunkSize / 2) {
-        errorBelch("stack chunk buffer size (-kb) must be less than 50%% of the stack chunk size (-kc)");
+        errorBelch("stack chunk buffer size (-kb) must be less than 50%%\n"
+                   "of the stack chunk size (-kc)");
         errorUsage();
     }
+
+    if (RtsFlags.ParFlags.parGcLoadBalancingGen == ~0u) {
+        StgWord alloc_area_bytes
+            = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE;
+
+        // If allocation area is larger that CPU cache
+        // we can finish scanning quicker doing work-stealing
+        // scan. Trac #9221
+        // 32M looks big enough not to fit into L2 cache
+        // of popular modern CPUs.
+        if (alloc_area_bytes >= 32 * 1024 * 1024) {
+            RtsFlags.ParFlags.parGcLoadBalancingGen = 0;
+        } else {
+            RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
+        }
+    }
+
+#ifdef THREADED_RTS
+    if (RtsFlags.ParFlags.parGcThreads > RtsFlags.ParFlags.nCapabilities) {
+        errorBelch("GC threads (-qn) must be between 1 and the value of -N");
+        errorUsage();
+    }
+#endif
 }
 
 static void errorUsage (void)
@@ -1494,7 +1519,8 @@ openStatsFile (char *filename,           // filename, or NULL
         if (*filename != '\0') {  /* stats file specified */
             f = fopen(filename,"w");
         } else {
-            char stats_filename[STATS_FILENAME_MAXLEN]; /* default <program>.<ext> */
+            /* default <program>.<ext> */
+            char stats_filename[STATS_FILENAME_MAXLEN];
             sprintf(stats_filename, filename_fmt, prog_name);
             f = fopen(stats_filename,"w");
         }
@@ -1531,7 +1557,7 @@ static void initStatsFile (FILE *f)
 -------------------------------------------------------------------------- */
 
 static StgWord64
-decodeSize(const char *flag, nat offset, StgWord64 min, StgWord64 max)
+decodeSize(const char *flag, uint32_t offset, StgWord64 min, StgWord64 max)
 {
     char c;
     const char *s;
@@ -1571,10 +1597,188 @@ decodeSize(const char *flag, nat offset, StgWord64 min, StgWord64 max)
     return val;
 }
 
+#ifdef DEBUG
+static void read_debug_flags(const char* arg)
+{
+    // Already parsed "-D"
+    const char *c;
+    for (c  = arg + 2; *c != '\0'; c++) {
+        switch (*c) {
+        case 's':
+            RtsFlags.DebugFlags.scheduler = rtsTrue;
+            break;
+        case 'i':
+            RtsFlags.DebugFlags.interpreter = rtsTrue;
+            break;
+        case 'w':
+            RtsFlags.DebugFlags.weak = rtsTrue;
+            break;
+        case 'G':
+            RtsFlags.DebugFlags.gccafs = rtsTrue;
+            break;
+        case 'g':
+            RtsFlags.DebugFlags.gc = rtsTrue;
+            break;
+        case 'b':
+            RtsFlags.DebugFlags.block_alloc = rtsTrue;
+            break;
+        case 'S':
+            RtsFlags.DebugFlags.sanity = rtsTrue;
+            break;
+        case 't':
+            RtsFlags.DebugFlags.stable = rtsTrue;
+            break;
+        case 'p':
+            RtsFlags.DebugFlags.prof = rtsTrue;
+            break;
+        case 'l':
+            RtsFlags.DebugFlags.linker = rtsTrue;
+            break;
+        case 'a':
+            RtsFlags.DebugFlags.apply = rtsTrue;
+            break;
+        case 'm':
+            RtsFlags.DebugFlags.stm = rtsTrue;
+            break;
+        case 'z':
+            RtsFlags.DebugFlags.squeeze = rtsTrue;
+            break;
+        case 'c':
+            RtsFlags.DebugFlags.hpc = rtsTrue;
+            break;
+        case 'r':
+            RtsFlags.DebugFlags.sparks = rtsTrue;
+            break;
+        default:
+            bad_option( arg );
+        }
+    }
+    // -Dx also turns on -v.  Use -l to direct trace
+    // events to the .eventlog file instead.
+    RtsFlags.TraceFlags.tracing = TRACE_STDERR;
+}
+#endif
+
+#ifdef PROFILING
+// Parse a "-h" flag, returning whether the parse resulted in an error.
+static rtsBool read_heap_profiling_flag(const char *arg)
+{
+    // Already parsed "-h"
+
+    rtsBool error = rtsFalse;
+    switch (arg[2]) {
+    case '\0':
+    case 'C':
+    case 'c':
+    case 'M':
+    case 'm':
+    case 'D':
+    case 'd':
+    case 'Y':
+    case 'y':
+    case 'R':
+    case 'r':
+    case 'B':
+    case 'b':
+        if (arg[2] != '\0' && arg[3] != '\0') {
+            {
+                const char *left  = strchr(arg, '{');
+                const char *right = strrchr(arg, '}');
+
+                // curly braces are optional, for
+                // backwards compat.
+                if (left)
+                    left = left+1;
+                else
+                    left = arg + 3;
+
+                if (!right)
+                    right = arg + strlen(arg);
+
+                char *selector = stgStrndup(left, right - left + 1);
+
+                switch (arg[2]) {
+                case 'c': // cost centre label select
+                    RtsFlags.ProfFlags.ccSelector = selector;
+                    break;
+                case 'C':
+                    RtsFlags.ProfFlags.ccsSelector = selector;
+                    break;
+                case 'M':
+                case 'm': // cost centre module select
+                    RtsFlags.ProfFlags.modSelector = selector;
+                    break;
+                case 'D':
+                case 'd': // closure descr select
+                    RtsFlags.ProfFlags.descrSelector = selector;
+                    break;
+                case 'Y':
+                case 'y': // closure type select
+                    RtsFlags.ProfFlags.typeSelector = selector;
+                    break;
+                case 'R':
+                case 'r': // retainer select
+                    RtsFlags.ProfFlags.retainerSelector = selector;
+                    break;
+                case 'B':
+                case 'b': // biography select
+                    RtsFlags.ProfFlags.bioSelector = selector;
+                    break;
+                default:
+                    free(selector);
+                }
+            }
+            break;
+        }
+
+        if (RtsFlags.ProfFlags.doHeapProfile != 0) {
+            errorBelch("multiple heap profile options");
+            error = rtsTrue;
+            break;
+        }
+
+        switch (arg[2]) {
+        case '\0':
+        case 'C':
+        case 'c':
+            RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS;
+            break;
+        case 'M':
+        case 'm':
+            RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD;
+            break;
+        case 'D':
+        case 'd':
+            RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR;
+            break;
+        case 'Y':
+        case 'y':
+            RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE;
+            break;
+        case 'R':
+        case 'r':
+            RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_RETAINER;
+            break;
+        case 'B':
+        case 'b':
+            RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_LDV;
+            break;
+        }
+        break;
+
+    default:
+        errorBelch("invalid heap profile option: %s", arg);
+        error = rtsTrue;
+    }
+
+    return error;
+}
+#endif
+
 #if defined(TRACING)
-static void read_trace_flags(char *arg)
+static void read_trace_flags(const char *arg)
 {
-    char *c;
+    const char *c;
     rtsBool enabled = rtsTrue;
     /* Syntax for tracing flags currently looks like:
      *
@@ -1858,3 +2062,41 @@ void freeRtsArgs(void)
     freeProgArgv();
     freeRtsArgv();
 }
+
+
+/*
+Note [OPTION_SAFE vs OPTION_UNSAFE]
+
+Ticket #3910 originally pointed out that the RTS options are a potential
+security problem. For example the -t -s or -S flags can be used to
+overwrite files. This would be bad in the context of CGI scripts or
+setuid binaries. So we introduced a system where +RTS processing is more
+or less disabled unless you pass the -rtsopts flag at link time.
+
+This scheme is safe enough but it also really annoyes users. They have
+to use -rtsopts in many circumstances: with -threaded to use -N, with
+-eventlog to use -l, with -prof to use any of the profiling flags. Many
+users just set -rtsopts globally or in project .cabal files. Apart from
+annoying users it reduces security because it means that deployed
+binaries will have all RTS options enabled rather than just profiling
+ones.
+
+So now, we relax the set of RTS options that are available in the
+default -rtsopts=some case. For "deployment" ways like vanilla and
+-threaded we remain quite conservative. Only --info -? --help are
+allowed for vanilla. For -threaded, -N and -N<x> are allowed with a
+check that x <= num cpus.
+
+For "developer" ways like -debug, -eventlog, -prof, we allow all the
+options that are special to that way. Some of these allow writing files,
+but the file written is not directly under the control of the attacker.
+For the setuid case (where the attacker would have control over binary
+name, current dir, local symlinks etc) we check if the process is
+running setuid/setgid and refuse all RTS option processing. Users would
+need to use -rtsopts=all in this case.
+
+We are making the assumption that developers will not deploy binaries
+built in the -debug, -eventlog, -prof ways. And even if they do, the
+damage should be limited to DOS, information disclosure and writing
+files like <progname>.eventlog, not arbitrary files.
+*/