Reset FPU precision back to MSVCRT defaults
authorTamar Christina <tamar@zhox.com>
Wed, 14 Dec 2016 21:45:35 +0000 (16:45 -0500)
committerBen Gamari <ben@smart-cactus.org>
Thu, 15 Dec 2016 15:42:25 +0000 (10:42 -0500)
Mingw-w64 does a stupid thing. They set the FPU precision to extended
mode by default.  The reasoning is that it's for compatibility with GNU
Linux ported libraries. However the problem is this is incompatible with
the standard Windows double precision mode.  In fact, if we create a new
OS thread then Windows will reset the FPU to double precision mode.  So
we end up with a weird state where the main thread by default has a
different precision than any child threads.

Test Plan: ./validate new test T7289

Reviewers: simonmar, austin, bgamari, erikd

Reviewed By: simonmar

Subscribers: thomie, #ghc_windows_task_force

Differential Revision: https://phabricator.haskell.org/D2819

GHC Trac Issues: #7289

rts/RtsStartup.c
testsuite/tests/rts/T7289/Makefile [new file with mode: 0644]
testsuite/tests/rts/T7289/T7289.hs [new file with mode: 0644]
testsuite/tests/rts/T7289/T7289.stdout [new file with mode: 0644]
testsuite/tests/rts/T7289/all.T [new file with mode: 0644]
testsuite/tests/rts/T7289/fp.c [new file with mode: 0644]

index dd4efa6..955ad13 100644 (file)
@@ -46,7 +46,9 @@
 #include "win32/AsyncIO.h"
 #endif
 
 #include "win32/AsyncIO.h"
 #endif
 
-#if !defined(mingw32_HOST_OS)
+#if defined(mingw32_HOST_OS)
+#include <fenv.h>
+#else
 #include "posix/TTY.h"
 #endif
 
 #include "posix/TTY.h"
 #endif
 
@@ -69,10 +71,18 @@ static void flushStdHandles(void);
 
 #define X86_INIT_FPU 0
 
 
 #define X86_INIT_FPU 0
 
-#if X86_INIT_FPU
 static void
 x86_init_fpu ( void )
 {
 static void
 x86_init_fpu ( void )
 {
+#if defined(mingw32_HOST_OS) && !X86_INIT_FPU
+    /* Mingw-w64 does a stupid thing. They set the FPU precision to extended mode by default.
+    The reasoning is that it's for compatibility with GNU Linux ported libraries. However the
+    problem is this is incompatible with the standard Windows double precision mode.  In fact,
+    if we create a new OS thread then Windows will reset the FPU to double precision mode.
+    So we end up with a weird state where the main thread by default has a different precision
+    than any child threads. */
+    fesetenv(FE_PC53_ENV);
+#elif X86_INIT_FPU
   __volatile unsigned short int fpu_cw;
 
   // Grab the control word
   __volatile unsigned short int fpu_cw;
 
   // Grab the control word
@@ -87,8 +97,26 @@ x86_init_fpu ( void )
 
   // Store the new control word back
   __asm __volatile ("fldcw %0" : : "m" (fpu_cw));
 
   // Store the new control word back
   __asm __volatile ("fldcw %0" : : "m" (fpu_cw));
+#else
+    return;
+#endif
+}
+
+#if defined(mingw32_HOST_OS)
+/* And now we have to override the build in ones in Mingw-W64's CRT. */
+void _fpreset(void)
+{
+    x86_init_fpu();
+}
+
+#ifdef __GNUC__
+void __attribute__((alias("_fpreset"))) fpreset(void);
+#else
+void fpreset(void) {
+    _fpreset();
 }
 #endif
 }
 #endif
+#endif
 
 /* -----------------------------------------------------------------------------
    Starting up the RTS
 
 /* -----------------------------------------------------------------------------
    Starting up the RTS
@@ -244,9 +272,7 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
     startupAsyncIO();
 #endif
 
     startupAsyncIO();
 #endif
 
-#if X86_INIT_FPU
     x86_init_fpu();
     x86_init_fpu();
-#endif
 
     startupHpc();
 
 
     startupHpc();
 
diff --git a/testsuite/tests/rts/T7289/Makefile b/testsuite/tests/rts/T7289/Makefile
new file mode 100644 (file)
index 0000000..9101fbd
--- /dev/null
@@ -0,0 +1,3 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
diff --git a/testsuite/tests/rts/T7289/T7289.hs b/testsuite/tests/rts/T7289/T7289.hs
new file mode 100644 (file)
index 0000000..1751333
--- /dev/null
@@ -0,0 +1,9 @@
+module Main where
+
+import Control.Concurrent
+
+foreign import ccall "showControlBits" checkfpu :: IO ()
+
+main
+ = do checkfpu
+      forkOS checkfpu
diff --git a/testsuite/tests/rts/T7289/T7289.stdout b/testsuite/tests/rts/T7289/T7289.stdout
new file mode 100644 (file)
index 0000000..7a74a81
--- /dev/null
@@ -0,0 +1,2 @@
+FPU: 0x027f
+FPU: 0x027f
diff --git a/testsuite/tests/rts/T7289/all.T b/testsuite/tests/rts/T7289/all.T
new file mode 100644 (file)
index 0000000..7ef59cc
--- /dev/null
@@ -0,0 +1,6 @@
+test('T7289', [ extra_clean(['fp.o', 'testfp.o', 'testfp.hi'])
+               , extra_files(['fp.c'])
+               , unless(opsys('mingw32'), skip)
+               , only_ways(['threaded1'])
+               ],
+               compile_and_run, ['fp.c'])
diff --git a/testsuite/tests/rts/T7289/fp.c b/testsuite/tests/rts/T7289/fp.c
new file mode 100644 (file)
index 0000000..12f1b39
--- /dev/null
@@ -0,0 +1,26 @@
+#include <stdio.h>
+#include <xmmintrin.h>
+#include <float.h>
+
+static unsigned int
+getFPUStateX86 (void)
+{
+    unsigned int control = 0;
+#if defined(_MSC_VER)
+    control = _controlfp(0, 0);
+#else
+    __asm__ __volatile__("fnstcw %0" : "=m" (control));
+#endif
+    return control;
+}
+
+static unsigned int
+getSSEStateX86 (void)
+{
+    return _mm_getcsr();
+}
+
+extern void showControlBits (void)
+{
+    printf("FPU: 0x%04x\n", getFPUStateX86());
+}