Add -XBinaryLiterals language extension (re #9224)
authorHerbert Valerio Riedel <hvr@gnu.org>
Sat, 21 Jun 2014 09:38:17 +0000 (11:38 +0200)
committerHerbert Valerio Riedel <hvr@gnu.org>
Fri, 27 Jun 2014 18:46:32 +0000 (20:46 +0200)
Haskell2010 supports

- base-10 (prefix-less),
- base-8 (via `0[oO]`-prefix), and
- base-16 (via `0[xX]`-prefix) integer literals.

This commit adds syntax support for base-2 integer literals via the new `0[bB]`
prefix. The use of a `0b` prefix for indicating binary literals is known
from popular programming languages such as C++14, Perl, Python, Ruby, and Java.

This syntax extension is disabled by default and can be enabled via the
new `{-# LANGUAGE BinaryLiterals #-}` pragma and/or the new `-XBinaryLiterals`

This new extensions requires to upgrade the `ExtsBitmap` type from
`Word` to `Word64` as this adds a 33th flag which is not guaranteed to
fit into a `Word`.

Signed-off-by: Herbert Valerio Riedel <hvr@gnu.org>
Differential Revision: https://phabricator.haskell.org/D22

22 files changed:
compiler/main/DynFlags.hs
compiler/parser/Ctype.lhs
compiler/parser/Lexer.x
docs/users_guide/7.10.1-notes.xml
docs/users_guide/flags.xml
docs/users_guide/glasgow_exts.xml
testsuite/.gitignore
testsuite/tests/driver/T4437.hs
testsuite/tests/parser/should_fail/ParserNoBinaryLiterals1.hs [new file with mode: 0644]
testsuite/tests/parser/should_fail/ParserNoBinaryLiterals1.stderr [new file with mode: 0644]
testsuite/tests/parser/should_fail/ParserNoBinaryLiterals2.hs [new file with mode: 0644]
testsuite/tests/parser/should_fail/ParserNoBinaryLiterals2.stderr [new file with mode: 0644]
testsuite/tests/parser/should_fail/ParserNoBinaryLiterals3.hs [new file with mode: 0644]
testsuite/tests/parser/should_fail/ParserNoBinaryLiterals3.stderr [new file with mode: 0644]
testsuite/tests/parser/should_fail/all.T
testsuite/tests/parser/should_run/BinaryLiterals0.hs [new file with mode: 0644]
testsuite/tests/parser/should_run/BinaryLiterals0.stdout [new file with mode: 0644]
testsuite/tests/parser/should_run/BinaryLiterals1.hs [new file with mode: 0644]
testsuite/tests/parser/should_run/BinaryLiterals1.stdout [new file with mode: 0644]
testsuite/tests/parser/should_run/BinaryLiterals2.hs [new file with mode: 0644]
testsuite/tests/parser/should_run/BinaryLiterals2.stdout [new file with mode: 0644]
testsuite/tests/parser/should_run/all.T

index 3494208..122eaff 100644 (file)
@@ -582,6 +582,7 @@ data ExtensionFlag
    | Opt_TraditionalRecordSyntax
    | Opt_LambdaCase
    | Opt_MultiWayIf
+   | Opt_BinaryLiterals
    | Opt_NegativeLiterals
    | Opt_EmptyCase
    | Opt_PatternSynonyms
@@ -2875,6 +2876,7 @@ xFlags = [
   ( "UndecidableInstances",             Opt_UndecidableInstances, nop ),
   ( "IncoherentInstances",              Opt_IncoherentInstances, nop ),
   ( "PackageImports",                   Opt_PackageImports, nop ),
+  ( "BinaryLiterals",                   Opt_BinaryLiterals, nop ),
   ( "NegativeLiterals",                 Opt_NegativeLiterals, nop ),
   ( "EmptyCase",                        Opt_EmptyCase, nop ),
   ( "PatternSynonyms",                  Opt_PatternSynonyms, nop )
index b8819d5..c024ebe 100644 (file)
@@ -19,7 +19,7 @@ module Ctype
        , is_digit      -- Char# -> Bool
        , is_alphanum   -- Char# -> Bool
 
-       , is_decdigit, is_hexdigit, is_octdigit
+       , is_decdigit, is_hexdigit, is_octdigit, is_bindigit
        , hexDigit, octDecDigit
        ) where
 
@@ -87,6 +87,9 @@ is_hexdigit c
 is_octdigit :: Char -> Bool
 is_octdigit c = c >= '0' && c <= '7'
 
+is_bindigit :: Char -> Bool
+is_bindigit c = c == '0' || c == '1'
+
 to_lower :: Char -> Char
 to_lower c
   | c >=  'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
index 8faa286..78c39c7 100644 (file)
@@ -117,6 +117,7 @@ $small     = [$ascsmall $unismall \_]
 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 
+$binit     = 0-1
 $octit     = 0-7
 $hexit     = [$decdigit A-F a-f]
 $symchar   = [$symbol \:]
@@ -134,6 +135,7 @@ $docsym    = [\| \^ \* \$]
 @consym    = \: $symchar*
 
 @decimal     = $decdigit+
+@binary      = $binit+
 @octal       = $octit+
 @hexadecimal = $hexit+
 @exponent    = [eE] [\-\+]? @decimal
@@ -401,9 +403,12 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 <0> {
   -- Normal integral literals (:: Num a => a, from Integer)
   @decimal                                                               { tok_num positive 0 0 decimal }
+  0[bB] @binary                / { ifExtension binaryLiteralsEnabled }   { tok_num positive 2 2 binary }
   0[oO] @octal                                                           { tok_num positive 2 2 octal }
   0[xX] @hexadecimal                                                     { tok_num positive 2 2 hexadecimal }
   @negative @decimal           / { ifExtension negativeLiteralsEnabled } { tok_num negative 1 1 decimal }
+  @negative 0[bB] @binary      / { ifExtension negativeLiteralsEnabled `alexAndPred`
+                                   ifExtension binaryLiteralsEnabled }   { tok_num negative 3 3 binary }
   @negative 0[oO] @octal       / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 octal }
   @negative 0[xX] @hexadecimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 hexadecimal }
 
@@ -417,13 +422,19 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
   -- It's simpler (and faster?) to give separate cases to the negatives,
   -- especially considering octal/hexadecimal prefixes.
   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+  0[bB] @binary                \# / { ifExtension magicHashEnabled `alexAndPred`
+                                      ifExtension binaryLiteralsEnabled } { tok_primint positive 2 3 binary }
   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+  @negative 0[bB] @binary      \# / { ifExtension magicHashEnabled `alexAndPred`
+                                      ifExtension binaryLiteralsEnabled } { tok_primint negative 3 4 binary }
   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 
   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
+  0[bB] @binary                \# \# / { ifExtension magicHashEnabled `alexAndPred`
+                                         ifExtension binaryLiteralsEnabled } { tok_primword 2 4 binary }
   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 
@@ -1112,6 +1123,7 @@ positive = id
 negative = negate
 decimal, octal, hexadecimal :: (Integer, Char -> Int)
 decimal = (10,octDecDigit)
+binary = (2,octDecDigit)
 octal = (8,octDecDigit)
 hexadecimal = (16,hexDigit)
 
@@ -1410,6 +1422,7 @@ lex_escape = do
 
         'x'   -> readNum is_hexdigit 16 hexDigit
         'o'   -> readNum is_octdigit  8 octDecDigit
+        'b'   -> readNum is_bindigit  2 octDecDigit
         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
 
         c1 ->  do
@@ -1855,8 +1868,8 @@ setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
 
 -- for reasons of efficiency, flags indicating language extensions (eg,
 -- -fglasgow-exts or -XParallelArrays) are represented by a bitmap
--- stored in an unboxed Int
-type ExtsBitmap = Word
+-- stored in an unboxed Word64
+type ExtsBitmap = Word64
 
 xbit :: ExtBits -> ExtsBitmap
 xbit = bit . fromEnum
@@ -1897,6 +1910,7 @@ data ExtBits
   | TypeLiteralsBit
   | ExplicitNamespacesBit
   | LambdaCaseBit
+  | BinaryLiteralsBit
   | NegativeLiteralsBit
   deriving Enum
 
@@ -1952,6 +1966,8 @@ explicitNamespacesEnabled :: ExtsBitmap -> Bool
 explicitNamespacesEnabled = xtest ExplicitNamespacesBit
 lambdaCaseEnabled :: ExtsBitmap -> Bool
 lambdaCaseEnabled = xtest LambdaCaseBit
+binaryLiteralsEnabled :: ExtsBitmap -> Bool
+binaryLiteralsEnabled = xtest BinaryLiteralsBit
 negativeLiteralsEnabled :: ExtsBitmap -> Bool
 negativeLiteralsEnabled = xtest NegativeLiteralsBit
 patternSynonymsEnabled :: ExtsBitmap -> Bool
@@ -2018,6 +2034,7 @@ mkPState flags buf loc =
                .|. TypeLiteralsBit             `setBitIf` xopt Opt_DataKinds flags
                .|. ExplicitNamespacesBit       `setBitIf` xopt Opt_ExplicitNamespaces flags
                .|. LambdaCaseBit               `setBitIf` xopt Opt_LambdaCase               flags
+               .|. BinaryLiteralsBit           `setBitIf` xopt Opt_BinaryLiterals           flags
                .|. NegativeLiteralsBit         `setBitIf` xopt Opt_NegativeLiterals         flags
                .|. PatternSynonymsBit          `setBitIf` xopt Opt_PatternSynonyms          flags
       --
index c462a4d..b45721c 100644 (file)
@@ -31,7 +31,7 @@
         <itemizedlist>
             <listitem>
                 <para>
-                    TODO FIXME
+                    Added support for <link linkend="binary-literals">binary integer literals</link>
                </para>
            </listitem>
        </itemizedlist>
index ad9c44c..1dd224a 100644 (file)
             <entry><option>-XNoBangPatterns</option></entry>
           </row>
           <row>
+            <entry><option>-XBinaryLiterals</option></entry>
+            <entry>Enable support for <link linkend="binary-literals">binary literals</link>.</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoBinaryLiterals</option></entry>
+          </row>
+          <row>
             <entry><option>-XCApiFFI</option></entry>
             <entry>Enable <link linkend="ffi-capi">the CAPI calling convention</link>.</entry>
             <entry>dynamic</entry>
index e959a1f..e97d579 100644 (file)
@@ -480,6 +480,26 @@ Indeed, the bindings can even be recursive.
       </para>
    </sect2>
 
+    <sect2 id="binary-literals">
+      <title>Binary integer literals</title>
+      <para>
+          Haskell 2010 and Haskell 98 allows for integer literals to
+          be given in decimal, octal (prefixed by
+          <literal>0o</literal> or <literal>0O</literal>), or
+          hexadecimal notation (prefixed by <literal>0x</literal> or
+          <literal>0X</literal>).
+      </para>
+
+      <para>
+          The language extension <option>-XBinaryLiterals</option>
+          adds support for expressing integer literals in binary
+          notation with the prefix <literal>0b</literal> or
+          <literal>0B</literal>. For instance, the binary integer
+          literal <literal>0b11001001</literal> will be desugared into
+          <literal>fromInteger 201</literal> when
+          <option>-XBinaryLiterals</option> is enabled.
+      </para>
+   </sect2>
 
     <!-- ====================== HIERARCHICAL MODULES =======================  -->
 
index 519d432..f25ca25 100644 (file)
@@ -1016,6 +1016,9 @@ tests/overloadedlists/should_run/overloadedlistsrun05
 tests/parser/should_compile/T5243
 tests/parser/should_compile/T7476/Main.imports
 tests/parser/should_compile/T7476/T7476
+tests/parser/should_run/BinaryLiterals0
+tests/parser/should_run/BinaryLiterals1
+tests/parser/should_run/BinaryLiterals2
 tests/parser/should_run/ParserMultiWayIf
 tests/parser/should_run/T1344
 tests/parser/should_run/operator
index 40ddb4b..c6332ea 100644 (file)
@@ -32,6 +32,7 @@ check title expected got
 expectedGhcOnlyExtensions :: [String]
 expectedGhcOnlyExtensions = ["RelaxedLayout",
                              "AlternativeLayoutRule",
+                             "BinaryLiterals",
                              "AlternativeLayoutRuleTransitional",
                              "JavaScriptFFI",
                              "PatternSynonyms"]
diff --git a/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals1.hs b/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals1.hs
new file mode 100644 (file)
index 0000000..6b7de0f
--- /dev/null
@@ -0,0 +1,5 @@
+module ParserNoBinaryLiterals1 where
+
+f :: Int -> ()
+f 0b0 = ()
+f _   = ()
diff --git a/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals1.stderr b/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals1.stderr
new file mode 100644 (file)
index 0000000..3b57330
--- /dev/null
@@ -0,0 +1,5 @@
+
+ParserNoBinaryLiterals1.hs:4:1:
+    Equations for ‘f’ have different numbers of arguments
+      ParserNoBinaryLiterals1.hs:4:1-10
+      ParserNoBinaryLiterals1.hs:5:1-10
diff --git a/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals2.hs b/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals2.hs
new file mode 100644 (file)
index 0000000..e760bd8
--- /dev/null
@@ -0,0 +1,9 @@
+{-# LANGUAGE MagicHash #-}
+
+module ParserNoBinaryLiterals2 where
+
+import GHC.Types
+
+f :: Word -> ()
+f (W# 0b0##) = ()
+f _          = ()
diff --git a/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals2.stderr b/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals2.stderr
new file mode 100644 (file)
index 0000000..4a756d6
--- /dev/null
@@ -0,0 +1,5 @@
+
+ParserNoBinaryLiterals2.hs:8:4:
+    Constructor ‘W#’ should have 1 argument, but has been given 2
+    In the pattern: W# 0 b0##
+    In an equation for ‘f’: f (W# 0 b0##) = ()
diff --git a/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals3.hs b/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals3.hs
new file mode 100644 (file)
index 0000000..b6bc81b
--- /dev/null
@@ -0,0 +1,9 @@
+{-# LANGUAGE MagicHash #-}
+
+module ParserNoBinaryLiterals3 where
+
+import GHC.Types
+
+f :: Int -> ()
+f (I# 0b0#) = ()
+f _         = ()
diff --git a/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals3.stderr b/testsuite/tests/parser/should_fail/ParserNoBinaryLiterals3.stderr
new file mode 100644 (file)
index 0000000..32c27e6
--- /dev/null
@@ -0,0 +1,5 @@
+
+ParserNoBinaryLiterals3.hs:8:4:
+    Constructor ‘I#’ should have 1 argument, but has been given 2
+    In the pattern: I# 0 b0#
+    In an equation for ‘f’: f (I# 0 b0#) = ()
index 45c471e..7e286cf 100644 (file)
@@ -75,6 +75,9 @@ test('readFailTraditionalRecords3', normal, compile_fail, [''])
 test('ParserNoForallUnicode', normal, compile_fail, [''])
 test('ParserNoLambdaCase', when(compiler_lt('ghc', '7.5'), skip), compile_fail, [''])
 test('ParserNoMultiWayIf', when(compiler_lt('ghc', '7.5'), skip), compile_fail, [''])
+test('ParserNoBinaryLiterals1', normal, compile_fail, [''])
+test('ParserNoBinaryLiterals2', normal, compile_fail, [''])
+test('ParserNoBinaryLiterals3', normal, compile_fail, [''])
 
 test('T5425', normal, compile_fail, [''])
 test('T984', normal, compile_fail, [''])
diff --git a/testsuite/tests/parser/should_run/BinaryLiterals0.hs b/testsuite/tests/parser/should_run/BinaryLiterals0.hs
new file mode 100644 (file)
index 0000000..7257445
--- /dev/null
@@ -0,0 +1,19 @@
+-- | Anti-Test for GHC 7.10+'s @BinaryLiterals@ extensions (see GHC #9224)
+--
+-- NB: This code won't compile with -XBinaryLiterals enabled
+
+{-# LANGUAGE NegativeLiterals #-}
+
+module Main where
+
+main :: IO ()
+main = print lst
+  where
+    -- "0b0" is to be parsed as "0 b0"
+    lst = [ (,) 0b0, (,) 0b1, (,) 0b10, (,) 0b11
+          , (,) -0b0, (,) -0b1, (,) -0b10, (,) -0b11
+          ] :: [(Int,Int)]
+    b0 = 60
+    b1 = 61
+    b11 = 611
+    b10 = 610
diff --git a/testsuite/tests/parser/should_run/BinaryLiterals0.stdout b/testsuite/tests/parser/should_run/BinaryLiterals0.stdout
new file mode 100644 (file)
index 0000000..dacce88
--- /dev/null
@@ -0,0 +1 @@
+[(0,60),(0,61),(0,610),(0,611),(0,60),(0,61),(0,610),(0,611)]
diff --git a/testsuite/tests/parser/should_run/BinaryLiterals1.hs b/testsuite/tests/parser/should_run/BinaryLiterals1.hs
new file mode 100644 (file)
index 0000000..f9918fb
--- /dev/null
@@ -0,0 +1,25 @@
+-- | Test for GHC 7.10+'s @BinaryLiterals@ extensions (see GHC #9224)
+
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE MagicHash #-}
+
+module Main where
+
+import GHC.Types
+
+main = do
+    print [ I# 0b0#, I# -0b0#, I# 0b1#, I# -0b1#
+          , I# 0b00000000000000000000000000000000000000000000000000000000000000000000000000001#
+          , I# -0b00000000000000000000000000000000000000000000000000000000000000000000000000001#
+          , I# -0b11001001#, I# -0b11001001#
+          , I# -0b11111111#, I# -0b11111111#
+          ]
+    print [ W# 0b0##, W# 0b1##, W# 0b11001001##, W# 0b11##, W# 0b11111111##
+          , W# 0b00000000000000000000000000000000000000000000000000000000000000000000000000001##
+          ]
+
+    print [ 0b0, 0b1, 0b10, 0b11, 0b100, 0b101, 0b110, 0b111 :: Integer
+          , -0b0, -0b1, -0b10, -0b11, -0b100, -0b101, -0b110, -0b111
+          , 0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+          , -0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+          ]
diff --git a/testsuite/tests/parser/should_run/BinaryLiterals1.stdout b/testsuite/tests/parser/should_run/BinaryLiterals1.stdout
new file mode 100644 (file)
index 0000000..e1065be
--- /dev/null
@@ -0,0 +1,3 @@
+[0,0,1,-1,1,-1,-201,-201,-255,-255]
+[0,1,201,3,255,1]
+[0,1,2,3,4,5,6,7,0,-1,-2,-3,-4,-5,-6,-7,340282366920938463463374607431768211455,-340282366920938463463374607431768211455]
diff --git a/testsuite/tests/parser/should_run/BinaryLiterals2.hs b/testsuite/tests/parser/should_run/BinaryLiterals2.hs
new file mode 100644 (file)
index 0000000..3779d52
--- /dev/null
@@ -0,0 +1,29 @@
+-- | Test for GHC 7.10+'s @BinaryLiterals@ extensions (see GHC #9224)
+
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE NegativeLiterals #-}
+
+module Main where
+
+import GHC.Types
+import GHC.Int
+
+main = do
+    print [ I# 0B0#, I# -0B0#, I# 0B1#, I# -0B1#
+          , I# 0B00000000000000000000000000000000000000000000000000000000000000000000000000001#
+          , I# -0B00000000000000000000000000000000000000000000000000000000000000000000000000001#
+          , I# -0B11001001#, I# -0B11001001#
+          , I# -0B11111111#, I# -0B11111111#
+          ]
+    print [ W# 0B0##, W# 0B1##, W# 0B11001001##, W# 0B11##, W# 0B11111111##
+          , W# 0B00000000000000000000000000000000000000000000000000000000000000000000000000001##
+          ]
+
+    print [ 0B0, 0B1, 0B10, 0B11, 0B100, 0B101, 0B110, 0B111 :: Integer
+          , -0B0, -0B1, -0B10, -0B11, -0B100, -0B101, -0B110, -0B111
+          , 0B11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+          , -0B11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+          ]
+
+    print [ I8# -0B10000000#, I8# 0B1111111# ]
diff --git a/testsuite/tests/parser/should_run/BinaryLiterals2.stdout b/testsuite/tests/parser/should_run/BinaryLiterals2.stdout
new file mode 100644 (file)
index 0000000..76506e9
--- /dev/null
@@ -0,0 +1,4 @@
+[0,0,1,-1,1,-1,-201,-201,-255,-255]
+[0,1,201,3,255,1]
+[0,1,2,3,4,5,6,7,0,-1,-2,-3,-4,-5,-6,-7,340282366920938463463374607431768211455,-340282366920938463463374607431768211455]
+[-128,127]
index eee0330..cf7ee6f 100644 (file)
@@ -6,3 +6,6 @@ test('T1344', normal, compile_and_run, [''])
 test('operator', normal, compile_and_run, [''])
 test('operator2', normal, compile_and_run, [''])
 test('ParserMultiWayIf', when(compiler_lt('ghc', '7.5'), skip), compile_and_run, [''])
+test('BinaryLiterals0', normal, compile_and_run, [''])
+test('BinaryLiterals1', when(compiler_lt('ghc', '7.9'), skip), compile_and_run, [''])
+test('BinaryLiterals2', when(compiler_lt('ghc', '7.9'), skip), compile_and_run, [''])
\ No newline at end of file