Data.Binary.IEEE754

% Copyright (C) 2009 John Millikin <jmillikin@gmail.com> % % This program is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program. If not, see <http://www.gnu.org/licenses/>.

{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Data.Binary.IEEE754 (
        -- * Parsing
          getFloat16be, getFloat16le
        , getFloat32be, getFloat32le
        , getFloat64be, getFloat64le

        -- * Serializing
        , putFloat32be, putFloat32le
        , putFloat64be, putFloat64le
) where

import Data.Bits ((.&.), (.|.), shiftL, shiftR, Bits)
import Data.Word (Word8)
import Data.List (foldl')

import qualified Data.ByteString as B
import Data.Binary.Get (Get, getByteString)
import Data.Binary.Put (Put, putByteString)
\end{code}

getFloat16be :: Get Float
getFloat16be = getFloat (ByteCount 2) splitBytes
\end{code}

\begin{code}
getFloat16le :: Get Float
getFloat16le = getFloat (ByteCount 2) $ splitBytes . reverse
\end{code}

\begin{code}
getFloat32be :: Get Float
getFloat32be = getFloat (ByteCount 4) splitBytes
\end{code}

\begin{code}
getFloat32le :: Get Float
getFloat32le = getFloat (ByteCount 4) $ splitBytes . reverse
\end{code}

\begin{code}
getFloat64be :: Get Double
getFloat64be = getFloat (ByteCount 8) splitBytes
\end{code}

\begin{code}
getFloat64le :: Get Double
getFloat64le = getFloat (ByteCount 8) $ splitBytes . reverse
\end{code}

Split the raw byte array into (sign, exponent, significand) components. The exponent and signifcand are drawn directly from the bits in the original float, and have not been unbiased or otherwise modified.

splitBytes :: [Word8] -> RawFloat
splitBytes bs = RawFloat width sign exp' sig expWidth sigWidth where
        width = ByteCount (length bs)
        nBits = bitsInWord8 bs
        sign = if head bs .&. 0x80 == 0x80
                then Negative
                else Positive

        expStart = 1
        expWidth = exponentWidth nBits
        expEnd = expStart + expWidth
        exp' = Exponent . fromIntegral $ bitSlice bs expStart expEnd

        sigWidth = nBits - expEnd
        sig  = Significand $ bitSlice bs expEnd nBits
\end{code}

The next step depends on the value of the exponent e, size of the exponent field in bits w, and value of the significand.

\begin{table}[h] \begin{center} \begin{tabular}{lrl} \toprule Exponent & Significand & Format \\midrule

0 & 0 & Zero \0 & > 0 & Denormalised \1 \leq e \leq 2^w - 2 & \textit{any} & Normalised \2^w-1 & 0 & Infinity \2^w-1 & > 0 & NaN \\bottomrule

There's no built-in literals for Infinity or NaN, so they are constructed using the {\tt Read} instances for {\tt Double} and {\tt Float}.

merge :: (Read a, RealFloat a) => RawFloat -> a
merge f@(RawFloat _ _ e sig eWidth _)
        | e == 0 = if sig == 0
                then 0.0
                else denormalised f
        | e == eMax - 1 = if sig == 0
                then read "Infinity"
                else read "NaN"
        | otherwise = normalised f
        where eMax = 2 `pow` eWidth
\end{code}

If a value is normalised, its significand has an implied {\tt 1} bit in its most-significant bit. The significand must be adjusted by this value before being passed to {\tt encodeField}.

normalised :: RealFloat a => RawFloat -> a
normalised f = encodeFloat fraction exp' where
        Significand sig = rawSignificand f
        Exponent exp' = unbiased - sigWidth

        fraction = sig + (1 `bitShiftL` rawSignificandWidth f)

        sigWidth = fromIntegral $ rawSignificandWidth f
        unbiased = unbias (rawExponent f) (rawExponentWidth f)
\end{code}

For denormalised values, the implied {\tt 1} bit is the least-significant bit of the exponent.

denormalised :: RealFloat a => RawFloat -> a
denormalised f = encodeFloat sig exp' where
        Significand sig = rawSignificand f
        Exponent exp' = unbiased - sigWidth + 1

        sigWidth = fromIntegral $ rawSignificandWidth f
        unbiased = unbias (rawExponent f) (rawExponentWidth f)
\end{code}

By composing {\tt splitBytes} and {\tt merge}, the absolute value of the float is calculated. Before being returned to the calling function, it must be signed appropriately.

getFloat :: (Read a, RealFloat a) => ByteCount
            -> ([Word8] -> RawFloat) -> Get a
getFloat (ByteCount width) parser = do
        raw <- fmap (parser . B.unpack) $ getByteString width
        let absFloat = merge raw
        return $ case rawSign raw of
                Positive ->  absFloat
                Negative -> -absFloat
\end{code}

putFloat32be :: Float -> Put
putFloat32be = putFloat (ByteCount 4) id
\end{code}

\begin{code}
putFloat32le :: Float -> Put
putFloat32le = putFloat (ByteCount 4) reverse
\end{code}

\begin{code}
putFloat64be :: Double -> Put
putFloat64be = putFloat (ByteCount 8) id
\end{code}

\begin{code}
putFloat64le :: Double -> Put
putFloat64le = putFloat (ByteCount 8) reverse
\end{code}

Serialisation is similar to parsing. First, the float is converted to a structure representing raw bitfields. The values returned from {\tt decodeFloat} are clamped to their expected lengths before being stored in the {\tt RawFloat}.

splitFloat :: RealFloat a => ByteCount -> a -> RawFloat
splitFloat width x = raw where
        raw = RawFloat width sign clampedExp clampedSig expWidth sigWidth
        sign = if isNegativeNaN x || isNegativeZero x || x < 0
                then Negative
                else Positive
        clampedExp = clamp expWidth exp'
        clampedSig = clamp sigWidth sig
        (exp', sig) = case (dFraction, dExponent, biasedExp) of
                (0, 0, _) -> (0, 0)
                (_, _, 0) -> (0, Significand $ truncatedSig + 1)
                _         -> (biasedExp, Significand truncatedSig)
        expWidth = exponentWidth $ bitCount width
        sigWidth = bitCount width - expWidth - 1 -- 1 for sign bit

        (dFraction, dExponent) = decodeFloat x

        rawExp = Exponent $ dExponent + fromIntegral sigWidth
        biasedExp = bias rawExp expWidth
        truncatedSig = abs dFraction - (1 `bitShiftL` sigWidth)
\end{code}

Then, the {\tt RawFloat} is converted to a list of bytes by mashing all the fields together into an {\tt Integer}, and chopping up that integer in 8-bit blocks.

rawToBytes :: RawFloat -> [Word8]
rawToBytes raw = integerToBytes mashed width where
        RawFloat width sign exp' sig expWidth sigWidth = raw
        sign' :: Word8
        sign' = case sign of
                Positive -> 0
                Negative -> 1
        mashed = mashBits sig sigWidth .
                 mashBits exp' expWidth .
                 mashBits sign' 1 $ 0
\end{code}

{\tt clamp}, given a maximum bit count and a value, will strip any 1-bits in positions above the count.

clamp :: (Num a, Bits a) => BitCount -> a -> a
clamp = (.&.) . mask where
        mask 1 = 1
        mask n | n > 1 = (mask (n - 1) `shiftL` 1) + 1
        mask _ = undefined
\end{code}

For merging the fields, just shift the starting integer over a bit and then \textsc{or} it with the next value. The weird parameter order allows easy composition.

mashBits :: (Bits a, Integral a) => a -> BitCount -> Integer -> Integer
mashBits _ 0 x = x
mashBits y n x = (x `bitShiftL` n) .|. fromIntegral y
\end{code}

Given an integer, read it in 255-block increments starting from the LSB. Each increment is converted to a byte and added to the final list.

integerToBytes :: Integer -> ByteCount -> [Word8]
integerToBytes _ 0 = []
integerToBytes x n = bytes where
        bytes = integerToBytes (x `shiftR` 8) (n - 1) ++ [step]
        step = fromIntegral x .&. 0xFF
\end{code}

Finally, the raw parsing is wrapped up in {\tt Put}. The second parameter allows the same code paths to be used for little- and big-endian serialisation.

putFloat :: (RealFloat a) => ByteCount -> ([Word8] -> [Word8]) -> a -> Put
putFloat width f x = putByteString $ B.pack bytes where
        bytes = f . rawToBytes . splitFloat width $ x
\end{code}

Information about the raw bit patterns in the float is stored in {\tt RawFloat}, so they don't have to be passed around to the various format cases. The exponent should be biased, and the significand shouldn't have it's implied MSB (if applicable).

data RawFloat = RawFloat
        { rawWidth            :: ByteCount
        , rawSign             :: Sign
        , rawExponent         :: Exponent
        , rawSignificand      :: Significand
        , rawExponentWidth    :: BitCount
        , rawSignificandWidth :: BitCount
        }
        deriving (Show)
\end{code}

Calculate the proper size of the exponent field, in bits, given the size of the full structure.

exponentWidth :: BitCount -> BitCount
exponentWidth k
        | k == 16         = 5
        | k == 32         = 8
        | k `mod` 32 == 0 = ceiling (4 * logBase 2 (fromIntegral k)) - 13
        | otherwise       = error "Invalid length of floating-point value"
\end{code}

\begin{code}
bias :: Exponent -> BitCount -> Exponent
bias e eWidth = e - (1 - (2 `pow` (eWidth - 1)))
\end{code}

\begin{code}
unbias :: Exponent -> BitCount -> Exponent
unbias e eWidth = e + 1 - (2 `pow` (eWidth - 1))
\end{code}

data Sign = Positive | Negative
        deriving (Show)

newtype Exponent = Exponent Int
        deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)

newtype Significand = Significand Integer
        deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)

newtype BitCount = BitCount Int
        deriving (Show, Eq, Num, Ord, Real, Enum, Integral)

newtype ByteCount = ByteCount Int
        deriving (Show, Eq, Num, Ord, Real, Enum, Integral)

bitCount :: ByteCount -> BitCount
bitCount (ByteCount x) = BitCount (x * 8)

bitsInWord8 :: [Word8] -> BitCount
bitsInWord8 = bitCount . ByteCount . length

bitShiftL :: (Bits a) => a -> BitCount -> a
bitShiftL x (BitCount n) = shiftL x n

bitShiftR :: (Bits a) => a -> BitCount -> a
bitShiftR x (BitCount n) = shiftR x n
\end{code}

Considering a byte list as a sequence of bits, slice it from start inclusive to end exclusive, and return the resulting bit sequence as an integer.