{-# OPTIONS_GHC -cpp -fglasgow-exts #-} -- -- Module : ByteString.Base -- License : BSD-style -- Maintainer : dons@cse.unsw.edu.au -- Stability : experimental -- Portability : portable, requires ffi and cpp -- Tested with : GHC 6.4.1 and Hugs March 2005 -- -- | A module containing semi-public ByteString internals. This exposes -- the ByteString representation and low level construction functions. -- Modules which extend the ByteString system will need to use this module -- while ideally most users will be able to make do with the public interface -- modules. -- module Data.ByteString.Base ( -- * The @ByteString@ type and representation ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable LazyByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable -- * Unchecked access unsafeHead, -- :: ByteString -> Word8 unsafeTail, -- :: ByteString -> ByteString unsafeIndex, -- :: ByteString -> Int -> Word8 unsafeTake, -- :: Int -> ByteString -> ByteString unsafeDrop, -- :: Int -> ByteString -> ByteString -- * Low level introduction and elimination empty, -- :: ByteString create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) mallocByteString, -- :: Int -> IO (ForeignPtr a) unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int) -- * Utilities inlinePerformIO, -- :: IO a -> a nullForeignPtr, -- :: ForeignPtr Word8 countOccurrences, -- :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO () -- * Standard C Functions c_strlen, -- :: CString -> IO CInt c_malloc, -- :: CInt -> IO (Ptr Word8) c_free, -- :: Ptr Word8 -> IO () c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ()) memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8 memcmp, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt memcpy, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () memmove, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) -- * cbits functions c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO () c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8 c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8 c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt -- * Internal GHC magic -- * Chars w2c, c2w, isSpaceWord8 ) where import Foreign.ForeignPtr (ForeignPtr, newForeignPtr_, withForeignPtr) import Foreign.Ptr (Ptr, FunPtr, plusPtr, castPtr) import Foreign.Storable (Storable(..)) import Foreign.C.Types (CInt, CSize, CULong) import Foreign.C.String (CString, CStringLen) import Control.Exception (assertb) import Data.Char (ord) import Data.Word (Word8) import Data.Char (chr) import System.IO.Unsafe (unsafePerformIO) import Foreign.ForeignPtr (mallocForeignPtrBytes) import Foreign.Ptr (nullPtr) -- CFILES stuff is Hugs only {-# CFILES cbits/fpstring.c #-} -- ----------------------------------------------------------------------------- -- -- Useful macros, until we have bang patterns -- -- ----------------------------------------------------------------------------- -- | A space-efficient representation of a Word8 vector, supporting many -- efficient operations. A 'ByteString' contains 8-bit characters only. -- -- Instances of Eq, Ord, Read, Show, Data, Typeable -- data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) {-# UNPACK #-} !Int -- offset {-# UNPACK #-} !Int -- length instance Show ByteString where showsPrec p ps r = showsPrec p (unpackWith w2c ps) r instance Read ByteString where readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ] -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. unpackWith :: (Word8 -> a) -> ByteString -> [a] unpackWith _ (PS _ _ 0) = [] unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> go (p `plusPtr` s) (l - 1) [] where go a b c | a `seq` b `seq` c `seq` False = undefined go p 0 acc = peek p >>= \e -> return (k e : acc) go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc) {-# INLINE unpackWith #-} {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some -- conversion function packWith :: (a -> Word8) -> [a] -> ByteString packWith k str = unsafeCreate (length str) $ \p -> go p str where go a b | a `seq` b `seq` False = undefined go _ [] = return () go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff {-# INLINE packWith #-} {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} ------------------------------------------------------------------------ -- | A space-efficient representation of a Word8 vector, supporting many -- efficient operations. A 'ByteString' contains 8-bit characters only. -- -- Instances of Eq, Ord, Read, Show, Data, Typeable -- newtype LazyByteString = LPS [ByteString] -- LPS for lazy packed string deriving (Show,Read ) ------------------------------------------------------------------------ -- | /O(1)/ The empty 'ByteString' empty :: ByteString empty = PS nullForeignPtr 0 0 nullForeignPtr :: ForeignPtr Word8 nullForeignPtr = unsafePerformIO $ newForeignPtr_ nullPtr {-# NOINLINE nullForeignPtr #-} -- --------------------------------------------------------------------- -- -- Extensions to the basic interface -- -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the -- check for the empty case, so there is an obligation on the programmer -- to provide a proof that the ByteString is non-empty. unsafeHead :: ByteString -> Word8 unsafeHead (PS x s l) = assertb (l > 0) $ inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s {-# INLINE unsafeHead #-} -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the -- check for the empty case. As with 'unsafeHead', the programmer must -- provide a separate proof that the ByteString is non-empty. unsafeTail :: ByteString -> ByteString unsafeTail (PS ps s l) = assertb (l > 0) $ PS ps (s+1) (l-1) {-# INLINE unsafeTail #-} -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8' -- This omits the bounds check, which means there is an accompanying -- obligation on the programmer to ensure the bounds are checked in some -- other way. unsafeIndex :: ByteString -> Int -> Word8 unsafeIndex (PS x s l) i = assertb (i >= 0 && i < l) $ inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i) {-# INLINE unsafeIndex #-} -- | A variety of 'take' which omits the checks on @n@ so there is an -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. unsafeTake :: Int -> ByteString -> ByteString unsafeTake n (PS x s l) = assertb (0 <= n && n <= l) $ PS x s n {-# INLINE unsafeTake #-} -- | A variety of 'drop' which omits the checks on @n@ so there is an -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. unsafeDrop :: Int -> ByteString -> ByteString unsafeDrop n (PS x s l) = assertb (0 <= n && n <= l) $ PS x (s+n) (l-n) {-# INLINE unsafeDrop #-} -- --------------------------------------------------------------------- -- Low level constructors -- | /O(1)/ Build a ByteString from a ForeignPtr fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString fromForeignPtr fp l = PS fp 0 l -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) toForeignPtr (PS ps s l) = (ps, s, l) -- | A way of creating ByteStrings outside the IO monad. The @Int@ -- argument gives the final size of the ByteString. Unlike -- 'createAndTrim' the ByteString is not reallocated if the final size -- is less than the estimated size. unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString unsafeCreate l f = unsafePerformIO (create l f) {-# INLINE unsafeCreate #-} -- | Create ByteString of size @l@ and use action @f@ to fill it's contents. create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString create l f = do fp <- mallocByteString l withForeignPtr fp $ \p -> f p return $! PS fp 0 l -- | Given the maximum size needed and a function to make the contents -- of a ByteString, createAndTrim makes the 'ByteString'. The generating -- function is required to return the actual final size (<= the maximum -- size), and the resulting byte array is realloced to this size. -- -- createAndTrim is the main mechanism for creating custom, efficient -- ByteString functions, using Haskell or C functions to fill the space. -- createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString createAndTrim l f = do fp <- mallocByteString l withForeignPtr fp $ \p -> do l' <- f p if assertb (l' <= l) $ l' >= l then return $! PS fp 0 l else create l' $ \p' -> memcpy p' p (fromIntegral l') createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) createAndTrim' l f = do fp <- mallocByteString l withForeignPtr fp $ \p -> do (off, l', res) <- f p if assertb (l' <= l) $ l' >= l then return $! (PS fp 0 l, res) else do ps <- create l' $ \p' -> memcpy p' (p `plusPtr` off) (fromIntegral l') return $! (ps, res) -- | Wrapper of mallocForeignPtrBytes with faster implementation -- for GHC 6.5 builds newer than 06/06/06 mallocByteString :: Int -> IO (ForeignPtr a) mallocByteString l = do mallocForeignPtrBytes l ------------------------------------------------------------------------ -- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. w2c :: Word8 -> Char w2c = chr . fromIntegral {-# INLINE w2c #-} -- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and -- silently truncates to 8 bits Chars > '\255'. It is provided as -- convenience for ByteString construction. c2w :: Char -> Word8 c2w = fromIntegral . ord {-# INLINE c2w #-} -- Selects white-space characters in the Latin-1 range -- ordered by frequency -- Idea from Ketil isSpaceWord8 :: Word8 -> Bool isSpaceWord8 w = case w of 0x20 -> True -- SPACE 0x0A -> True -- LF, \n 0x09 -> True -- HT, \t 0x0C -> True -- FF, \f 0x0D -> True -- CR, \r 0x0B -> True -- VT, \v 0xA0 -> True -- spotted by QC.. _ -> False {-# INLINE isSpaceWord8 #-} ------------------------------------------------------------------------ -- | Just like unsafePerformIO, but we inline it. Big performance gains as -- it exposes lots of things to further inlining -- {-# INLINE inlinePerformIO #-} inlinePerformIO :: IO a -> a inlinePerformIO = unsafePerformIO -- | Count the number of occurrences of each byte. -- {-# SPECIALIZE countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO () #-} countOccurrences :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO () countOccurrences a b c | a `seq` b `seq` c `seq` False = undefined countOccurrences counts str l = go 0 where go a | a `seq` False = undefined go i | i == l = return () | otherwise = do k <- fromIntegral `fmap` peekElemOff str i x <- peekElemOff counts k pokeElemOff counts k (x + 1) go (i + 1) -- | /O(1) construction/ Use a @ByteString@ with a function requiring a -- @CString@. Warning: modifying the @CString@ will affect the -- @ByteString@. Why is this function unsafe? It relies on the null -- byte at the end of the ByteString to be there. Unless you can -- guarantee the null byte, you should use the safe version, which will -- copy the string first. unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s) -- | /O(1) construction/ Use a @ByteString@ with a function requiring a -- @CStringLen@. unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a unsafeUseAsCStringLen (PS ps s l) f = withForeignPtr ps $ \p -> f (castPtr p `plusPtr` s,l) -- --------------------------------------------------------------------- -- -- Standard C functions -- foreign import ccall unsafe "string.h strlen" c_strlen :: CString -> IO CSize foreign import ccall unsafe "stdlib.h malloc" c_malloc :: CSize -> IO (Ptr Word8) foreign import ccall unsafe "static stdlib.h free" c_free :: Ptr Word8 -> IO () foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer :: FunPtr (Ptr Word8 -> IO ()) foreign import ccall unsafe "string.h memchr" memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) foreign import ccall unsafe "string.h memcmp" memcmp :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt foreign import ccall unsafe "string.h memcpy" memcpy :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () foreign import ccall unsafe "string.h memmove" memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () foreign import ccall unsafe "string.h memset" memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) -- --------------------------------------------------------------------- -- -- Uses our C code -- foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse :: Ptr Word8 -> Ptr Word8 -> CULong -> IO () foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse :: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO () foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum :: Ptr Word8 -> CULong -> IO Word8 foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum :: Ptr Word8 -> CULong -> IO Word8 foreign import ccall unsafe "static fpstring.h fps_count" c_count :: Ptr Word8 -> CULong -> Word8 -> IO CULong -- --------------------------------------------------------------------- -- MMap {- foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap :: Int -> Int -> IO (Ptr Word8) foreign import ccall unsafe "static unistd.h close" c_close :: Int -> IO Int foreign import ccall unsafe "static sys/mman.h munmap" c_munmap :: Ptr Word8 -> Int -> IO Int -} -- --------------------------------------------------------------------- -- Internal GHC Haskell magic
(HTML for this module was generated on 2014-04-04. About the conversion tool.)