module GF.Infra.Ident (-- ** Identifiers
ModuleName(..), moduleNameS,
Ident, ident2utf8, showIdent, prefixIdent,
-- *** Normal identifiers (returned by the parser)
identS, identC, identW,
-- *** Special identifiers for internal use
identV, identA, identAV,
argIdent, isArgIdent, getArgIndex,
varStr, varX, isWildIdent, varIndex,
-- *** Raw identifiers
RawIdent, rawIdentS, rawIdentC, ident2raw, prefixRawIdent,
isPrefixOf, showRawIdent
) where
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.ByteString.Char8 as BS(append,isPrefixOf)
-- Limit use of BS functions to the ones that work correctly on
-- UTF-8-encoded bytestrings!
import Data.Char(isDigit)
import PGF.Internal(Binary(..))
import GF.Text.Pretty
-- | Module names
newtype ModuleName = MN Ident deriving (Eq,Ord)
moduleNameS = MN . identS
instance Show ModuleName where showsPrec d (MN m) = showsPrec d m
instance Pretty ModuleName where pp (MN m) = pp m
-- | the constructors labelled /INTERNAL/ are
-- internal representation never returned by the parser
data Ident =
IC {-# UNPACK #-} !RawIdent -- ^ raw identifier after parsing, resolved in Rename
| IW -- ^ wildcard
--
-- below this constructor: internal representation never returned by the parser
| IV {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int -- ^ /INTERNAL/ variable
| IA {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat at position
| IAV {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat with bindings at position
--
deriving (Eq, Ord, Show, Read)
-- | Identifiers are stored as UTF-8-encoded bytestrings.
-- (It is also possible to use regular Haskell 'String's, with somewhat
-- reduced performance and increased memory use.)
newtype RawIdent = Id { rawId2utf8 :: UTF8.ByteString }
deriving (Eq, Ord, Show, Read)
pack = UTF8.fromString
unpack = UTF8.toString
rawIdentS = Id . pack
rawIdentC = Id
showRawIdent = unpack . rawId2utf8
prefixRawIdent (Id x) (Id y) = Id (BS.append x y)
isPrefixOf (Id x) (Id y) = BS.isPrefixOf x y
instance Binary RawIdent where
put = put . rawId2utf8
get = fmap rawIdentC get
-- | This function should be used with care, since the returned ByteString is
-- UTF-8-encoded.
ident2utf8 :: Ident -> UTF8.ByteString
ident2utf8 i = case i of
IC (Id s) -> s
IV (Id s) n -> BS.append s (pack ('_':show n))
IA (Id s) j -> BS.append s (pack ('_':show j))
IAV (Id s) b j -> BS.append s (pack ('_':show b ++ '_':show j))
IW -> pack "_"
ident2raw :: Ident -> RawIdent
ident2raw = Id . ident2utf8
showIdent :: Ident -> String
showIdent i = unpack $! ident2utf8 i
instance Pretty Ident where pp = pp . showIdent
instance Pretty RawIdent where pp = pp . showRawIdent
identS :: String -> Ident
identS = identC . rawIdentS
identC :: RawIdent -> Ident
identW :: Ident
prefixIdent :: String -> Ident -> Ident
prefixIdent pref = identC . Id . BS.append (pack pref) . ident2utf8
-- normal identifier
-- ident s = IC s
identV :: RawIdent -> Int -> Ident
identA :: RawIdent -> Int -> Ident
identAV:: RawIdent -> Int -> Int -> Ident
(identC, identV, identA, identAV, identW) =
(IC, IV, IA, IAV, IW)
-- | to mark argument variables
argIdent :: Int -> Ident -> Int -> Ident
argIdent 0 (IC c) i = identA c i
argIdent b (IC c) i = identAV c b i
isArgIdent IA{} = True
isArgIdent IAV{} = True
isArgIdent _ = False
getArgIndex (IA _ i) = Just i
getArgIndex (IAV _ _ i) = Just i
getArgIndex (IC (Id bs))
| isDigit c =
-- (Just . read . unpack . snd . BS.spanEnd isDigit) bs -- not ok with UTF-8
(Just . read . reverse . takeWhile isDigit) s
where s@(c:_) = reverse (unpack bs)
getArgIndex x = Nothing
-- | used in lin defaults
varStr :: Ident
varStr = identA (rawIdentS "str") 0
-- | refreshing variables
varX :: Int -> Ident
varX = identV (rawIdentS "x")
isWildIdent :: Ident -> Bool
isWildIdent x = case x of
IW -> True
IC s | s == wild -> True
_ -> False
wild = Id (pack "_")
varIndex :: Ident -> Int
varIndex (IV _ n) = n
varIndex _ = -1 --- other than IV should not count