{-# LANGUAGE MagicHash, BangPatterns, FlexibleContexts #-} module PGF.Macros where import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import PGF.CId import PGF.Data import Control.Monad import qualified Data.Map as Map --import qualified Data.Set as Set --import qualified Data.IntMap as IntMap --import qualified Data.IntSet as IntSet import qualified Data.Array as Array --import Data.Maybe import Data.List import Data.Array.IArray import Text.PrettyPrint import GHC.Prim import GHC.Base(getTag) import Data.Char -- operations for manipulating PGF grammars and objects mapConcretes :: (Concr -> Concr) -> PGF -> PGF mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) } lookType :: Abstr -> CId -> Type lookType abs f = case lookMap (error $ "lookType " ++ show f) f (funs abs) of (ty,_,_,_) -> ty isData :: Abstr -> CId -> Bool isData abs f = case Map.lookup f (funs abs) of Just (_,_,Nothing,_) -> True -- the encoding of data constrs _ -> False lookValCat :: Abstr -> CId -> CId lookValCat abs = valCat . lookType abs lookStartCat :: PGF -> CId lookStartCat pgf = mkCId $ case msum $ Data.List.map (Map.lookup (mkCId "startcat")) [gflags pgf, aflags (abstract pgf)] of Just (LStr s) -> s _ -> "S" lookGlobalFlag :: PGF -> CId -> Maybe Literal lookGlobalFlag pgf f = Map.lookup f (gflags pgf) lookAbsFlag :: PGF -> CId -> Maybe Literal lookAbsFlag pgf f = Map.lookup f (aflags (abstract pgf)) lookConcr :: PGF -> Language -> Concr lookConcr pgf cnc = lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf -- use if name fails, use abstract + name; so e.g. "Eng" becomes "DemoEng" lookConcrComplete :: PGF -> CId -> Concr lookConcrComplete pgf cnc = case Map.lookup cnc (concretes pgf) of Just c -> c _ -> lookConcr pgf (mkCId (showCId (absname pgf) ++ showCId cnc)) lookConcrFlag :: PGF -> CId -> CId -> Maybe Literal lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang functionsToCat :: PGF -> CId -> [(CId,Type)] functionsToCat pgf cat = [(f,ty) | (_,f) <- fs, Just (ty,_,_,_) <- [Map.lookup f $ funs $ abstract pgf]] where (_,fs,_) = lookMap ([],[],0) cat $ cats $ abstract pgf -- | List of functions that lack linearizations in the given language. missingLins :: PGF -> Language -> [CId] missingLins pgf lang = [c | c <- fs, not (hasl c)] where fs = Map.keys $ funs $ abstract pgf hasl = hasLin pgf lang hasLin :: PGF -> Language -> CId -> Bool hasLin pgf lang f = Map.member f $ lproductions $ lookConcr pgf lang restrictPGF :: (CId -> Bool) -> PGF -> PGF restrictPGF cond pgf = pgf { abstract = abstr { funs = Map.filterWithKey (\c _ -> cond c) (funs abstr), cats = Map.map (\(hyps,fs,p) -> (hyps,filter (cond . snd) fs,p)) (cats abstr) } } ---- restrict concrs also, might be needed where abstr = abstract pgf depth :: Expr -> Int depth (EAbs _ _ t) = depth t depth (EApp e1 e2) = max (depth e1) (depth e2) + 1 depth _ = 1 cftype :: [CId] -> CId -> Type cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val [] typeOfHypo :: Hypo -> Type typeOfHypo (_,_,ty) = ty catSkeleton :: Type -> ([CId],CId) catSkeleton ty = case ty of DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val) typeSkeleton :: Type -> ([(Int,CId)],CId) typeSkeleton ty = case ty of DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val) valCat :: Type -> CId valCat ty = case ty of DTyp _ val _ -> val contextLength :: Type -> Int contextLength ty = case ty of DTyp hyps _ _ -> length hyps -- | Show the printname of function or category showPrintName :: PGF -> Language -> CId -> String showPrintName pgf lang id = lookMap (showCId id) id $ printnames $ lookMap (error "no lang") lang $ concretes pgf -- lookup with default value lookMap :: Ord i => a -> i -> Map.Map i a -> a lookMap d c m = Map.findWithDefault d c m --- from Operations combinations :: [[a]] -> [[a]] combinations t = case t of [] -> [[]] aa:uu -> [a:u | a <- aa, u <- combinations uu] cidString = mkCId "String" cidInt = mkCId "Int" cidFloat = mkCId "Float" cidVar = mkCId "__gfVar" -- Utilities for doing linearization -- | BracketedString represents a sentence that is linearized -- as usual but we also want to retain the ''brackets'' that -- mark the beginning and the end of each constituent. data BracketedString = Leaf Token -- ^ this is the leaf i.e. a single token | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString] -- ^ this is a bracket. The 'CId' is the category of -- the phrase. The 'FId' is an unique identifier for -- every phrase in the sentence. For context-free grammars -- i.e. without discontinuous constituents this identifier -- is also unique for every bracket. When there are discontinuous -- phrases then the identifiers are unique for every phrase but -- not for every bracket since the bracket represents a constituent. -- The different constituents could still be distinguished by using -- the constituent index i.e. 'LIndex'. If the grammar is reduplicating -- then the constituent indices will be the same for all brackets -- that represents the same constituent. data BracketedTokn = Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty | LeafKS Token | LeafNE | LeafBIND | LeafSOFT_BIND | LeafCAPIT | LeafKP [BracketedTokn] [([BracketedTokn],[String])] deriving Eq type LinTable = ([CId],Array.Array LIndex [BracketedTokn]) -- | Renders the bracketed string as string where -- the brackets are shown as @(S ...)@ where -- @S@ is the category. showBracketedString :: BracketedString -> String showBracketedString = render . ppBracketedString ppBracketedString (Leaf t) = text t ppBracketedString (Bracket cat fid fid' index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss)) -- | The length of the bracketed string in number of tokens. lengthBracketedString :: BracketedString -> Int lengthBracketedString (Leaf _) = 1 lengthBracketedString (Bracket _ _ _ _ _ _ bss) = sum (map lengthBracketedString bss) untokn :: Maybe String -> [BracketedTokn] -> (Maybe String,[BracketedString]) untokn nw bss = let (nw',bss') = mapAccumR untokn nw bss in case sequence bss' of Just bss -> (nw,concat bss) Nothing -> (nw,[]) where untokn nw (Bracket_ cat fid fid' index fun es bss) = let (nw',bss') = mapAccumR untokn nw bss in case sequence bss' of Just bss -> (nw',Just [Bracket cat fid fid' index fun es (concat bss)]) Nothing -> (Nothing, Nothing) untokn nw (LeafKS t) | null t = (nw,Just []) | otherwise = (Just t,Just [Leaf t]) untokn nw LeafNE = (Nothing, Nothing) untokn nw (LeafKP d vs) = let (nw',bss') = mapAccumR untokn nw (sel d vs nw) in case sequence bss' of Just bss -> (nw',Just (concat bss)) Nothing -> (Nothing, Nothing) where sel d vs Nothing = d sel d vs (Just w) = case [v | (v,cs) <- vs, any (\c -> isPrefixOf c w) cs] of v:_ -> v _ -> d type CncType = (CId, FId) -- concrete type is the abstract type (the category) + the forest id mkLinTable :: Concr -> (CncType -> Bool) -> [CId] -> FunId -> [(CncType,FId,CId,[Expr],LinTable)] -> LinTable mkLinTable cnc filter xs funid args = (xs,listArray (bounds lins) [computeSeq filter (elems (sequences cnc ! seqid)) args | seqid <- elems lins]) where (CncFun _ lins) = cncfuns cnc ! funid computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,FId,CId,[Expr],LinTable)] -> [BracketedTokn] computeSeq filter seq args = concatMap compute seq where compute (SymCat d r) = getArg d r compute (SymLit d r) = getArg d r compute (SymVar d r) = getVar d r compute (SymKS t) = [LeafKS t] compute SymNE = [LeafNE] compute SymBIND = [LeafKS "&+"] compute SymSOFT_BIND = [] compute SymSOFT_SPACE = [] compute SymCAPIT = [LeafKS "&|"] compute SymALL_CAPIT = [LeafKS "&|"] compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]] getArg d r | not (null arg_lin) && filter ct = [Bracket_ cat fid fid' r fun es arg_lin] | otherwise = arg_lin where arg_lin = lin ! r (ct@(cat,fid),fid',fun,es,(_xs,lin)) = args !! d getVar d r = [LeafKS (showCId (xs !! r))] where (_ct,_,_fun,_es,(xs,_lin)) = args !! d flattenBracketedString :: BracketedString -> [String] flattenBracketedString (Leaf w) = [w] flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss -- The following is a version of Data.List.sortBy which together -- with the sorting also eliminates duplicate values sortNubBy cmp = mergeAll . sequences where sequences (a:b:xs) = case cmp a b of GT -> descending b [a] xs EQ -> sequences (b:xs) LT -> ascending b (a:) xs sequences xs = [xs] descending a as [] = [a:as] descending a as (b:bs) = case cmp a b of GT -> descending b (a:as) bs EQ -> descending a as bs LT -> (a:as) : sequences (b:bs) ascending a as [] = let !x = as [a] in [x] ascending a as (b:bs) = case cmp a b of GT -> let !x = as [a] in x : sequences (b:bs) EQ -> ascending a as bs LT -> ascending b (\ys -> as (a:ys)) bs mergeAll [x] = x mergeAll xs = mergeAll (mergePairs xs) mergePairs (a:b:xs) = let !x = merge a b in x : mergePairs xs mergePairs xs = xs merge as@(a:as') bs@(b:bs') = case cmp a b of GT -> b:merge as bs' EQ -> a:merge as' bs' LT -> a:merge as' bs merge [] bs = bs merge as [] = as -- The following function does case-insensitive comparison of sequences. -- This is used to allow case-insensitive parsing, while -- the linearizer still has access to the original cases. compareCaseInsensitve s1 s2 = case compareSeq (elems s1) (elems s2) of (EQ,c) -> c (c, _) -> c where compareSeq [] [] = dup EQ compareSeq [] _ = dup LT compareSeq _ [] = dup GT compareSeq (x:xs) (y:ys) = case compareSym x y of (EQ,EQ) -> compareSeq xs ys (EQ,c2) -> case compareSeq xs ys of (c1,_) -> (c1,c2) x -> x compareSym s1 s2 = case s1 of SymCat d1 r1 -> case s2 of SymCat d2 r2 -> case compare d1 d2 of EQ -> dup (r1 `compare` r2) x -> dup x _ -> dup LT SymLit d1 r1 -> case s2 of SymCat {} -> dup GT SymLit d2 r2 -> case compare d1 d2 of EQ -> dup (r1 `compare` r2) x -> dup x _ -> dup LT SymVar d1 r1 -> if tagToEnum# (getTag s2 ># 2#) then dup LT else case s2 of SymVar d2 r2 -> case compare d1 d2 of EQ -> dup (r1 `compare` r2) x -> dup x _ -> dup GT SymKS t1 -> if tagToEnum# (getTag s2 ># 3#) then dup LT else case s2 of SymKS t2 -> t1 `compareToken` t2 _ -> dup GT SymKP a1 b1 -> if tagToEnum# (getTag s2 ># 4#) then dup LT else case s2 of SymKP a2 b2 -> case compare a1 a2 of EQ -> dup (b1 `compare` b2) x -> dup x _ -> dup GT _ -> let t1 = getTag s1 t2 = getTag s2 in if tagToEnum# (t1 <# t2) then dup LT else if tagToEnum# (t1 ==# t2) then dup EQ else dup GT compareToken [] [] = dup EQ compareToken [] _ = dup LT compareToken _ [] = dup GT compareToken (x:xs) (y:ys) | x == y = compareToken xs ys | otherwise = case compare (toLower x) (toLower y) of EQ -> case compareToken xs ys of (c,_) -> (c,compare x y) c -> dup c dup x = (x,x)