Macros.hs

Plain text version of Macros.hs

{-# LANGUAGE MagicHash, BangPatterns, FlexibleContexts #-}
module PGF.Macros where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint

import PGF.CId
import PGF.Data
import Control.Monad
import qualified Data.Map    as Map
--import qualified Data.Set    as Set
--import qualified Data.IntMap as IntMap
--import qualified Data.IntSet as IntSet
import qualified Data.Array  as Array
--import Data.Maybe
import Data.List
import Data.Array.IArray
import Text.PrettyPrint
import GHC.Prim
import GHC.Base(getTag)
import Data.Char

-- operations for manipulating PGF grammars and objects

mapConcretes :: (Concr -> Concr) -> PGF -> PGF
mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }

lookType :: Abstr -> CId -> Type
lookType abs f = 
  case lookMap (error $ "lookType " ++ show f) f (funs abs) of
    (ty,_,_,_) -> ty

isData :: Abstr -> CId -> Bool
isData abs f =
  case Map.lookup f (funs abs) of
    Just (_,_,Nothing,_) -> True       -- the encoding of data constrs
    _                    -> False

lookValCat :: Abstr -> CId -> CId
lookValCat abs = valCat . lookType abs

lookStartCat :: PGF -> CId
lookStartCat pgf = mkCId $
  case msum $ Data.List.map (Map.lookup (mkCId "startcat")) [gflags pgf, aflags (abstract pgf)] of
    Just (LStr s) -> s
    _             -> "S"

lookGlobalFlag :: PGF -> CId -> Maybe Literal
lookGlobalFlag pgf f = Map.lookup f (gflags pgf)

lookAbsFlag :: PGF -> CId -> Maybe Literal
lookAbsFlag pgf f = Map.lookup f (aflags (abstract pgf))

lookConcr :: PGF -> Language -> Concr
lookConcr pgf cnc = 
    lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf

-- use if name fails, use abstract + name; so e.g. "Eng" becomes "DemoEng" 
lookConcrComplete :: PGF -> CId -> Concr
lookConcrComplete pgf cnc = 
  case Map.lookup cnc (concretes pgf) of
    Just c -> c
    _ -> lookConcr pgf (mkCId (showCId (absname pgf) ++ showCId cnc))

lookConcrFlag :: PGF -> CId -> CId -> Maybe Literal
lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang

functionsToCat :: PGF -> CId -> [(CId,Type)]
functionsToCat pgf cat =
  [(f,ty) | (_,f) <- fs, Just (ty,_,_,_) <- [Map.lookup f $ funs $ abstract pgf]]
 where 
   (_,fs,_) = lookMap ([],[],0) cat $ cats $ abstract pgf

-- | List of functions that lack linearizations in the given language.
missingLins :: PGF -> Language -> [CId]
missingLins pgf lang = [c | c <- fs, not (hasl c)] where
  fs = Map.keys $ funs $ abstract pgf
  hasl = hasLin pgf lang

hasLin :: PGF -> Language -> CId -> Bool
hasLin pgf lang f = Map.member f $ lproductions $ lookConcr pgf lang

restrictPGF :: (CId -> Bool) -> PGF -> PGF
restrictPGF cond pgf = pgf {
  abstract = abstr {
    funs = Map.filterWithKey (\c _ -> cond c) (funs abstr),
    cats = Map.map (\(hyps,fs,p) -> (hyps,filter (cond . snd) fs,p)) (cats abstr)
    }
  }  ---- restrict concrs also, might be needed
 where
  abstr = abstract pgf

depth :: Expr -> Int
depth (EAbs _ _ t) = depth t
depth (EApp e1 e2) = max (depth e1) (depth e2) + 1
depth _            = 1

cftype :: [CId] -> CId -> Type
cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val []

typeOfHypo :: Hypo -> Type
typeOfHypo (_,_,ty) = ty

catSkeleton :: Type -> ([CId],CId)
catSkeleton ty = case ty of
  DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val)

typeSkeleton :: Type -> ([(Int,CId)],CId)
typeSkeleton ty = case ty of
  DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val)

valCat :: Type -> CId
valCat ty = case ty of
  DTyp _ val _ -> val

contextLength :: Type -> Int
contextLength ty = case ty of
  DTyp hyps _ _ -> length hyps

-- | Show the printname of function or category
showPrintName :: PGF -> Language -> CId -> String
showPrintName pgf lang id = lookMap (showCId id) id $ printnames $ lookMap (error "no lang") lang $ concretes pgf

-- lookup with default value
lookMap :: Ord i => a -> i -> Map.Map i a -> a 
lookMap d c m = Map.findWithDefault d c m

--- from Operations
combinations :: [[a]] -> [[a]]
combinations t = case t of 
  []    -> [[]]
  aa:uu -> [a:u | a <- aa, u <- combinations uu]

cidString = mkCId "String"
cidInt    = mkCId "Int"
cidFloat  = mkCId "Float"
cidVar    = mkCId "__gfVar"


-- Utilities for doing linearization

-- | BracketedString represents a sentence that is linearized
-- as usual but we also want to retain the ''brackets'' that
-- mark the beginning and the end of each constituent.
data BracketedString
  = Leaf Token                                                                -- ^ this is the leaf i.e. a single token
  | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
                                                                               -- ^ this is a bracket. The 'CId' is the category of
                                                                               -- the phrase. The 'FId' is an unique identifier for
                                                                               -- every phrase in the sentence. For context-free grammars
                                                                               -- i.e. without discontinuous constituents this identifier
                                                                               -- is also unique for every bracket. When there are discontinuous 
                                                                               -- phrases then the identifiers are unique for every phrase but
                                                                               -- not for every bracket since the bracket represents a constituent.
                                                                               -- The different constituents could still be distinguished by using
                                                                               -- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
                                                                               -- then the constituent indices will be the same for all brackets
                                                                               -- that represents the same constituent.

data BracketedTokn
  = Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn]    -- Invariant: the list is not empty
  | LeafKS Token
  | LeafNE
  | LeafBIND
  | LeafSOFT_BIND
  | LeafCAPIT
  | LeafKP [BracketedTokn] [([BracketedTokn],[String])]
  deriving Eq

type LinTable = ([CId],Array.Array LIndex [BracketedTokn])

-- | Renders the bracketed string as string where 
-- the brackets are shown as @(S ...)@ where
-- @S@ is the category.
showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString

ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket cat fid fid' index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))

-- | The length of the bracketed string in number of tokens.
lengthBracketedString :: BracketedString -> Int
lengthBracketedString (Leaf _)                  = 1
lengthBracketedString (Bracket _ _ _ _ _ _ bss) = sum (map lengthBracketedString bss)

untokn :: Maybe String -> [BracketedTokn] -> (Maybe String,[BracketedString])
untokn nw bss =
  let (nw',bss') = mapAccumR untokn nw bss
  in case sequence bss' of
       Just bss -> (nw,concat bss)
       Nothing  -> (nw,[])
  where
    untokn nw (Bracket_ cat fid fid' index fun es bss) =
      let (nw',bss') = mapAccumR untokn nw bss
      in case sequence bss' of
           Just bss -> (nw',Just [Bracket cat fid fid' index fun es (concat bss)])
           Nothing  -> (Nothing, Nothing)
    untokn nw (LeafKS t)
      | null t              = (nw,Just [])
      | otherwise           = (Just t,Just [Leaf t])
    untokn nw LeafNE        = (Nothing, Nothing)
    untokn nw (LeafKP d vs) = let (nw',bss') = mapAccumR untokn nw (sel d vs nw)
                              in case sequence bss' of
                                   Just bss -> (nw',Just (concat bss))
                                   Nothing  -> (Nothing, Nothing)
                              where
                                sel d vs Nothing  = d
                                sel d vs (Just w) =
                                  case [v | (v,cs) <- vs, any (\c -> isPrefixOf c w) cs] of
                                    v:_ -> v
                                    _   -> d

type CncType = (CId, FId)    -- concrete type is the abstract type (the category) + the forest id

mkLinTable :: Concr -> (CncType -> Bool) -> [CId] -> FunId -> [(CncType,FId,CId,[Expr],LinTable)] -> LinTable
mkLinTable cnc filter xs funid args = (xs,listArray (bounds lins) [computeSeq filter (elems (sequences cnc ! seqid)) args | seqid <- elems lins])
  where
    (CncFun _ lins) = cncfuns cnc ! funid

computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,FId,CId,[Expr],LinTable)] -> [BracketedTokn]
computeSeq filter seq args = concatMap compute seq
  where
    compute (SymCat d r)      = getArg d r
    compute (SymLit d r)      = getArg d r
    compute (SymVar d r)      = getVar d r
    compute (SymKS t)         = [LeafKS t]
    compute SymNE             = [LeafNE]
    compute SymBIND           = [LeafKS "&+"]
    compute SymSOFT_BIND      = []
    compute SymSOFT_SPACE     = []
    compute SymCAPIT          = [LeafKS "&|"]
    compute SymALL_CAPIT      = [LeafKS "&|"]
    compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]]

    getArg d r
      | not (null arg_lin) &&
        filter ct   = [Bracket_ cat fid fid' r fun es arg_lin]
      | otherwise   = arg_lin
      where
        arg_lin                              = lin ! r
        (ct@(cat,fid),fid',fun,es,(_xs,lin)) = args !! d

    getVar d r = [LeafKS (showCId (xs !! r))]
      where
        (_ct,_,_fun,_es,(xs,_lin)) = args !! d

flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w)                  = [w]
flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss


-- The following is a version of Data.List.sortBy which together
-- with the sorting also eliminates duplicate values 
sortNubBy cmp = mergeAll . sequences
  where
    sequences (a:b:xs) =
      case cmp a b of
        GT -> descending b [a]  xs
        EQ -> sequences (b:xs)
        LT -> ascending  b (a:) xs
    sequences xs = [xs]

    descending a as []     = [a:as]
    descending a as (b:bs) =
      case cmp a b of
        GT -> descending b (a:as) bs
        EQ -> descending a as bs
        LT -> (a:as) : sequences (b:bs)

    ascending a as []     = let !x = as [a]
                            in [x]
    ascending a as (b:bs) =
      case cmp a b of
        GT -> let !x = as [a]
              in x : sequences (b:bs)
        EQ -> ascending a as bs
        LT -> ascending b (\ys -> as (a:ys)) bs

    mergeAll [x] = x
    mergeAll xs  = mergeAll (mergePairs xs)

    mergePairs (a:b:xs) = let !x = merge a b
                          in x : mergePairs xs
    mergePairs xs       = xs

    merge as@(a:as') bs@(b:bs') =
      case cmp a b of
        GT -> b:merge as  bs'
        EQ -> a:merge as' bs'
        LT -> a:merge as' bs
    merge [] bs         = bs
    merge as []         = as


-- The following function does case-insensitive comparison of sequences.
-- This is used to allow case-insensitive parsing, while
-- the linearizer still has access to the original cases.
compareCaseInsensitve s1 s2 =
    case compareSeq (elems s1) (elems s2) of
      (EQ,c) -> c
      (c, _) -> c
    where
      compareSeq []     []     = dup EQ
      compareSeq []     _      = dup LT
      compareSeq _      []     = dup GT
      compareSeq (x:xs) (y:ys) =
        case compareSym x y of
          (EQ,EQ) -> compareSeq xs ys
          (EQ,c2) -> case compareSeq xs ys of
                       (c1,_) -> (c1,c2)
          x       -> x

      compareSym s1 s2 =
        case s1 of
          SymCat d1 r1
            -> case s2 of
                 SymCat d2 r2
                   -> case compare d1 d2 of
                        EQ -> dup (r1 `compare` r2)
                        x  -> dup x
                 _ -> dup LT
          SymLit d1 r1
            -> case s2 of
                 SymCat {} -> dup GT
                 SymLit d2 r2
                   -> case compare d1 d2 of
                        EQ -> dup (r1 `compare` r2)
                        x  -> dup x
                 _ -> dup LT
          SymVar d1 r1
            -> if tagToEnum# (getTag s2 ># 2#)
                 then dup LT
                 else case s2 of
                        SymVar d2 r2
                          -> case compare d1 d2 of
                               EQ -> dup (r1 `compare` r2)
                               x  -> dup x
                        _ -> dup GT
          SymKS t1
            -> if tagToEnum# (getTag s2 ># 3#)
                 then dup LT
                 else case s2 of
                        SymKS t2 -> t1 `compareToken` t2
                        _          -> dup GT
          SymKP a1 b1
            -> if tagToEnum# (getTag s2 ># 4#)
                 then dup LT
                 else case s2 of
                        SymKP a2 b2
                          -> case compare a1 a2 of
                               EQ -> dup (b1 `compare` b2)
                               x  -> dup x
                        _ -> dup GT
          _ -> let t1 = getTag s1
                   t2 = getTag s2
               in if tagToEnum# (t1 <# t2) 
                    then dup LT
                    else if tagToEnum# (t1 ==# t2)
                           then dup EQ
                           else dup GT
  
      compareToken []     []     = dup EQ
      compareToken []     _      = dup LT
      compareToken _      []     = dup GT
      compareToken (x:xs) (y:ys)
        | x == y    = compareToken xs ys
        | otherwise = case compare (toLower x) (toLower y) of
                        EQ -> case compareToken xs ys of
                                (c,_) -> (c,compare x y)
                        c  -> dup c

      dup x = (x,x)