GF.Speech.CFGToFA

Plain source file: src/compiler/GF/Speech/CFGToFA.hs (2015-03-03)

GF.Speech.CFGToFA is imported by: ...
----------------------------------------------------------------------
-- |
-- Module      : GF.Speech.CFGToFA
--
-- Approximates CFGs with finite state networks.
----------------------------------------------------------------------
module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
                          MFA(..), cfgToMFA, cfgToFA') where

import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set

import PGF.Internal
import GF.Data.Utilities
import GF.Grammar.CFG
--import GF.Speech.PGFToCFG
--import GF.Infra.Ident (Ident)

import GF.Data.Graph
--import GF.Data.Relation
import GF.Speech.FiniteState
--import GF.Speech.CFG

data Recursivity = RightR | LeftR | NotR

data MutRecSet = MutRecSet {
                            mrCats :: Set Cat,
                            mrNonRecRules :: [CFRule],
                            mrRecRules :: [CFRule],
                            mrRec :: Recursivity
                           }


type MutRecSets = Map Cat MutRecSet

--
-- * Multiple DFA type
--

data MFA = MFA Cat [(Cat,DFA CFSymbol)]



cfgToFA :: CFG -> DFA Token
cfgToFA = minimize . compileAutomaton . makeSimpleRegular


--
-- * Compile strongly regular grammars to NFAs
--

-- Convert a strongly regular grammar to a finite automaton.
compileAutomaton :: CFG -> NFA Token
compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
  where 
  (fa,s,f) = newFA_
  ns = mutRecSets g $ mutRecCats False g

-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
--   Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State 
          -> NFA Token -> NFA Token
make_fa c@(g,ns) q0 alpha q1 fa = 
   case alpha of
        []              -> newTransition q0 q1 Nothing fa
        [Terminal t]    -> newTransition q0 q1 (Just t) fa
        [NonTerminal a] -> 
            case Map.lookup a ns of
              -- a is recursive
              Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) -> 
                  case mrRec n of
                    -- the set Ni is right-recursive or cyclic
                    RightR ->
                        let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
                                  ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs, 
                                       let (xs,NonTerminal d) = (init ss,last ss)]
                         in make_fas new $ newTransition q0 (getState a) Nothing fa'
                    -- the set Ni is left-recursive                         
                    LeftR ->
                        let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
                                  ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
                         in make_fas new $ newTransition (getState a) q1 Nothing fa'
                where
                  (fa',stateMap) = addStatesForCats ni fa
                  getState x = Map.findWithDefault 
                               (error $ "CFGToFiniteState: No state for " ++ x) 
                               x stateMap
              -- a is not recursive
              Nothing -> let rs = catRules g a
                          in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
        (x:beta) -> let (fa',q) = newState () fa
                     in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
  where
  make_fa_ = make_fa c
  make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs

--
-- * Compile a strongly regular grammar to a DFA with sub-automata
--

cfgToMFA :: CFG -> MFA
cfgToMFA  = buildMFA . makeSimpleRegular

-- | Build a DFA by building and expanding an MFA
cfgToFA' :: CFG -> DFA Token
cfgToFA' = mfaToDFA . cfgToMFA

buildMFA :: CFG -> MFA
buildMFA g = sortSubLats $ removeUnusedSubLats mfa
  where fas = compileAutomata g
        mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]

mfaStartDFA :: MFA -> DFA CFSymbol
mfaStartDFA (MFA start subs) = 
    fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs

mfaToDFA :: MFA -> DFA Token
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
  where
  subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
  getSub l = fromJust $ Map.lookup l subs'
  expand (FA (Graph c ns es) s f) 
      = foldl' expandEdge (FA (Graph c ns []) s f) es
  expandEdge fa (f,t,x) = 
      case x of
        Nothing              -> newTransition f t Nothing  fa
        Just (Terminal s)    -> newTransition f t (Just s) fa
        Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)

removeUnusedSubLats :: MFA -> MFA
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
  where
  usedMap = subLatUseMap mfa
  used = growUsedSet (Set.singleton start)
  isUsed c = c `Set.member` used
  growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)

subLatUseMap :: MFA -> Map Cat (Set Cat)
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]

usedSubLats :: DFA CFSymbol -> Set Cat
usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]

-- | Sort sub-networks topologically.
sortSubLats :: MFA -> MFA
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
  where
  usedByMap = revMultiMap (subLatUseMap mfa)
  sortLats _ [] = []
  sortLats ub ls = xs ++ sortLats ub' ys
      where (xs,ys) = partition ((==0) . indeg) ls
            ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
            indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub

-- | Convert a strongly regular grammar to a number of finite automata,
--   one for each non-terminal.
--   The edges in the automata accept tokens, or name another automaton to use.
compileAutomata :: CFG
                 -> [(Cat,NFA CFSymbol)]
                    -- ^ A map of non-terminals and their automata.
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
  where
  mrs = mutRecSets g $ mutRecCats True g
  makeOneFA c = make_fa1 mr s [NonTerminal c] f fa 
    where (fa,s,f) = newFA_
          mr = fromJust (Map.lookup c mrs)


-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
--   Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
--   adapted to build a finite automaton for a single (mutually recursive) set only.
--   Categories not in the set will result in category-labelled edges.
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
                      --   we are building the automaton.
         -> State     -- ^ State to come from
         -> [CFSymbol] -- ^ Symbols to accept
         -> State     -- ^ State to end up in
         -> NFA CFSymbol -- ^ FA to add to.
         -> NFA CFSymbol
make_fa1 mr q0 alpha q1 fa = 
   case alpha of
        []        -> newTransition q0 q1 Nothing fa
        [t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
        [c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
        [NonTerminal a] ->
            case mrRec mr of
                NotR -> -- the set is a non-recursive (always singleton) set of categories
                        -- so the set of category rules is the set of rules for the whole set
                    make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
                RightR -> -- the set is right-recursive or cyclic
                    let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
                              ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr, 
                                                                 let (xs,NonTerminal d) = (init ss,last ss)]
                     in make_fas new $ newTransition q0 (getState a) Nothing fa'
                LeftR -> -- the set is left-recursive
                    let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
                              ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
                     in make_fas new $ newTransition (getState a) q1 Nothing fa'
             where
             (fa',stateMap) = addStatesForCats (mrCats mr) fa
             getState x = Map.findWithDefault 
                          (error $ "CFGToFiniteState: No state for " ++ x) 
                          x stateMap
        (x:beta) -> let (fa',q) = newState () fa
                     in make_fas [(q0,[x],q),(q,beta,q1)] fa'
  where
  make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs

mutRecSets :: CFG -> [Set Cat] -> MutRecSets
mutRecSets g = Map.fromList . concatMap mkMutRecSet
  where 
  mkMutRecSet cs = [ (c,ms) | c <- csl ]
   where csl = Set.toList cs
         rs = catSetRules g cs
         (nrs,rrs) = partition (ruleIsNonRecursive cs) rs
         ms = MutRecSet {
                         mrCats = cs,
                         mrNonRecRules = nrs,
                         mrRecRules = rrs,
                         mrRec = rec
                        }
         rec | null rrs = NotR
             | all (isRightLinear cs) rrs = RightR
             | otherwise = LeftR

--
-- * Utilities
--

-- | Add a state for the given NFA for each of the categories
--   in the given set. Returns a map of categories to their
--   corresponding states.
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
addStatesForCats cs fa = (fa', m)
  where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
        m = Map.fromList (zip (Set.toList cs) (map fst ns))

revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]

Index

(HTML for this module was generated on 2015-03-03. About the conversion tool.)