GF.Speech.RegExp

Plain source file: src/compiler/GF/Speech/RegExp.hs (2013-09-18)

GF.Speech.RegExp is imported by: ...
module GF.Speech.RegExp (RE(..), 
                         epsilonRE, nullRE, 
                         isEpsilon, isNull,
                         unionRE, concatRE, seqRE,
                         repeatRE, minimizeRE,
                         mapRE, mapRE', joinRE,
                         symbolsRE,
                         dfa2re, prRE) where

import Data.List

import GF.Data.Utilities
import GF.Speech.FiniteState

data RE a = 
      REUnion [RE a]  -- ^ REUnion [] is null
    | REConcat [RE a] -- ^ REConcat [] is epsilon
    | RERepeat (RE a)
    | RESymbol a
      deriving (Eq,Ord,Show)


dfa2re :: (Ord a) => DFA a -> RE a
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
             . oneFinalState () epsilonRE . mapTransitions RESymbol 
  where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
        merge es = [(f,t,unionRE ls) 
                        | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]

elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
elimStates fa =
    case [s | (s,_) <- states fa, isInternal fa s] of
      [] -> fa
      sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
          where sAs = nonLoopTransitionsTo sE fa
                sBs = nonLoopTransitionsFrom sE fa
                r2 = unionRE $ loops sE fa
                ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
                r r1 r3 = concatRE [r1, repeatRE r2, r3]

epsilonRE :: RE a
epsilonRE = REConcat []

nullRE :: RE a
nullRE = REUnion []

isNull :: RE a -> Bool
isNull (REUnion []) = True
isNull _ = False

isEpsilon :: RE a -> Bool
isEpsilon (REConcat []) = True
isEpsilon _ = False

unionRE :: Ord a => [RE a] -> RE a
unionRE = unionOrId . nub' . concatMap toList 
  where 
    toList (REUnion xs) = xs
    toList x = [x]
    unionOrId [r] = r
    unionOrId rs = REUnion rs

concatRE :: [RE a] -> RE a
concatRE xs | any isNull xs = nullRE
            | otherwise = case concatMap toList xs of
                            [r] -> r
                            rs -> REConcat rs
  where
    toList (REConcat xs) = xs
    toList x = [x]

seqRE :: [a] -> RE a
seqRE = concatRE . map RESymbol

repeatRE :: RE a -> RE a
repeatRE x | isNull x || isEpsilon x = epsilonRE
           | otherwise = RERepeat x

finalRE :: Ord a => DFA (RE a) -> RE a
finalRE fa = concatRE [repeatRE r1, r2, 
                       repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
  where 
    s0 = startState fa
    [sF] = finalStates fa
    r1 = unionRE $ loops s0 fa
    r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
    r3 = unionRE $ loops sF fa
    r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa

reverseRE :: RE a -> RE a
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
reverseRE (RERepeat x) = RERepeat (reverseRE x)
reverseRE x = x

minimizeRE :: Ord a => RE a -> RE a
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward

mergeForward :: Ord a => RE a -> RE a
mergeForward (REUnion xs) = 
    unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
mergeForward (RERepeat r) = repeatRE (mergeForward r)
mergeForward r = r

firstRE :: RE a -> (RE a, RE a)
firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE)

mapRE :: (a -> b) -> RE a -> RE b
mapRE f = mapRE' (RESymbol . f)

mapRE' :: (a -> RE b) -> RE a -> RE b
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
mapRE' f (RESymbol s) = f s

joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs)
joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss

symbolsRE :: RE a -> [a]
symbolsRE (REConcat xs) = concatMap symbolsRE xs
symbolsRE (REUnion xs) = concatMap symbolsRE xs
symbolsRE (RERepeat x) = symbolsRE x
symbolsRE (RESymbol x) = [x]

-- Debugging

prRE :: (a -> String) -> RE a -> String
prRE = prRE' 0

prRE' :: Int -> (a -> String) -> RE a -> String
prRE' _ _ (REUnion []) = "<NULL>"
prRE' n f (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1 f) xs)))
prRE' n f (REConcat xs) = p n 2 (unwords (map (prRE' 2 f) xs))
prRE' n f (RERepeat x) = p n 3 (prRE' 3 f x) ++ "*"
prRE' _ f (RESymbol s) = f s

p n m s | n >= m = "(" ++ s ++ ")"
        | True = s

Index

(HTML for this module was generated on 2014-04-04. About the conversion tool.)