RegExp.hs

Plain text version of RegExp.hs

module GF.Speech.RegExp (RE(..), 
                         epsilonRE, nullRE, 
                         isEpsilon, isNull,
                         unionRE, concatRE, seqRE,
                         repeatRE, minimizeRE,
                         mapRE, mapRE', joinRE,
                         symbolsRE,
                         dfa2re, prRE) where

import Data.List

import GF.Data.Utilities
import GF.Speech.FiniteState

data RE a = 
      REUnion [RE a]  -- ^ REUnion [] is null
    | REConcat [RE a] -- ^ REConcat [] is epsilon
    | RERepeat (RE a)
    | RESymbol a
      deriving (Eq,Ord,Show)


dfa2re :: (Ord a) => DFA a -> RE a
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
             . oneFinalState () epsilonRE . mapTransitions RESymbol 
  where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
        merge es = [(f,t,unionRE ls) 
                        | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]

elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
elimStates fa =
    case [s | (s,_) <- states fa, isInternal fa s] of
      [] -> fa
      sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
          where sAs = nonLoopTransitionsTo sE fa
                sBs = nonLoopTransitionsFrom sE fa
                r2 = unionRE $ loops sE fa
                ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
                r r1 r3 = concatRE [r1, repeatRE r2, r3]

epsilonRE :: RE a
epsilonRE = REConcat []

nullRE :: RE a
nullRE = REUnion []

isNull :: RE a -> Bool
isNull (REUnion []) = True
isNull _ = False

isEpsilon :: RE a -> Bool
isEpsilon (REConcat []) = True
isEpsilon _ = False

unionRE :: Ord a => [RE a] -> RE a
unionRE = unionOrId . nub' . concatMap toList 
  where 
    toList (REUnion xs) = xs
    toList x = [x]
    unionOrId [r] = r
    unionOrId rs = REUnion rs

concatRE :: [RE a] -> RE a
concatRE xs | any isNull xs = nullRE
            | otherwise = case concatMap toList xs of
                            [r] -> r
                            rs -> REConcat rs
  where
    toList (REConcat xs) = xs
    toList x = [x]

seqRE :: [a] -> RE a
seqRE = concatRE . map RESymbol

repeatRE :: RE a -> RE a
repeatRE x | isNull x || isEpsilon x = epsilonRE
           | otherwise = RERepeat x

finalRE :: Ord a => DFA (RE a) -> RE a
finalRE fa = concatRE [repeatRE r1, r2, 
                       repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
  where 
    s0 = startState fa
    [sF] = finalStates fa
    r1 = unionRE $ loops s0 fa
    r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
    r3 = unionRE $ loops sF fa
    r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa

reverseRE :: RE a -> RE a
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
reverseRE (RERepeat x) = RERepeat (reverseRE x)
reverseRE x = x

minimizeRE :: Ord a => RE a -> RE a
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward

mergeForward :: Ord a => RE a -> RE a
mergeForward (REUnion xs) = 
    unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
mergeForward (RERepeat r) = repeatRE (mergeForward r)
mergeForward r = r

firstRE :: RE a -> (RE a, RE a)
firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE)

mapRE :: (a -> b) -> RE a -> RE b
mapRE f = mapRE' (RESymbol . f)

mapRE' :: (a -> RE b) -> RE a -> RE b
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
mapRE' f (RESymbol s) = f s

joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs)
joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss

symbolsRE :: RE a -> [a]
symbolsRE (REConcat xs) = concatMap symbolsRE xs
symbolsRE (REUnion xs) = concatMap symbolsRE xs
symbolsRE (RERepeat x) = symbolsRE x
symbolsRE (RESymbol x) = [x]

-- Debugging

prRE :: (a -> String) -> RE a -> String
prRE = prRE' 0

prRE' :: Int -> (a -> String) -> RE a -> String
prRE' _ _ (REUnion []) = "<NULL>"
prRE' n f (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1 f) xs)))
prRE' n f (REConcat xs) = p n 2 (unwords (map (prRE' 2 f) xs))
prRE' n f (RERepeat x) = p n 3 (prRE' 3 f x) ++ "*"
prRE' _ f (RESymbol s) = f s

p n m s | n >= m = "(" ++ s ++ ")"
        | True = s