module GF.Text.Transliterations ( transliterate, transliterateWithFile, transliteration, characterTable, transliterationPrintNames ) where import Data.Char import Numeric import qualified Data.Map as Map -- transliterations between ASCII and a Unicode character set -- current transliterations: devanagari, thai -- to add a new one: define the Unicode range and the corresponding ASCII strings, -- which may be one or more characters long -- conventions to be followed: -- each character is either [letter] or [letter+nonletters] -- when using a sparse range of unicodes, mark missing codes as "-" in transliterations -- characters can be invisible: ignored in translation to unicode transliterate :: String -> Maybe (String -> String) transliterate s = case s of 'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t 't':'o':'_':t -> fmap appTransToUnicode $ transliteration t _ -> Nothing transliterateWithFile :: String -> String -> Bool -> (String -> String) transliterateWithFile name src isFrom = (if isFrom then appTransFromUnicode else appTransToUnicode) (getTransliterationFile name src) transliteration :: String -> Maybe Transliteration transliteration s = Map.lookup s allTransliterations allTransliterations = Map.fromList [ ("amharic",transAmharic), ("ancientgreek", transAncientGreek), ("arabic", transArabic), ("arabic_unvocalized", transArabicUnvoc), ("devanagari", transDevanagari), ("greek", transGreek), ("hebrew", transHebrew), ("persian", transPersian), ("sanskrit", transSanskrit), ("sindhi", transSindhi), ("nepali", transNepali), ("telugu", transTelugu), ("thai", transThai), ("urdu", transUrdu) ] -- used in command options and help transliterationPrintNames = [(t,printname p) | (t,p) <- Map.toList allTransliterations] characterTable :: Transliteration -> String characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where prOne (i,s) = unwords ["|", showHex i "", "|", [toEnum i], "|", s, "|"] data Transliteration = Trans { trans_to_unicode :: Map.Map String Int, trans_from_unicode :: Map.Map Int String, invisible_chars :: [String], printname :: String } appTransToUnicode :: Transliteration -> String -> String appTransToUnicode trans = concat . map (\c -> maybe c (return . toEnum) $ Map.lookup c (trans_to_unicode trans) ) . filter (flip notElem (invisible_chars trans)) . unchar appTransFromUnicode :: Transliteration -> String -> String appTransFromUnicode trans = concat . map (\c -> maybe [toEnum c] id $ Map.lookup c (trans_from_unicode trans) ) . map fromEnum mkTransliteration :: String -> [String] -> [Int] -> Transliteration mkTransliteration name ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) [] name where tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"] uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"] getTransliterationFile :: String -> String -> Transliteration getTransliterationFile name = uncurry (mkTransliteration name) . codes where codes = unzip . map (mkOne . words) . filter (not . all isSpace) . lines mkOne ws = case ws of [c]:t:_ -> (t,fromEnum c) --