module PGF.LexingAGreek where -- HL 2a1.2.2016 import Data.Char(isSpace) -- * Text lexing without word capitalization of the first word of every sentence. -- Greek sentences in (transliterated) texts don't start with capital character. -- Ordinary greek text does not have vowel length indicators. We then use '.' as -- a sentence separator. lexTextAGreek :: String -> [String] lexTextAGreek s = lext s where lext s = case s of c:cs | isAGreekPunct c -> [c] : (lext cs) c:cs | isSpace c -> lext cs _:_ -> let (w,cs) = break (\x -> isSpace x || isAGreekPunct x) s in w : lext cs [] -> [] -- Philological greek text may use vowel length indicators. Then '.' is not a sentence -- separator, nor is 'v. ' for vowel v. Sentence ends at 'v..' or 'c. ' with non-vowel c. lexTextAGreek2 :: String -> [String] lexTextAGreek2 s = lext s where lext s = case s of c:cs | isAGreekPunct c -> [c] : (lext cs) c:cs | isSpace c -> lext cs _:_ -> let (w,cs) = break (\x -> isSpace x || isAGreekPunct x) s in case cs of '.':'.':d:ds | isSpace d -> (w++['.']) : lext ('.':d:ds) '.':d:ds | isAGreekPunct d || isSpace d -> (w++['.']) : lext (d:ds) '.':d:ds | not (isSpace d) -> case lext (d:ds) of e:es -> (w++['.']++e) : es es -> (w++['.']) : es '.':[] -> (w++['.']) : [] _ -> w : lext cs [] -> [] unlexTextAGreek :: [String] -> String unlexTextAGreek = unlext where unlext s = case s of w:[] -> w w:[c]:[] | isAGreekPunct c -> w ++ [c] w:[c]:cs | isAGreekPunct c -> w ++ [c] ++ " " ++ unlext cs w:ws -> w ++ " " ++ unlext ws [] -> [] isAGreekPunct = flip elem ".,;