GF.Text.Clitics

Plain source file: src/compiler/GF/Text/Clitics.hs (2013-09-18)

GF.Text.Clitics is imported by: ...
module GF.Text.Clitics (getClitics,getCliticsText) where

import Data.List

-- AR 6/2/2011
-- Analyse word as stem+clitic whenever 
--   (1) clitic is in clitic list
--   (2) either 
--      (a) stem is in Lexicon
--      (b) stem can be analysed as stem0+clitic0
-- 
-- Examples: 
--   Italian amarmi = amar+mi
--   Finnish autossanikohan = autossa+ni+kohan
--
-- The analysis gives all results, including the case where the whole word is in Lexicon.
-- 
-- The clitics in the list are expected to be reversed.

getClitics :: (String -> Bool) -> [String] -> String -> [[String]]
getClitics isLex rclitics = map (reverse . map reverse) . clits . reverse where
  clits rword = ifLex rword [rclit:more | 
                  rclit <- rclitics, stem <- splits rclit rword, more <- clits stem]
  splits c = maybe [] return . stripPrefix c

  ifLex w ws = if isLex (reverse w) then [w] : ws else ws


getCliticsText :: (String -> Bool) -> [String] -> [String] -> [String]
getCliticsText isLex rclitics = 
  map unwords . sequence . map (map render . getClitics isLex rclitics) 
 where
  render = unwords . intersperse "&+"


-- example

getClitics1 = getClitics exlex1 exclits1
exlex1   = flip elem ["auto", "naise", "rahan","maa","maahan","maahankaan"]
exclits1 = map reverse ["ni","ko","han","pas","nsa","kin","kaan"]

Index

(HTML for this module was generated on 2013-11-05. About the conversion tool.)