---------------------------------------------------------------------- -- | -- Module : VisualizeTree -- Maintainer : KA -- Stability : (stable) -- Portability : (portable) -- -- Print a graph of an abstract syntax tree in Graphviz DOT format -- Based on BB's VisualizeGrammar ----------------------------------------------------------------------------- module PGF.VisualizeTree ( GraphvizOptions(..) , graphvizDefaults , graphvizAbstractTree , graphvizParseTree , graphvizParseTreeDep , graphvizDependencyTree , Labels, getDepLabels , CncLabels, getCncDepLabels , graphvizBracketedString , graphvizAlignment , gizaAlignment , conlls2latexDoc ) where import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint import PGF.CId (wildCId,showCId,ppCId,mkCId) --CId,pCId, import PGF.Data import PGF.Expr (Tree) -- showExpr import PGF.Linearize ----import PGF.LatexVisualize (conll2latex) ---- should be separate module? import PGF.Macros (lookValCat, BracketedString(..)) --lookMap, BracketedTokn(..), flattenBracketedString import qualified Data.Map as Map --import qualified Data.IntMap as IntMap import Data.List (intersperse,nub,mapAccumL,find,groupBy,sortBy,partition) import Data.Ord (comparing) import Data.Char (isDigit) import Data.Maybe (fromMaybe) import Text.PrettyPrint --import Data.Array.IArray --import Control.Monad --import qualified Data.Set as Set --import qualified Text.ParserCombinators.ReadP as RP data GraphvizOptions = GraphvizOptions {noLeaves :: Bool, noFun :: Bool, noCat :: Bool, noDep :: Bool, nodeFont :: String, leafFont :: String, nodeColor :: String, leafColor :: String, nodeEdgeStyle :: String, leafEdgeStyle :: String } graphvizDefaults = GraphvizOptions False False False True "" "" "" "" "" "" -- | Renders abstract syntax tree in Graphviz format. -- The pair of 'Bool' @(funs,cats)@ lets you control whether function names and -- category names are included in the rendered tree. graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String graphvizAbstractTree pgf (funs,cats) = render . tree2graph where tree2graph t = text "graph {" $$ ppGraph [] [] 0 t $$ text "}" getAbs xs (EAbs _ x e) = getAbs (x:xs) e getAbs xs (ETyped e _) = getAbs xs e getAbs xs e = (xs,e) getApp (EApp x (EImplArg y)) es = getApp x es getApp (EApp x y) es = getApp x (y:es) getApp (ETyped e _) es = getApp e es getApp e es = (e,es) getLbl scope (EFun f) = let fun = if funs then ppCId f else empty cat = if cats then ppCId (lookValCat (abstract pgf) f) else empty sep = if funs && cats then colon else empty in fun <+> sep <+> cat getLbl scope (ELit l) = text (escapeStr (render (ppLit l))) getLbl scope (EMeta i) = ppMeta i getLbl scope (EVar i) = ppCId (scope !! i) getLbl scope (ETyped e _) = getLbl scope e getLbl scope (EImplArg e) = getLbl scope e ppGraph scope ps i e0 = let (xs, e1) = getAbs [] e0 (e2,args) = getApp e1 [] binds = if null xs then empty else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->" (lbl,eargs) = case e2 of EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node _ -> (getLbl scope' e2, args) scope' = xs ++ scope in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$ (if null ps then empty else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$ vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs) ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps)) escapeStr [] = [] escapeStr ('\\':cs) = '\\':'\\':escapeStr cs escapeStr ('"' :cs) = '\\':'"' :escapeStr cs escapeStr (c :cs) = c :escapeStr cs type Labels = Map.Map CId [String] -- | Visualize word dependency tree. graphvizDependencyTree :: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@ -> Bool -- ^ Include extra information (debug) -> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels' -> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@)) -> PGF -> CId -- ^ The language of analysis -> Tree -> String -- ^ Rendered output in the specified format graphvizDependencyTree format debug mlab mclab pgf lang t = case format of "latex" -> render . ppLaTeX $ conll2latex' conll "svg" -> render . ppSVG . toSVG $ conll2latex' conll "conll" -> printCoNLL conll "conllu" -> printCoNLL ([["# text = " ++ linearize pgf lang t], ["# tree = " ++ showExpr [] t]] ++ conll) "malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes) "malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) _ -> render $ text "digraph {" $$ space $$ nest 2 (text "rankdir=LR ;" $$ text "node [shape = plaintext] ;" $$ vcat nodes $$ vcat links) $$ text "}" where conll = fixCoNLL (maybe [] id mclab) conll0 conll0 = (map.map) render wnodes nodes = map mkNode leaves links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun,_),_,w) <- tail leaves] -- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL -- P variants are automatically predicted rather than gold standard wnodes = [[int i, maltws ws, ppCId fun, ppCId (posCat cat), ppCId cat, int lind, int parent, text lab, unspec, unspec] | ((cat,fid,fun,lind),i,ws) <- tail leaves, let (lab,parent) = fromMaybe (dep_lbl,0) (do (lbl,fid) <- lookup fid deps (_,i,_) <- find (\((_,fid1,_,_),i,_) -> fid == fid1) leaves return (lbl,i)) ] maltws = text . concat . intersperse "+" . words -- no spaces in column 2 nil = -1 bss = bracketedLinearize pgf lang t root = (wildCId,nil,wildCId,0) leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss deps = let (_,(h,deps)) = getDeps 0 [] t [] in (h,(dep_lbl,nil)):deps groupAndIndexIt id [] = [] groupAndIndexIt id ((p,w):pws) = (p,id,w) : groupAndIndexIt (id+1) pws --- groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws --- in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 where collect pws@((p1,w):pws1) | p == p1 = let (ws,pws2) = collect pws1 in (w:ws,pws2) collect pws = ([],pws) getLeaves parent bs = case bs of Leaf w -> [(parent,w)] Bracket cat fid _ lind fun _ bss -> concatMap (getLeaves (cat,fid,fun,lind)) bss mkNode ((_,p,_,_),i,w) = tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;" labels = maybe Map.empty id mlab clabels = maybe [] id mclab posCat cat = case Map.lookup cat labels of Just [p] -> mkCId p _ -> cat getDeps n_fid xs (EAbs _ x e) es = getDeps n_fid (x:xs) e es getDeps n_fid xs (EApp e1 e2) es = getDeps n_fid xs e1 (e2:es) getDeps n_fid xs (EImplArg e) es = getDeps n_fid xs e es getDeps n_fid xs (ETyped e _) es = getDeps n_fid xs e es getDeps n_fid xs (EFun f) es = let (n_fid_1,ds) = descend n_fid xs es (mb_h, deps) = selectHead f ds in case mb_h of Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++ [(n_fid_1,(dep_lbl,fid))]++ concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps])) Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps])) getDeps n_fid xs (EMeta i) es = (n_fid+2,(n_fid,[])) getDeps n_fid xs (EVar i) _ = (n_fid+2,(n_fid,[])) getDeps n_fid xs (ELit l) [] = (n_fid+1,(n_fid,[])) descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e []) n_fid es selectHead f ds = case Map.lookup f labels of Just lbls -> extractHead (zip lbls ds) Nothing -> extractLast ds where extractHead [] = (Nothing, []) extractHead (ld@(l,d):lds) | l == head_lbl = (Just d,lds) | otherwise = let (mb_h,deps) = extractHead lds in (mb_h,ld:deps) extractLast [] = (Nothing, []) extractLast (d:ds) | null ds = (Just d,[]) | otherwise = let (mb_h,deps) = extractLast ds in (mb_h,(dep_lbl,d):deps) dep_lbl = "dep" head_lbl = "head" root_lbl = "root" unspec = text "_" -- auxiliaries for UD conversion PK 15/12/2018 rmcomments :: String -> String rmcomments s = case s of '-':'-':_ -> [] '#':'f':'u':'n':rest -> rmcomments rest -- the new gf-ud format '#':'c':'a':'t':rest -> rmcomments rest x:xs -> x : rmcomments xs _ -> [] -- | Prepare lines obtained from a configuration file for labels for -- use with 'graphvizDependencyTree'. Format per line /fun/ /label/@*@. --- ignore other gf-ud annotatations than #fun and #cat at this point getDepLabels :: String -> Labels getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map (words . rmcomments) (lines s), not (head f == '#')] -- the old function, without dependencies graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String graphvizParseTree = graphvizParseTreeDep Nothing graphvizParseTreeDep :: Maybe Labels -> PGF -> Language -> GraphvizOptions -> Tree -> String graphvizParseTreeDep mbl pgf lang opts tree = graphvizBracketedString opts mbl tree $ bracketedLinearize pgf lang tree graphvizBracketedString :: GraphvizOptions -> Maybe Labels -> Tree -> [BracketedString] -> String graphvizBracketedString opts mbl tree bss = render graphviz_code where graphviz_code = text "graph {" $$ text node_style $$ vcat internal_nodes $$ (if noLeaves opts then empty else text leaf_style $$ leaf_nodes ) $$ text "}" leaf_style = mkOption "edge" "style" (leafEdgeStyle opts) ++ mkOption "edge" "color" (leafColor opts) ++ mkOption "node" "fontcolor" (leafColor opts) ++ mkOption "node" "fontname" (leafFont opts) ++ mkOption "node" "shape" "plaintext" node_style = mkOption "edge" "style" (nodeEdgeStyle opts) ++ mkOption "edge" "color" (nodeColor opts) ++ mkOption "node" "fontcolor" (nodeColor opts) ++ mkOption "node" "fontname" (nodeFont opts) ++ mkOption "node" "shape" nodeshape where nodeshape | noFun opts && noCat opts = "point" | otherwise = "plaintext" mkOption object optname optvalue | null optvalue = "" | otherwise = object ++ "[" ++ optname ++ "=\"" ++ optvalue ++ "\"]; " mkNode fun cat | noFun opts = showCId cat | noCat opts = showCId fun | otherwise = showCId fun ++ " : " ++ showCId cat nil = -1 internal_nodes = [mkLevel internals | internals <- getInternals (map ((,) nil) bss), not (null internals)] leaf_nodes = mkLevel [(parent, id, mkLeafNode cat word) | (id, (parent, (cat,word))) <- zip [100000..] (concatMap (getLeaves (mkCId "?") nil) bss)] getInternals [] = [] getInternals nodes = nub [(parent, fid, mkNode fun cat) | (parent, Bracket cat fid _ _ fun _ _) <- nodes] : getInternals [(fid, child) | (_, Bracket _ fid _ _ _ _ children) <- nodes, child <- children] getLeaves cat parent (Leaf word) = [(parent, (cat, word))] -- the lowest cat before the word getLeaves _ parent (Bracket cat fid _ i _ _ children) = concatMap (getLeaves cat fid) children mkLevel nodes = text "subgraph {rank=same;" $$ nest 2 (-- the following gives the name of the node and its label: vcat [tag id <> text (mkOption "" "label" lbl) | (_, id, lbl) <- nodes] $$ -- the following is for fixing the order between the children: (if length nodes > 1 then text (mkOption "edge" "style" "invis") $$ hsep (intersperse (text " -- ") [tag id | (_, id, _) <- nodes]) <+> semi else empty) ) $$ text "}" $$ -- the following is for the edges between parent and children: vcat [tag pid <> text " -- " <> tag id <> text (depLabel node) | node@(pid, id, _) <- nodes, pid /= nil] $$ space depLabel node@(parent,id,lbl) | noDep opts = ";" | otherwise = case getArg id of Just (fun,arg) -> mkOption "" "label" (lookLabel fun arg) _ -> ";" getArg i = getArgumentPlace i (expr2numtree tree) Nothing labels = maybe Map.empty id mbl lookLabel fun arg = case Map.lookup fun labels of Just xx | length xx > arg -> case xx !! arg of "head" -> "" l -> l _ -> argLabel fun arg argLabel fun arg = if arg==0 then "" else "dep#" ++ show arg --showCId fun ++ "#" ++ show arg -- assuming the arg is head, if no configuration is given; always true for 1-arg funs mkLeafNode cat word | noDep opts = word --- || not (noCat opts) -- show POS only if intermediate nodes hidden | otherwise = posCat cat ++ "\n" ++ word -- show POS in dependency tree posCat cat = case Map.lookup cat labels of Just [p] -> p _ -> showCId cat ---- to restore the argument place from bracketed linearization data NumTree = NumTree Int CId [NumTree] getArgumentPlace :: Int -> NumTree -> Maybe (CId,Int) -> Maybe (CId,Int) getArgumentPlace i tree@(NumTree int fun ts) mfi | i == int = mfi | otherwise = case [fj | (t,x) <- zip ts [0..], Just fj <- [getArgumentPlace i t (Just (fun,x))]] of fj:_ -> Just fj _ -> Nothing expr2numtree :: Expr -> NumTree expr2numtree = fst . renumber 0 . flatten where flatten e = case e of EApp f a -> case flatten f of NumTree _ g ts -> NumTree 0 g (ts ++ [flatten a]) EFun f -> NumTree 0 f [] renumber i t@(NumTree _ f ts) = case renumbers i ts of (ts',j) -> (NumTree j f ts', j+1) renumbers i ts = case ts of t:tt -> case renumber i t of (t',j) -> case renumbers j tt of (tt',k) -> (t':tt',k) _ -> ([],i) ----- end this terrible stuff AR 4/11/2015 type Rel = (Int,[Int]) -- possibly needs changes after clearing about many-to-many on this level type IndexedSeq = (Int,[String]) type LangSeq = [IndexedSeq] data PreAlign = PreAlign [LangSeq] [[Rel]] deriving Show -- alignment structure for a phrase in 2 languages, along with the -- many-to-many relations genPreAlignment :: PGF -> [Language] -> Expr -> PreAlign genPreAlignment pgf langs = lin2align . linsBracketed where linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs] lin2align :: [[BracketedString]] -> PreAlign lin2align bsss = PreAlign langSeqs langRels where (langSeqs,langRels) = mkLayers leaves nil = -1 leaves = map (groupAndIndexIt 0 . concatMap (getLeaves nil)) bsss groupAndIndexIt id [] = [] groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 where collect pws@((p1,w):pws1) | p == p1 = let (ws,pws2) = collect pws1 in (w:ws,pws2) collect pws = ([],pws) getLeaves parent bs = case bs of Leaf w -> [(parent,w)] Bracket _ fid _ _ _ _ bss -> concatMap (getLeaves fid) bss mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest) in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest) mkLayers [cs] = ([fields cs], []) mkLayers _ = ([],[]) mkLinks cs (p0,id0,_) = (id0,indices) where indices = [id1 | (p1,id1,_) <- cs, p1 == p0] fields cs = [(id, [w]) | (_,id,w) <- cs] -- we assume we have 2 languages - source and target gizaAlignment :: PGF -> (Language,Language) -> Expr -> (String,String,String) gizaAlignment pgf (l1,l2) e = let PreAlign [rl1,rl2] rels = genPreAlignment pgf [l1,l2] e in (unwords (map showIndSeq rl1), unwords (concat $ map snd rl2), unwords $ words $ showRels rl2 (concat rels)) showIndSeq (_,l) = let ww = map words l w_ = map (intersperse "_") ww in concat $ concat w_ showRels inds2 [] = [] showRels inds2 ((ind,is):rest) = let lOffs = computeOffset inds2 0 ltemp = [(i,getOffsetIndex i lOffs) | i <- is] lcurr = concat $ map (\(offset,ncomp) -> [show ind ++ "-" ++ show (-1 + offset + ii) ++ " "| ii <- [1..ncomp]]) (map snd ltemp) lrest = showRels inds2 rest in (unwords lcurr) ++ lrest getOffsetIndex i lst = let ll = filter (\(x,_) -> x == i) lst in snd $ head ll computeOffset [] transp = [] computeOffset ((i,l):rest) transp = let nw = (length $ words $ concat l) in (i,(transp,nw)) : (computeOffset rest (transp + nw)) -- alignment in the Graphviz format from the intermediate structure -- same effect as the old direct function graphvizAlignment :: PGF -> [Language] -> Expr -> String graphvizAlignment pgf langs exp = render (text "digraph {" $$ space $$ nest 2 (text "rankdir=LR ;" $$ text "node [shape = record] ;" $$ space $$ renderList 0 lrels rrels) $$ text "}") where (PreAlign lrels rrels) = genPreAlignment pgf langs exp renderList ii (l:ls) (r:rs) = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" $$ (case ls of [] -> empty _ -> vcat [struct ii <> colon <> tag id0 <> colon <> char 'e' <+> text "->" <+> struct (ii+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi | (id0,ids) <- r, id1 <- ids] $$ renderList (ii + 1) ls rs) renderList ii [] _ = empty renderList ii [l] [] = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text (' ':w) | (id,ws) <- cs, w <- ws]) -- auxiliaries for graphviz syntax struct l = text ("struct" ++ show l) tbrackets d = char '<' <> d <> char '>' tag i | i < 0 = char 'r' <> int (negate i) | otherwise = char 'n' <> int i ---------------------- should be a separate module? -- visualization with latex output. AR Nov 2015 conlls2latexDoc :: [String] -> String conlls2latexDoc = render . latexDoc . vcat . intersperse (text "" $+$ app "vspace" (text "4mm")) . map conll2latex . filter (not . null) conll2latex :: String -> Doc conll2latex = ppLaTeX . conll2latex' . parseCoNLL conll2latex' :: CoNLL -> [LaTeX] conll2latex' = dep2latex . conll2dep' data Dep = Dep { wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0) , tokens :: [(String,(String,String))] -- word, (pos,features) (0..) , deps :: [((Int,Int),String)] -- from, to, label , root :: Int -- root word position } -- some general measures defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres spaceLength = 10.0 charWidth = 1.8 wsize rwld w = 100 * rwld w + spaceLength -- word length, units wpos rwld i = sum [wsize rwld j | j <- [0..i-1]] -- start position of the i'th word wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]] -- distance between words x and y labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below labelstart c = c - 15.0 -- label starts 15u left of arc centre arcbase = 30.0 -- arcs start and end 40u above the bottom arcfactor r = r * 600 -- reduction of arc size from word distance xyratio = 3 -- width/height ratio of arcs putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand] putArc frwld height x y label = [oval,arrowhead,labelling] where oval = Put (ctr,arcbase) (OvalTop (wdth,hght)) arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label) dxy = wdist frwld x y -- distance between words, >>= 20.0 ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length hdxy = dxy / 2 -- half the distance wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are wider in proportion hght = ndxy / (xyratio * rwld) -- arc height is independent of word length begp = min x y -- begin position of oval ctr = wpos frwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow rwld = 0.5 ---- dep2latex :: Dep -> [LaTeX] dep2latex d = [Comment (unwords (map fst (tokens d))), Picture defaultUnit (width,height) ( [Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words ++ [Put (wpos rwld i,15) (TinyText w) | (i,(w,_)) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom --- ++ [Put (wpos rwld i,-15) (TinyText w) | (i,(_,w)) <- zip [0..] (map snd (tokens d))] -- features 15u below bottom -> DON'T SHOW ++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels ++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))] ++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "root")] )] where wld i = wordLength d i -- >= 20.0 rwld i = (wld i) / defaultWordLength -- >= 1.0 aheight x y = depth (min x y) (max x y) + 1 ---- abs (x-y) arcs = [(min u v, max u v) | ((u,v),_) <- deps d] depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted [] -> 0 uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs]) width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1) height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d])) type CoNLL = [[String]] parseCoNLL :: String -> CoNLL parseCoNLL = map words . lines --conll2dep :: String -> Dep --conll2dep = conll2dep' . parseCoNLL conll2dep' :: CoNLL -> Dep conll2dep' ls = Dep { wordLength = wld , tokens = toks , deps = dps , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1] } where wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,(pos,feat)) = toks !! i in [tok,pos {-,feat-}]]) --- feat not shown toks = [(w,(c,m)) | _:w:_:c:_:m:_ <- ls] dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"] --maxdist = maximum [abs (x-y) | ((x,y),_) <- dps] -- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture) -- We render both LaTeX and SVG from this intermediate representation of -- LaTeX pictures. data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand] data DrawingCommand = Put Position Object data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length type UnitLengthMM = Double type Size = (Double,Double) type Position = (Double,Double) type Length = Double -- * latex formatting ppLaTeX = vcat . map ppLaTeX1 where ppLaTeX1 el = case el of Comment s -> comment s Picture unit size cmds -> app "setlength{\\unitlength}" (text (show unit ++ "mm")) $$ hang (app "begin" (text "picture")<>text (show size)) 2 (vcat (map ppDrawingCommand cmds)) $$ app "end" (text "picture") $$ text "" ppDrawingCommand (Put pos obj) = put pos (ppObject obj) ppObject obj = case obj of Text s -> text s TinyText s -> small (text s) OvalTop size -> text "\\oval" <> text (show size) <> text "[t]" ArrowDown len -> app "vector(0,-1)" (text (show len)) put p@(_,_) = app ("put" ++ show p) small w = text "{\\tiny" <+> w <> text "}" comment s = text "%%" <+> text s -- line break show follow app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}" latexDoc :: Doc -> Doc latexDoc body = vcat [text "\\documentclass{article}", text "\\usepackage[a4paper,margin=0.5in,landscape]{geometry}", text "\\usepackage[utf8]{inputenc}", text "\\begin{document}", body, text "\\end{document}"] -- * SVG (see https://www.w3.org/Graphics/SVG/IG/resources/svgprimer.html) -- | Render LaTeX pictures as SVG toSVG = concatMap toSVG1 where toSVG1 el = case el of Comment s -> [] Picture unit size@(w,h) cmds -> [Elem "svg" ["width".=x1,"height".=y0+5, ("viewBox",unwords (map show [0,0,x1,y0+5])), ("version","1.1"), ("xmlns","http://www.w3.org/2000/svg")] (white_bg:concatMap draw cmds)] where white_bg = Elem "rect" ["x".=0,"y".=0,"width".=x1,"height".=y0+5, ("fill","white")] [] draw (Put pos obj) = objectSVG pos obj objectSVG pos obj = case obj of Text s -> [text 16 pos s] TinyText s -> [text 10 pos s] OvalTop size -> [ovalTop pos size] ArrowDown len -> arrowDown pos len text h (x,y) s = Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h] [CharData s] ovalTop (x,y) (w,h) = Elem "path" [("d",path),("stroke","black"),("fill","none")] [] where x1 = x-w/2 x2 = min x (x1+r) x3 = max x (x4-r) x4 = x+w/2 y1 = y y2 = y+r r = h/2 sx = show . xc sy = show . yc path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2, "L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1]) arrowDown (x,y) len = [Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2, ("stroke","black")] [], Elem "path" [("d",unwords arrowhead)] []] where x2 = xc x y2 = yc (y-len) arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6] xc x = num x+5 yc y = y0-num y x1 = num w+10 y0 = num h+20 num x = round (scale*x) scale = unit*5 infix 0 .= n.=v = (n,show v) -- * SVG is XML data SVG = CharData String | Elem TagName Attrs [SVG] type TagName = String type Attrs = [(String,String)] ppSVG svg = vcat [text "", text "", text "", vcat (map ppSVG1 svg)] -- It should be a single element... where ppSVG1 svg1 = case svg1 of CharData s -> text (encode s) Elem tag attrs [] -> text "<"<>text tag<>cat (map attr attrs) <> text "/>" Elem tag attrs svg -> cat [text "<"<>text tag<>cat (map attr attrs) <> text ">", nest 2 (cat (map ppSVG1 svg)), text "text tag<>text ">"] attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\"" encode s = foldr encodeEntity "" s encodeEntity = encodeEntity' (const False) encodeEntity' esc c r = case c of '&' -> "&"++r '<' -> "<"++r '>' -> ">"++r _ -> c:r ---------------------------------- -- concrete syntax annotations (local) on top of conll -- examples of annotations: -- UseComp {"not"} PART neg head -- UseComp {*} AUX cop head type CncLabels = [CncLabel] data CncLabel = CncSyncat (String, String -> Maybe (String -> String,String,String)) -- (fun, word/lemma -> (pos,label,target)) -- the pos can remain unchanged, as in the current notation in the article | CncMorpho (String,[String]) -- (category, features in ascending order) | CncForm (String,(String,String)) -- (wordform, (lemma,features)) fixCoNLL :: CncLabels -> CoNLL -> CoNLL fixCoNLL cncLabels conll = map (fixMorpho . fixDep) (markRoot conll) where labels = [l | CncSyncat l <- cncLabels] flabels = [r | CncMorpho r <- cncLabels] -- change the root label from dep to root --- doing this for the leftmost word of the root node markRoot rows = case rows of (i:word:fun:pos:cat:x_:"0":lab_:xs):rs -> (i:word:fun:pos:cat:x_:"0":"root":xs) : map (markNoRoot i) rs r:rs -> r : markRoot rs _ -> rows --- what about if there is no root? markNoRoot r row@(i:word:fun:pos:cat:x_:j:label:xs) = case j of "0" -> (i:word:fun:pos:cat:x_: r :label:xs) _ -> row fixDep row = case row of (i:word:fun:pos:cat:x_:j:label:xs) | label /= "root" -> case look (fun,word) of Just (pos',label',"head") -> (i:word:fun:pos' pos:cat: x_: j :label':xs) Just (pos',label',target) -> (i:word:fun:pos' pos:cat: x_: getDep j target:label':xs) _ -> row _ -> row fixMorpho (i:word:fun:pos:cat: mo :j:label:xs) = (i:word:fun:pos:cat: (feat cat word mo) :j:label:xs) look (fun,word) = case lookup fun labels of Just relabel -> case relabel word of Just row -> Just row _ -> case lookup "*" labels of Just starlabel -> starlabel word _ -> Nothing _ -> case lookup "*" labels of Just starlabel -> starlabel word _ -> Nothing getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll] feat cat word x = case lookup cat flabels of Just tags | all isDigit x && length tags > read x -> tags !! read x _ -> case lookup (show word) flabels of Just (t:_) -> t _ -> cat ++ "-" ++ x getCncDepLabels :: String -> CncLabels getCncDepLabels s = wlabels ws ++ flabels fs where wlabels = map CncSyncat . map merge . groupBy (\ (x,_) (a,_) -> x == a) . sortBy (comparing fst) . concatMap analyse . filter chooseW flabels = map CncMorpho . map collectTags . map words (fs,ws) = partition chooseF $ map uncomment $ lines s --- choose is for compatibility with the general notation chooseW line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules ---- TODO: this means that "(" cannot be a token chooseF line = take 1 line == "@" --- feature assignments have the form e.g. @N SgNom SgGen ; no spaces inside tags uncomment line = case line of '-':'-':_ -> "" c:cs -> c : uncomment cs _ -> line analyse line = case break (=='{') line of (beg,_:ws) -> case break (=='}') ws of (toks,_:target) -> case (getToks beg, words target) of (funs,[ label,j]) -> [(fun, (tok, (id, label,j))) | fun <- funs, tok <- getToks toks] (funs,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | fun <- funs, tok <- getToks toks] _ -> [] _ -> [] _ -> [] merge rules@((fun,_):_) = (fun, \tok -> case lookup tok (map snd rules) of Just new -> return new _ -> lookup "*" (map snd rules) ) getToks = map unquote . filter (/=",") . toks toks s = case lex s of [(t,"")] -> [t] ; [(t,cc)] -> t:toks cc ; _ -> [] unquote s = case s of '"':cc@(_:_) | last cc == '"' -> init cc ; _ -> s collectTags (w:ws) = (tail w,ws) -- added init to remove the last \n. otherwise, two empty lines are in between each sentence PK 17/12/2018 printCoNLL :: CoNLL -> String printCoNLL = init . unlines . map (concat . intersperse "\t")