VisualizeTree.hs

Plain text version of VisualizeTree.hs

----------------------------------------------------------------------
-- |
-- Module      : VisualizeTree
-- Maintainer  : KA
-- Stability   : (stable)
-- Portability : (portable)
--
-- Print a graph of an abstract syntax tree in Graphviz DOT format
-- Based on BB's VisualizeGrammar
-----------------------------------------------------------------------------

module PGF.VisualizeTree
             ( GraphvizOptions(..)
             , graphvizDefaults
             , graphvizAbstractTree
             , graphvizParseTree
             , graphvizParseTreeDep
             , graphvizDependencyTree
             , Labels, getDepLabels
             , CncLabels, getCncDepLabels
             , graphvizBracketedString
             , graphvizAlignment
             , gizaAlignment
             , conlls2latexDoc
             ) where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint

import PGF.CId (wildCId,showCId,ppCId,mkCId) --CId,pCId,
import PGF.Data
import PGF.Expr (Tree) -- showExpr
import PGF.Linearize
----import PGF.LatexVisualize (conll2latex) ---- should be separate module?
import PGF.Macros (lookValCat, BracketedString(..))
                   --lookMap, BracketedTokn(..), flattenBracketedString

import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap
import Data.List (intersperse,nub,mapAccumL,find,groupBy,sortBy,partition)
import Data.Ord (comparing)
import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Text.PrettyPrint

--import Data.Array.IArray
--import Control.Monad
--import qualified Data.Set as Set
--import qualified Text.ParserCombinators.ReadP as RP


data GraphvizOptions = GraphvizOptions {noLeaves :: Bool,
                                        noFun :: Bool,
                                        noCat :: Bool,
                                        noDep :: Bool,
                                        nodeFont :: String,
                                        leafFont :: String,
                                        nodeColor :: String,
                                        leafColor :: String,
                                        nodeEdgeStyle :: String,
                                        leafEdgeStyle :: String
                                       }

graphvizDefaults = GraphvizOptions False False False True "" "" "" "" "" ""


-- | Renders abstract syntax tree in Graphviz format.
-- The pair of 'Bool' @(funs,cats)@ lets you control whether function names and
-- category names are included in the rendered tree.
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
graphvizAbstractTree pgf (funs,cats) = render . tree2graph
  where
    tree2graph t =
      text "graph {" $$
      ppGraph [] [] 0 t $$
      text "}"

    getAbs xs (EAbs _ x e) = getAbs (x:xs) e
    getAbs xs (ETyped e _) = getAbs xs e
    getAbs xs e            = (xs,e)

    getApp (EApp x (EImplArg y)) es = getApp x es
    getApp (EApp x y)            es = getApp x (y:es)
    getApp (ETyped e _)          es = getApp e es
    getApp e                     es = (e,es)

    getLbl scope (EFun f)     = let fun = if funs then ppCId f else empty
                                    cat = if cats then ppCId (lookValCat (abstract pgf) f) else empty
                                    sep = if funs && cats then colon else empty
                                in fun <+> sep <+> cat
    getLbl scope (ELit l)     = text (escapeStr (render (ppLit l)))
    getLbl scope (EMeta i)    = ppMeta i
    getLbl scope (EVar i)     = ppCId (scope !! i)
    getLbl scope (ETyped e _) = getLbl scope e
    getLbl scope (EImplArg e) = getLbl scope e

    ppGraph scope ps i e0 =
      let (xs,  e1) = getAbs [] e0
          (e2,args) = getApp e1 []
          binds     = if null xs
                        then empty
                        else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->"
          (lbl,eargs) = case e2 of
                          EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node
                          _          -> (getLbl scope' e2, args)
          scope'    = xs ++ scope
      in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$
         (if null ps
            then empty
            else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$
         vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs)

    ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps))

    escapeStr []        = []
    escapeStr ('\\':cs) = '\\':'\\':escapeStr cs
    escapeStr ('"' :cs) = '\\':'"' :escapeStr cs
    escapeStr (c   :cs) = c        :escapeStr cs


type Labels = Map.Map CId [String]

-- | Visualize word dependency tree.
graphvizDependencyTree
  :: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
  -> Bool -- ^ Include extra information (debug)
  -> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
  -> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
  -> PGF
  -> CId -- ^ The language of analysis
  -> Tree
  -> String -- ^ Rendered output in the specified format
graphvizDependencyTree format debug mlab mclab pgf lang t = 
  case format of
    "latex"      -> render . ppLaTeX $ conll2latex' conll
    "svg"        -> render . ppSVG . toSVG $ conll2latex' conll
    "conll"      -> printCoNLL conll
    "conllu"     -> printCoNLL ([["# text = " ++ linearize pgf lang t], ["# tree = " ++ showExpr [] t]] ++ conll)
    "malt_tab"   -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
    "malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
    _            -> render $ text "digraph {" $$
                    space $$
                    nest 2 (text "rankdir=LR ;" $$
                            text "node [shape = plaintext] ;" $$
                            vcat nodes $$
                            vcat links) $$
                    text "}"
  where
    conll  = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
    conll0 = (map.map) render wnodes
    nodes  = map mkNode leaves
    links  = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun,_),_,w) <- tail leaves]

-- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL
-- P variants are automatically predicted rather than gold standard

    wnodes = [[int i, maltws ws, ppCId fun, ppCId (posCat cat), ppCId cat, int lind, int parent, text lab, unspec, unspec] |
              ((cat,fid,fun,lind),i,ws) <- tail leaves,
              let (lab,parent) = fromMaybe (dep_lbl,0)
                                           (do (lbl,fid) <- lookup fid deps
                                               (_,i,_) <- find (\((_,fid1,_,_),i,_) -> fid == fid1) leaves
                                               return (lbl,i))
             ]
    maltws = text . concat . intersperse "+" . words  -- no spaces in column 2

    nil = -1

    bss = bracketedLinearize pgf lang t

    root = (wildCId,nil,wildCId,0)

    leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss
    deps   = let (_,(h,deps)) = getDeps 0 [] t []
             in (h,(dep_lbl,nil)):deps

    groupAndIndexIt id []          = []
    groupAndIndexIt id ((p,w):pws) = (p,id,w) : groupAndIndexIt (id+1) pws
---    groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
---                                     in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
      where
        collect pws@((p1,w):pws1)
          | p == p1   = let (ws,pws2) = collect pws1
                        in (w:ws,pws2)
        collect pws   = ([],pws)

    getLeaves parent bs =
      case bs of
        Leaf w                           -> [(parent,w)]
        Bracket cat fid _ lind fun _ bss -> concatMap (getLeaves (cat,fid,fun,lind)) bss

    mkNode ((_,p,_,_),i,w) =
      tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi

    mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x  <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"

    labels  = maybe Map.empty id mlab
    clabels = maybe [] id mclab

    posCat cat = case Map.lookup cat labels of
        Just [p] -> mkCId p
        _ -> cat

    getDeps n_fid xs (EAbs _ x e) es = getDeps n_fid (x:xs) e      es
    getDeps n_fid xs (EApp e1 e2) es = getDeps n_fid xs     e1 (e2:es)
    getDeps n_fid xs (EImplArg e) es = getDeps n_fid xs     e      es
    getDeps n_fid xs (ETyped e _) es = getDeps n_fid xs     e      es
    getDeps n_fid xs (EFun f)     es = let (n_fid_1,ds) = descend n_fid xs es
                                           (mb_h, deps) = selectHead f ds
                                       in case mb_h of
                                            Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++
                                                                                [(n_fid_1,(dep_lbl,fid))]++
                                                                                concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps]))
                                            Nothing          -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps]))
    getDeps n_fid xs (EMeta i)    es = (n_fid+2,(n_fid,[]))
    getDeps n_fid xs (EVar  i)    _  = (n_fid+2,(n_fid,[]))
    getDeps n_fid xs (ELit l)     [] = (n_fid+1,(n_fid,[]))

    descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e []) n_fid es

    selectHead f ds =
      case Map.lookup f labels of
        Just lbls -> extractHead (zip lbls ds)
        Nothing   -> extractLast ds
      where
        extractHead []    = (Nothing, [])
        extractHead (ld@(l,d):lds)
          | l == head_lbl = (Just d,lds)
          | otherwise     = let (mb_h,deps) = extractHead lds
                            in (mb_h,ld:deps)

        extractLast []    = (Nothing, [])
        extractLast (d:ds)
          | null ds       = (Just d,[])
          | otherwise     = let (mb_h,deps) = extractLast ds
                            in (mb_h,(dep_lbl,d):deps)

    dep_lbl  = "dep"
    head_lbl = "head"
    root_lbl = "ROOT"
    unspec   = text "_"

-- auxiliaries for UD conversion  PK 15/12/2018 
rmcomments :: String -> String
rmcomments [] = []
rmcomments ('-':'-':xs) = []
rmcomments ('-':x  :xs) = '-':rmcomments (x:xs)
rmcomments (x:xs)       = x:rmcomments xs

-- | Prepare lines obtained from a configuration file for labels for
-- use with 'graphvizDependencyTree'. Format per line /fun/ /label/@*@.
getDepLabels :: String -> Labels
-- getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls &lt;- map words (lines s)]
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map (words . rmcomments) (lines s)]

-- the old function, without dependencies
graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String
graphvizParseTree = graphvizParseTreeDep Nothing

graphvizParseTreeDep :: Maybe Labels -> PGF -> Language -> GraphvizOptions -> Tree -> String
graphvizParseTreeDep mbl pgf lang opts tree = graphvizBracketedString opts mbl tree $ bracketedLinearize pgf lang tree

graphvizBracketedString :: GraphvizOptions -> Maybe Labels -> Tree -> [BracketedString] -> String
graphvizBracketedString opts mbl tree bss = render graphviz_code
    where
      graphviz_code
          = text "graph {" $$
            text node_style $$
            vcat internal_nodes $$
            (if noLeaves opts then empty
             else text leaf_style $$
                  leaf_nodes
            ) $$ text "}"

      leaf_style = mkOption "edge" "style" (leafEdgeStyle opts) ++
                   mkOption "edge" "color" (leafColor opts) ++
                   mkOption "node" "fontcolor" (leafColor opts) ++
                   mkOption "node" "fontname" (leafFont opts) ++
                   mkOption "node" "shape" "plaintext"

      node_style = mkOption "edge" "style" (nodeEdgeStyle opts) ++
                   mkOption "edge" "color" (nodeColor opts) ++
                   mkOption "node" "fontcolor" (nodeColor opts) ++
                   mkOption "node" "fontname" (nodeFont opts) ++
                   mkOption "node" "shape" nodeshape
          where nodeshape | noFun opts && noCat opts = "point"
                          | otherwise = "plaintext"

      mkOption object optname optvalue
          | null optvalue  = ""
          | otherwise      = object ++ "[" ++ optname ++ "=\"" ++ optvalue ++ "\"]; "

      mkNode fun cat
          | noFun opts = showCId cat
          | noCat opts = showCId fun
          | otherwise  = showCId fun ++ " : " ++ showCId cat

      nil = -1
      internal_nodes = [mkLevel internals |
                        internals <- getInternals (map ((,) nil) bss),
                        not (null internals)]
      leaf_nodes = mkLevel [(parent, id, mkLeafNode cat word) |
                            (id, (parent, (cat,word))) <- zip [100000..] (concatMap (getLeaves (mkCId "?") nil) bss)]

      getInternals []    = []
      getInternals nodes
          = nub [(parent, fid, mkNode fun cat) |
                 (parent, Bracket cat fid _ _ fun _ _) <- nodes]
            : getInternals [(fid, child) |
                            (_, Bracket _ fid _ _ _ _ children) <- nodes,
                            child <- children]

      getLeaves cat parent (Leaf word) = [(parent, (cat, word))] -- the lowest cat before the word
      getLeaves _ parent (Bracket cat fid _ i _ _ children)
          = concatMap (getLeaves cat fid) children

      mkLevel nodes
          = text "subgraph {rank=same;" $$
            nest 2 (-- the following gives the name of the node and its label:
                    vcat [tag id <> text (mkOption "" "label" lbl) | (_, id, lbl) <- nodes] $$
                    -- the following is for fixing the order between the children:
                    (if length nodes > 1 then
                         text (mkOption "edge" "style" "invis") $$
                         hsep (intersperse (text " -- ") [tag id | (_, id, _) <- nodes]) <+> semi
                     else empty)
                   ) $$
            text "}" $$
            -- the following is for the edges between parent and children:
            vcat [tag pid <> text " -- " <> tag id <> text (depLabel node) | node@(pid, id, _) <- nodes, pid /= nil] $$
            space

      depLabel node@(parent,id,lbl) 
        | noDep opts = ";"
        | otherwise = case getArg id of
            Just (fun,arg) -> mkOption "" "label" (lookLabel fun arg) 
            _ -> ";"
      getArg i = getArgumentPlace i (expr2numtree tree) Nothing

      labels = maybe Map.empty id mbl

      lookLabel fun arg = case Map.lookup fun labels of
        Just xx | length xx > arg -> case xx !! arg of
          "head" -> ""
          l -> l
        _ -> argLabel fun arg
      argLabel fun arg = if arg==0 then "" else "dep#" ++ show arg --showCId fun ++ "#" ++ show arg
                         -- assuming the arg is head, if no configuration is given; always true for 1-arg funs
      mkLeafNode cat word
       | noDep opts = word        --- || not (noCat opts) -- show POS only if intermediate nodes hidden
       | otherwise = posCat cat ++ "\n" ++ word         -- show POS in dependency tree

      posCat cat = case Map.lookup cat labels of
        Just [p] -> p
        _ -> showCId cat

---- to restore the argument place from bracketed linearization
data NumTree = NumTree Int CId [NumTree]

getArgumentPlace :: Int -> NumTree -> Maybe (CId,Int) -> Maybe (CId,Int)
getArgumentPlace i tree@(NumTree int fun ts) mfi
 | i == int  = mfi
 | otherwise = case [fj | (t,x) <- zip ts [0..], Just fj <- [getArgumentPlace i t (Just (fun,x))]] of
     fj:_ -> Just fj
     _ -> Nothing

expr2numtree :: Expr -> NumTree
expr2numtree = fst . renumber 0 . flatten where
  flatten e = case e of
    EApp f a -> case flatten f of
      NumTree _ g ts -> NumTree 0 g (ts ++ [flatten a])
    EFun f -> NumTree 0 f []
  renumber i t@(NumTree _ f ts) = case renumbers i ts of
    (ts',j) -> (NumTree j f ts', j+1)
  renumbers i ts = case ts of
    t:tt -> case renumber i t of
      (t',j) -> case renumbers j tt of (tt',k) -> (t':tt',k)
    _ -> ([],i)
----- end this terrible stuff AR 4/11/2015
 



type Rel = (Int,[Int])
-- possibly needs changes after clearing about many-to-many on this level

type IndexedSeq = (Int,[String])
type LangSeq = [IndexedSeq]

data PreAlign = PreAlign [LangSeq] [[Rel]]
  deriving Show
-- alignment structure for a phrase in 2 languages, along with the
-- many-to-many relations


genPreAlignment :: PGF -> [Language] -> Expr -> PreAlign
genPreAlignment pgf langs = lin2align . linsBracketed
 where
      linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]

      lin2align :: [[BracketedString]] -> PreAlign
      lin2align bsss =  PreAlign langSeqs langRels
          where
           (langSeqs,langRels) = mkLayers leaves
           nil = -1
           leaves = map (groupAndIndexIt 0 . concatMap (getLeaves nil)) bsss

           groupAndIndexIt id []          = []
           groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
                                             in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
            where
              collect pws@((p1,w):pws1)
                | p == p1   = let (ws,pws2) = collect pws1
                                in (w:ws,pws2)
              collect pws   = ([],pws)

           getLeaves parent bs =
                  case bs of
                    Leaf w                    -> [(parent,w)]
                    Bracket _ fid _ _ _ _ bss -> concatMap (getLeaves fid) bss

           mkLayers (cs:css:rest) = let (lrest, rrest) =  mkLayers (css:rest)
                                     in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)
           mkLayers [cs] = ([fields cs], [])
           mkLayers _ = ([],[])

           mkLinks cs (p0,id0,_) = (id0,indices)
                    where
                     indices = [id1 | (p1,id1,_) <- cs, p1 == p0]

           fields cs = [(id, [w]) | (_,id,w) <- cs]


-- we assume we have 2 languages - source and target
gizaAlignment :: PGF -> (Language,Language) -> Expr -> (String,String,String)
gizaAlignment pgf (l1,l2) e = let PreAlign [rl1,rl2] rels = genPreAlignment pgf [l1,l2] e
                                   in
                                     (unwords (map showIndSeq rl1), unwords (concat $ map snd rl2),
                                        unwords $ words $ showRels rl2 (concat rels))


showIndSeq (_,l) = let ww = map words l
                       w_ = map (intersperse "_")  ww
                      in
                       concat $ concat w_

showRels inds2 [] = []
showRels inds2 ((ind,is):rest) =
                       let lOffs = computeOffset inds2 0
                           ltemp = [(i,getOffsetIndex i lOffs) | i <- is]
                           lcurr = concat $ map (\(offset,ncomp) -> [show ind ++ "-" ++ show (-1 + offset + ii)  ++ " "| ii <- [1..ncomp]]) (map snd ltemp)
                           lrest = showRels inds2 rest
                            in
                             (unwords lcurr) ++ lrest







getOffsetIndex i lst = let ll = filter (\(x,_) -> x == i) lst
                           in
                           snd $ head ll

computeOffset [] transp = []
computeOffset ((i,l):rest) transp = let nw = (length $ words $ concat l)
                                     in (i,(transp,nw)) : (computeOffset rest (transp + nw))



-- alignment in the Graphviz format from the intermediate structure
-- same effect as the old direct function
graphvizAlignment :: PGF -> [Language] -> Expr -> String
graphvizAlignment pgf langs exp =
    render (text "digraph {" $$
      space $$
      nest 2 (text "rankdir=LR ;" $$
              text "node [shape = record] ;" $$
              space $$
              renderList 0 lrels rrels) $$
      text "}")
  where
     (PreAlign lrels rrels) = genPreAlignment pgf langs exp


     renderList ii (l:ls) (r:rs) = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" $$
                                 (case ls of
                                       [] -> empty
                                       _  -> vcat [struct ii  <> colon <> tag id0
                                               <> colon <> char 'e' <+> text "->" <+> struct (ii+1)
                                               <> colon <> tag id1 <> colon <> char 'w' <+> semi
                                                   | (id0,ids) <- r, id1 <- ids] $$ renderList (ii + 1) ls rs)
     renderList ii [] _ = empty
     renderList ii [l] [] = struct ii <> text "[label = \"" <> fields l <> text "\"] ;"

     fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text (' ':w) | (id,ws) <- cs, w <- ws])



-- auxiliaries for graphviz syntax
struct l = text ("struct" ++ show l)
tbrackets d = char '<' <> d  <> char '>'
tag i
  | i < 0     = char 'r' <> int (negate i)
  | otherwise = char 'n' <> int i


---------------------- should be a separate module?

-- visualization with latex output. AR Nov 2015

conlls2latexDoc :: [String] -> String
conlls2latexDoc =
  render .
  latexDoc .
  vcat .
  intersperse (text "" $+$ app "vspace" (text "4mm")) .
  map conll2latex .
  filter (not . null)

conll2latex :: String -> Doc
conll2latex = ppLaTeX . conll2latex' . parseCoNLL

conll2latex' :: CoNLL -> [LaTeX]
conll2latex' = dep2latex . conll2dep'

data Dep = Dep {
    wordLength  :: Int -> Double        -- length of word at position int       -- was: fixed width, millimetres (&gt;= 20.0)
  , tokens      :: [(String,(String,String))]    -- word, (pos,features) (0..)
  , deps        :: [((Int,Int),String)] -- from, to, label
  , root        :: Int                  -- root word position
  }

-- some general measures
defaultWordLength = 20.0  -- the default fixed width word length, making word 100 units
defaultUnit       = 0.2   -- unit in latex pictures, 0.2 millimetres
spaceLength       = 10.0
charWidth = 1.8

wsize rwld  w  = 100 * rwld w + spaceLength                   -- word length, units
wpos rwld i    = sum [wsize rwld j | j <- [0..i-1]]           -- start position of the i'th word
wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]]    -- distance between words x and y
labelheight h  = h + arcbase + 3    -- label just above arc; 25 would put it just below
labelstart c   = c - 15.0           -- label starts 15u left of arc centre
arcbase        = 30.0               -- arcs start and end 40u above the bottom
arcfactor r    = r * 600            -- reduction of arc size from word distance
xyratio        = 3                  -- width/height ratio of arcs

putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
putArc frwld height x y label = [oval,arrowhead,labelling] where
  oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
  arrowhead = Put (endp,arcbase + 5) (ArrowDown 5)   -- downgoing arrow 5u above the arc base
  labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
  dxy  = wdist frwld x y             -- distance between words, &gt;&gt;= 20.0
  ndxy = 100 * rwld * fromIntegral height  -- distance that is indep of word length
  hdxy = dxy / 2                     -- half the distance
  wdth = dxy - (arcfactor rwld)/dxy  -- longer arcs are wider in proportion
  hght = ndxy / (xyratio * rwld)      -- arc height is independent of word length
  begp = min x y                     -- begin position of oval
  ctr  = wpos frwld begp + hdxy + (if x < y then 20 else  10)  -- LR arcs are farther right from center of oval
  endp = (if x < y then (+) else (-)) ctr (wdth/2)            -- the point of the arrow
  rwld = 0.5 ----

dep2latex :: Dep -> [LaTeX]
dep2latex d =
  [Comment (unwords (map fst (tokens d))),
   Picture defaultUnit (width,height) (
     [Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))]   -- words
  ++ [Put (wpos rwld i,15) (TinyText w) | (i,(w,_)) <- zip [0..] (map snd (tokens d))]   -- pos tags 15u above bottom
  ++ [Put (wpos rwld i,-15) (TinyText w) | (i,(_,w)) <- zip [0..] (map snd (tokens d))]   -- features 15u below bottom
  ++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d]    -- arcs and labels
  ++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
  ++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
  )]
 where
   wld i  = wordLength d i  -- &gt;= 20.0
   rwld i = (wld i) / defaultWordLength       -- &gt;= 1.0
   aheight x y = depth (min x y) (max x y) + 1    ---- abs (x-y)
   arcs = [(min u v, max u v) | ((u,v),_) <- deps d]
   depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
     [] -> 0
     uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs])
   width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
   height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d]))

type CoNLL = [[String]]
parseCoNLL :: String -> CoNLL
parseCoNLL = map words . lines

--conll2dep :: String -&gt; Dep
--conll2dep = conll2dep' . parseCoNLL

conll2dep' :: CoNLL -> Dep
conll2dep' ls = Dep {
    wordLength = wld 
  , tokens = toks
  , deps = dps
  , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
  }
 where
   wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,(pos,feat)) = toks !! i in [tok,pos,feat]])
   toks = [(w,(c,m)) | _:w:_:c:_:m:_ <- ls]
   dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
   --maxdist = maximum [abs (x-y) | ((x,y),_) &lt;- dps]


-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)

-- We render both LaTeX and SVG from this intermediate representation of
-- LaTeX pictures.

data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
data DrawingCommand = Put Position Object
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length

type UnitLengthMM = Double
type Size = (Double,Double)
type Position = (Double,Double)
type Length = Double


-- * latex formatting
ppLaTeX = vcat . map ppLaTeX1
  where
    ppLaTeX1 el =
      case el of
        Comment s -> comment s
        Picture unit size cmds ->
          app "setlength{\\unitlength}" (text (show unit ++ "mm"))
          $$ hang (app "begin" (text "picture")<>text (show size)) 2
                  (vcat (map ppDrawingCommand cmds))
          $$ app "end" (text "picture")
          $$ text ""

    ppDrawingCommand (Put pos obj) = put pos (ppObject obj)

    ppObject obj =
      case obj of
        Text s -> text s
        TinyText s -> small (text s)
        OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
        ArrowDown len -> app "vector(0,-1)" (text (show len))

    put p@(_,_) = app ("put" ++ show p)
    small w = text "{\\tiny" <+> w <> text "}"
    comment s = text "%%" <+> text s -- line break show follow
    
app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"


latexDoc :: Doc -> Doc
latexDoc body =
  vcat [text "\\documentclass{article}",
        text "\\usepackage[utf8]{inputenc}",
        text "\\begin{document}",
        body,
        text "\\end{document}"]

-- * SVG (see https://www.w3.org/Graphics/SVG/IG/resources/svgprimer.html)

-- | Render LaTeX pictures as SVG
toSVG = concatMap toSVG1
  where
    toSVG1 el =
      case el of
        Comment s -> []
        Picture unit size@(w,h) cmds ->
          [Elem "svg" ["width".=x1,"height".=y0+5,
                       ("viewBox",unwords (map show [0,0,x1,y0+5])),
                       ("version","1.1"),
                       ("xmlns","http://www.w3.org/2000/svg")]
                       (white_bg:concatMap draw cmds)]
          where
            white_bg =
              Elem "rect" ["x".=0,"y".=0,"width".=x1,"height".=y0+5,
                           ("fill","white")] []

            draw (Put pos obj) = objectSVG pos obj

            objectSVG pos obj =
              case obj of
                Text s -> [text 16 pos s]
                TinyText s -> [text 10 pos s]
                OvalTop size -> [ovalTop pos size]
                ArrowDown len -> arrowDown pos len

            text h (x,y) s =
              Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h]
                          [CharData s]

            ovalTop (x,y) (w,h) =
              Elem "path" [("d",path),("stroke","black"),("fill","none")] []
              where
                x1 = x-w/2
                x2 = min x (x1+r)
                x3 = max x (x4-r)
                x4 = x+w/2
                y1 = y
                y2 = y+r
                r = h/2
                sx = show . xc
                sy = show . yc
                path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2,
                                 "L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1])

            arrowDown (x,y) len =
                [Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2,
                              ("stroke","black")] [],
                 Elem "path" [("d",unwords arrowhead)] []]
               where
                 x2 = xc x
                 y2 = yc (y-len)
                 arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6]

            xc x = num x+5
            yc y = y0-num y
            x1 = num w+10
            y0 = num h+20
            num x = round (scale*x)
            scale = unit*5

            infix 0 .=
            n.=v = (n,show v)

-- * SVG is XML

data SVG = CharData String | Elem TagName Attrs [SVG]
type TagName = String
type Attrs = [(String,String)]

ppSVG svg =
  vcat [text "<?xml version=\"1.0\" standalone=\"no\"?>",
        text "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"",
        text "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">",
        text "",
        vcat (map ppSVG1 svg)] -- It should be a single &lt;svg&gt; element...
  where
    ppSVG1 svg1 =
      case svg1 of
        CharData s -> text (encode s)
        Elem tag attrs [] ->
            text "<"<>text tag<>cat (map attr attrs) <> text "/>"
        Elem tag attrs svg ->
            cat [text "<"<>text tag<>cat (map attr attrs) <> text ">",
                 nest 2 (cat (map ppSVG1 svg)),
                 text "</"<>text tag<>text ">"]

    attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\""

    encode s = foldr encodeEntity "" s

    encodeEntity = encodeEntity' (const False)
    encodeEntity' esc c r =
      case c of
        '&' -> "&amp;"++r
        '<' -> "&lt;"++r
        '>' -> "&gt;"++r
        _ -> c:r


----------------------------------
-- concrete syntax annotations (local) on top of conll
-- examples of annotations:
-- UseComp {"not"} PART neg head 
-- UseComp {*} AUX cop head

type CncLabels = [
  Either
    (String, String -> Maybe (String -> String,String,String))
    -- (fun, word -&gt; (pos,label,target))
    -- the pos can remain unchanged, as in the current notation in the article
    (String,[String])
    -- (category, morphological forms)
  ]

fixCoNLL :: CncLabels -> CoNLL -> CoNLL
fixCoNLL cncLabels conll = map fixc conll where
  labels  = [l | Left l <- cncLabels]
  flabels = [r | Right r <- cncLabels]

  fixc row = case row of
    (i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:(feat cat word x_):"0":"root":xs) --- change the root label from dep to root 
    (i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
      Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:(feat cat word x_):j :label':xs)
      Just (pos',label',target) -> (i:word:fun:pos' pos:cat:(feat cat word x_): getDep j target:label':xs)
      _ -> (i:word:fun:pos:cat:(feat cat word x_):j:label:xs)
    _ -> row
    
  look (fun,word) = case lookup fun labels of
    Just relabel -> case relabel word of
      Just row -> Just row
      _ -> case lookup "*" labels of
        Just starlabel -> starlabel word
        _ -> Nothing
    _ -> case lookup "*" labels of
        Just starlabel -> starlabel word
        _ -> Nothing
  
  getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]

  feat cat word x = case lookup cat flabels of
    Just tags | all isDigit x && length tags > read x -> tags !! read x
    _ -> case lookup (show word) flabels of
      Just (t:_) -> t
      _ -> cat ++ "-" ++ x

getCncDepLabels :: String -> CncLabels
getCncDepLabels s = wlabels ws ++ flabels fs
 where
  wlabels =
    map Left .
    map merge .
    groupBy (\ (x,_) (a,_) -> x == a) .
    sortBy (comparing fst) .
    concatMap analyse .
    filter chooseW
    
  flabels =
    map Right .
    map collectTags .
    map words

  (fs,ws) = partition chooseF $ map uncomment $ lines s

  --- choose is for compatibility with the general notation
  chooseW line = notElem '(' line &&
                 elem '{' line
                   --- ignoring non-local (with "(") and abstract (without "{") rules
                   ---- TODO: this means that "(" cannot be a token

  chooseF line = take 1 line == "@"  --- feature assignments have the form e.g. @N SgNom SgGen ; no spaces inside tags

  uncomment line = case line of
    '-':'-':_ -> ""
    c:cs -> c : uncomment cs
    _ -> line

  analyse line = case break (=='{') line of
    (beg,_:ws) -> case break (=='}') ws of
      (toks,_:target) -> case (getToks beg, words target) of
        (funs,[    label,j]) -> [(fun, (tok, (id,       label,j))) | fun <- funs, tok <- getToks toks]
        (funs,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | fun <- funs, tok <- getToks toks]
        _ -> []
      _ -> []
    _ -> []
  merge rules@((fun,_):_) = (fun, \tok ->
    case lookup tok (map snd rules) of
      Just new -> return new
      _ -> lookup "*"  (map snd rules)
    )
  getToks = map unquote . filter (/=",") . toks
  toks s = case lex s of [(t,"")] -> [t] ; [(t,cc)] -> t:toks cc ; _ -> []
  unquote s = case s of '"':cc@(_:_) | last cc == '"' -> init cc ; _ -> s

  collectTags (w:ws) = (tail w,ws)

-- added init to remove the last \n. otherwise, two empty lines are in between each sentence PK 17/12/2018
printCoNLL :: CoNLL -> String
printCoNLL = init . unlines . map (concat . intersperse "\t")