module Haddock.Parser (parseString, parseParas, parseStringMaybe, parseParasMaybe) where
import Prelude hiding (takeWhile)
import Control.Arrow (first)
import Control.Monad (void, mfilter)
import Control.Applicative
import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
import qualified Data.ByteString.Char8 as BS
import Data.Char (chr, isAsciiUpper)
import Data.List (stripPrefix, intercalate, unfoldr)
import Data.Maybe (fromMaybe)
import Data.Monoid
import DynFlags
import FastString (mkFastString)
import Haddock.Doc
import Haddock.Types
import Lexer (mkPState, unP, ParseResult(POk))
import Parser (parseIdentifier)
import RdrName
import SrcLoc (mkRealSrcLoc, unLoc)
import StringBuffer (stringToStringBuffer)
import Haddock.Utf8
import Haddock.Parser.Util
parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
parseParasMaybe d = Just . parseParas d
parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
parseStringMaybe d = Just . parseString d
parse :: Parser a -> BS.ByteString -> a
parse p = either err id . parseOnly (p <* endOfInput)
where
err = error . ("Haddock.Parser.parse: " ++)
parseParas :: DynFlags
-> String
-> Doc RdrName
parseParas d = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
where
p :: Parser (Doc RdrName)
p = mconcat <$> paragraph d `sepBy` many (skipHorizontalSpace *> "\n")
parseString :: DynFlags -> String -> Doc RdrName
parseString d = parseStringBS d . encodeUtf8 . dropWhile isSpace
parseStringBS :: DynFlags -> BS.ByteString -> Doc RdrName
parseStringBS d = parse p
where
p :: Parser (Doc RdrName)
p = mconcat <$> many (monospace d <|> anchor <|> identifier d
<|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold d
<|> emphasis d <|> encodedChar <|> string' <|> skipSpecialChar)
encodedChar :: Parser (Doc a)
encodedChar = "&#" *> c <* ";"
where
c = DocString . return . chr <$> num
num = hex <|> decimal
hex = ("x" <|> "X") *> hexadecimal
specialChar :: [Char]
specialChar = "_/<@\"&'`#"
string' :: Parser (Doc a)
string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
where
unescape "" = ""
unescape ('\\':x:xs) = x : unescape xs
unescape (x:xs) = x : unescape xs
skipSpecialChar :: Parser (Doc a)
skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
emphasis :: DynFlags -> Parser (Doc RdrName)
emphasis d = DocEmphasis . parseStringBS d <$>
mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
bold :: DynFlags -> Parser (Doc RdrName)
bold d = DocBold . parseStringBS d <$> disallowNewline ("__" *> takeUntil "__")
disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
disallowNewline = mfilter ('\n' `BS.notElem`)
takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
takeWhile_ p = scan False p_
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
takeWhile1_ = mfilter (not . BS.null) . takeWhile_
anchor :: Parser (Doc a)
anchor = DocAName . decodeUtf8 <$>
disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#")
monospace :: DynFlags -> Parser (Doc RdrName)
monospace d = DocMonospaced . parseStringBS d <$> ("@" *> takeWhile1_ (/= '@') <* "@")
moduleName :: Parser (Doc a)
moduleName = DocModule <$> (char '"' *> modid <* char '"')
where
modid = intercalate "." <$> conid `sepBy1` "."
conid = (:)
<$> satisfy isAsciiUpper
<*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!|@/;,^?\"\n"))
picture :: Parser (Doc a)
picture = DocPic . makeLabeled Picture . decodeUtf8
<$> disallowNewline ("<<" *> takeUntil ">>")
paragraph :: DynFlags -> Parser (Doc RdrName)
paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d
<|> property <|> header d
<|> textParagraph d)
header :: DynFlags -> Parser (Doc RdrName)
header d = do
let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
pser = foldl1 (<|>) psers
delim <- decodeUtf8 <$> pser
line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString d
rest <- paragraph d <|> return mempty
return $ docAppend (DocParagraph (DocHeader (Header (length delim) line))) rest
textParagraph :: DynFlags -> Parser (Doc RdrName)
textParagraph d = docParagraph . parseString d . intercalate "\n" <$> many1 nonEmptyLine
list :: DynFlags -> Parser (Doc RdrName)
list d = DocUnorderedList <$> unorderedList d
<|> DocOrderedList <$> orderedList d
<|> DocDefList <$> definitionList d
unorderedList :: DynFlags -> Parser [Doc RdrName]
unorderedList d = ("*" <|> "-") *> innerList (unorderedList d) d
orderedList :: DynFlags -> Parser [Doc RdrName]
orderedList d = (paren <|> dot) *> innerList (orderedList d) d
where
dot = (decimal :: Parser Int) <* "."
paren = "(" *> decimal <* ")"
innerList :: Parser [Doc RdrName] -> DynFlags -> Parser [Doc RdrName]
innerList item d = do
c <- takeLine
(cs, items) <- more item d
let contents = docParagraph . parseString d . dropNLs . unlines $ c : cs
return $ case items of
Left p -> [contents `joinPara` p]
Right i -> contents : i
definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)]
definitionList d = do
label <- "[" *> (parseStringBS d <$> takeWhile1 (`notElem` "]\n")) <* "]"
c <- takeLine
(cs, items) <- more (definitionList d) d
let contents = parseString d . dropNLs . unlines $ c : cs
return $ case items of
Left p -> [(label, contents `joinPara` p)]
Right i -> (label, contents) : i
joinPara :: Doc id -> Doc id -> Doc id
joinPara (DocParagraph p) c = docParagraph $ docAppend p c
joinPara d p = docAppend d p
dropNLs :: String -> String
dropNLs = reverse . dropWhile (== '\n') . reverse
more :: Monoid a => Parser a -> DynFlags
-> Parser ([String], Either (Doc RdrName) a)
more item d = innerParagraphs d <|> moreListItems item
<|> moreContent item d <|> pure ([], Right mempty)
innerParagraphs :: DynFlags
-> Parser ([String], Either (Doc RdrName) a)
innerParagraphs d = (,) [] . Left <$> ("\n" *> indentedParagraphs d)
moreListItems :: Parser a
-> Parser ([String], Either (Doc RdrName) a)
moreListItems item = (,) [] . Right <$> (skipSpace *> item)
moreContent :: Monoid a => Parser a -> DynFlags
-> Parser ([String], Either (Doc RdrName) a)
moreContent item d = first . (:) <$> nonEmptyLine <*> more item d
indentedParagraphs :: DynFlags -> Parser (Doc RdrName)
indentedParagraphs d = parseParas d . concat <$> dropFrontOfPara " "
dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
dropFrontOfPara sp = do
currentParagraph <- some (sp *> takeNonEmptyLine)
followingParagraphs <-
skipHorizontalSpace *> nextPar
<|> skipHorizontalSpace *> nlList
<|> endOfInput *> return []
return (currentParagraph ++ followingParagraphs)
where
nextPar = (++) <$> nlList <*> dropFrontOfPara sp
nlList = "\n" *> return ["\n"]
nonSpace :: BS.ByteString -> Parser BS.ByteString
nonSpace xs
| not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
| otherwise = return xs
takeNonEmptyLine :: Parser String
takeNonEmptyLine = do
(++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
birdtracks :: Parser (Doc a)
birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
where
line = skipHorizontalSpace *> ">" *> takeLine
stripSpace :: [String] -> [String]
stripSpace = fromMaybe <*> mapM strip'
where
strip' (' ':xs') = Just xs'
strip' "" = Just ""
strip' _ = Nothing
examples :: Parser (Doc a)
examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
where
go :: Parser [Example]
go = do
prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
expr <- takeLine
(rs, es) <- resultAndMoreExamples
return (makeExample prefix expr rs : es)
where
resultAndMoreExamples :: Parser ([String], [Example])
resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
where
moreExamples :: Parser ([String], [Example])
moreExamples = (,) [] <$> go
result :: Parser ([String], [Example])
result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
makeExample :: String -> String -> [String] -> Example
makeExample prefix expression res =
Example (strip expression) result
where
result = map (substituteBlankLine . tryStripPrefix) res
tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine xs = xs
nonEmptyLine :: Parser String
nonEmptyLine = mfilter (any (not . isSpace)) takeLine
takeLine :: Parser String
takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
endOfLine :: Parser ()
endOfLine = void "\n" <|> endOfInput
property :: Parser (Doc a)
property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
codeblock :: DynFlags -> Parser (Doc RdrName)
codeblock d =
DocCodeBlock . parseStringBS d . dropSpaces
<$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
where
dropSpaces xs =
let rs = decodeUtf8 xs
in case splitByNl rs of
[] -> xs
ys -> case last ys of
' ':_ -> case mapM dropSpace ys of
Nothing -> xs
Just zs -> encodeUtf8 $ intercalate "\n" zs
_ -> xs
splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
_ -> Nothing)
. ('\n' :)
dropSpace "" = Just ""
dropSpace (' ':xs) = Just xs
dropSpace _ = Nothing
block' = scan False p
where
p isNewline c
| isNewline && c == '@' = Nothing
| isNewline && isSpace c = Just isNewline
| otherwise = Just $ c == '\n'
hyperlink :: Parser (Doc a)
hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
<$> disallowNewline ("<" *> takeUntil ">")
<|> autoUrl
autoUrl :: Parser (Doc a)
autoUrl = mkLink <$> url
where
url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
mkLink :: BS.ByteString -> Doc a
mkLink s = case BS.unsnoc s of
Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
_ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
parseValid :: Parser String
parseValid = do
vs' <- many' $ utf8String "⋆" <|> return <$> idChar
let vs = concat vs'
c <- peekChar
case c of
Just '`' -> return vs
Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
<|> return vs
_ -> fail "outofvalid"
where
idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
<|> digit <|> letter_ascii
utf8String :: String -> Parser String
utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
identifier :: DynFlags -> Parser (Doc RdrName)
identifier dflags = do
o <- idDelim
vid <- parseValid
e <- idDelim
return $ validIdentifier o vid e
where
idDelim = char '\'' <|> char '`'
validIdentifier o ident e = case parseIdent ident of
Just identName -> DocIdentifier identName
Nothing -> DocString $ o : ident ++ [e]
parseIdent :: String -> Maybe RdrName
parseIdent str0 =
let buffer = stringToStringBuffer str0
realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
pstate = mkPState dflags buffer realSrcLc
in case unP parseIdentifier pstate of
POk _ name -> Just (unLoc name)
_ -> Nothing
strip :: String -> String
strip = (\f -> f . f) $ dropWhile isSpace . reverse
skipHorizontalSpace :: Parser ()
skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
takeHorizontalSpace :: Parser BS.ByteString
takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
makeLabeled :: (String -> Maybe String -> a) -> String -> a
makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
(uri, "") -> f uri Nothing
(uri, label) -> f uri (Just $ dropWhile isSpace label)
where
removeEscapes "" = ""
removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
removeEscapes ('\\':xs) = removeEscapes xs
removeEscapes (x:xs) = x : removeEscapes xs