{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Text.Seonbi.Html.Scanner
    ( Result (..)
    , scanHtml
    ) where

import Data.Char
import Prelude hiding (takeWhile)

import Data.Attoparsec.Text.Lazy
import Data.Map.Strict
import qualified Data.Text
import qualified Data.Text.Lazy

import Text.Seonbi.Html.Entity
import Text.Seonbi.Html.Tag
import Text.Seonbi.Html.TagStack

htmlFragments :: Parser [HtmlEntity]
htmlFragments :: Parser [HtmlEntity]
htmlFragments = do
    [HtmlEntity]
result <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
Text.Seonbi.Html.TagStack.empty
    HtmlEntity
txt <- HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
Text.Seonbi.Html.TagStack.empty
    forall t. Chunk t => Parser t ()
endOfInput
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case HtmlEntity
txt of
        HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
"" } -> [HtmlEntity]
result
        HtmlEntity
_ -> [HtmlEntity]
result forall a. [a] -> [a] -> [a]
++ [HtmlEntity
txt]

fragments :: HtmlTagStack -> Parser [HtmlEntity]
fragments :: HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
tagStack' = do
    HtmlEntity
txt <- HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
tagStack'
    ([HtmlEntity]
entities, HtmlTagStack
nextStack) <- HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity HtmlTagStack
tagStack'
    [HtmlEntity]
nextChunk <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
nextStack
    let chunks :: [HtmlEntity]
chunks = [HtmlEntity]
entities forall a. [a] -> [a] -> [a]
++ [HtmlEntity]
nextChunk
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case HtmlEntity
txt of
        HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
"" } -> [HtmlEntity]
chunks
        HtmlEntity
txt' -> HtmlEntity
txt' forall a. a -> [a] -> [a]
: [HtmlEntity]
chunks

htmlText :: HtmlTagStack -> Parser HtmlEntity
htmlText :: HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
tagStack' = do
    [Text]
texts <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' Parser Text
textFragment
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> HtmlEntity
mkText forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
texts
  where
      mkText :: Data.Text.Text -> HtmlEntity
      mkText :: Text -> HtmlEntity
mkText Text
txt = HtmlText { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', rawText :: Text
rawText = Text
txt }

textFragment :: Parser Data.Text.Text
textFragment :: Parser Text
textFragment = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ (Char -> Bool) -> Parser Text
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
'<')
    , do
        Char
a <- Char -> Parser Char
char Char
'<'
        Char
b <- (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ \ Char
c ->
            Bool -> Bool
not (Char
c forall a. Eq a => a -> a -> Bool
== Char
'!' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c)
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Text
Data.Text.pack [Char
a, Char
b]
    ]

htmlEntity :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity HtmlTagStack
tagStack' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlComment HtmlTagStack
tagStack'
    , HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata HtmlTagStack
tagStack'
    , HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag HtmlTagStack
tagStack'
    , HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag HtmlTagStack
tagStack'
    -- fallback:
    , (, HtmlTagStack
tagStack') forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. a -> [a] -> [a]
: []) forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
Data.Text.singleton
        forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
    ]

-- https://www.w3.org/TR/html5/syntax.html#comments
htmlComment :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlComment :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlComment HtmlTagStack
tagStack' = do
    Text
_ <- Text -> Parser Text
string Text
"<!--"
    [Text]
contents <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ (Char -> Bool) -> Parser Text
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
'-')
        , do
              Char
a <- Char -> Parser Char
char Char
'-'
              Char
b <- Char -> Parser Char
notChar Char
'-'
              forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Text
Data.Text.pack [Char
a, Char
b]
        , do
            Text
a <- Text -> Parser Text
string Text
"--"
            Char
b <- Char -> Parser Char
notChar Char
'>'
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
a Char
b
        ]
    Text
_ <- Text -> Parser Text
string Text
"-->"
    forall (m :: * -> *) a. Monad m => a -> m a
return
        ( [ HtmlComment
                { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack'
                , comment :: Text
comment = [Text] -> Text
Data.Text.concat [Text]
contents
                }
          ]
        , HtmlTagStack
tagStack'
        )

-- https://www.w3.org/TR/html5/syntax.html#cdata-sections
cdata :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata HtmlTagStack
tagStack' = do
    Text
_ <- Text -> Parser Text
string Text
"<![CDATA["
    [Text]
contents <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ (Char -> Bool) -> Parser Text
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
']')
        , do
            Char
a <- Char -> Parser Char
char Char
']'
            Char
b <- Char -> Parser Char
notChar Char
']'
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Text
Data.Text.pack [Char
a, Char
b]
        , do
            Text
a <- Text -> Parser Text
string Text
"]]"
            Char
b <- Char -> Parser Char
notChar Char
'>'
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
a Char
b
        ]
    Text
_ <- Text -> Parser Text
string Text
"]]>"
    forall (m :: * -> *) a. Monad m => a -> m a
return
        ( [HtmlCdata { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', text :: Text
text = [Text] -> Text
Data.Text.concat [Text]
contents }]
        , HtmlTagStack
tagStack'
        )

-- https://www.w3.org/TR/html5/syntax.html#start-tags
startTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag HtmlTagStack
tagStack' = do
    Char
_ <- Char -> Parser Char
char Char
'<'
    HtmlTag
tag' <- Parser HtmlTag
htmlTag
    [Text]
attributes <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ do
            Char
s <- Char -> Parser Char
char Char
'"'
            Text
c <- (Char -> Bool) -> Parser Text
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'"')
            Char
e <- Char -> Parser Char
char Char
'"'
            forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
Data.Text.cons Char
s forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
c Char
e)
        , do
            Char
s <- Char -> Parser Char
char Char
'\''
            Text
c <- (Char -> Bool) -> Parser Text
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'\'')
            Char
e <- Char -> Parser Char
char Char
'\''
            forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
Data.Text.cons Char
s forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
c Char
e)
        , (Char -> Bool) -> Parser Text
takeWhile1 forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char
c forall a. Eq a => a -> a -> Bool
/= Char
'"' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\'' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'/' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'>'
        ]
    Char
selfClosing <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Char
' ' forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
'/'
    Char
_ <- Char -> Parser Char
char Char
'>'
    let ([HtmlEntity]
trailingEntities, HtmlTagStack
nextTagStack) =
            if Char
selfClosing forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| HtmlTag -> HtmlTagKind
htmlTagKind HtmlTag
tag' forall a. Eq a => a -> a -> Bool
== HtmlTagKind
Void
            then ([HtmlEndTag { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', tag :: HtmlTag
tag = HtmlTag
tag' }], HtmlTagStack
tagStack')
            else ([], HtmlTag -> HtmlTagStack -> HtmlTagStack
push HtmlTag
tag' HtmlTagStack
tagStack')
    forall (m :: * -> *) a. Monad m => a -> m a
return
        ( HtmlStartTag
            { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack'
            , tag :: HtmlTag
tag = HtmlTag
tag'
            , rawAttributes :: Text
rawAttributes = [Text] -> Text
Data.Text.concat [Text]
attributes
            } forall a. a -> [a] -> [a]
: [HtmlEntity]
trailingEntities
        , HtmlTagStack
nextTagStack
        )

-- https://www.w3.org/TR/html5/syntax.html#end-tags
endTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag HtmlTagStack
tagStack' = do
    Text
_ <- Text -> Parser Text
string Text
"</"
    HtmlTag
tag' <- Parser HtmlTag
htmlTag
    Char
_ <- Char -> Parser Char
char Char
'>'
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case HtmlTag -> HtmlTagKind
htmlTagKind HtmlTag
tag' of
        HtmlTagKind
Void -> ([], HtmlTagStack
tagStack')
        HtmlTagKind
_ ->
            let
                nextTagStack :: HtmlTagStack
nextTagStack = HtmlTag -> HtmlTagStack -> HtmlTagStack
pop HtmlTag
tag' HtmlTagStack
tagStack'
            in
                ( [HtmlEndTag { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
nextTagStack, tag :: HtmlTag
tag = HtmlTag
tag' }]
                , HtmlTagStack
nextTagStack
                )

htmlTag :: Parser HtmlTag
htmlTag :: Parser HtmlTag
htmlTag = do
    Text
name <- Parser Text
tagName
    case forall k a. Ord k => k -> Map k a -> Maybe a
Data.Map.Strict.lookup (Text -> Text
Data.Text.toLower Text
name) Map Text HtmlTag
htmlTagNames of
        Just HtmlTag
t -> forall (m :: * -> *) a. Monad m => a -> m a
return HtmlTag
t
        Maybe HtmlTag
_ -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"failed to parse; invalid tag: " forall a. [a] -> [a] -> [a]
++ Text -> [Char]
Data.Text.unpack Text
name)

tagName :: Parser Data.Text.Text
tagName :: Parser Text
tagName = do
    Char
first <- (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c
    Text
rest <- (Char -> Bool) -> Parser Text
takeWhile forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
Data.Text.cons Char
first Text
rest

scanHtml :: Data.Text.Lazy.Text -> Result [HtmlEntity]
scanHtml :: Text -> Result [HtmlEntity]
scanHtml = forall a. Parser a -> Text -> Result a
parse Parser [HtmlEntity]
htmlFragments