{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Text.Seonbi.Html.Scanner
( Result (..)
, scanHtml
) where
import Data.Char
import Prelude hiding (takeWhile)
import Data.Attoparsec.Text.Lazy
import Data.Map.Strict
import qualified Data.Text
import qualified Data.Text.Lazy
import Text.Seonbi.Html.Entity
import Text.Seonbi.Html.Tag
import Text.Seonbi.Html.TagStack
htmlFragments :: Parser [HtmlEntity]
htmlFragments :: Parser [HtmlEntity]
htmlFragments = do
[HtmlEntity]
result <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
Text.Seonbi.Html.TagStack.empty
HtmlEntity
txt <- HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
Text.Seonbi.Html.TagStack.empty
forall t. Chunk t => Parser t ()
endOfInput
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case HtmlEntity
txt of
HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
"" } -> [HtmlEntity]
result
HtmlEntity
_ -> [HtmlEntity]
result forall a. [a] -> [a] -> [a]
++ [HtmlEntity
txt]
fragments :: HtmlTagStack -> Parser [HtmlEntity]
fragments :: HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
tagStack' = do
HtmlEntity
txt <- HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
tagStack'
([HtmlEntity]
entities, HtmlTagStack
nextStack) <- HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity HtmlTagStack
tagStack'
[HtmlEntity]
nextChunk <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Parser [HtmlEntity]
fragments HtmlTagStack
nextStack
let chunks :: [HtmlEntity]
chunks = [HtmlEntity]
entities forall a. [a] -> [a] -> [a]
++ [HtmlEntity]
nextChunk
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case HtmlEntity
txt of
HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
"" } -> [HtmlEntity]
chunks
HtmlEntity
txt' -> HtmlEntity
txt' forall a. a -> [a] -> [a]
: [HtmlEntity]
chunks
htmlText :: HtmlTagStack -> Parser HtmlEntity
htmlText :: HtmlTagStack -> Parser HtmlEntity
htmlText HtmlTagStack
tagStack' = do
[Text]
texts <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' Parser Text
textFragment
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> HtmlEntity
mkText forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
texts
where
mkText :: Data.Text.Text -> HtmlEntity
mkText :: Text -> HtmlEntity
mkText Text
txt = HtmlText { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', rawText :: Text
rawText = Text
txt }
textFragment :: Parser Data.Text.Text
textFragment :: Parser Text
textFragment = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
'<')
, do
Char
a <- Char -> Parser Char
char Char
'<'
Char
b <- (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ \ Char
c ->
Bool -> Bool
not (Char
c forall a. Eq a => a -> a -> Bool
== Char
'!' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Text
Data.Text.pack [Char
a, Char
b]
]
htmlEntity :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlEntity HtmlTagStack
tagStack' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
htmlComment HtmlTagStack
tagStack'
, HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata HtmlTagStack
tagStack'
, HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag HtmlTagStack
tagStack'
, HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag HtmlTagStack
tagStack'
, (, HtmlTagStack
tagStack') forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. a -> [a] -> [a]
: []) forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
Data.Text.singleton
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
]
htmlComment :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
HtmlTagStack
tagStack' = do
Text
_ <- Text -> Parser Text
string Text
"<!--"
[Text]
contents <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
'-')
, do
Char
a <- Char -> Parser Char
char Char
'-'
Char
b <- Char -> Parser Char
notChar Char
'-'
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Text
Data.Text.pack [Char
a, Char
b]
, do
Text
a <- Text -> Parser Text
string Text
"--"
Char
b <- Char -> Parser Char
notChar Char
'>'
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
a Char
b
]
Text
_ <- Text -> Parser Text
string Text
"-->"
forall (m :: * -> *) a. Monad m => a -> m a
return
( [ HtmlComment
{ tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack'
, comment :: Text
comment = [Text] -> Text
Data.Text.concat [Text]
contents
}
]
, HtmlTagStack
tagStack'
)
cdata :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
cdata HtmlTagStack
tagStack' = do
Text
_ <- Text -> Parser Text
string Text
"<![CDATA["
[Text]
contents <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
']')
, do
Char
a <- Char -> Parser Char
char Char
']'
Char
b <- Char -> Parser Char
notChar Char
']'
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Text
Data.Text.pack [Char
a, Char
b]
, do
Text
a <- Text -> Parser Text
string Text
"]]"
Char
b <- Char -> Parser Char
notChar Char
'>'
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
a Char
b
]
Text
_ <- Text -> Parser Text
string Text
"]]>"
forall (m :: * -> *) a. Monad m => a -> m a
return
( [HtmlCdata { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', text :: Text
text = [Text] -> Text
Data.Text.concat [Text]
contents }]
, HtmlTagStack
tagStack'
)
startTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
startTag HtmlTagStack
tagStack' = do
Char
_ <- Char -> Parser Char
char Char
'<'
HtmlTag
tag' <- Parser HtmlTag
htmlTag
[Text]
attributes <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ do
Char
s <- Char -> Parser Char
char Char
'"'
Text
c <- (Char -> Bool) -> Parser Text
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'"')
Char
e <- Char -> Parser Char
char Char
'"'
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
Data.Text.cons Char
s forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
c Char
e)
, do
Char
s <- Char -> Parser Char
char Char
'\''
Text
c <- (Char -> Bool) -> Parser Text
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'\'')
Char
e <- Char -> Parser Char
char Char
'\''
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
Data.Text.cons Char
s forall a b. (a -> b) -> a -> b
$ Text -> Char -> Text
Data.Text.snoc Text
c Char
e)
, (Char -> Bool) -> Parser Text
takeWhile1 forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char
c forall a. Eq a => a -> a -> Bool
/= Char
'"' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\'' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'/' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'>'
]
Char
selfClosing <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Char
' ' forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
'/'
Char
_ <- Char -> Parser Char
char Char
'>'
let ([HtmlEntity]
trailingEntities, HtmlTagStack
nextTagStack) =
if Char
selfClosing forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| HtmlTag -> HtmlTagKind
htmlTagKind HtmlTag
tag' forall a. Eq a => a -> a -> Bool
== HtmlTagKind
Void
then ([HtmlEndTag { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack', tag :: HtmlTag
tag = HtmlTag
tag' }], HtmlTagStack
tagStack')
else ([], HtmlTag -> HtmlTagStack -> HtmlTagStack
push HtmlTag
tag' HtmlTagStack
tagStack')
forall (m :: * -> *) a. Monad m => a -> m a
return
( HtmlStartTag
{ tagStack :: HtmlTagStack
tagStack = HtmlTagStack
tagStack'
, tag :: HtmlTag
tag = HtmlTag
tag'
, rawAttributes :: Text
rawAttributes = [Text] -> Text
Data.Text.concat [Text]
attributes
} forall a. a -> [a] -> [a]
: [HtmlEntity]
trailingEntities
, HtmlTagStack
nextTagStack
)
endTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag :: HtmlTagStack -> Parser ([HtmlEntity], HtmlTagStack)
endTag HtmlTagStack
tagStack' = do
Text
_ <- Text -> Parser Text
string Text
"</"
HtmlTag
tag' <- Parser HtmlTag
htmlTag
Char
_ <- Char -> Parser Char
char Char
'>'
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case HtmlTag -> HtmlTagKind
htmlTagKind HtmlTag
tag' of
HtmlTagKind
Void -> ([], HtmlTagStack
tagStack')
HtmlTagKind
_ ->
let
nextTagStack :: HtmlTagStack
nextTagStack = HtmlTag -> HtmlTagStack -> HtmlTagStack
pop HtmlTag
tag' HtmlTagStack
tagStack'
in
( [HtmlEndTag { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
nextTagStack, tag :: HtmlTag
tag = HtmlTag
tag' }]
, HtmlTagStack
nextTagStack
)
htmlTag :: Parser HtmlTag
htmlTag :: Parser HtmlTag
htmlTag = do
Text
name <- Parser Text
tagName
case forall k a. Ord k => k -> Map k a -> Maybe a
Data.Map.Strict.lookup (Text -> Text
Data.Text.toLower Text
name) Map Text HtmlTag
htmlTagNames of
Just HtmlTag
t -> forall (m :: * -> *) a. Monad m => a -> m a
return HtmlTag
t
Maybe HtmlTag
_ -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"failed to parse; invalid tag: " forall a. [a] -> [a] -> [a]
++ Text -> [Char]
Data.Text.unpack Text
name)
tagName :: Parser Data.Text.Text
tagName :: Parser Text
tagName = do
Char
first <- (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c
Text
rest <- (Char -> Bool) -> Parser Text
takeWhile forall a b. (a -> b) -> a -> b
$ \ Char
c -> Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
Data.Text.cons Char
first Text
rest
scanHtml :: Data.Text.Lazy.Text -> Result [HtmlEntity]
scanHtml :: Text -> Result [HtmlEntity]
scanHtml = forall a. Parser a -> Text -> Result a
parse Parser [HtmlEntity]
htmlFragments