{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Seonbi.Html.TextNormalizer
( escapeHtmlEntities
, normalizeCdata
, normalizeText
) where
import Control.Exception
import Data.List
import Data.Text hiding (groupBy, map)
import Text.Seonbi.Html.Entity
normalizeText :: [HtmlEntity] -> [HtmlEntity]
normalizeText :: [HtmlEntity] -> [HtmlEntity]
normalizeText [HtmlEntity]
fragments =
[ case forall a b. (a -> b) -> [a] -> [b]
map HtmlEntity -> HtmlEntity
normalizeCdata [HtmlEntity]
frags of
[HtmlEntity
f] ->
HtmlEntity
f
frags' :: [HtmlEntity]
frags'@(HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
s }:[HtmlEntity]
_) ->
HtmlText
{ tagStack :: HtmlTagStack
tagStack = HtmlTagStack
s
, rawText :: Text
rawText = [Text] -> Text
Data.Text.concat forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map HtmlEntity -> Text
rawText [HtmlEntity]
frags'
}
[HtmlEntity]
frags' ->
forall a e. Exception e => e -> a
throw forall a b. (a -> b) -> a -> b
$ [Char] -> AssertionFailed
AssertionFailed
([Char]
"Unexpected error occured; grouping does not work well: " forall a. [a] -> [a] -> [a]
++
forall a. Show a => a -> [Char]
show [HtmlEntity]
frags')
| [HtmlEntity]
frags <- forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy HtmlEntity -> HtmlEntity -> Bool
isSibling [HtmlEntity]
fragments
]
where
isSibling :: HtmlEntity -> HtmlEntity -> Bool
isSibling :: HtmlEntity -> HtmlEntity -> Bool
isSibling HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
a } HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
b } = HtmlTagStack
a forall a. Eq a => a -> a -> Bool
== HtmlTagStack
b
isSibling HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
a } HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
b } = HtmlTagStack
a forall a. Eq a => a -> a -> Bool
== HtmlTagStack
b
isSibling HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
a } HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
b } = HtmlTagStack
a forall a. Eq a => a -> a -> Bool
== HtmlTagStack
b
isSibling HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
a } HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
b } = HtmlTagStack
a forall a. Eq a => a -> a -> Bool
== HtmlTagStack
b
isSibling HtmlEntity
_ HtmlEntity
_ = Bool
False
normalizeCdata :: HtmlEntity -> HtmlEntity
normalizeCdata :: HtmlEntity -> HtmlEntity
normalizeCdata HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
s, text :: HtmlEntity -> Text
text = Text
t } =
HtmlText { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
s, rawText :: Text
rawText = Text -> Text
escapeHtmlEntities Text
t }
normalizeCdata HtmlEntity
entity = HtmlEntity
entity
escapeHtmlEntities :: Text -> Text
escapeHtmlEntities :: Text -> Text
escapeHtmlEntities =
(Char -> Text) -> Text -> Text
Data.Text.concatMap forall a b. (a -> b) -> a -> b
$ \ case
Char
'<' -> Text
"<"
Char
'>' -> Text
">"
Char
'&' -> Text
"&"
Char
'"' -> Text
"""
Char
c -> Char -> Text
Data.Text.singleton Char
c