{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Seonbi.Html.TextNormalizer
    ( escapeHtmlEntities
    , normalizeCdata
    , normalizeText
    ) where

import Control.Exception
import Data.List

import Data.Text hiding (groupBy, map)

import Text.Seonbi.Html.Entity

-- | As 'scanHtml' may emit two or more continuous 'HtmlText' fragments even
-- if these can be represented as only one 'HtmlText' fragment, it makes
-- postprocessing hard.
--
-- The 'normalizeText' function concatenates such continuous 'HtmlText'
-- fragments into one if possible so that postprocessing can be easy:
--
-- >>> :set -XOverloadedStrings -XOverloadedLists
-- >>> normalizeText [HtmlText [] "Hello, ", HtmlText [] "world!"]
-- [HtmlText {tagStack = fromList [], rawText = "Hello, world!"}]
--
-- It also transforms all 'HtmlCdata' fragments into an 'HtmlText' together.
--
-- >>> :{
-- normalizeText [ HtmlText [] "foo "
--               , HtmlCdata [] "<bar>", HtmlText [] " baz!"
--               ]
-- :}
-- [HtmlText {tagStack = fromList [], rawText = "foo &lt;bar&gt; baz!"}]
normalizeText :: [HtmlEntity] -> [HtmlEntity]
normalizeText :: [HtmlEntity] -> [HtmlEntity]
normalizeText [HtmlEntity]
fragments =
    [ case forall a b. (a -> b) -> [a] -> [b]
map HtmlEntity -> HtmlEntity
normalizeCdata [HtmlEntity]
frags of
        [HtmlEntity
f] ->
            HtmlEntity
f
        frags' :: [HtmlEntity]
frags'@(HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
s }:[HtmlEntity]
_) ->
            HtmlText
                { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
s
                , rawText :: Text
rawText = [Text] -> Text
Data.Text.concat forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map HtmlEntity -> Text
rawText [HtmlEntity]
frags'
                }
        [HtmlEntity]
frags' ->
            forall a e. Exception e => e -> a
throw forall a b. (a -> b) -> a -> b
$ [Char] -> AssertionFailed
AssertionFailed
                ([Char]
"Unexpected error occured; grouping does not work well: " forall a. [a] -> [a] -> [a]
++
                    forall a. Show a => a -> [Char]
show [HtmlEntity]
frags')
    | [HtmlEntity]
frags <- forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy HtmlEntity -> HtmlEntity -> Bool
isSibling [HtmlEntity]
fragments
    ]
  where
    isSibling :: HtmlEntity -> HtmlEntity -> Bool
    isSibling :: HtmlEntity -> HtmlEntity -> Bool
isSibling HtmlText  { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
a } HtmlText  { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
b } = HtmlTagStack
a forall a. Eq a => a -> a -> Bool
== HtmlTagStack
b
    isSibling HtmlText  { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
a } HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
b } = HtmlTagStack
a forall a. Eq a => a -> a -> Bool
== HtmlTagStack
b
    isSibling HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
a } HtmlText  { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
b } = HtmlTagStack
a forall a. Eq a => a -> a -> Bool
== HtmlTagStack
b
    isSibling HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
a } HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
b } = HtmlTagStack
a forall a. Eq a => a -> a -> Bool
== HtmlTagStack
b
    isSibling HtmlEntity
_ HtmlEntity
_ = Bool
False

-- | Transform a given 'HtmlCdata' node into an equivalent 'HtmlText' node.
--
-- >>> import Text.Seonbi.Html.Tag
-- >>> normalizeCdata HtmlCdata { tagStack = [P], text = "<p id=\"foo\">" }
-- HtmlText {tagStack = fromList [P], rawText = "&lt;p id=&quot;foo&quot;&gt;"}
normalizeCdata :: HtmlEntity -> HtmlEntity
normalizeCdata :: HtmlEntity -> HtmlEntity
normalizeCdata HtmlCdata { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
s, text :: HtmlEntity -> Text
text = Text
t } =
    HtmlText { tagStack :: HtmlTagStack
tagStack = HtmlTagStack
s, rawText :: Text
rawText = Text -> Text
escapeHtmlEntities Text
t }
normalizeCdata HtmlEntity
entity = HtmlEntity
entity

-- | Escape special (control) characters into corresponding character entities
-- in the given HTML text.
--
-- >>> escapeHtmlEntities "<foo & \"bar\">"
-- "&lt;foo &amp; &quot;bar&quot;&gt;"
escapeHtmlEntities :: Text -> Text
escapeHtmlEntities :: Text -> Text
escapeHtmlEntities =
    (Char -> Text) -> Text -> Text
Data.Text.concatMap forall a b. (a -> b) -> a -> b
$ \ case
        Char
'<' -> Text
"&lt;"
        Char
'>' -> Text
"&gt;"
        Char
'&' -> Text
"&amp;"
        Char
'"' -> Text
"&quot;"
        Char
c -> Char -> Text
Data.Text.singleton Char
c