{-# LANGUAGE LambdaCase #-}
module Text.Seonbi.Html.Clipper
    ( clipPrefixText
    , clipSuffixText
    , clipText
    ) where

import Control.Monad
import Data.List (dropWhileEnd)

import Data.Text

import Text.Seonbi.Html

-- | Clip the given prefix text and suffix text from the HTML fragments.
-- It simply is composed of 'clipPrefixText' and 'clipSuffixText' functions.
-- It returns 'Nothing' if any of a prefix and a suffix does not match.
clipText :: Text -> Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipText :: Text -> Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipText Text
prefix Text
suffix =
    Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipSuffixText Text
suffix forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipPrefixText Text
prefix

-- | Clip the given prefix text from the HTML fragments.  If its first
-- text element does not have the same prefix, or the first element is not
-- an 'HtmlText' node, or the list of HTML fragments have nothing at all,
-- it returns 'Nothing'.
--
-- >>> :set -XOverloadedLists
-- >>> :set -XOverloadedStrings
-- >>> clipPrefixText "foo" [HtmlText [] "bar", HtmlStartTag [] P ""]
-- Nothing
-- >>> clipPrefixText "foo" [HtmlStartTag [] P "", HtmlText [] "foo"]
-- Nothing
-- >>> clipPrefixText "foo" []
-- Nothing
--
-- If the first element is an 'HtmlText' node, and its 'rawText' contains
-- the common prefix text, it returns a 'Just' value holding a list of
-- HTML fragments with the common prefix removed.
--
-- >>> clipPrefixText "foo" [HtmlText [] "foobar", HtmlStartTag [] P ""]
-- Just [HtmlText {... "bar"},HtmlStartTag {...}]
-- >>> clipPrefixText "foo" [HtmlText [] "foo", HtmlStartTag [] P ""]
-- Just [HtmlStartTag {..., tag = P, ...}]
--
-- A given text is treated as a raw text, which means even if some HTML
-- entities refer to the same characters it may fails to match unless
-- they share the exactly same representation, e.g.:
--
-- >>> clipPrefixText "&amp;" [HtmlText [] "&AMP;"]
-- Nothing
--
-- In the same manner, it doesn't find a prefix from 'HtmlCdata', e.g.:
--
-- >>> clipPrefixText "foo" [HtmlCdata [] "foo", HtmlStartTag [] P ""]
-- Nothing
--
-- In order to remove a prefix from both 'HtmlText' and 'HtmlCdata',
-- apply 'normalizeText' first so that all 'HtmlCdata' entities are transformed
-- to equivalent 'HtmlText' entities:
--
-- >>> import Text.Seonbi.Html.TextNormalizer (normalizeText)
-- >>> let normalized = normalizeText [HtmlCdata [] "foo", HtmlStartTag [] P ""]
-- >>> clipPrefixText "foo" normalized
-- Just [HtmlStartTag {..., tag = P, ...}]
--
-- Plus, it works even if HTML fragments contain some 'HtmlComment' entities,
-- but these are not touched at all, e.g.:
--
-- >>> clipPrefixText "bar" [HtmlComment [] "foo", HtmlText [] "barbaz"]
-- Just [HtmlComment {... "foo"},HtmlText {... "baz"}]
clipPrefixText :: Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipPrefixText :: Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipPrefixText Text
prefix []
  | Text -> Bool
Data.Text.null Text
prefix = forall a. a -> Maybe a
Just []
  | Bool
otherwise = forall a. Maybe a
Nothing
clipPrefixText Text
prefix (x :: HtmlEntity
x@HtmlComment {} : [HtmlEntity]
xs) =
    (HtmlEntity
x forall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipPrefixText Text
prefix [HtmlEntity]
xs
clipPrefixText Text
prefix (x :: HtmlEntity
x@HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
rawText' } : [HtmlEntity]
xs)
  | Text
prefix forall a. Eq a => a -> a -> Bool
== Text
rawText' = forall a. a -> Maybe a
Just [HtmlEntity]
xs
  | Text
prefix Text -> Text -> Bool
`isPrefixOf` Text
rawText' = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$
      HtmlEntity
x { rawText :: Text
rawText = Int -> Text -> Text
Data.Text.drop (Text -> Int
Data.Text.length Text
prefix) Text
rawText' } forall a. a -> [a] -> [a]
: [HtmlEntity]
xs
  | Bool
otherwise = forall a. Maybe a
Nothing
clipPrefixText Text
_ [HtmlEntity]
_ = forall a. Maybe a
Nothing

-- | Clip the given suffix text from the HTML fragments, in the same manner
-- to 'clipPrefixText'.
clipSuffixText :: Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipSuffixText :: Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipSuffixText Text
suffix []
  | Text -> Bool
Data.Text.null Text
suffix = forall a. a -> Maybe a
Just []
  | Bool
otherwise = forall a. Maybe a
Nothing
clipSuffixText Text
suffix [HtmlEntity]
entities =
    case forall a. [a] -> a
Prelude.last [HtmlEntity]
entities' of
        e :: HtmlEntity
e@HtmlText { rawText :: HtmlEntity -> Text
rawText = Text
rawText' }
          | Text
suffix forall a. Eq a => a -> a -> Bool
== Text
rawText' -> forall a. a -> Maybe a
Just ([HtmlEntity]
init' forall a. [a] -> [a] -> [a]
++ [HtmlEntity]
comments)
          | Text
suffix Text -> Text -> Bool
`isSuffixOf` Text
rawText' ->
              let
                  sLen :: Int
sLen = Text -> Int
Data.Text.length Text
suffix
                  rtLen :: Int
rtLen = Text -> Int
Data.Text.length Text
rawText'
                  clipped :: Text
clipped = Int -> Text -> Text
Data.Text.take (Int
rtLen forall a. Num a => a -> a -> a
- Int
sLen) Text
rawText'
              in
                  forall a. a -> Maybe a
Just ([HtmlEntity]
init' forall a. [a] -> [a] -> [a]
++ HtmlEntity
e { rawText :: Text
rawText = Text
clipped } forall a. a -> [a] -> [a]
: [HtmlEntity]
comments)
          | Bool
otherwise -> forall a. Maybe a
Nothing
        HtmlEntity
_ -> forall a. Maybe a
Nothing
  where
    entities' :: [HtmlEntity]
    entities' :: [HtmlEntity]
entities' = (forall a. (a -> Bool) -> [a] -> [a]
`Data.List.dropWhileEnd` [HtmlEntity]
entities) forall a b. (a -> b) -> a -> b
$ \ case
        HtmlComment {} -> Bool
True
        HtmlEntity
_ -> Bool
False
    init' :: [HtmlEntity]
    init' :: [HtmlEntity]
init' = forall a. [a] -> [a]
Prelude.init [HtmlEntity]
entities'
    comments :: [HtmlEntity]
    comments :: [HtmlEntity]
comments = forall a. Int -> [a] -> [a]
Prelude.drop (forall (t :: * -> *) a. Foldable t => t a -> Int
Prelude.length [HtmlEntity]
entities') [HtmlEntity]
entities