{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
-- | This module deals with punctuations in Korean text.
module Text.Seonbi.Punctuation
    ( -- * Arrows
      ArrowTransformationOption (..)
    , transformArrow
      -- * Quotes
    , CitationQuotes (..)
    , Quotes (..)
    , QuotePair (..)
    , angleQuotes
    , cornerBrackets
    , curvedQuotes
    , curvedSingleQuotesWithQ
    , guillemets
    , horizontalCornerBrackets
    , horizontalCornerBracketsWithQ
    , quoteCitation
    , transformQuote
    , verticalCornerBrackets
    , verticalCornerBracketsWithQ
      -- * Stops: periods, commas, & interpuncts
    , Stops (..)
    , horizontalStops
    , horizontalStopsWithSlashes
    , normalizeStops
    , transformEllipsis
    , verticalStops
      -- * Dashes
    , transformEmDash
    ) where

import Prelude hiding (takeWhile)

import Control.Monad
import Data.Char (isSpace)
import Data.Either
import Data.List (minimumBy)
import Data.Maybe
import Data.Ord
import Numeric

import Data.Attoparsec.Text
import Data.Set
import Data.Text hiding (any, length, takeWhile)
import qualified Data.Text

import Text.Seonbi.Html
import Text.Seonbi.Html.Clipper
import Text.Seonbi.Html.Lang
import Text.Seonbi.Html.Preservation
import Text.Seonbi.Html.Wrapper
import Text.Seonbi.PairedTransformer

-- | A set of quoting parentheses to be used by 'quoteCitation' function.
--
-- There are two presets: 'angleQuotes' and 'cornerBrackets'.  These both
-- surround titles with a @\<cite>@ tag.  In order to disable surrounded
-- elements, set 'htmlElement' field to 'Nothing', e.g.:
--
-- @
-- 'angleQuotes' { 'htmlElement' = 'Nothing' }
-- @
data CitationQuotes = CitationQuotes
    { -- | The leading and trailing punctuations to surround a title of
      -- novel, newspaper, magazine, movie, television program, etc.
      CitationQuotes -> (Text, Text)
title :: (Text, Text)
    , -- | The leading and trailing punctuations to surround a title of
      -- short story, chapter, article, episode, etc.
      CitationQuotes -> (Text, Text)
subtitle :: (Text, Text)
    , -- | Optional pair of an HTML element and its attributes to surround
      -- citations.  E.g., if it is @'Just' ('Cite', " class=\"autogen\")@
      -- titles are transformed like @\<cite class="autogen">이런 날\</cite>@.
      CitationQuotes -> Maybe (HtmlTag, Text)
htmlElement :: Maybe (HtmlTag, HtmlRawAttrs)
    } deriving (CitationQuotes -> CitationQuotes -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CitationQuotes -> CitationQuotes -> Bool
$c/= :: CitationQuotes -> CitationQuotes -> Bool
== :: CitationQuotes -> CitationQuotes -> Bool
$c== :: CitationQuotes -> CitationQuotes -> Bool
Eq, Int -> CitationQuotes -> ShowS
[CitationQuotes] -> ShowS
CitationQuotes -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CitationQuotes] -> ShowS
$cshowList :: [CitationQuotes] -> ShowS
show :: CitationQuotes -> String
$cshow :: CitationQuotes -> String
showsPrec :: Int -> CitationQuotes -> ShowS
$cshowsPrec :: Int -> CitationQuotes -> ShowS
Show)

-- | Cite a title using angle quotes, used by South Korean orthography in
-- horizontal writing (橫書), e.g., 《나비와 엉겅퀴》 or 〈枾崎의 바다〉.
angleQuotes :: CitationQuotes
angleQuotes :: CitationQuotes
angleQuotes = CitationQuotes
    { title :: (Text, Text)
title = (Text
"&#12298;", Text
"&#12299;")
    , subtitle :: (Text, Text)
subtitle = (Text
"&#12296;", Text
"&#12297;")
    , htmlElement :: Maybe (HtmlTag, Text)
htmlElement = forall a. a -> Maybe a
Just (HtmlTag
Cite, Text
"")
    }

-- | Cite a title using corner brackets, used by South Korean orthography in
-- vertical writing (縱書) and Japanese orthography,
-- e.g., 『나비와 엉겅퀴』 or 「枾崎의 바다」.
cornerBrackets :: CitationQuotes
cornerBrackets :: CitationQuotes
cornerBrackets = CitationQuotes
    { title :: (Text, Text)
title = (Text
"&#12302;", Text
"&#12303;")
    , subtitle :: (Text, Text)
subtitle = (Text
"&#12300;", Text
"&#12301;")
    , htmlElement :: Maybe (HtmlTag, Text)
htmlElement = forall a. a -> Maybe a
Just (HtmlTag
Cite, Text
"")
    }

-- | People tend to cite the title of a work (e.g., a book, a paper, a poem,
-- a song, a film, a TV show, a game) by wrapping inequality symbols
-- like @\<\<나비와 엉겅퀴>>@ or @\<枾崎의 바다>@ instead of proper angle quotes
-- like @《나비와 엉겅퀴》@ or @〈枾崎의 바다〉@.
--
-- This transforms, in the given HTML fragments, all folk-citing quotes into
-- typographic citing quotes:
--
-- - Pairs of less-than and greater-than inequality symbols (@<@ & @>@) into
--   pairs of proper angle quotes (@〈@ & @〉@)
-- - Pairs of two consecutive inequality symbols (@<<@ & @>>@) into
--   pairs of proper double angle quotes (@《@ & @》@)
quoteCitation :: CitationQuotes -- ^ Quoting parentheses to wrap titles.
              -> [HtmlEntity] -- ^ The input HTML entities to transform.
              -> [HtmlEntity]
quoteCitation :: CitationQuotes -> [HtmlEntity] -> [HtmlEntity]
quoteCitation CitationQuotes
quotes =
    forall m. PairedTransformer m -> [HtmlEntity] -> [HtmlEntity]
transformPairs PairedTransformer TitlePunct
pairedTransformer
  where
    pairedTransformer :: PairedTransformer TitlePunct
    pairedTransformer :: PairedTransformer TitlePunct
pairedTransformer = PairedTransformer
        { ignoresTagStack :: HtmlTagStack -> Bool
ignoresTagStack = HtmlTagStack -> Bool
isPreservedTagStack
        , matchStart :: [TitlePunct] -> Text -> Maybe (TitlePunct, Text, Text, Text)
matchStart = \ [TitlePunct]
_ -> Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher forall a b. (a -> b) -> a -> b
$ Parser Text (TitlePunct, Text)
-> Parser Text (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser Text (TitlePunct, Text)
openTitle Parser Text (TitlePunct, Text)
openSubtitle
        , matchEnd :: Text -> Maybe (TitlePunct, Text, Text, Text)
matchEnd = Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher forall a b. (a -> b) -> a -> b
$ Parser Text (TitlePunct, Text)
-> Parser Text (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser Text (TitlePunct, Text)
closeTitle Parser Text (TitlePunct, Text)
closeSubtitle
        , areMatchesPaired :: TitlePunct -> TitlePunct -> Bool
areMatchesPaired = forall a. Eq a => a -> a -> Bool
(==)
        , transformPair :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair = TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair'
        }
    transformPair' :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
    transformPair' :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair' TitlePunct
punct TitlePunct
_ [HtmlEntity]
buffer =
        case [HtmlEntity]
cited of
            [] -> []
            entities :: [HtmlEntity]
entities@(HtmlEntity
x : [HtmlEntity]
_) ->
                let
                    ts :: HtmlTagStack
ts = HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x
                    makeText :: Text -> HtmlEntity
makeText = HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
ts
                    category :: CitationQuotes -> (Text, Text)
category = case TitlePunct
punct of
                        TitlePunct
DoubleAngle -> CitationQuotes -> (Text, Text)
title
                        TitlePunct
DoubleCorner -> CitationQuotes -> (Text, Text)
title
                        TitlePunct
DoubleInequal -> CitationQuotes -> (Text, Text)
title
                        TitlePunct
Angle -> CitationQuotes -> (Text, Text)
subtitle
                        TitlePunct
Corner -> CitationQuotes -> (Text, Text)
subtitle
                        TitlePunct
Inequal -> CitationQuotes -> (Text, Text)
subtitle
                    (Text
startP, Text
endP) = CitationQuotes -> (Text, Text)
category CitationQuotes
quotes
                in
                    Text -> HtmlEntity
makeText Text
startP forall a. a -> [a] -> [a]
: [HtmlEntity]
entities forall a. [a] -> [a] -> [a]
++ [Text -> HtmlEntity
makeText Text
endP]
      where
        buffer' :: [HtmlEntity]
        buffer' :: [HtmlEntity]
buffer' = forall a. Int -> [a] -> [a]
Prelude.drop Int
1 forall a b. (a -> b) -> a -> b
$ forall a. Int -> [a] -> [a]
Prelude.take (forall (t :: * -> *) a. Foldable t => t a -> Int
length [HtmlEntity]
buffer forall a. Num a => a -> a -> a
- Int
1) [HtmlEntity]
buffer
        cited :: [HtmlEntity]
        cited :: [HtmlEntity]
cited = case (CitationQuotes -> Maybe (HtmlTag, Text)
htmlElement CitationQuotes
quotes, [HtmlEntity]
buffer') of
            (Maybe (HtmlTag, Text)
Nothing, [HtmlEntity]
b) -> [HtmlEntity]
b
            (Maybe (HtmlTag, Text)
_, []) -> []
            (Just (HtmlTag
tag', Text
""), HtmlEntity
x : [HtmlEntity]
_) ->
                if [HtmlEntity]
buffer' [HtmlEntity] -> HtmlTag -> Bool
`isWrappedBy` HtmlTag
tag'
                    then [HtmlEntity]
buffer'
                    else HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap (HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x) HtmlTag
tag' Text
"" [HtmlEntity]
buffer'
            (Just (HtmlTag
tag', Text
attrs), HtmlEntity
x : [HtmlEntity]
_) ->
                if [HtmlEntity] -> HtmlTag -> Maybe Text -> Bool
isWrappedBy' [HtmlEntity]
buffer' HtmlTag
tag' (forall a. a -> Maybe a
Just Text
attrs)
                    then [HtmlEntity]
buffer'
                    else HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap (HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x) HtmlTag
tag' Text
attrs [HtmlEntity]
buffer'
    specialChars :: Set Char
    specialChars :: Set Char
specialChars =
        [ Char
'<', Char
'>', Char
'&'
        , Char
'\x3008', Char
'\x3009', Char
'\x300a', Char
'\x300b', Char
'\x300e', Char
'\x300f'
        ]
    matcher :: Parser [Either Text (TitlePunct, Text, Text)]
            -> Text
            -> Maybe (TitlePunct, Text, Text, Text)
    matcher :: Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher Parser [Either Text (TitlePunct, Text, Text)]
parser' Text
text' = case forall a. Parser a -> Text -> Either String a
parseOnly Parser [Either Text (TitlePunct, Text, Text)]
parser' Text
text' of
        Left String
_ -> forall a. Maybe a
Nothing
        Right [Either Text (TitlePunct, Text, Text)]
matches -> case forall a b. [Either a b] -> ([a], [b])
partitionEithers [Either Text (TitlePunct, Text, Text)]
matches of
            ([Text]
l, [(TitlePunct
punct, Text
m, Text
post)]) -> forall a. a -> Maybe a
Just (TitlePunct
punct, [Text] -> Text
Data.Text.concat [Text]
l, Text
m, Text
post)
            ([Text], [(TitlePunct, Text, Text)])
_ -> forall a. Maybe a
Nothing
    parser :: Parser (TitlePunct, Text)
           -> Parser (TitlePunct, Text)
           -> Parser [Either Text (TitlePunct, Text, Text)]
    parser :: Parser Text (TitlePunct, Text)
-> Parser Text (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser Text (TitlePunct, Text)
title' Parser Text (TitlePunct, Text)
subtitle' = forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text Text
takeWhile1 (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` Set Char
specialChars)
        , do
            (TitlePunct
punct, Text
m) <- Parser Text (TitlePunct, Text)
title'
            Text
remain <- (Char -> Bool) -> Parser Text Text
takeWhile (forall a b. a -> b -> a
const Bool
True)
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (TitlePunct
punct, Text
m, Text
remain)
        , do
            (TitlePunct
punct, Text
m) <- Parser Text (TitlePunct, Text)
subtitle'
            Text
remain <- (Char -> Bool) -> Parser Text Text
takeWhile (forall a b. a -> b -> a
const Bool
True)
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (TitlePunct
punct, Text
m, Text
remain)
        , forall a b. a -> Either a b
Left forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
        ]
    openTitle :: Parser (TitlePunct, Text)
    openTitle :: Parser Text (TitlePunct, Text)
openTitle = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Text (TitlePunct, Text)
leftDoubleAngle
        , Parser Text (TitlePunct, Text)
leftDoubleCorner
        , (TitlePunct
DoubleInequal,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text -> Parser Text Text
double' Parser Text Text
lt
        ]
    closeTitle :: Parser (TitlePunct, Text)
    closeTitle :: Parser Text (TitlePunct, Text)
closeTitle = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Text (TitlePunct, Text)
rightDoubleAngle
        , Parser Text (TitlePunct, Text)
rightDoubleCorner
        , (TitlePunct
DoubleInequal,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text -> Parser Text Text
double' Parser Text Text
gt
        ]
    double' :: Parser Text -> Parser Text
    double' :: Parser Text Text -> Parser Text Text
double' Parser Text Text
p = do
        Text
t <- Parser Text Text
p
        Text
t' <- Parser Text Text
p
        forall (m :: * -> *) a. Monad m => a -> m a
return (Text
t Text -> Text -> Text
`append` Text
t')
    openSubtitle :: Parser (TitlePunct, Text)
    openSubtitle :: Parser Text (TitlePunct, Text)
openSubtitle = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text (TitlePunct, Text)
leftAngle, (TitlePunct
Inequal,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
lt]
    closeSubtitle :: Parser (TitlePunct, Text)
    closeSubtitle :: Parser Text (TitlePunct, Text)
closeSubtitle = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text (TitlePunct, Text)
rightAngle, (TitlePunct
Inequal,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
gt]
    leftAngle :: Parser (TitlePunct, Text)
    leftAngle :: Parser Text (TitlePunct, Text)
leftAngle = (TitlePunct
Angle,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x3008'
        , Text -> Parser Text Text
string Text
"&#12296;"
        , Text -> Parser Text Text
asciiCI Text
"&#x3008;"
        ]
    rightAngle :: Parser (TitlePunct, Text)
    rightAngle :: Parser Text (TitlePunct, Text)
rightAngle = (TitlePunct
Angle,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x3009'
        , Text -> Parser Text Text
string Text
"&#12297;"
        , Text -> Parser Text Text
asciiCI Text
"&#x3009;"
        ]
    leftDoubleAngle :: Parser (TitlePunct, Text)
    leftDoubleAngle :: Parser Text (TitlePunct, Text)
leftDoubleAngle = (TitlePunct
DoubleAngle,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x300a'
        , Text -> Parser Text Text
string Text
"&#12298;"
        , Text -> Parser Text Text
asciiCI Text
"&#x300a;"
        ]
    rightDoubleAngle :: Parser (TitlePunct, Text)
    rightDoubleAngle :: Parser Text (TitlePunct, Text)
rightDoubleAngle = (TitlePunct
DoubleAngle,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x300b'
        , Text -> Parser Text Text
string Text
"&#12299;"
        , Text -> Parser Text Text
asciiCI Text
"&#x300b;"
        ]
    leftDoubleCorner :: Parser (TitlePunct, Text)
    leftDoubleCorner :: Parser Text (TitlePunct, Text)
leftDoubleCorner = (TitlePunct
DoubleCorner,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x300e'
        , Text -> Parser Text Text
string Text
"&#12302;"
        , Text -> Parser Text Text
asciiCI Text
"&#x300e;"
        ]
    rightDoubleCorner :: Parser (TitlePunct, Text)
    rightDoubleCorner :: Parser Text (TitlePunct, Text)
rightDoubleCorner = (TitlePunct
DoubleCorner,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x300f'
        , Text -> Parser Text Text
string Text
"&#12303;"
        , Text -> Parser Text Text
asciiCI Text
"&#x300f;"
        ]

data TitlePunct
    = DoubleAngle | Angle
    | DoubleCorner | Corner
    | DoubleInequal | Inequal
    deriving (TitlePunct -> TitlePunct -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TitlePunct -> TitlePunct -> Bool
$c/= :: TitlePunct -> TitlePunct -> Bool
== :: TitlePunct -> TitlePunct -> Bool
$c== :: TitlePunct -> TitlePunct -> Bool
Eq, Int -> TitlePunct -> ShowS
[TitlePunct] -> ShowS
TitlePunct -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TitlePunct] -> ShowS
$cshowList :: [TitlePunct] -> ShowS
show :: TitlePunct -> String
$cshow :: TitlePunct -> String
showsPrec :: Int -> TitlePunct -> ShowS
$cshowsPrec :: Int -> TitlePunct -> ShowS
Show)


-- | A set of stops—'period', 'comma', and 'interpunct'—to be used by
-- 'normalizeStops' function.
--
-- There are three presets: 'horizontalStops', 'verticalStops', and
-- 'horizontalStopsWithSlashes'.
data Stops = Stops
    { Stops -> Text
period :: Text
    , Stops -> Text
comma :: Text
    , Stops -> Text
interpunct :: Text
    } deriving (Stops -> Stops -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Stops -> Stops -> Bool
$c/= :: Stops -> Stops -> Bool
== :: Stops -> Stops -> Bool
$c== :: Stops -> Stops -> Bool
Eq, Int -> Stops -> ShowS
[Stops] -> ShowS
Stops -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Stops] -> ShowS
$cshowList :: [Stops] -> ShowS
show :: Stops -> String
$cshow :: Stops -> String
showsPrec :: Int -> Stops -> ShowS
$cshowsPrec :: Int -> Stops -> ShowS
Show)

-- | Stop sentences in the modern Korean style which follows Western stops.
-- E.g.:
--
-- > 봄·여름·가을·겨울. 어제, 오늘.
horizontalStops :: Stops
horizontalStops :: Stops
horizontalStops = Stops
    { period :: Text
period = Text
". "
    , comma :: Text
comma = Text
", "
    , interpunct :: Text
interpunct = Text
"·"
    }

-- | Stop sentences in the pre-modern Korean style which follows Chinese stops.
-- E.g.:
--
-- > 봄·여름·가을·겨울。어제、오늘。
verticalStops :: Stops
verticalStops :: Stops
verticalStops = Stops
    { period :: Text
period = Text
"。"
    , comma :: Text
comma = Text
"、"
    , interpunct :: Text
interpunct = Text
"·"
    }

-- | Similar to 'horizontalStops' except slashes are used instead of
-- interpuncts. E.g.:
--
-- > 봄/여름/가을/겨울. 어제, 오늘.
horizontalStopsWithSlashes :: Stops
horizontalStopsWithSlashes :: Stops
horizontalStopsWithSlashes = Stops
    { period :: Text
period = Text
". "
    , comma :: Text
comma = Text
", "
    , interpunct :: Text
interpunct = Text
"/"
    }


-- | Normalizes sentence stops (periods, commas, and interpuncts).
normalizeStops :: Stops -> [HtmlEntity] -> [HtmlEntity]
normalizeStops :: Stops -> [HtmlEntity] -> [HtmlEntity]
normalizeStops Stops
stops [HtmlEntity]
input = (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [LangHtmlEntity]
annotatedEntities) forall a b. (a -> b) -> a -> b
$ \ case
    LangHtmlEntity { lang :: LangHtmlEntity -> Maybe Text
lang = Maybe Text
l
                   , entity :: LangHtmlEntity -> HtmlEntity
entity = e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt }
                   } ->
        if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack Bool -> Bool -> Bool
|| Maybe Text -> Bool
isNeverKorean Maybe Text
l
        then HtmlEntity
e
        else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replaceText Text
txt }
    LangHtmlEntity { entity :: LangHtmlEntity -> HtmlEntity
entity = HtmlEntity
e } ->
        HtmlEntity
e
  where
    annotatedEntities :: [LangHtmlEntity]
    annotatedEntities :: [LangHtmlEntity]
annotatedEntities = ([HtmlEntity] -> [LangHtmlEntity]
annotateWithLang forall b c a. (b -> c) -> (a -> b) -> a -> c
. [HtmlEntity] -> [HtmlEntity]
normalizeText) [HtmlEntity]
input
    replaceText :: Text -> Text
    replaceText :: Text -> Text
replaceText Text
txt =
        case forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
            Left String
_ -> forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
            Right Text
t -> Text
t
    parser :: Parser Text
    parser :: Parser Text Text
parser = do
        [Text]
chunks <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ Parser Text Text
stops'
            , Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
            ]
        forall t. Chunk t => Parser t ()
endOfInput
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
    stops' :: Parser Text
    stops' :: Parser Text Text
stops' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ do { Ending
ending <- Parser Text Ending
period'
             ; forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending forall a b. (a -> b) -> a -> b
$ Stops -> Text
period Stops
stops)
             }
        , do { Ending
ending <- Parser Text Ending
comma'
             ; forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending forall a b. (a -> b) -> a -> b
$ Stops -> Text
comma Stops
stops)
             }
        , do { Ending
ending <- Parser Text Ending
interpunct'
             ; forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending forall a b. (a -> b) -> a -> b
$ Stops -> Text
interpunct Stops
stops)
             }
        ]
    adjustEnding :: Ending -> Text -> Text
    adjustEnding :: Ending -> Text -> Text
adjustEnding Ending
ending Text
text
      | Text -> Int
Data.Text.length Text
text forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Char -> Bool
isSpace (Text -> Char
Data.Text.last Text
text) =
            Text -> Text
stripEnd Text
text forall a. Semigroup a => a -> a -> a
<> case Ending
ending of { TrailingChars Text
c -> Text
c
                                            ; TrailingSpaces Text
s -> Text
s
                                            ; Ending
Ending -> Text
Data.Text.empty
                                            }
      | Bool
otherwise = Text
text forall a. Semigroup a => a -> a -> a
<> case Ending
ending of { TrailingChars Text
c -> Text
c
                                           ; Ending
_ -> Text
Data.Text.empty
                                           }
    toEntity :: Text -> Text
    toEntity :: Text -> Text
toEntity = (Char -> Text) -> Text -> Text
Data.Text.concatMap forall a b. (a -> b) -> a -> b
$ \ Char
c ->
        if Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' -- ASCII compatible characters
        then Char -> Text
Data.Text.singleton Char
c
        else [Text] -> Text
Data.Text.concat [Text
"&#x", String -> Text
pack forall a b. (a -> b) -> a -> b
$ forall a. (Integral a, Show a) => a -> ShowS
showHex (forall a. Enum a => a -> Int
fromEnum Char
c) String
"", Text
";"]
    period' :: Parser Ending
    period' :: Parser Text Ending
period' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Char
char Char
'.' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
        , Char -> Parser Char
char Char
'。' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
        , Text -> Parser Text Text
string Text
"&period;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
        , Text -> Parser Text Text
string Text
"&#46;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
        , Text -> Parser Text Text
string Text
"&#12290;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
        , Text -> Parser Text Text
asciiCI Text
"&#x2e;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
        , Text -> Parser Text Text
asciiCI Text
"&#x3002;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
        ]
    comma' :: Parser Ending
    comma' :: Parser Text Ending
comma' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Char
char Char
'、' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
        , Text -> Parser Text Text
string Text
"," forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
        , Text -> Parser Text Text
string Text
"&comma;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
        , Text -> Parser Text Text
string Text
"&#44;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
        , Text -> Parser Text Text
string Text
"&#12289;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
        , Text -> Parser Text Text
asciiCI Text
"&#x2c;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
        , Text -> Parser Text Text
asciiCI Text
"&#x3001;" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
        ]
    interpunct' :: Parser Ending
    interpunct' :: Parser Text Ending
interpunct' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Char
char Char
'·' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
        , Text -> Parser Text Text
string Text
"&middot;"
        , Text -> Parser Text Text
string Text
"&centerdot;"
        , Text -> Parser Text Text
string Text
"&CenterDot;"
        , Text -> Parser Text Text
string Text
"&#183;"
        , Text -> Parser Text Text
asciiCI Text
"&#xb7;"
        ] forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Ending
Ending
    closingChars :: String
    closingChars :: String
closingChars =
        [ Char
'"', Char
'”', Char
'\'', Char
'’', Char
')', Char
']', Char
'}', Char
'」', Char
'』', Char
'〉', Char
'》', Char
')', Char
'〕'
        , Char
']', Char
'}', Char
'⦆', Char
'】', Char
'〗', Char
'〙', Char
'〛', Char
'›', Char
'»'
        ]
    closingEntities :: [Text]
    closingEntities :: [Text]
closingEntities =
        [ Text
"&quot;", Text
"&QUOT;"                               -- "
        , Text
"&apos;"                                         -- '
        , Text
"&rpar;"                                         -- )
        , Text
"&rsqb;", Text
"&rbrack;"                             -- ]
        , Text
"&rcub;", Text
"&rbrace;"                             -- }
        , Text
"&raquo;"                                        -- »
        , Text
"&rsquo;", Text
"&rsquor;", Text
"&CloseCurlyQuote;"       -- ’
        , Text
"&rdquo;", Text
"&rdquor;", Text
"&CloseCurlyDoubleQuote;" -- ”
        , Text
"&rsaquo;"                                       -- ›
        ]
    closing :: Parser Text
    closing :: Parser Text Text
closing = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice forall a b. (a -> b) -> a -> b
$
        [Text -> Parser Text Text
string [Char
c] | Char
c <- String
closingChars] forall a. [a] -> [a] -> [a]
++
        [Text -> Parser Text Text
string Text
e | Text
e <- [Text]
closingEntities] forall a. [a] -> [a] -> [a]
++
        [Text -> Parser Text Text
asciiCI forall a b. (a -> b) -> a -> b
$ String -> Text
pack forall a b. (a -> b) -> a -> b
$ String
"&#x" forall a. [a] -> [a] -> [a]
++ forall a. (Integral a, Show a) => a -> ShowS
showHex (forall a. Enum a => a -> Int
fromEnum Char
c) String
"" forall a. [a] -> [a] -> [a]
++ String
";"
        | Char
c <- String
closingChars
        ] forall a. [a] -> [a] -> [a]
++
        [Text -> Parser Text Text
string forall a b. (a -> b) -> a -> b
$ Text
"&#" forall a. Semigroup a => a -> a -> a
<> String -> Text
pack (forall a. Show a => a -> String
show Char
c) forall a. Semigroup a => a -> a -> a
<> Text
";" | Char
c <- String
closingChars]
    ending' :: Parser Ending
    ending' :: Parser Text Ending
ending' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Ending
Ending
        , Text -> Ending
TrailingChars forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
closing
        ]
    boundary :: Parser Ending
    boundary :: Parser Text Ending
boundary = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Text Ending
ending'
        , Text -> Ending
TrailingSpaces forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace
        ]
    trailingSpaces :: Parser Ending
    trailingSpaces :: Parser Text Ending
trailingSpaces = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Text Ending
boundary
        , forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Ending
TrailingSpaces Text
" "
        ]


data Ending = TrailingChars Text | TrailingSpaces Text | Ending


-- | Substitution options for 'transformArrow' function.  These options can
-- be composited as an element of a set.
--
-- - @[]@: Transform only leftwards and rightwards arrows.
-- - @['LeftRight']@: Transform bi-directional arrows as well as left/rightwards
-- arrows.
-- - @['DoubleArrow']@: Transform double arrows as well as single arrows.
-- - @['LeftRight', 'DoubleArrow']@: Transform all types of arrows.
data ArrowTransformationOption
    -- | A bidirect arrow (e.g., ↔︎).
    = LeftRight
    -- | An arrow which has two lines (e.g., ⇒).
    | DoubleArrow
    deriving (ArrowTransformationOption -> ArrowTransformationOption -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c/= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
== :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c== :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
Eq, Eq ArrowTransformationOption
ArrowTransformationOption -> ArrowTransformationOption -> Bool
ArrowTransformationOption -> ArrowTransformationOption -> Ordering
ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
$cmin :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
max :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
$cmax :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
>= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c>= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
> :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c> :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
<= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c<= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
< :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c< :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
compare :: ArrowTransformationOption -> ArrowTransformationOption -> Ordering
$ccompare :: ArrowTransformationOption -> ArrowTransformationOption -> Ordering
Ord, Int -> ArrowTransformationOption -> ShowS
[ArrowTransformationOption] -> ShowS
ArrowTransformationOption -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ArrowTransformationOption] -> ShowS
$cshowList :: [ArrowTransformationOption] -> ShowS
show :: ArrowTransformationOption -> String
$cshow :: ArrowTransformationOption -> String
showsPrec :: Int -> ArrowTransformationOption -> ShowS
$cshowsPrec :: Int -> ArrowTransformationOption -> ShowS
Show)

-- | Transforms hyphens and less-than and greater-than inequality symbols that
-- mimic arrows into actual arrow characters:
--
-- - @->@ turns into @→@ (U+2192 RIGHTWARDS ARROW).
-- - @<-@ turns into @←@ (U+2190 LEFTWARDS ARROW).
-- - @\<->@ turns into @↔@ (U+2194 LEFT RIGHT ARROW)
--   if 'LeftRight' is configured.
-- - @=>@ turns into @⇒@ (U+21D2 RIGHTWARDS DOUBLE ARROW)
--   if 'DoubleArrow' is configured.
-- - @<=@ turns into @⇐@ (U+21D0 LEFTWARDS DOUBLE ARROW)
--   if 'DoubleArrow' is configured.
-- - @\<=>@ turns into @⇔@ (U+21D4 LEFT RIGHT DOUBLE ARROW)
--   if both 'DoubleArrow' and 'LeftRight' are configured at a time.
transformArrow :: Set ArrowTransformationOption -> [HtmlEntity] -> [HtmlEntity]
transformArrow :: Set ArrowTransformationOption -> [HtmlEntity] -> [HtmlEntity]
transformArrow Set ArrowTransformationOption
options [HtmlEntity]
input = (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [HtmlEntity] -> [HtmlEntity]
normalizeText [HtmlEntity]
input) forall a b. (a -> b) -> a -> b
$ \ case
    e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt } ->
        if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack
        then HtmlEntity
e
        else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replaceText Text
txt }
    HtmlEntity
e ->
        HtmlEntity
e
  where
    replaceText :: Text -> Text
    replaceText :: Text -> Text
replaceText Text
txt =
        case forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
            Left String
_ -> forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
            Right Text
t -> Text
t
    specialChars :: Set Char
    specialChars :: Set Char
specialChars = if ArrowTransformationOption
DoubleArrow forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
       then [Char
'<', Char
'>', Char
'&', Char
'-', Char
'=']
       else [Char
'<', Char
'>', Char
'&', Char
'-']
    parser :: Parser Text
    parser :: Parser Text Text
parser = do
        [Text]
chunks <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ (Char -> Bool) -> Parser Text Text
takeWhile1 (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` Set Char
specialChars)
            , forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text Text]
arrows
            , Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
            ]
        forall t. Chunk t => Parser t ()
endOfInput
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
    arrows :: [Parser Text]
    arrows :: [Parser Text Text]
arrows = forall a. [Maybe a] -> [a]
catMaybes
        [ if ArrowTransformationOption
DoubleArrow forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
             Bool -> Bool -> Bool
&& ArrowTransformationOption
LeftRight forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
          then forall a. a -> Maybe a
Just Parser Text Text
doubleLeftRight
          else forall a. Maybe a
Nothing
        , if ArrowTransformationOption
DoubleArrow forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
          then forall a. a -> Maybe a
Just Parser Text Text
doubleLeft
          else forall a. Maybe a
Nothing
        , if ArrowTransformationOption
DoubleArrow forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
          then forall a. a -> Maybe a
Just Parser Text Text
doubleRight
          else forall a. Maybe a
Nothing
        , if ArrowTransformationOption
LeftRight forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
          then forall a. a -> Maybe a
Just Parser Text Text
leftRight
          else forall a. Maybe a
Nothing
        , forall a. a -> Maybe a
Just Parser Text Text
left
        , forall a. a -> Maybe a
Just Parser Text Text
right
        ]
    doubleLeftRight :: Parser Text
    doubleLeftRight :: Parser Text Text
doubleLeftRight = Parser Text Text
lt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
equals forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&hArr;"
    doubleLeft :: Parser Text
    doubleLeft :: Parser Text Text
doubleLeft = Parser Text Text
lt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
equals forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&lArr;"
    doubleRight :: Parser Text
    doubleRight :: Parser Text Text
doubleRight = Parser Text ()
equals forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&rArr;"
    leftRight :: Parser Text
    leftRight :: Parser Text Text
leftRight = Parser Text Text
lt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&harr;"
    left :: Parser Text
    left :: Parser Text Text
left = Parser Text Text
lt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&larr;"
    right :: Parser Text
    right :: Parser Text Text
right = Parser Text ()
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&rarr;"
    hyphen :: Parser ()
    hyphen :: Parser Text ()
hyphen = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
        , Text -> Parser Text Text
string Text
"&hyphen;"
        , Text -> Parser Text Text
string Text
"&dash;"
        , Text -> Parser Text Text
string Text
"&#45;"
        , Text -> Parser Text Text
asciiCI Text
"&#x2d;"
        ]
    equals :: Parser ()
    equals :: Parser Text ()
equals = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Char
char Char
'=' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
        , Text -> Parser Text Text
string Text
"&equals;"
        , Text -> Parser Text Text
string Text
"&61;"
        , Text -> Parser Text Text
asciiCI Text
"&#x3d;"
        ]

lt :: Parser Text
lt :: Parser Text Text
lt = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'<'
    , Text -> Parser Text Text
string Text
"&lt;"
    , Text -> Parser Text Text
string Text
"&#60;"
    , Text -> Parser Text Text
asciiCI Text
"&#x3c;"
    ]

gt :: Parser Text
gt :: Parser Text Text
gt = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'>'
    , Text -> Parser Text Text
string Text
"&gt;"
    , Text -> Parser Text Text
string Text
"&#62;"
    , Text -> Parser Text Text
asciiCI Text
"&#x3e;"
    ]

-- | Until 2015, the National Institute of Korean Language (國立國語院) had
-- allowed to use only ellipses (@…@) for omitted word, phrase, line,
-- paragraph, or speechlessness.  However, people tend to use three or more
-- consecutive periods (@...@) instead of a proper ellipsis.
-- Although NIKL has started to allow consecutive periods besides an ellipsis
-- for these uses, ellipses are still a proper punctuation in principle.
--
-- This transforms, in the given HTML fragments, all three consecutive periods
-- into proper ellipses.
transformEllipsis :: [HtmlEntity] -> [HtmlEntity]
transformEllipsis :: [HtmlEntity] -> [HtmlEntity]
transformEllipsis = (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText forall a b. (a -> b) -> a -> b
$ \ Text
txt ->
    case forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
        Left String
_ -> forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
        Right Text
t -> Text
t
  where
    parser :: Parser Text
    parser :: Parser Text Text
parser = do
        [Text]
chunks <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ (Char -> Bool) -> Parser Text Text
takeWhile1 (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` ([Char
'&', Char
'.', Char
'。'] :: Set Char))
            , Parser Text Text
ellipsis
            , Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
            ]
        forall t. Chunk t => Parser t ()
endOfInput
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
    ellipsis :: Parser Text
    ellipsis :: Parser Text Text
ellipsis = do
        forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ Parser Text Text
period forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
period forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
period
            , Parser Text Text
chinesePeriod forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
chinesePeriod forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
chinesePeriod
            ]
        forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&hellip;"
    period :: Parser Text
    period :: Parser Text Text
period = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Text -> Parser Text Text
string Text
"."
        , Text -> Parser Text Text
string Text
"&period;"
        , Text -> Parser Text Text
string Text
"&#46;"
        , Text -> Parser Text Text
asciiCI Text
"&#x2e;"
        ]
    chinesePeriod :: Parser Text
    chinesePeriod :: Parser Text Text
chinesePeriod = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Text -> Parser Text Text
string Text
"。"
        , Text -> Parser Text Text
string Text
"&#12290;"
        , Text -> Parser Text Text
asciiCI Text
"&#x3002;"
        ]

-- | Pairs of substitute folk single and double quotes.
-- Used by 'transformQuote' function.
--
-- The are three presets: 'curvedQuotes', 'guillemets', and
-- 'curvedSingleQuotesWithQ':
--
-- - 'curvedQuotes' uses South Korean curved quotation marks which follows
--   English quotes (@‘@: U+2018, @’@: U+2019, @“@: U+201C, @”@: U+201D)
-- - 'guillemets' uses North Korean angular quotation marks, influenced
--   by Russian guillemets but with some adjustments to replace guillemets with
--   East Asian angular quotes (@〈@: U+3008, @〉@: U+3009, @《@: U+300A,
--   @》@: U+300B).
-- - 'curvedSingleQuotesWithQ' is the almost same to 'curvedQuotes' but
--   wrap text with a @\<q>@ tag instead of curved double quotes.
data Quotes = Quotes
    { Quotes -> QuotePair
singleQuotes :: QuotePair
    , Quotes -> QuotePair
doubleQuotes :: QuotePair
    } deriving (Quotes -> Quotes -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Quotes -> Quotes -> Bool
$c/= :: Quotes -> Quotes -> Bool
== :: Quotes -> Quotes -> Bool
$c== :: Quotes -> Quotes -> Bool
Eq, Eq Quotes
Quotes -> Quotes -> Bool
Quotes -> Quotes -> Ordering
Quotes -> Quotes -> Quotes
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Quotes -> Quotes -> Quotes
$cmin :: Quotes -> Quotes -> Quotes
max :: Quotes -> Quotes -> Quotes
$cmax :: Quotes -> Quotes -> Quotes
>= :: Quotes -> Quotes -> Bool
$c>= :: Quotes -> Quotes -> Bool
> :: Quotes -> Quotes -> Bool
$c> :: Quotes -> Quotes -> Bool
<= :: Quotes -> Quotes -> Bool
$c<= :: Quotes -> Quotes -> Bool
< :: Quotes -> Quotes -> Bool
$c< :: Quotes -> Quotes -> Bool
compare :: Quotes -> Quotes -> Ordering
$ccompare :: Quotes -> Quotes -> Ordering
Ord, Int -> Quotes -> ShowS
[Quotes] -> ShowS
Quotes -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Quotes] -> ShowS
$cshowList :: [Quotes] -> ShowS
show :: Quotes -> String
$cshow :: Quotes -> String
showsPrec :: Int -> Quotes -> ShowS
$cshowsPrec :: Int -> Quotes -> ShowS
Show)

-- | A pair of an opening quote and a closing quote.
data QuotePair
    -- | Wrap the quoted text with a pair of punctuation characters.
    = QuotePair Text Text
    -- | Wrap the quoted text (HTML elements) with an element like @\<q>@ tag.
    | HtmlElement HtmlTag HtmlRawAttrs
    deriving (QuotePair -> QuotePair -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: QuotePair -> QuotePair -> Bool
$c/= :: QuotePair -> QuotePair -> Bool
== :: QuotePair -> QuotePair -> Bool
$c== :: QuotePair -> QuotePair -> Bool
Eq, Eq QuotePair
QuotePair -> QuotePair -> Bool
QuotePair -> QuotePair -> Ordering
QuotePair -> QuotePair -> QuotePair
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: QuotePair -> QuotePair -> QuotePair
$cmin :: QuotePair -> QuotePair -> QuotePair
max :: QuotePair -> QuotePair -> QuotePair
$cmax :: QuotePair -> QuotePair -> QuotePair
>= :: QuotePair -> QuotePair -> Bool
$c>= :: QuotePair -> QuotePair -> Bool
> :: QuotePair -> QuotePair -> Bool
$c> :: QuotePair -> QuotePair -> Bool
<= :: QuotePair -> QuotePair -> Bool
$c<= :: QuotePair -> QuotePair -> Bool
< :: QuotePair -> QuotePair -> Bool
$c< :: QuotePair -> QuotePair -> Bool
compare :: QuotePair -> QuotePair -> Ordering
$ccompare :: QuotePair -> QuotePair -> Ordering
Ord, Int -> QuotePair -> ShowS
[QuotePair] -> ShowS
QuotePair -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [QuotePair] -> ShowS
$cshowList :: [QuotePair] -> ShowS
show :: QuotePair -> String
$cshow :: QuotePair -> String
showsPrec :: Int -> QuotePair -> ShowS
$cshowsPrec :: Int -> QuotePair -> ShowS
Show)

-- | English-style curved quotes (@‘@: U+2018, @’@: U+2019, @“@: U+201C,
-- @”@: U+201D), which are used by South Korean orthography.
curvedQuotes :: Quotes
curvedQuotes :: Quotes
curvedQuotes = Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&lsquo;" Text
"&rsquo;"
    , doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&ldquo;" Text
"&rdquo;"
    }

-- | Vertical corner brackets (@﹁@: U+FE41, @﹂@: U+FE42, @﹃@: U+FE43,
-- @﹄@: U+FE44), which are used by East Asian orthography.
verticalCornerBrackets :: Quotes
verticalCornerBrackets :: Quotes
verticalCornerBrackets = Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#xfe41;" Text
"&#xfe42;"
    , doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#xfe43;" Text
"&#xfe44;"
    }

-- | Traditional horizontal corner brackets (@「@: U+300C, @」@: U+300D,
-- @『@: U+300E, @』@: U+300F), which are used by East Asian orthography.
horizontalCornerBrackets :: Quotes
horizontalCornerBrackets :: Quotes
horizontalCornerBrackets = Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x300c;" Text
"&#x300d;"
    , doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x300e;" Text
"&#x300f;"
    }

-- | East Asian guillemets (@〈@: U+3008, @〉@: U+3009, @《@: U+300A, @》@:
-- U+300B), which are used by North Korean orthography.
guillemets :: Quotes
guillemets :: Quotes
guillemets = Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x3008;" Text
"&#x3009;"
    , doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x300a;" Text
"&#x300b;"
    }

-- | Use English-style curved quotes (@‘@: U+2018, @’@: U+2019) for single
-- quotes, and HTML @\<q\>@ tags for double quotes.
curvedSingleQuotesWithQ :: Quotes
curvedSingleQuotesWithQ :: Quotes
curvedSingleQuotesWithQ = Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&lsquo;" Text
"&rsquo;"
    , doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
    }

-- | Use vertical corner brackets (@﹁@: U+FE41, @﹂@: U+FE42) for single quotes,
-- and HTML @\<q\>@ tags for double quotes.
verticalCornerBracketsWithQ :: Quotes
verticalCornerBracketsWithQ :: Quotes
verticalCornerBracketsWithQ = Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#xfe41;" Text
"&#xfe42;"
    , doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
    }

-- | Use horizontal corner brackets (@「@: U+300C, @」@: U+300D)
-- for single quotes, and HTML @\<q\>@ tags for double quotes.
horizontalCornerBracketsWithQ :: Quotes
horizontalCornerBracketsWithQ :: Quotes
horizontalCornerBracketsWithQ = Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x300c;" Text
"&#x300d;"
    , doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
    }

-- | Transform pairs of apostrophes (@'@: U+0027) and straight double
-- quotes (@"@: U+0022) into more appropriate quotation marks like
-- typographic single quotes (@‘@: U+2018, @’@: U+2019) and
-- double quotes (@“@: U+201C, @”@: U+201D), or rather wrap them with an HTML
-- element like @\<q>@ tag.  It depends on the options passed to the first
-- parameter; see also 'Quotes'.
transformQuote :: Quotes -- ^ Pair of quoting punctuations and wrapping element.
               -> [HtmlEntity] -- ^ The input HTML entities to transform.
               -> [HtmlEntity]
transformQuote :: Quotes -> [HtmlEntity] -> [HtmlEntity]
transformQuote Quotes { QuotePair
doubleQuotes :: QuotePair
singleQuotes :: QuotePair
doubleQuotes :: Quotes -> QuotePair
singleQuotes :: Quotes -> QuotePair
.. } = forall m. PairedTransformer m -> [HtmlEntity] -> [HtmlEntity]
transformPairs forall a b. (a -> b) -> a -> b
$
    PairedTransformer
        { ignoresTagStack :: HtmlTagStack -> Bool
ignoresTagStack = HtmlTagStack -> Bool
isPreservedTagStack
        , matchStart :: [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart = [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart'
        , matchEnd :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd = Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd'
        , areMatchesPaired :: (QuotePunct, Text) -> (QuotePunct, Text) -> Bool
areMatchesPaired = \ (QuotePunct
punct, Text
text) (QuotePunct
punct', Text
text') ->
            QuotePunct -> QuotePunct -> Bool
arePaired QuotePunct
punct QuotePunct
punct' Bool -> Bool -> Bool
&& Text
text forall a. Eq a => a -> a -> Bool
== Text
text'
        , transformPair :: (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair = (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair'
        }
  where
    punctuations :: [(QuotePunct, [Text])]
    punctuations :: [(QuotePunct, [Text])]
punctuations =
        [ ( QuotePunct
Apostrophe
          , [Text
"'", Text
"&apos;", Text
"&#39;", Text
"&#x27;", Text
"&#X27;"]
          )
        , ( QuotePunct
DoubleQuote
          , [Text
"\"", Text
"&quot;", Text
"&QUOT;", Text
"&#34;", Text
"&#x22;", Text
"&#X22;"]
          )
        , ( QuotePunct
DoubleQuote
          , [Text
"\"", Text
"&quot;", Text
"&QUOT;", Text
"&#34;", Text
"&#x22;", Text
"&#X22;"]
          )
        , ( QuotePunct
OpeningSingleQuote
          , [ Text
"\x2018", Text
"&lsquo;", Text
"&OpenCurlyQuote;"
            , Text
"&#8216;", Text
"&#x2018;", Text
"&#X2018;"
            ]
          )
        , ( QuotePunct
ClosingSingleQuote
          , [ Text
"\x2019", Text
"&rsquo;", Text
"&rsquor;", Text
"&CloseCurlyQuote;"
            , Text
"&#8217;", Text
"&#x2019;", Text
"&#X2019;"
            ]
          )
        , ( QuotePunct
OpeningDoubleQuote
          , [ Text
"\x201c", Text
"&ldquo;", Text
"&OpenCurlyDoubleQuote;"
            , Text
"&#8220;", Text
"&#x201c;", Text
"&#x201C;", Text
"&#X201c;", Text
"&#X201C;"
            ]
          )
        , ( QuotePunct
ClosingDoubleQuote
          , [ Text
"\x201d", Text
"&rdquo;", Text
"&rdquor;", Text
"&CloseCurlyDoubleQuote;"
            , Text
"&#8221;", Text
"&#x201d;", Text
"&#x201D;", Text
"&#X201d;", Text
"&#X201D;"
            ]
          )
        ]
    matchStart' :: [(QuotePunct, Text)]
                -> Text
                -> Maybe ((QuotePunct, Text), Text, Text, Text)
    matchStart' :: [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart' [(QuotePunct, Text)]
prevMatches Text
text
      | forall (t :: * -> *) a. Foldable t => t a -> Bool
Prelude.null [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates = forall a. Maybe a
Nothing
      | Bool
otherwise =
            let (QuotePunct
matcher, Text
entity, (Text
pre, Text
post)) = forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy
                    (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a -> b) -> a -> b
$ \ (QuotePunct
_, Text
_, (Text
pre', Text
_)) -> Text -> Int
Data.Text.length Text
pre')
                    [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates
            in
                if Text -> Bool
Data.Text.null Text
post then
                   forall a. Maybe a
Nothing
                else
                    forall a. a -> Maybe a
Just
                        ( (QuotePunct
matcher, Text
entity)
                        , Text
pre
                        , Text
entity
                        , Int -> Text -> Text
Data.Text.drop (Text -> Int
Data.Text.length Text
entity) Text
post
                        )
      where
        prevMatchers :: Set QuotePunct
        prevMatchers :: Set QuotePunct
prevMatchers = forall a. Ord a => [a] -> Set a
Data.Set.fromList (forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(QuotePunct, Text)]
prevMatches)
        prevMatcherCandidates :: [(QuotePunct, Text, (Text, Text))]
        prevMatcherCandidates :: [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates =
            [ (QuotePunct
matcher', Text
entity', Text -> Text -> (Text, Text)
breakOn Text
entity' Text
text)
            | (QuotePunct
matcher', [Text]
entities) <- [(QuotePunct, [Text])]
punctuations
            , QuotePunct -> Bool
opens QuotePunct
matcher'
            , QuotePunct
matcher' forall a. Ord a => a -> Set a -> Bool
`Data.Set.notMember` Set QuotePunct
prevMatchers
            , Text
entity' <- [Text]
entities
            ]
    matchEnd' :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
    matchEnd' :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd' Text
text
      | forall (t :: * -> *) a. Foldable t => t a -> Bool
Prelude.null [(QuotePunct, Text, (Text, Text))]
matcherCandidates = forall a. Maybe a
Nothing
      | Bool
otherwise =
            let (QuotePunct
matcher, Text
entity, (Text
pre, Text
post)) = forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy
                    (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a -> b) -> a -> b
$ \ (QuotePunct
_, Text
_, (Text
pre', Text
_)) -> Text -> Int
Data.Text.length Text
pre')
                    [(QuotePunct, Text, (Text, Text))]
matcherCandidates
            in
                if Text -> Bool
Data.Text.null Text
post then
                    forall a. Maybe a
Nothing
                else
                    forall a. a -> Maybe a
Just
                        ( (QuotePunct
matcher, Text
entity)
                        , Text
pre
                        , Text
entity
                        , Int -> Text -> Text
Data.Text.drop (Text -> Int
Data.Text.length Text
entity) Text
post
                        )
      where
        matcherCandidates :: [(QuotePunct, Text, (Text, Text))]
        matcherCandidates :: [(QuotePunct, Text, (Text, Text))]
matcherCandidates =
            [ (QuotePunct
matcher', Text
entity', Text -> Text -> (Text, Text)
breakOn Text
entity' Text
text)
            | (QuotePunct
matcher', [Text]
entities) <- [(QuotePunct, [Text])]
punctuations
            , QuotePunct -> Bool
closes QuotePunct
matcher'
            , Text
entity' <- [Text]
entities
            ]
    transformPair' :: (QuotePunct, Text)
                   -> (QuotePunct, Text)
                   -> [HtmlEntity]
                   -> [HtmlEntity]
    transformPair' :: (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair' (QuotePunct
punct, Text
start) (QuotePunct
_, Text
end) buffer :: [HtmlEntity]
buffer@(HtmlEntity
firstEntity : [HtmlEntity]
_) =
        case Text -> Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipText Text
start Text
end [HtmlEntity]
buffer of
            Maybe [HtmlEntity]
Nothing -> [HtmlEntity]
buffer
            Just [HtmlEntity]
es -> case QuotePair
pair of
                QuotePair Text
open Text
close ->
                    HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' Text
open forall a. a -> [a] -> [a]
: [HtmlEntity]
es forall a. [a] -> [a] -> [a]
++ [HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' Text
close]
                HtmlElement HtmlTag
tag Text
attrs ->
                    HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap HtmlTagStack
tagStack' HtmlTag
tag Text
attrs [HtmlEntity]
es
      where
        pair :: QuotePair
        pair :: QuotePair
pair = case QuotePunct
punct of
            QuotePunct
DoubleQuote -> QuotePair
doubleQuotes
            QuotePunct
OpeningDoubleQuote -> QuotePair
doubleQuotes
            QuotePunct
ClosingDoubleQuote -> QuotePair
doubleQuotes
            QuotePunct
_ -> QuotePair
singleQuotes
        tagStack' :: HtmlTagStack
        tagStack' :: HtmlTagStack
tagStack' = HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
firstEntity
    transformPair' (QuotePunct, Text)
_ (QuotePunct, Text)
_ [] = []
    arePaired :: QuotePunct -> QuotePunct -> Bool
    arePaired :: QuotePunct -> QuotePunct -> Bool
arePaired QuotePunct
OpeningSingleQuote = (forall a. Eq a => a -> a -> Bool
== QuotePunct
ClosingSingleQuote)
    arePaired QuotePunct
OpeningDoubleQuote = (forall a. Eq a => a -> a -> Bool
== QuotePunct
ClosingDoubleQuote)
    arePaired QuotePunct
punct = (forall a. Eq a => a -> a -> Bool
== QuotePunct
punct)

data QuotePunct
    = DoubleQuote
    | Apostrophe
    | OpeningSingleQuote | ClosingSingleQuote
    | OpeningDoubleQuote | ClosingDoubleQuote
    deriving (QuotePunct -> QuotePunct -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: QuotePunct -> QuotePunct -> Bool
$c/= :: QuotePunct -> QuotePunct -> Bool
== :: QuotePunct -> QuotePunct -> Bool
$c== :: QuotePunct -> QuotePunct -> Bool
Eq, Eq QuotePunct
QuotePunct -> QuotePunct -> Bool
QuotePunct -> QuotePunct -> Ordering
QuotePunct -> QuotePunct -> QuotePunct
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: QuotePunct -> QuotePunct -> QuotePunct
$cmin :: QuotePunct -> QuotePunct -> QuotePunct
max :: QuotePunct -> QuotePunct -> QuotePunct
$cmax :: QuotePunct -> QuotePunct -> QuotePunct
>= :: QuotePunct -> QuotePunct -> Bool
$c>= :: QuotePunct -> QuotePunct -> Bool
> :: QuotePunct -> QuotePunct -> Bool
$c> :: QuotePunct -> QuotePunct -> Bool
<= :: QuotePunct -> QuotePunct -> Bool
$c<= :: QuotePunct -> QuotePunct -> Bool
< :: QuotePunct -> QuotePunct -> Bool
$c< :: QuotePunct -> QuotePunct -> Bool
compare :: QuotePunct -> QuotePunct -> Ordering
$ccompare :: QuotePunct -> QuotePunct -> Ordering
Ord, Int -> QuotePunct -> ShowS
[QuotePunct] -> ShowS
QuotePunct -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [QuotePunct] -> ShowS
$cshowList :: [QuotePunct] -> ShowS
show :: QuotePunct -> String
$cshow :: QuotePunct -> String
showsPrec :: Int -> QuotePunct -> ShowS
$cshowsPrec :: Int -> QuotePunct -> ShowS
Show)

opens :: QuotePunct -> Bool
opens :: QuotePunct -> Bool
opens QuotePunct
DoubleQuote = Bool
True
opens QuotePunct
Apostrophe = Bool
True
opens QuotePunct
OpeningSingleQuote = Bool
True
opens QuotePunct
OpeningDoubleQuote = Bool
True
opens QuotePunct
_ = Bool
False

closes :: QuotePunct -> Bool
closes :: QuotePunct -> Bool
closes QuotePunct
DoubleQuote = Bool
True
closes QuotePunct
Apostrophe = Bool
True
closes QuotePunct
ClosingSingleQuote = Bool
True
closes QuotePunct
ClosingDoubleQuote = Bool
True
closes QuotePunct
_ = Bool
False

-- | Transform the following folk em dashes into proper em dashes
-- (@—@: @U+2014 EM DASH@):
--
-- - A hyphen (@-@: @U+002D HYPHEN-MINUS@) surrounded by spaces.
-- - Two or three consecutive hyphens (@--@ or @---@).
-- - A hangul vowel @ㅡ@ (@U+3161 HANGUL LETTER EU@) surrounded by spaces.
--   There are Korean people that use a hangul vowel @ㅡ@ ("eu") instead of
--   an em dash due to their ignorance or negligence.
transformEmDash :: [HtmlEntity] -> [HtmlEntity]
transformEmDash :: [HtmlEntity] -> [HtmlEntity]
transformEmDash = (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText forall a b. (a -> b) -> a -> b
$ \ Text
txt ->
    case forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
        Left String
_ -> forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
        Right Text
t -> Text
t
  where
    parser :: Parser Text
    parser :: Parser Text Text
parser = do
        [Text]
chunks <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ (Char -> Bool) -> Parser Text Text
takeWhile1 forall a b. (a -> b) -> a -> b
$ \ Char
c ->
                Bool -> Bool
not (Char -> Bool
isSpace Char
c) Bool -> Bool -> Bool
&&
                Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` ([Char
'&', Char
'-', Char
'\x3161'] :: Set Char)
            , Parser Text Text
emDash
            , Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
            ]
        forall t. Chunk t => Parser t ()
endOfInput
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
    emDash :: Parser Text
    emDash :: Parser Text Text
emDash = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Text Text
hyphens
        , (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text Text
eu, Parser Text Text
hyphen] forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace
        ] forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&mdash;"
    hyphens :: Parser Text
    hyphens :: Parser Text Text
hyphens = Parser Text Text
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Text
"" Parser Text Text
hyphen
    hyphen :: Parser Text
    hyphen :: Parser Text Text
hyphen = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
Prelude.map Text -> Parser Text Text
string
        [Text
"-", Text
"&#45;", Text
"&#x2d;", Text
"&#x2D;", Text
"&#X2d;", Text
"&#X2D;"]
    eu :: Parser Text
    eu :: Parser Text Text
eu = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
Prelude.map Text -> Parser Text Text
string
        [Text
"\x3161", Text
"&#12641;", Text
"&#x3161;", Text
"&#X3161;"]

transformText :: (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText :: (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText Text -> Text
replace' = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a -> b) -> a -> b
$ \ case
    e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt } ->
        if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack
        then HtmlEntity
e
        else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replace' Text
txt }
    HtmlEntity
e ->
        HtmlEntity
e