{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
module Text.Seonbi.Punctuation
(
ArrowTransformationOption (..)
, transformArrow
, CitationQuotes (..)
, Quotes (..)
, QuotePair (..)
, angleQuotes
, cornerBrackets
, curvedQuotes
, curvedSingleQuotesWithQ
, guillemets
, horizontalCornerBrackets
, horizontalCornerBracketsWithQ
, quoteCitation
, transformQuote
, verticalCornerBrackets
, verticalCornerBracketsWithQ
, Stops (..)
, horizontalStops
, horizontalStopsWithSlashes
, normalizeStops
, transformEllipsis
, verticalStops
, transformEmDash
) where
import Prelude hiding (takeWhile)
import Control.Monad
import Data.Char (isSpace)
import Data.Either
import Data.List (minimumBy)
import Data.Maybe
import Data.Ord
import Numeric
import Data.Attoparsec.Text
import Data.Set
import Data.Text hiding (any, length, takeWhile)
import qualified Data.Text
import Text.Seonbi.Html
import Text.Seonbi.Html.Clipper
import Text.Seonbi.Html.Lang
import Text.Seonbi.Html.Preservation
import Text.Seonbi.Html.Wrapper
import Text.Seonbi.PairedTransformer
data CitationQuotes = CitationQuotes
{
CitationQuotes -> (Text, Text)
title :: (Text, Text)
,
CitationQuotes -> (Text, Text)
subtitle :: (Text, Text)
,
CitationQuotes -> Maybe (HtmlTag, Text)
htmlElement :: Maybe (HtmlTag, HtmlRawAttrs)
} deriving (CitationQuotes -> CitationQuotes -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CitationQuotes -> CitationQuotes -> Bool
$c/= :: CitationQuotes -> CitationQuotes -> Bool
== :: CitationQuotes -> CitationQuotes -> Bool
$c== :: CitationQuotes -> CitationQuotes -> Bool
Eq, Int -> CitationQuotes -> ShowS
[CitationQuotes] -> ShowS
CitationQuotes -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CitationQuotes] -> ShowS
$cshowList :: [CitationQuotes] -> ShowS
show :: CitationQuotes -> String
$cshow :: CitationQuotes -> String
showsPrec :: Int -> CitationQuotes -> ShowS
$cshowsPrec :: Int -> CitationQuotes -> ShowS
Show)
angleQuotes :: CitationQuotes
angleQuotes :: CitationQuotes
angleQuotes = CitationQuotes
{ title :: (Text, Text)
title = (Text
"《", Text
"》")
, subtitle :: (Text, Text)
subtitle = (Text
"〈", Text
"〉")
, htmlElement :: Maybe (HtmlTag, Text)
htmlElement = forall a. a -> Maybe a
Just (HtmlTag
Cite, Text
"")
}
cornerBrackets :: CitationQuotes
cornerBrackets :: CitationQuotes
cornerBrackets = CitationQuotes
{ title :: (Text, Text)
title = (Text
"『", Text
"』")
, subtitle :: (Text, Text)
subtitle = (Text
"「", Text
"」")
, htmlElement :: Maybe (HtmlTag, Text)
htmlElement = forall a. a -> Maybe a
Just (HtmlTag
Cite, Text
"")
}
quoteCitation :: CitationQuotes
-> [HtmlEntity]
-> [HtmlEntity]
quoteCitation :: CitationQuotes -> [HtmlEntity] -> [HtmlEntity]
quoteCitation CitationQuotes
quotes =
forall m. PairedTransformer m -> [HtmlEntity] -> [HtmlEntity]
transformPairs PairedTransformer TitlePunct
pairedTransformer
where
pairedTransformer :: PairedTransformer TitlePunct
pairedTransformer :: PairedTransformer TitlePunct
pairedTransformer = PairedTransformer
{ ignoresTagStack :: HtmlTagStack -> Bool
ignoresTagStack = HtmlTagStack -> Bool
isPreservedTagStack
, matchStart :: [TitlePunct] -> Text -> Maybe (TitlePunct, Text, Text, Text)
matchStart = \ [TitlePunct]
_ -> Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher forall a b. (a -> b) -> a -> b
$ Parser Text (TitlePunct, Text)
-> Parser Text (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser Text (TitlePunct, Text)
openTitle Parser Text (TitlePunct, Text)
openSubtitle
, matchEnd :: Text -> Maybe (TitlePunct, Text, Text, Text)
matchEnd = Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher forall a b. (a -> b) -> a -> b
$ Parser Text (TitlePunct, Text)
-> Parser Text (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser Text (TitlePunct, Text)
closeTitle Parser Text (TitlePunct, Text)
closeSubtitle
, areMatchesPaired :: TitlePunct -> TitlePunct -> Bool
areMatchesPaired = forall a. Eq a => a -> a -> Bool
(==)
, transformPair :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair = TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair'
}
transformPair' :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair' :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair' TitlePunct
punct TitlePunct
_ [HtmlEntity]
buffer =
case [HtmlEntity]
cited of
[] -> []
entities :: [HtmlEntity]
entities@(HtmlEntity
x : [HtmlEntity]
_) ->
let
ts :: HtmlTagStack
ts = HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x
makeText :: Text -> HtmlEntity
makeText = HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
ts
category :: CitationQuotes -> (Text, Text)
category = case TitlePunct
punct of
TitlePunct
DoubleAngle -> CitationQuotes -> (Text, Text)
title
TitlePunct
DoubleCorner -> CitationQuotes -> (Text, Text)
title
TitlePunct
DoubleInequal -> CitationQuotes -> (Text, Text)
title
TitlePunct
Angle -> CitationQuotes -> (Text, Text)
subtitle
TitlePunct
Corner -> CitationQuotes -> (Text, Text)
subtitle
TitlePunct
Inequal -> CitationQuotes -> (Text, Text)
subtitle
(Text
startP, Text
endP) = CitationQuotes -> (Text, Text)
category CitationQuotes
quotes
in
Text -> HtmlEntity
makeText Text
startP forall a. a -> [a] -> [a]
: [HtmlEntity]
entities forall a. [a] -> [a] -> [a]
++ [Text -> HtmlEntity
makeText Text
endP]
where
buffer' :: [HtmlEntity]
buffer' :: [HtmlEntity]
buffer' = forall a. Int -> [a] -> [a]
Prelude.drop Int
1 forall a b. (a -> b) -> a -> b
$ forall a. Int -> [a] -> [a]
Prelude.take (forall (t :: * -> *) a. Foldable t => t a -> Int
length [HtmlEntity]
buffer forall a. Num a => a -> a -> a
- Int
1) [HtmlEntity]
buffer
cited :: [HtmlEntity]
cited :: [HtmlEntity]
cited = case (CitationQuotes -> Maybe (HtmlTag, Text)
htmlElement CitationQuotes
quotes, [HtmlEntity]
buffer') of
(Maybe (HtmlTag, Text)
Nothing, [HtmlEntity]
b) -> [HtmlEntity]
b
(Maybe (HtmlTag, Text)
_, []) -> []
(Just (HtmlTag
tag', Text
""), HtmlEntity
x : [HtmlEntity]
_) ->
if [HtmlEntity]
buffer' [HtmlEntity] -> HtmlTag -> Bool
`isWrappedBy` HtmlTag
tag'
then [HtmlEntity]
buffer'
else HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap (HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x) HtmlTag
tag' Text
"" [HtmlEntity]
buffer'
(Just (HtmlTag
tag', Text
attrs), HtmlEntity
x : [HtmlEntity]
_) ->
if [HtmlEntity] -> HtmlTag -> Maybe Text -> Bool
isWrappedBy' [HtmlEntity]
buffer' HtmlTag
tag' (forall a. a -> Maybe a
Just Text
attrs)
then [HtmlEntity]
buffer'
else HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap (HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x) HtmlTag
tag' Text
attrs [HtmlEntity]
buffer'
specialChars :: Set Char
specialChars :: Set Char
specialChars =
[ Char
'<', Char
'>', Char
'&'
, Char
'\x3008', Char
'\x3009', Char
'\x300a', Char
'\x300b', Char
'\x300e', Char
'\x300f'
]
matcher :: Parser [Either Text (TitlePunct, Text, Text)]
-> Text
-> Maybe (TitlePunct, Text, Text, Text)
matcher :: Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher Parser [Either Text (TitlePunct, Text, Text)]
parser' Text
text' = case forall a. Parser a -> Text -> Either String a
parseOnly Parser [Either Text (TitlePunct, Text, Text)]
parser' Text
text' of
Left String
_ -> forall a. Maybe a
Nothing
Right [Either Text (TitlePunct, Text, Text)]
matches -> case forall a b. [Either a b] -> ([a], [b])
partitionEithers [Either Text (TitlePunct, Text, Text)]
matches of
([Text]
l, [(TitlePunct
punct, Text
m, Text
post)]) -> forall a. a -> Maybe a
Just (TitlePunct
punct, [Text] -> Text
Data.Text.concat [Text]
l, Text
m, Text
post)
([Text], [(TitlePunct, Text, Text)])
_ -> forall a. Maybe a
Nothing
parser :: Parser (TitlePunct, Text)
-> Parser (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser :: Parser Text (TitlePunct, Text)
-> Parser Text (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser Text (TitlePunct, Text)
title' Parser Text (TitlePunct, Text)
subtitle' = forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text Text
takeWhile1 (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` Set Char
specialChars)
, do
(TitlePunct
punct, Text
m) <- Parser Text (TitlePunct, Text)
title'
Text
remain <- (Char -> Bool) -> Parser Text Text
takeWhile (forall a b. a -> b -> a
const Bool
True)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (TitlePunct
punct, Text
m, Text
remain)
, do
(TitlePunct
punct, Text
m) <- Parser Text (TitlePunct, Text)
subtitle'
Text
remain <- (Char -> Bool) -> Parser Text Text
takeWhile (forall a b. a -> b -> a
const Bool
True)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (TitlePunct
punct, Text
m, Text
remain)
, forall a b. a -> Either a b
Left forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
]
openTitle :: Parser (TitlePunct, Text)
openTitle :: Parser Text (TitlePunct, Text)
openTitle = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Parser Text (TitlePunct, Text)
leftDoubleAngle
, Parser Text (TitlePunct, Text)
leftDoubleCorner
, (TitlePunct
DoubleInequal,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text -> Parser Text Text
double' Parser Text Text
lt
]
closeTitle :: Parser (TitlePunct, Text)
closeTitle :: Parser Text (TitlePunct, Text)
closeTitle = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Parser Text (TitlePunct, Text)
rightDoubleAngle
, Parser Text (TitlePunct, Text)
rightDoubleCorner
, (TitlePunct
DoubleInequal,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text -> Parser Text Text
double' Parser Text Text
gt
]
double' :: Parser Text -> Parser Text
double' :: Parser Text Text -> Parser Text Text
double' Parser Text Text
p = do
Text
t <- Parser Text Text
p
Text
t' <- Parser Text Text
p
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
t Text -> Text -> Text
`append` Text
t')
openSubtitle :: Parser (TitlePunct, Text)
openSubtitle :: Parser Text (TitlePunct, Text)
openSubtitle = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text (TitlePunct, Text)
leftAngle, (TitlePunct
Inequal,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
lt]
closeSubtitle :: Parser (TitlePunct, Text)
closeSubtitle :: Parser Text (TitlePunct, Text)
closeSubtitle = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text (TitlePunct, Text)
rightAngle, (TitlePunct
Inequal,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
gt]
leftAngle :: Parser (TitlePunct, Text)
leftAngle :: Parser Text (TitlePunct, Text)
leftAngle = (TitlePunct
Angle,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x3008'
, Text -> Parser Text Text
string Text
"〈"
, Text -> Parser Text Text
asciiCI Text
"〈"
]
rightAngle :: Parser (TitlePunct, Text)
rightAngle :: Parser Text (TitlePunct, Text)
rightAngle = (TitlePunct
Angle,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x3009'
, Text -> Parser Text Text
string Text
"〉"
, Text -> Parser Text Text
asciiCI Text
"〉"
]
leftDoubleAngle :: Parser (TitlePunct, Text)
leftDoubleAngle :: Parser Text (TitlePunct, Text)
leftDoubleAngle = (TitlePunct
DoubleAngle,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x300a'
, Text -> Parser Text Text
string Text
"《"
, Text -> Parser Text Text
asciiCI Text
"《"
]
rightDoubleAngle :: Parser (TitlePunct, Text)
rightDoubleAngle :: Parser Text (TitlePunct, Text)
rightDoubleAngle = (TitlePunct
DoubleAngle,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x300b'
, Text -> Parser Text Text
string Text
"》"
, Text -> Parser Text Text
asciiCI Text
"》"
]
leftDoubleCorner :: Parser (TitlePunct, Text)
leftDoubleCorner :: Parser Text (TitlePunct, Text)
leftDoubleCorner = (TitlePunct
DoubleCorner,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x300e'
, Text -> Parser Text Text
string Text
"『"
, Text -> Parser Text Text
asciiCI Text
"『"
]
rightDoubleCorner :: Parser (TitlePunct, Text)
rightDoubleCorner :: Parser Text (TitlePunct, Text)
rightDoubleCorner = (TitlePunct
DoubleCorner,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'\x300f'
, Text -> Parser Text Text
string Text
"』"
, Text -> Parser Text Text
asciiCI Text
"』"
]
data TitlePunct
= DoubleAngle | Angle
| DoubleCorner | Corner
| DoubleInequal | Inequal
deriving (TitlePunct -> TitlePunct -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TitlePunct -> TitlePunct -> Bool
$c/= :: TitlePunct -> TitlePunct -> Bool
== :: TitlePunct -> TitlePunct -> Bool
$c== :: TitlePunct -> TitlePunct -> Bool
Eq, Int -> TitlePunct -> ShowS
[TitlePunct] -> ShowS
TitlePunct -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TitlePunct] -> ShowS
$cshowList :: [TitlePunct] -> ShowS
show :: TitlePunct -> String
$cshow :: TitlePunct -> String
showsPrec :: Int -> TitlePunct -> ShowS
$cshowsPrec :: Int -> TitlePunct -> ShowS
Show)
data Stops = Stops
{ Stops -> Text
period :: Text
, Stops -> Text
comma :: Text
, Stops -> Text
interpunct :: Text
} deriving (Stops -> Stops -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Stops -> Stops -> Bool
$c/= :: Stops -> Stops -> Bool
== :: Stops -> Stops -> Bool
$c== :: Stops -> Stops -> Bool
Eq, Int -> Stops -> ShowS
[Stops] -> ShowS
Stops -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Stops] -> ShowS
$cshowList :: [Stops] -> ShowS
show :: Stops -> String
$cshow :: Stops -> String
showsPrec :: Int -> Stops -> ShowS
$cshowsPrec :: Int -> Stops -> ShowS
Show)
horizontalStops :: Stops
horizontalStops :: Stops
horizontalStops = Stops
{ period :: Text
period = Text
". "
, comma :: Text
comma = Text
", "
, interpunct :: Text
interpunct = Text
"·"
}
verticalStops :: Stops
verticalStops :: Stops
verticalStops = Stops
{ period :: Text
period = Text
"。"
, comma :: Text
comma = Text
"、"
, interpunct :: Text
interpunct = Text
"·"
}
horizontalStopsWithSlashes :: Stops
horizontalStopsWithSlashes :: Stops
horizontalStopsWithSlashes = Stops
{ period :: Text
period = Text
". "
, comma :: Text
comma = Text
", "
, interpunct :: Text
interpunct = Text
"/"
}
normalizeStops :: Stops -> [HtmlEntity] -> [HtmlEntity]
normalizeStops :: Stops -> [HtmlEntity] -> [HtmlEntity]
normalizeStops Stops
stops [HtmlEntity]
input = (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [LangHtmlEntity]
annotatedEntities) forall a b. (a -> b) -> a -> b
$ \ case
LangHtmlEntity { lang :: LangHtmlEntity -> Maybe Text
lang = Maybe Text
l
, entity :: LangHtmlEntity -> HtmlEntity
entity = e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt }
} ->
if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack Bool -> Bool -> Bool
|| Maybe Text -> Bool
isNeverKorean Maybe Text
l
then HtmlEntity
e
else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replaceText Text
txt }
LangHtmlEntity { entity :: LangHtmlEntity -> HtmlEntity
entity = HtmlEntity
e } ->
HtmlEntity
e
where
annotatedEntities :: [LangHtmlEntity]
annotatedEntities :: [LangHtmlEntity]
annotatedEntities = ([HtmlEntity] -> [LangHtmlEntity]
annotateWithLang forall b c a. (b -> c) -> (a -> b) -> a -> c
. [HtmlEntity] -> [HtmlEntity]
normalizeText) [HtmlEntity]
input
replaceText :: Text -> Text
replaceText :: Text -> Text
replaceText Text
txt =
case forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
Left String
_ -> forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
Right Text
t -> Text
t
parser :: Parser Text
parser :: Parser Text Text
parser = do
[Text]
chunks <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Parser Text Text
stops'
, Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
]
forall t. Chunk t => Parser t ()
endOfInput
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
stops' :: Parser Text
stops' :: Parser Text Text
stops' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ do { Ending
ending <- Parser Text Ending
period'
; forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending forall a b. (a -> b) -> a -> b
$ Stops -> Text
period Stops
stops)
}
, do { Ending
ending <- Parser Text Ending
comma'
; forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending forall a b. (a -> b) -> a -> b
$ Stops -> Text
comma Stops
stops)
}
, do { Ending
ending <- Parser Text Ending
interpunct'
; forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending forall a b. (a -> b) -> a -> b
$ Stops -> Text
interpunct Stops
stops)
}
]
adjustEnding :: Ending -> Text -> Text
adjustEnding :: Ending -> Text -> Text
adjustEnding Ending
ending Text
text
| Text -> Int
Data.Text.length Text
text forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Char -> Bool
isSpace (Text -> Char
Data.Text.last Text
text) =
Text -> Text
stripEnd Text
text forall a. Semigroup a => a -> a -> a
<> case Ending
ending of { TrailingChars Text
c -> Text
c
; TrailingSpaces Text
s -> Text
s
; Ending
Ending -> Text
Data.Text.empty
}
| Bool
otherwise = Text
text forall a. Semigroup a => a -> a -> a
<> case Ending
ending of { TrailingChars Text
c -> Text
c
; Ending
_ -> Text
Data.Text.empty
}
toEntity :: Text -> Text
toEntity :: Text -> Text
toEntity = (Char -> Text) -> Text -> Text
Data.Text.concatMap forall a b. (a -> b) -> a -> b
$ \ Char
c ->
if Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80'
then Char -> Text
Data.Text.singleton Char
c
else [Text] -> Text
Data.Text.concat [Text
"&#x", String -> Text
pack forall a b. (a -> b) -> a -> b
$ forall a. (Integral a, Show a) => a -> ShowS
showHex (forall a. Enum a => a -> Int
fromEnum Char
c) String
"", Text
";"]
period' :: Parser Ending
period' :: Parser Text Ending
period' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Parser Char
char Char
'.' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
, Char -> Parser Char
char Char
'。' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
, Text -> Parser Text Text
string Text
"." forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
, Text -> Parser Text Text
string Text
"." forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
, Text -> Parser Text Text
string Text
"。" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
, Text -> Parser Text Text
asciiCI Text
"." forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
, Text -> Parser Text Text
asciiCI Text
"。" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
]
comma' :: Parser Ending
comma' :: Parser Text Ending
comma' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Parser Char
char Char
'、' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
, Text -> Parser Text Text
string Text
"," forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
, Text -> Parser Text Text
string Text
"," forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
, Text -> Parser Text Text
string Text
"," forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
, Text -> Parser Text Text
string Text
"、" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
, Text -> Parser Text Text
asciiCI Text
"," forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
boundary
, Text -> Parser Text Text
asciiCI Text
"、" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Ending
trailingSpaces
]
interpunct' :: Parser Ending
interpunct' :: Parser Text Ending
interpunct' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Parser Char
char Char
'·' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
, Text -> Parser Text Text
string Text
"·"
, Text -> Parser Text Text
string Text
"·"
, Text -> Parser Text Text
string Text
"·"
, Text -> Parser Text Text
string Text
"·"
, Text -> Parser Text Text
asciiCI Text
"·"
] forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Ending
Ending
closingChars :: String
closingChars :: String
closingChars =
[ Char
'"', Char
'”', Char
'\'', Char
'’', Char
')', Char
']', Char
'}', Char
'」', Char
'』', Char
'〉', Char
'》', Char
')', Char
'〕'
, Char
']', Char
'}', Char
'⦆', Char
'】', Char
'〗', Char
'〙', Char
'〛', Char
'›', Char
'»'
]
closingEntities :: [Text]
closingEntities :: [Text]
closingEntities =
[ Text
""", Text
"""
, Text
"'"
, Text
")"
, Text
"]", Text
"]"
, Text
"}", Text
"}"
, Text
"»"
, Text
"’", Text
"’", Text
"’"
, Text
"”", Text
"”", Text
"”"
, Text
"›"
]
closing :: Parser Text
closing :: Parser Text Text
closing = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice forall a b. (a -> b) -> a -> b
$
[Text -> Parser Text Text
string [Char
c] | Char
c <- String
closingChars] forall a. [a] -> [a] -> [a]
++
[Text -> Parser Text Text
string Text
e | Text
e <- [Text]
closingEntities] forall a. [a] -> [a] -> [a]
++
[Text -> Parser Text Text
asciiCI forall a b. (a -> b) -> a -> b
$ String -> Text
pack forall a b. (a -> b) -> a -> b
$ String
"&#x" forall a. [a] -> [a] -> [a]
++ forall a. (Integral a, Show a) => a -> ShowS
showHex (forall a. Enum a => a -> Int
fromEnum Char
c) String
"" forall a. [a] -> [a] -> [a]
++ String
";"
| Char
c <- String
closingChars
] forall a. [a] -> [a] -> [a]
++
[Text -> Parser Text Text
string forall a b. (a -> b) -> a -> b
$ Text
"&#" forall a. Semigroup a => a -> a -> a
<> String -> Text
pack (forall a. Show a => a -> String
show Char
c) forall a. Semigroup a => a -> a -> a
<> Text
";" | Char
c <- String
closingChars]
ending' :: Parser Ending
ending' :: Parser Text Ending
ending' = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Ending
Ending
, Text -> Ending
TrailingChars forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
closing
]
boundary :: Parser Ending
boundary :: Parser Text Ending
boundary = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Parser Text Ending
ending'
, Text -> Ending
TrailingSpaces forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace
]
trailingSpaces :: Parser Ending
trailingSpaces :: Parser Text Ending
trailingSpaces = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Parser Text Ending
boundary
, forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Ending
TrailingSpaces Text
" "
]
data Ending = TrailingChars Text | TrailingSpaces Text | Ending
data ArrowTransformationOption
= LeftRight
| DoubleArrow
deriving (ArrowTransformationOption -> ArrowTransformationOption -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c/= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
== :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c== :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
Eq, Eq ArrowTransformationOption
ArrowTransformationOption -> ArrowTransformationOption -> Bool
ArrowTransformationOption -> ArrowTransformationOption -> Ordering
ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
$cmin :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
max :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
$cmax :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
>= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c>= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
> :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c> :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
<= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c<= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
< :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c< :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
compare :: ArrowTransformationOption -> ArrowTransformationOption -> Ordering
$ccompare :: ArrowTransformationOption -> ArrowTransformationOption -> Ordering
Ord, Int -> ArrowTransformationOption -> ShowS
[ArrowTransformationOption] -> ShowS
ArrowTransformationOption -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ArrowTransformationOption] -> ShowS
$cshowList :: [ArrowTransformationOption] -> ShowS
show :: ArrowTransformationOption -> String
$cshow :: ArrowTransformationOption -> String
showsPrec :: Int -> ArrowTransformationOption -> ShowS
$cshowsPrec :: Int -> ArrowTransformationOption -> ShowS
Show)
transformArrow :: Set ArrowTransformationOption -> [HtmlEntity] -> [HtmlEntity]
transformArrow :: Set ArrowTransformationOption -> [HtmlEntity] -> [HtmlEntity]
transformArrow Set ArrowTransformationOption
options [HtmlEntity]
input = (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [HtmlEntity] -> [HtmlEntity]
normalizeText [HtmlEntity]
input) forall a b. (a -> b) -> a -> b
$ \ case
e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt } ->
if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack
then HtmlEntity
e
else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replaceText Text
txt }
HtmlEntity
e ->
HtmlEntity
e
where
replaceText :: Text -> Text
replaceText :: Text -> Text
replaceText Text
txt =
case forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
Left String
_ -> forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
Right Text
t -> Text
t
specialChars :: Set Char
specialChars :: Set Char
specialChars = if ArrowTransformationOption
DoubleArrow forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
then [Char
'<', Char
'>', Char
'&', Char
'-', Char
'=']
else [Char
'<', Char
'>', Char
'&', Char
'-']
parser :: Parser Text
parser :: Parser Text Text
parser = do
[Text]
chunks <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text Text
takeWhile1 (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` Set Char
specialChars)
, forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text Text]
arrows
, Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
]
forall t. Chunk t => Parser t ()
endOfInput
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
arrows :: [Parser Text]
arrows :: [Parser Text Text]
arrows = forall a. [Maybe a] -> [a]
catMaybes
[ if ArrowTransformationOption
DoubleArrow forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
Bool -> Bool -> Bool
&& ArrowTransformationOption
LeftRight forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
then forall a. a -> Maybe a
Just Parser Text Text
doubleLeftRight
else forall a. Maybe a
Nothing
, if ArrowTransformationOption
DoubleArrow forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
then forall a. a -> Maybe a
Just Parser Text Text
doubleLeft
else forall a. Maybe a
Nothing
, if ArrowTransformationOption
DoubleArrow forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
then forall a. a -> Maybe a
Just Parser Text Text
doubleRight
else forall a. Maybe a
Nothing
, if ArrowTransformationOption
LeftRight forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`Prelude.elem` Set ArrowTransformationOption
options
then forall a. a -> Maybe a
Just Parser Text Text
leftRight
else forall a. Maybe a
Nothing
, forall a. a -> Maybe a
Just Parser Text Text
left
, forall a. a -> Maybe a
Just Parser Text Text
right
]
doubleLeftRight :: Parser Text
doubleLeftRight :: Parser Text Text
doubleLeftRight = Parser Text Text
lt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
equals forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"⇔"
doubleLeft :: Parser Text
doubleLeft :: Parser Text Text
doubleLeft = Parser Text Text
lt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
equals forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"⇐"
doubleRight :: Parser Text
doubleRight :: Parser Text Text
doubleRight = Parser Text ()
equals forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"⇒"
leftRight :: Parser Text
leftRight :: Parser Text Text
leftRight = Parser Text Text
lt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"↔"
left :: Parser Text
left :: Parser Text Text
left = Parser Text Text
lt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"←"
right :: Parser Text
right :: Parser Text Text
right = Parser Text ()
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"→"
hyphen :: Parser ()
hyphen :: Parser Text ()
hyphen = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
, Text -> Parser Text Text
string Text
"‐"
, Text -> Parser Text Text
string Text
"‐"
, Text -> Parser Text Text
string Text
"-"
, Text -> Parser Text Text
asciiCI Text
"-"
]
equals :: Parser ()
equals :: Parser Text ()
equals = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Parser Char
char Char
'=' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
, Text -> Parser Text Text
string Text
"="
, Text -> Parser Text Text
string Text
"&61;"
, Text -> Parser Text Text
asciiCI Text
"="
]
lt :: Parser Text
lt :: Parser Text Text
lt = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'<'
, Text -> Parser Text Text
string Text
"<"
, Text -> Parser Text Text
string Text
"<"
, Text -> Parser Text Text
asciiCI Text
"<"
]
gt :: Parser Text
gt :: Parser Text Text
gt = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
char Char
'>'
, Text -> Parser Text Text
string Text
">"
, Text -> Parser Text Text
string Text
">"
, Text -> Parser Text Text
asciiCI Text
">"
]
transformEllipsis :: [HtmlEntity] -> [HtmlEntity]
transformEllipsis :: [HtmlEntity] -> [HtmlEntity]
transformEllipsis = (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText forall a b. (a -> b) -> a -> b
$ \ Text
txt ->
case forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
Left String
_ -> forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
Right Text
t -> Text
t
where
parser :: Parser Text
parser :: Parser Text Text
parser = do
[Text]
chunks <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text Text
takeWhile1 (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` ([Char
'&', Char
'.', Char
'。'] :: Set Char))
, Parser Text Text
ellipsis
, Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
]
forall t. Chunk t => Parser t ()
endOfInput
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
ellipsis :: Parser Text
ellipsis :: Parser Text Text
ellipsis = do
forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Parser Text Text
period forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
period forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
period
, Parser Text Text
chinesePeriod forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
chinesePeriod forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
chinesePeriod
]
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"…"
period :: Parser Text
period :: Parser Text Text
period = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Text -> Parser Text Text
string Text
"."
, Text -> Parser Text Text
string Text
"."
, Text -> Parser Text Text
string Text
"."
, Text -> Parser Text Text
asciiCI Text
"."
]
chinesePeriod :: Parser Text
chinesePeriod :: Parser Text Text
chinesePeriod = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Text -> Parser Text Text
string Text
"。"
, Text -> Parser Text Text
string Text
"。"
, Text -> Parser Text Text
asciiCI Text
"。"
]
data Quotes = Quotes
{ Quotes -> QuotePair
singleQuotes :: QuotePair
, Quotes -> QuotePair
doubleQuotes :: QuotePair
} deriving (Quotes -> Quotes -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Quotes -> Quotes -> Bool
$c/= :: Quotes -> Quotes -> Bool
== :: Quotes -> Quotes -> Bool
$c== :: Quotes -> Quotes -> Bool
Eq, Eq Quotes
Quotes -> Quotes -> Bool
Quotes -> Quotes -> Ordering
Quotes -> Quotes -> Quotes
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Quotes -> Quotes -> Quotes
$cmin :: Quotes -> Quotes -> Quotes
max :: Quotes -> Quotes -> Quotes
$cmax :: Quotes -> Quotes -> Quotes
>= :: Quotes -> Quotes -> Bool
$c>= :: Quotes -> Quotes -> Bool
> :: Quotes -> Quotes -> Bool
$c> :: Quotes -> Quotes -> Bool
<= :: Quotes -> Quotes -> Bool
$c<= :: Quotes -> Quotes -> Bool
< :: Quotes -> Quotes -> Bool
$c< :: Quotes -> Quotes -> Bool
compare :: Quotes -> Quotes -> Ordering
$ccompare :: Quotes -> Quotes -> Ordering
Ord, Int -> Quotes -> ShowS
[Quotes] -> ShowS
Quotes -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Quotes] -> ShowS
$cshowList :: [Quotes] -> ShowS
show :: Quotes -> String
$cshow :: Quotes -> String
showsPrec :: Int -> Quotes -> ShowS
$cshowsPrec :: Int -> Quotes -> ShowS
Show)
data QuotePair
= QuotePair Text Text
| HtmlElement HtmlTag HtmlRawAttrs
deriving (QuotePair -> QuotePair -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: QuotePair -> QuotePair -> Bool
$c/= :: QuotePair -> QuotePair -> Bool
== :: QuotePair -> QuotePair -> Bool
$c== :: QuotePair -> QuotePair -> Bool
Eq, Eq QuotePair
QuotePair -> QuotePair -> Bool
QuotePair -> QuotePair -> Ordering
QuotePair -> QuotePair -> QuotePair
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: QuotePair -> QuotePair -> QuotePair
$cmin :: QuotePair -> QuotePair -> QuotePair
max :: QuotePair -> QuotePair -> QuotePair
$cmax :: QuotePair -> QuotePair -> QuotePair
>= :: QuotePair -> QuotePair -> Bool
$c>= :: QuotePair -> QuotePair -> Bool
> :: QuotePair -> QuotePair -> Bool
$c> :: QuotePair -> QuotePair -> Bool
<= :: QuotePair -> QuotePair -> Bool
$c<= :: QuotePair -> QuotePair -> Bool
< :: QuotePair -> QuotePair -> Bool
$c< :: QuotePair -> QuotePair -> Bool
compare :: QuotePair -> QuotePair -> Ordering
$ccompare :: QuotePair -> QuotePair -> Ordering
Ord, Int -> QuotePair -> ShowS
[QuotePair] -> ShowS
QuotePair -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [QuotePair] -> ShowS
$cshowList :: [QuotePair] -> ShowS
show :: QuotePair -> String
$cshow :: QuotePair -> String
showsPrec :: Int -> QuotePair -> ShowS
$cshowsPrec :: Int -> QuotePair -> ShowS
Show)
curvedQuotes :: Quotes
curvedQuotes :: Quotes
curvedQuotes = Quotes
{ singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"‘" Text
"’"
, doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"“" Text
"”"
}
verticalCornerBrackets :: Quotes
verticalCornerBrackets :: Quotes
verticalCornerBrackets = Quotes
{ singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"﹁" Text
"﹂"
, doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"﹃" Text
"﹄"
}
horizontalCornerBrackets :: Quotes
horizontalCornerBrackets :: Quotes
horizontalCornerBrackets = Quotes
{ singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"「" Text
"」"
, doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"『" Text
"』"
}
guillemets :: Quotes
guillemets :: Quotes
guillemets = Quotes
{ singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"〈" Text
"〉"
, doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"《" Text
"》"
}
curvedSingleQuotesWithQ :: Quotes
curvedSingleQuotesWithQ :: Quotes
curvedSingleQuotesWithQ = Quotes
{ singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"‘" Text
"’"
, doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
}
verticalCornerBracketsWithQ :: Quotes
verticalCornerBracketsWithQ :: Quotes
verticalCornerBracketsWithQ = Quotes
{ singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"﹁" Text
"﹂"
, doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
}
horizontalCornerBracketsWithQ :: Quotes
horizontalCornerBracketsWithQ :: Quotes
horizontalCornerBracketsWithQ = Quotes
{ singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"「" Text
"」"
, doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
}
transformQuote :: Quotes
-> [HtmlEntity]
-> [HtmlEntity]
transformQuote :: Quotes -> [HtmlEntity] -> [HtmlEntity]
transformQuote Quotes { QuotePair
doubleQuotes :: QuotePair
singleQuotes :: QuotePair
doubleQuotes :: Quotes -> QuotePair
singleQuotes :: Quotes -> QuotePair
.. } = forall m. PairedTransformer m -> [HtmlEntity] -> [HtmlEntity]
transformPairs forall a b. (a -> b) -> a -> b
$
PairedTransformer
{ ignoresTagStack :: HtmlTagStack -> Bool
ignoresTagStack = HtmlTagStack -> Bool
isPreservedTagStack
, matchStart :: [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart = [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart'
, matchEnd :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd = Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd'
, areMatchesPaired :: (QuotePunct, Text) -> (QuotePunct, Text) -> Bool
areMatchesPaired = \ (QuotePunct
punct, Text
text) (QuotePunct
punct', Text
text') ->
QuotePunct -> QuotePunct -> Bool
arePaired QuotePunct
punct QuotePunct
punct' Bool -> Bool -> Bool
&& Text
text forall a. Eq a => a -> a -> Bool
== Text
text'
, transformPair :: (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair = (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair'
}
where
punctuations :: [(QuotePunct, [Text])]
punctuations :: [(QuotePunct, [Text])]
punctuations =
[ ( QuotePunct
Apostrophe
, [Text
"'", Text
"'", Text
"'", Text
"'", Text
"'"]
)
, ( QuotePunct
DoubleQuote
, [Text
"\"", Text
""", Text
""", Text
""", Text
""", Text
"""]
)
, ( QuotePunct
DoubleQuote
, [Text
"\"", Text
""", Text
""", Text
""", Text
""", Text
"""]
)
, ( QuotePunct
OpeningSingleQuote
, [ Text
"\x2018", Text
"‘", Text
"‘"
, Text
"‘", Text
"‘", Text
"‘"
]
)
, ( QuotePunct
ClosingSingleQuote
, [ Text
"\x2019", Text
"’", Text
"’", Text
"’"
, Text
"’", Text
"’", Text
"’"
]
)
, ( QuotePunct
OpeningDoubleQuote
, [ Text
"\x201c", Text
"“", Text
"“"
, Text
"“", Text
"“", Text
"“", Text
"“", Text
"“"
]
)
, ( QuotePunct
ClosingDoubleQuote
, [ Text
"\x201d", Text
"”", Text
"”", Text
"”"
, Text
"”", Text
"”", Text
"”", Text
"”", Text
"”"
]
)
]
matchStart' :: [(QuotePunct, Text)]
-> Text
-> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart' :: [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart' [(QuotePunct, Text)]
prevMatches Text
text
| forall (t :: * -> *) a. Foldable t => t a -> Bool
Prelude.null [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates = forall a. Maybe a
Nothing
| Bool
otherwise =
let (QuotePunct
matcher, Text
entity, (Text
pre, Text
post)) = forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy
(forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a -> b) -> a -> b
$ \ (QuotePunct
_, Text
_, (Text
pre', Text
_)) -> Text -> Int
Data.Text.length Text
pre')
[(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates
in
if Text -> Bool
Data.Text.null Text
post then
forall a. Maybe a
Nothing
else
forall a. a -> Maybe a
Just
( (QuotePunct
matcher, Text
entity)
, Text
pre
, Text
entity
, Int -> Text -> Text
Data.Text.drop (Text -> Int
Data.Text.length Text
entity) Text
post
)
where
prevMatchers :: Set QuotePunct
prevMatchers :: Set QuotePunct
prevMatchers = forall a. Ord a => [a] -> Set a
Data.Set.fromList (forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(QuotePunct, Text)]
prevMatches)
prevMatcherCandidates :: [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates :: [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates =
[ (QuotePunct
matcher', Text
entity', Text -> Text -> (Text, Text)
breakOn Text
entity' Text
text)
| (QuotePunct
matcher', [Text]
entities) <- [(QuotePunct, [Text])]
punctuations
, QuotePunct -> Bool
opens QuotePunct
matcher'
, QuotePunct
matcher' forall a. Ord a => a -> Set a -> Bool
`Data.Set.notMember` Set QuotePunct
prevMatchers
, Text
entity' <- [Text]
entities
]
matchEnd' :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd' :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd' Text
text
| forall (t :: * -> *) a. Foldable t => t a -> Bool
Prelude.null [(QuotePunct, Text, (Text, Text))]
matcherCandidates = forall a. Maybe a
Nothing
| Bool
otherwise =
let (QuotePunct
matcher, Text
entity, (Text
pre, Text
post)) = forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy
(forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a -> b) -> a -> b
$ \ (QuotePunct
_, Text
_, (Text
pre', Text
_)) -> Text -> Int
Data.Text.length Text
pre')
[(QuotePunct, Text, (Text, Text))]
matcherCandidates
in
if Text -> Bool
Data.Text.null Text
post then
forall a. Maybe a
Nothing
else
forall a. a -> Maybe a
Just
( (QuotePunct
matcher, Text
entity)
, Text
pre
, Text
entity
, Int -> Text -> Text
Data.Text.drop (Text -> Int
Data.Text.length Text
entity) Text
post
)
where
matcherCandidates :: [(QuotePunct, Text, (Text, Text))]
matcherCandidates :: [(QuotePunct, Text, (Text, Text))]
matcherCandidates =
[ (QuotePunct
matcher', Text
entity', Text -> Text -> (Text, Text)
breakOn Text
entity' Text
text)
| (QuotePunct
matcher', [Text]
entities) <- [(QuotePunct, [Text])]
punctuations
, QuotePunct -> Bool
closes QuotePunct
matcher'
, Text
entity' <- [Text]
entities
]
transformPair' :: (QuotePunct, Text)
-> (QuotePunct, Text)
-> [HtmlEntity]
-> [HtmlEntity]
transformPair' :: (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair' (QuotePunct
punct, Text
start) (QuotePunct
_, Text
end) buffer :: [HtmlEntity]
buffer@(HtmlEntity
firstEntity : [HtmlEntity]
_) =
case Text -> Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipText Text
start Text
end [HtmlEntity]
buffer of
Maybe [HtmlEntity]
Nothing -> [HtmlEntity]
buffer
Just [HtmlEntity]
es -> case QuotePair
pair of
QuotePair Text
open Text
close ->
HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' Text
open forall a. a -> [a] -> [a]
: [HtmlEntity]
es forall a. [a] -> [a] -> [a]
++ [HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' Text
close]
HtmlElement HtmlTag
tag Text
attrs ->
HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap HtmlTagStack
tagStack' HtmlTag
tag Text
attrs [HtmlEntity]
es
where
pair :: QuotePair
pair :: QuotePair
pair = case QuotePunct
punct of
QuotePunct
DoubleQuote -> QuotePair
doubleQuotes
QuotePunct
OpeningDoubleQuote -> QuotePair
doubleQuotes
QuotePunct
ClosingDoubleQuote -> QuotePair
doubleQuotes
QuotePunct
_ -> QuotePair
singleQuotes
tagStack' :: HtmlTagStack
tagStack' :: HtmlTagStack
tagStack' = HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
firstEntity
transformPair' (QuotePunct, Text)
_ (QuotePunct, Text)
_ [] = []
arePaired :: QuotePunct -> QuotePunct -> Bool
arePaired :: QuotePunct -> QuotePunct -> Bool
arePaired QuotePunct
OpeningSingleQuote = (forall a. Eq a => a -> a -> Bool
== QuotePunct
ClosingSingleQuote)
arePaired QuotePunct
OpeningDoubleQuote = (forall a. Eq a => a -> a -> Bool
== QuotePunct
ClosingDoubleQuote)
arePaired QuotePunct
punct = (forall a. Eq a => a -> a -> Bool
== QuotePunct
punct)
data QuotePunct
= DoubleQuote
| Apostrophe
| OpeningSingleQuote | ClosingSingleQuote
| OpeningDoubleQuote | ClosingDoubleQuote
deriving (QuotePunct -> QuotePunct -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: QuotePunct -> QuotePunct -> Bool
$c/= :: QuotePunct -> QuotePunct -> Bool
== :: QuotePunct -> QuotePunct -> Bool
$c== :: QuotePunct -> QuotePunct -> Bool
Eq, Eq QuotePunct
QuotePunct -> QuotePunct -> Bool
QuotePunct -> QuotePunct -> Ordering
QuotePunct -> QuotePunct -> QuotePunct
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: QuotePunct -> QuotePunct -> QuotePunct
$cmin :: QuotePunct -> QuotePunct -> QuotePunct
max :: QuotePunct -> QuotePunct -> QuotePunct
$cmax :: QuotePunct -> QuotePunct -> QuotePunct
>= :: QuotePunct -> QuotePunct -> Bool
$c>= :: QuotePunct -> QuotePunct -> Bool
> :: QuotePunct -> QuotePunct -> Bool
$c> :: QuotePunct -> QuotePunct -> Bool
<= :: QuotePunct -> QuotePunct -> Bool
$c<= :: QuotePunct -> QuotePunct -> Bool
< :: QuotePunct -> QuotePunct -> Bool
$c< :: QuotePunct -> QuotePunct -> Bool
compare :: QuotePunct -> QuotePunct -> Ordering
$ccompare :: QuotePunct -> QuotePunct -> Ordering
Ord, Int -> QuotePunct -> ShowS
[QuotePunct] -> ShowS
QuotePunct -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [QuotePunct] -> ShowS
$cshowList :: [QuotePunct] -> ShowS
show :: QuotePunct -> String
$cshow :: QuotePunct -> String
showsPrec :: Int -> QuotePunct -> ShowS
$cshowsPrec :: Int -> QuotePunct -> ShowS
Show)
opens :: QuotePunct -> Bool
opens :: QuotePunct -> Bool
opens QuotePunct
DoubleQuote = Bool
True
opens QuotePunct
Apostrophe = Bool
True
opens QuotePunct
OpeningSingleQuote = Bool
True
opens QuotePunct
OpeningDoubleQuote = Bool
True
opens QuotePunct
_ = Bool
False
closes :: QuotePunct -> Bool
closes :: QuotePunct -> Bool
closes QuotePunct
DoubleQuote = Bool
True
closes QuotePunct
Apostrophe = Bool
True
closes QuotePunct
ClosingSingleQuote = Bool
True
closes QuotePunct
ClosingDoubleQuote = Bool
True
closes QuotePunct
_ = Bool
False
transformEmDash :: [HtmlEntity] -> [HtmlEntity]
transformEmDash :: [HtmlEntity] -> [HtmlEntity]
transformEmDash = (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText forall a b. (a -> b) -> a -> b
$ \ Text
txt ->
case forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
Left String
_ -> forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
Right Text
t -> Text
t
where
parser :: Parser Text
parser :: Parser Text Text
parser = do
[Text]
chunks <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ (Char -> Bool) -> Parser Text Text
takeWhile1 forall a b. (a -> b) -> a -> b
$ \ Char
c ->
Bool -> Bool
not (Char -> Bool
isSpace Char
c) Bool -> Bool -> Bool
&&
Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` ([Char
'&', Char
'-', Char
'\x3161'] :: Set Char)
, Parser Text Text
emDash
, Char -> Text
Data.Text.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar
]
forall t. Chunk t => Parser t ()
endOfInput
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
emDash :: Parser Text
emDash :: Parser Text Text
emDash = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Parser Text Text
hyphens
, (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text Text
eu, Parser Text Text
hyphen] forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace
] forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
"—"
hyphens :: Parser Text
hyphens :: Parser Text Text
hyphens = Parser Text Text
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
hyphen forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Text
"" Parser Text Text
hyphen
hyphen :: Parser Text
hyphen :: Parser Text Text
hyphen = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
Prelude.map Text -> Parser Text Text
string
[Text
"-", Text
"-", Text
"-", Text
"-", Text
"-", Text
"-"]
eu :: Parser Text
eu :: Parser Text Text
eu = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
Prelude.map Text -> Parser Text Text
string
[Text
"\x3161", Text
"ㅡ", Text
"ㅡ", Text
"ㅡ"]
transformText :: (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText :: (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText Text -> Text
replace' = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a -> b) -> a -> b
$ \ case
e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt } ->
if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack
then HtmlEntity
e
else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replace' Text
txt }
HtmlEntity
e ->
HtmlEntity
e