{-# LANGUAGE OverloadedStrings #-}
module Text.Seonbi.Html.Lang
( LangHtmlEntity (..)
, LanguageTag
, annotateWithLang
, extractLang
, isKorean
, isNeverKorean
) where
import Control.Applicative
import Data.Char (isSpace)
import Data.Maybe
import Data.Attoparsec.Text
import Data.Text
import Text.Seonbi.Html.Entity
import Text.Seonbi.Html.Tag (HtmlTag)
type LanguageTag = Text
extractLang
:: HtmlRawAttrs
-> Maybe LanguageTag
Text
attrs =
case forall a. Parser a -> Text -> Either String a
parseOnly Parser (Maybe Text)
parser' Text
attrs of
Right (Just Text
lang') ->
let lt :: Text
lt = Text -> Text
toLower forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
strip forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
normalizeEntities forall a b. (a -> b) -> a -> b
$ Text
lang'
in if Text -> Bool
Data.Text.null Text
lt then forall a. Maybe a
Nothing else forall a. a -> Maybe a
Just Text
lt
Either String (Maybe Text)
_ -> forall a. Maybe a
Nothing
where
parser' :: Parser (Maybe Text)
parser' :: Parser (Maybe Text)
parser' = do
Parser ()
skipSpace
[Maybe Text]
attrs' <- Parser (Maybe Text)
langAttr forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy` Parser Char
space
Parser ()
skipSpace
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. [a] -> Maybe a
listToMaybe forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes [Maybe Text]
attrs'
langAttr :: Parser (Maybe Text)
langAttr :: Parser (Maybe Text)
langAttr = do
(Bool
isLang, Bool
cont) <- Parser (Bool, Bool)
attrIsLang
Text
value <- if Bool
cont then Parser Text
attrValue else forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
forall (m :: * -> *) a. Monad m => a -> m a
return (if Bool
isLang then forall a. a -> Maybe a
Just Text
value else forall a. Maybe a
Nothing)
attrIsLang :: Parser (Bool, Bool)
attrIsLang :: Parser (Bool, Bool)
attrIsLang = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Text -> Parser Text
asciiCI Text
"lang=" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
True, Bool
True)
, do { Text
_ <- (Char -> Bool) -> Parser Text
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
'=')
; Maybe Char
eq <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Char -> Parser Char
char Char
'=')
; forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
False, forall a. Maybe a -> Bool
isJust Maybe Char
eq)
}
]
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ do { (Char -> Bool) -> Parser ()
skip (forall a. Eq a => a -> a -> Bool
== Char
'"'); Text
v <- (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
== Char
'"'); (Char -> Bool) -> Parser ()
skip (forall a. Eq a => a -> a -> Bool
== Char
'"'); forall (m :: * -> *) a. Monad m => a -> m a
return Text
v }
, do { (Char -> Bool) -> Parser ()
skip (forall a. Eq a => a -> a -> Bool
== Char
'\'')
; Text
v <- (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
== Char
'\'')
; (Char -> Bool) -> Parser ()
skip (forall a. Eq a => a -> a -> Bool
== Char
'\''); forall (m :: * -> *) a. Monad m => a -> m a
return Text
v
}
, (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)
]
normalizeEntities :: Text -> Text
normalizeEntities :: Text -> Text
normalizeEntities
= Text -> Text -> Text -> Text
Data.Text.replace Text
"‐" Text
"-"
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> Text -> Text
Data.Text.replace Text
"‐" Text
"-"
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> Text -> Text
Data.Text.replace Text
"‐" Text
"-"
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> Text -> Text
Data.Text.replace Text
"‐" Text
"-"
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> Text -> Text
Data.Text.replace Text
"‐" Text
"-"
data LangHtmlEntity = LangHtmlEntity
{
LangHtmlEntity -> Maybe Text
lang :: Maybe LanguageTag
, LangHtmlEntity -> HtmlEntity
entity :: HtmlEntity
} deriving (Int -> LangHtmlEntity -> ShowS
[LangHtmlEntity] -> ShowS
LangHtmlEntity -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [LangHtmlEntity] -> ShowS
$cshowList :: [LangHtmlEntity] -> ShowS
show :: LangHtmlEntity -> String
$cshow :: LangHtmlEntity -> String
showsPrec :: Int -> LangHtmlEntity -> ShowS
$cshowsPrec :: Int -> LangHtmlEntity -> ShowS
Show, LangHtmlEntity -> LangHtmlEntity -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: LangHtmlEntity -> LangHtmlEntity -> Bool
$c/= :: LangHtmlEntity -> LangHtmlEntity -> Bool
== :: LangHtmlEntity -> LangHtmlEntity -> Bool
$c== :: LangHtmlEntity -> LangHtmlEntity -> Bool
Eq)
annotateWithLang :: [HtmlEntity] -> [LangHtmlEntity]
annotateWithLang :: [HtmlEntity] -> [LangHtmlEntity]
annotateWithLang =
[(HtmlTag, Maybe Text)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate []
where
annotate :: [(HtmlTag, Maybe LanguageTag)]
-> [HtmlEntity]
-> [LangHtmlEntity]
annotate :: [(HtmlTag, Maybe Text)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate [(HtmlTag, Maybe Text)]
_ [] = []
annotate [(HtmlTag, Maybe Text)]
stack (x :: HtmlEntity
x@HtmlStartTag { tag :: HtmlEntity -> HtmlTag
tag = HtmlTag
tag', rawAttributes :: HtmlEntity -> Text
rawAttributes = Text
attrs } : [HtmlEntity]
xs) =
Maybe Text -> HtmlEntity -> LangHtmlEntity
LangHtmlEntity Maybe Text
thisLang HtmlEntity
x forall a. a -> [a] -> [a]
: [(HtmlTag, Maybe Text)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate [(HtmlTag, Maybe Text)]
nextStack [HtmlEntity]
xs
where
parentLang :: Maybe LanguageTag
parentLang :: Maybe Text
parentLang = case [(HtmlTag, Maybe Text)]
stack of
(HtmlTag
_, Maybe Text
l):[(HtmlTag, Maybe Text)]
_ -> Maybe Text
l
[(HtmlTag, Maybe Text)]
_ -> forall a. Maybe a
Nothing
thisLang :: Maybe LanguageTag
thisLang :: Maybe Text
thisLang = Text -> Maybe Text
extractLang Text
attrs forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe Text
parentLang
nextStack :: [(HtmlTag, Maybe LanguageTag)]
nextStack :: [(HtmlTag, Maybe Text)]
nextStack = (HtmlTag
tag', Maybe Text
thisLang) forall a. a -> [a] -> [a]
: [(HtmlTag, Maybe Text)]
stack
annotate [(HtmlTag, Maybe Text)]
stack (x :: HtmlEntity
x@HtmlEndTag { tag :: HtmlEntity -> HtmlTag
tag = HtmlTag
tag' } : [HtmlEntity]
xs) =
Maybe Text -> HtmlEntity -> LangHtmlEntity
LangHtmlEntity Maybe Text
thisLang HtmlEntity
x forall a. a -> [a] -> [a]
: [(HtmlTag, Maybe Text)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate [(HtmlTag, Maybe Text)]
nextStack [HtmlEntity]
xs
where
([(HtmlTag, Maybe Text)]
nextStack, Maybe Text
thisLang) = case [(HtmlTag, Maybe Text)]
stack of
[] -> ([], forall a. Maybe a
Nothing)
s :: [(HtmlTag, Maybe Text)]
s@((HtmlTag
t, Maybe Text
lang'):[(HtmlTag, Maybe Text)]
ys) ->
(if HtmlTag
t forall a. Eq a => a -> a -> Bool
== HtmlTag
tag' then [(HtmlTag, Maybe Text)]
ys else [(HtmlTag, Maybe Text)]
s, Maybe Text
lang')
annotate [(HtmlTag, Maybe Text)]
stack (HtmlEntity
x : [HtmlEntity]
xs) =
Maybe Text -> HtmlEntity -> LangHtmlEntity
LangHtmlEntity Maybe Text
parentLang HtmlEntity
x forall a. a -> [a] -> [a]
: [(HtmlTag, Maybe Text)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate [(HtmlTag, Maybe Text)]
stack [HtmlEntity]
xs
where
parentLang :: Maybe LanguageTag
parentLang :: Maybe Text
parentLang = case [(HtmlTag, Maybe Text)]
stack of
(HtmlTag
_, Maybe Text
l):[(HtmlTag, Maybe Text)]
_ -> Maybe Text
l
[(HtmlTag, Maybe Text)]
_ -> forall a. Maybe a
Nothing
isKorean :: LanguageTag -> Bool
isKorean :: Text -> Bool
isKorean Text
lang' =
Text
l forall a. Eq a => a -> a -> Bool
== Text
"ko" Bool -> Bool -> Bool
|| Text
l forall a. Eq a => a -> a -> Bool
== Text
"kor" Bool -> Bool -> Bool
||
Text
"ko-" Text -> Text -> Bool
`isPrefixOf` Text
l Bool -> Bool -> Bool
||
Text
"kor-" Text -> Text -> Bool
`isPrefixOf` Text
l
where
l :: Text
l :: Text
l = Text -> Text
toLower Text
lang'
isNeverKorean :: Maybe LanguageTag -> Bool
isNeverKorean :: Maybe Text -> Bool
isNeverKorean Maybe Text
Nothing = Bool
False
isNeverKorean (Just Text
lang') = Bool -> Bool
not (Text -> Bool
isKorean Text
lang')