{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
module Text.Seonbi.Hanja
(
HanjaPhoneticization (..)
, def
, phoneticizeHanja
, phoneticizeHanjaChar
, HanjaDictionary
, HanjaWordPhoneticizer
, phoneticizeHanjaWord
, phoneticizeHanjaWordWithInitialSoundLaw
, withDictionary
, HanjaWordRenderer
, hangulOnly
, hanjaInParentheses
, hanjaInRuby
, convertInitialSoundLaw
, initialSoundLawTable
, initialSoundLawTable'
, revertInitialSoundLaw
) where
import Prelude hiding (lookup)
import Control.Applicative
import Control.Monad
import Data.Char
import Data.List hiding (lookup)
import Data.Maybe
import Data.Ord (comparing)
import Data.Attoparsec.Text
import Data.Default
import Data.Map.Strict
import Data.Set
import Data.Text hiding (concatMap)
import Text.Seonbi.Hangul
import Text.Seonbi.Html
import Text.Seonbi.Html.Lang
import Text.Seonbi.Html.Preservation
import Text.Seonbi.Html.TagStack (push)
import qualified Text.Seonbi.Trie as Trie
import Text.Seonbi.Unihan.KHangul
data HanjaPhoneticization = HanjaPhoneticization
{
HanjaPhoneticization -> HanjaWordPhoneticizer
phoneticizer :: HanjaWordPhoneticizer
, HanjaPhoneticization -> HanjaWordRenderer
wordRenderer :: HanjaWordRenderer
, HanjaPhoneticization -> HanjaWordRenderer
homophoneRenderer :: HanjaWordRenderer
, :: Bool
}
type HanjaWordPhoneticizer
= Text
-> Text
type HanjaWordRenderer
= HtmlTagStack
-> Text
-> Text
-> [HtmlEntity]
hangulOnly :: HanjaWordRenderer
hangulOnly :: HanjaWordRenderer
hangulOnly HtmlTagStack
stack Text
_ Text
hangul = [HtmlTagStack -> Text -> HtmlEntity
HtmlCdata HtmlTagStack
stack Text
hangul]
hanjaInParentheses :: HanjaWordRenderer
hanjaInParentheses :: HanjaWordRenderer
hanjaInParentheses HtmlTagStack
stack Text
hanja Text
hangul =
[HtmlTagStack -> Text -> HtmlEntity
HtmlCdata HtmlTagStack
stack forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text
hangul, Text
"(", Text
hanja, Text
")"]]
hanjaInRuby :: HanjaWordRenderer
hanjaInRuby :: HanjaWordRenderer
hanjaInRuby HtmlTagStack
stack Text
hanja Text
hangul =
[ HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
stack HtmlTag
Ruby Text
""
, HtmlTagStack -> Text -> HtmlEntity
HtmlCdata HtmlTagStack
rubyStack Text
hanja
, HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
rubyStack HtmlTag
RP Text
""
, HtmlTagStack -> Text -> HtmlEntity
HtmlText (HtmlTag -> HtmlTagStack -> HtmlTagStack
push HtmlTag
RP HtmlTagStack
rubyStack) Text
"("
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
rubyStack HtmlTag
RP
, HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
rubyStack HtmlTag
RT Text
""
, HtmlTagStack -> Text -> HtmlEntity
HtmlCdata (HtmlTag -> HtmlTagStack -> HtmlTagStack
push HtmlTag
RT HtmlTagStack
rubyStack) Text
hangul
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
rubyStack HtmlTag
RT
, HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
rubyStack HtmlTag
RP Text
""
, HtmlTagStack -> Text -> HtmlEntity
HtmlText (HtmlTag -> HtmlTagStack -> HtmlTagStack
push HtmlTag
RP HtmlTagStack
rubyStack) Text
")"
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
rubyStack HtmlTag
RP
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
stack HtmlTag
Ruby
]
where
rubyStack :: HtmlTagStack
rubyStack :: HtmlTagStack
rubyStack = HtmlTag -> HtmlTagStack -> HtmlTagStack
push HtmlTag
Ruby HtmlTagStack
stack
instance Default HanjaPhoneticization where
def :: HanjaPhoneticization
def = HanjaPhoneticization
{ phoneticizer :: HanjaWordPhoneticizer
phoneticizer = HanjaWordPhoneticizer
phoneticizeHanjaWordWithInitialSoundLaw
, wordRenderer :: HanjaWordRenderer
wordRenderer = HanjaWordRenderer
hangulOnly
, homophoneRenderer :: HanjaWordRenderer
homophoneRenderer = HanjaWordRenderer
hanjaInParentheses
, debugComment :: Bool
debugComment = Bool
False
}
phoneticizeHanja
:: HanjaPhoneticization
-> [HtmlEntity]
-> [HtmlEntity]
phoneticizeHanja :: HanjaPhoneticization -> [HtmlEntity] -> [HtmlEntity]
phoneticizeHanja HanjaPhoneticization { HanjaWordPhoneticizer
phoneticizer :: HanjaWordPhoneticizer
phoneticizer :: HanjaPhoneticization -> HanjaWordPhoneticizer
phoneticizer
, HanjaWordRenderer
wordRenderer :: HanjaWordRenderer
wordRenderer :: HanjaPhoneticization -> HanjaWordRenderer
wordRenderer
, HanjaWordRenderer
homophoneRenderer :: HanjaWordRenderer
homophoneRenderer :: HanjaPhoneticization -> HanjaWordRenderer
homophoneRenderer
, Bool
debugComment :: Bool
debugComment :: HanjaPhoneticization -> Bool
debugComment
}
[HtmlEntity]
entities =
(forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
`concatMap` [Either HtmlEntity (HtmlTagStack, Text, Text)]
normalized) forall a b. (a -> b) -> a -> b
$ \ case
Left HtmlEntity
e' ->
[HtmlEntity
e']
Right (HtmlTagStack
stack, Text
hanja, Text
hangul) ->
if forall a. Set a -> Int
Data.Set.size (forall k a. Ord k => a -> k -> Map k a -> a
findWithDefault [] Text
hangul Map Text (Set Text)
frequencyDict) forall a. Ord a => a -> a -> Bool
> Int
1
then HanjaWordRenderer
homophoneRenderer' HtmlTagStack
stack Text
hanja Text
hangul
else HanjaWordRenderer
wordRenderer' HtmlTagStack
stack Text
hanja Text
hangul
where
frequencyDict :: Map Text (Set Text)
frequencyDict :: Map Text (Set Text)
frequencyDict = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Data.Map.Strict.fromListWith
forall a. Ord a => Set a -> Set a -> Set a
Data.Set.union
[(Text
hangul, [Text
hanja]) | Right (HtmlTagStack
_, Text
hanja, Text
hangul) <- [Either HtmlEntity (HtmlTagStack, Text, Text)]
normalized]
normalized :: [Either HtmlEntity (HtmlTagStack, Text, Text)]
normalized :: [Either HtmlEntity (HtmlTagStack, Text, Text)]
normalized = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
Data.List.concat
[ case Either HtmlEntity (HtmlTagStack, Text, Text)
e of
Left HtmlEntity
_ ->
[Either HtmlEntity (HtmlTagStack, Text, Text)
e]
Right (HtmlTagStack
stack, Text
hanja, Text
hangul) ->
let hanjaWords :: [Text]
hanjaWords = Text -> [Text]
splitByDigits Text
hanja
hangulWords :: [Text]
hangulWords = Text -> [Text]
splitByDigits Text
hangul
hanjaWordsLen :: Int
hanjaWordsLen = forall (t :: * -> *) a. Foldable t => t a -> Int
Prelude.length [Text]
hanjaWords
hangulWordsLen :: Int
hangulWordsLen = forall (t :: * -> *) a. Foldable t => t a -> Int
Prelude.length [Text]
hangulWords
in
if Int
hanjaWordsLen forall a. Eq a => a -> a -> Bool
/= Int
hangulWordsLen
then [Either HtmlEntity (HtmlTagStack, Text, Text)
e]
else
[ if (Char -> Bool) -> Text -> Bool
Data.Text.any Char -> Bool
isDigit Text
hanj
then forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
stack Text
hanj
else forall a b. b -> Either a b
Right (HtmlTagStack
stack, Text
hanj, Text
hang)
| (Text
hanj, Text
hang) <- forall a b. [a] -> [b] -> [(a, b)]
Prelude.zip [Text]
hanjaWords [Text]
hangulWords
]
| Either HtmlEntity (HtmlTagStack, Text, Text)
e <- forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap LangHtmlEntity -> [Either HtmlEntity (HtmlTagStack, Text, Text)]
transform ([HtmlEntity] -> [LangHtmlEntity]
annotateWithLang [HtmlEntity]
entities)
]
splitByDigits :: Text -> [Text]
splitByDigits :: Text -> [Text]
splitByDigits = (Char -> Char -> Bool) -> Text -> [Text]
Data.Text.groupBy (\ Char
a Char
b -> Char -> Bool
isDigit Char
a forall a. Eq a => a -> a -> Bool
== Char -> Bool
isDigit Char
b)
transform :: LangHtmlEntity
-> [Either HtmlEntity (HtmlTagStack, Text, Text)]
transform :: LangHtmlEntity -> [Either HtmlEntity (HtmlTagStack, Text, Text)]
transform LangHtmlEntity
{ lang :: LangHtmlEntity -> Maybe Text
lang = Maybe Text
lang
, entity :: LangHtmlEntity -> HtmlEntity
entity = entity :: HtmlEntity
entity@HtmlText
{ tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
tagStack'
, rawText :: HtmlEntity -> Text
rawText = Text
rawText'
}
}
| HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
tagStack' Bool -> Bool -> Bool
|| Maybe Text -> Bool
isNeverKorean Maybe Text
lang =
[forall a b. a -> Either a b
Left HtmlEntity
entity]
| Bool
otherwise =
case Text -> Maybe [(Bool, Text)]
analyzeHanjaText Text
rawText' of
Maybe [(Bool, Text)]
Nothing -> [forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ HtmlEntity
entity { rawText :: Text
rawText = Text
rawText' }]
Just [(Bool, Text)]
pairs ->
[ if Bool
trueIfHanja
then forall a b. b -> Either a b
Right (HtmlTagStack
tagStack', Text
htmlText, HanjaWordPhoneticizer
phoneticizer Text
htmlText)
else forall a b. a -> Either a b
Left (HtmlEntity
entity { rawText :: Text
rawText = Text
htmlText })
| (Bool
trueIfHanja, Text
htmlText) <- [(Bool, Text)]
pairs
]
transform LangHtmlEntity { HtmlEntity
entity :: HtmlEntity
entity :: LangHtmlEntity -> HtmlEntity
entity } =
[forall a b. a -> Either a b
Left HtmlEntity
entity]
debugRenderer :: HanjaWordRenderer -> HanjaWordRenderer
debugRenderer :: HanjaWordRenderer -> HanjaWordRenderer
debugRenderer HanjaWordRenderer
render HtmlTagStack
stack Text
hanja Text
hangul =
HtmlTagStack -> Text -> HtmlEntity
HtmlComment HtmlTagStack
stack (Text
" Hanja: " Text -> HanjaWordPhoneticizer
`append` Text
hanja)
forall a. a -> [a] -> [a]
: HanjaWordRenderer
render HtmlTagStack
stack Text
hanja Text
hangul forall a. [a] -> [a] -> [a]
++ [HtmlTagStack -> Text -> HtmlEntity
HtmlComment HtmlTagStack
stack Text
" /Hanja "]
wordRenderer' :: HanjaWordRenderer
wordRenderer' :: HanjaWordRenderer
wordRenderer'
| Bool
debugComment = HanjaWordRenderer -> HanjaWordRenderer
debugRenderer HanjaWordRenderer
wordRenderer
| Bool
otherwise = HanjaWordRenderer
wordRenderer
homophoneRenderer' :: HanjaWordRenderer
homophoneRenderer' :: HanjaWordRenderer
homophoneRenderer'
| Bool
debugComment = HanjaWordRenderer -> HanjaWordRenderer
debugRenderer HanjaWordRenderer
homophoneRenderer
| Bool
otherwise = HanjaWordRenderer
homophoneRenderer
analyzeHanjaText :: Text -> Maybe [(Bool, Text)]
analyzeHanjaText :: Text -> Maybe [(Bool, Text)]
analyzeHanjaText Text
text' =
case forall a. Parser a -> Text -> Either [Char] a
parseOnly (Parser Text [(Bool, Text)]
textParser forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) Text
text' of
Left [Char]
_ -> forall a. Maybe a
Nothing
Right [(Bool, Text)]
pairs -> forall a. a -> Maybe a
Just
[ (Bool
trueIfHanja, Text
text)
| (Bool
trueIfHanja, Text
text) <- [(Bool, Text)]
pairs
, Bool -> Bool
not (Text -> Bool
Data.Text.null Text
text)
]
phoneticizeHanjaWord :: HanjaWordPhoneticizer
phoneticizeHanjaWord :: HanjaWordPhoneticizer
phoneticizeHanjaWord =
(Char -> Char) -> HanjaWordPhoneticizer
Data.Text.map Char -> Char
phoneticizeHanjaChar
phoneticizeHanjaWordWithInitialSoundLaw :: HanjaWordPhoneticizer
phoneticizeHanjaWordWithInitialSoundLaw :: HanjaWordPhoneticizer
phoneticizeHanjaWordWithInitialSoundLaw Text
word =
case forall a. Parser a -> Text -> Either [Char] a
parseOnly (Parser Text Text
parser forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) Text
word of
Left [Char]
_ -> Text
word
Right Text
"" -> Text
word
Right Text
hangulWord -> Text
hangulWord
where
parser :: Parser Text
parser :: Parser Text Text
parser = do
[Text]
chars <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many'
( forall i a. Parser i a -> Parser i a
try Parser Text Text
yeolYul
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall i a. Parser i a -> Parser i a
try Parser Text Text
prefixedNumber
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall i a. Parser i a -> Parser i a
try Parser Text Text
hanNumber
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall i a. Parser i a -> Parser i a
try (Char -> Text
Data.Text.singleton forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
phoneticize forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar)
)
let hangulWord :: Text
hangulWord = [Text] -> Text
Data.Text.concat [Text]
chars
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat
[ (Char -> Char) -> HanjaWordPhoneticizer
Data.Text.map Char -> Char
convertInitialSoundLaw forall a b. (a -> b) -> a -> b
$ Int -> HanjaWordPhoneticizer
Data.Text.take Int
1 Text
hangulWord
, Int -> HanjaWordPhoneticizer
Data.Text.drop Int
1 Text
hangulWord
]
yeolYul :: Parser Text
yeolYul :: Parser Text Text
yeolYul = do
Char
former <- (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ \ Char
c ->
Char
c Char -> Maybe Char -> Bool
`hasBatchim` forall a. a -> Maybe a
Just Char
'\x11ab' Bool -> Bool -> Bool
|| Char
c Char -> Maybe Char -> Bool
`hasBatchim` forall a. Maybe a
Nothing
Char
later <- Char -> Parser Char
phone Char
'렬' forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
phone Char
'률'
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Text
pack
[ Char -> Char
phoneticize Char
former
, Char -> Char
convert Char
later
]
prefixedNumber :: Parser Text
prefixedNumber :: Parser Text Text
prefixedNumber = do
Char
prefix <- Char -> Parser Char
char Char
'第'
Text
digits <- (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isHanDigit
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> HanjaWordPhoneticizer
Data.Text.cons
(Char -> Char
phoneticize Char
prefix)
((Char -> Char) -> HanjaWordPhoneticizer
Data.Text.map Char -> Char
convertDigit Text
digits)
hanNumber :: Parser Text
hanNumber :: Parser Text Text
hanNumber = do
Char
first <- Parser Char
hanDigit
Text
rest <- (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isHanDigit
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Char -> Char) -> HanjaWordPhoneticizer
Data.Text.map Char -> Char
convertDigit forall a b. (a -> b) -> a -> b
$ Char -> HanjaWordPhoneticizer
Data.Text.cons Char
first Text
rest
hanDigit :: Parser Char
hanDigit :: Parser Char
hanDigit = (Char -> Bool) -> Parser Char
satisfy Char -> Bool
isHanDigit
phone :: Char -> Parser Char
phone :: Char -> Parser Char
phone Char
hangul = (Char -> Bool) -> Parser Char
satisfy ((forall a. Eq a => a -> a -> Bool
== Char
hangul) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
phoneticize)
convertDigit :: Char -> Char
convertDigit :: Char -> Char
convertDigit = Char -> Char
convertInitialSoundLaw forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
phoneticizeDigit
convert :: Char -> Char
convert :: Char -> Char
convert = Char -> Char
convertInitialSoundLaw forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
phoneticize
phoneticizeDigit :: Char -> Char
phoneticizeDigit :: Char -> Char
phoneticizeDigit Char
'參' = Char
'삼'
phoneticizeDigit Char
'叁' = Char
'삼'
phoneticizeDigit Char
'参' = Char
'삼'
phoneticizeDigit Char
'叄' = Char
'삼'
phoneticizeDigit Char
'拾' = Char
'십'
phoneticizeDigit Char
c = Char -> Char
phoneticize Char
c
phoneticize :: Char -> Char
phoneticize :: Char -> Char
phoneticize = Char -> Char
phoneticizeHanjaChar
hasBatchim :: Char -> Maybe Char -> Bool
hasBatchim :: Char -> Maybe Char -> Bool
hasBatchim Char
c Maybe Char
batchim =
case Char -> Maybe JamoTriple
toJamoTriple (Char -> Char
phoneticize Char
c) of
Just (Char
_, Char
_, Maybe Char
final) -> Maybe Char
final forall a. Eq a => a -> a -> Bool
== Maybe Char
batchim
Maybe JamoTriple
_ -> Bool
False
isHanDigit :: Char -> Bool
isHanDigit :: Char -> Bool
isHanDigit = [Char] -> Char -> Bool
inClass forall a b. (a -> b) -> a -> b
$
[Char]
"零一壹壱弌夁二貳贰弐弍貮三參叁参弎叄四肆䦉五伍六陸陆陸七柒漆八捌" forall a. [a] -> [a] -> [a]
++
[Char]
"九玖十拾百佰陌千仟阡萬万億兆京垓秭穰溝澗"
type HanjaDictionary = Trie.Trie Text
withDictionary
:: HanjaDictionary
-> HanjaWordPhoneticizer
-> HanjaWordPhoneticizer
withDictionary :: HanjaDictionary -> HanjaWordPhoneticizer -> HanjaWordPhoneticizer
withDictionary HanjaDictionary
_ HanjaWordPhoneticizer
_ Text
"" = Text
""
withDictionary HanjaDictionary
dic HanjaWordPhoneticizer
fallback Text
word =
case [(Text, Text)]
matches of
[] ->
HanjaWordPhoneticizer
fallback Text
word
(Text
replaced, Text
rest) : [(Text, Text)]
_ ->
if Text -> Bool
Data.Text.null Text
rest
then Text
replaced
else Text
replaced Text -> HanjaWordPhoneticizer
`append` HanjaDictionary -> HanjaWordPhoneticizer -> HanjaWordPhoneticizer
withDictionary HanjaDictionary
dic HanjaWordPhoneticizer
fallback Text
rest
where
lookupDic :: Text -> Maybe Text
lookupDic :: Text -> Maybe Text
lookupDic = (forall a. Text -> Trie a -> Maybe a
`Trie.lookup` HanjaDictionary
dic)
tries :: [(Text, Text)]
tries :: [(Text, Text)]
tries =
[Int -> Text -> (Text, Text)
Data.Text.splitAt Int
pos Text
word | Int
pos <- [Int
0..Text -> Int
Data.Text.length Text
word]]
patterns :: Text -> [Text]
patterns :: Text -> [Text]
patterns Text
word' =
Text
word' forall a. a -> [a] -> [a]
: case Text -> Maybe (Text, Char)
unsnoc Text
word' of
Just (Text
next, Char
_) -> Text -> [Text]
patterns Text
next
Maybe (Text, Char)
Nothing -> []
matchTries :: [Maybe (Text, Text, Text)]
matchTries :: [Maybe (Text, Text, Text)]
matchTries =
(forall a b. (a -> b) -> [a] -> [b]
`Prelude.map` [(Text, Text)]
tries) forall a b. (a -> b) -> a -> b
$ \ (Text
unmatched, Text
wd) ->
case [(Text
p, Text
m) | Text
p <- Text -> [Text]
patterns Text
wd, Text
m <- forall a. Maybe a -> [a]
maybeToList (Text -> Maybe Text
lookupDic Text
p)] of
[] -> forall a. Maybe a
Nothing
(Text, Text)
pair : [(Text, Text)]
_ -> forall a. a -> Maybe a
Just
( Text
unmatched
, forall a b. (a, b) -> b
snd (Text, Text)
pair
, Int -> HanjaWordPhoneticizer
Data.Text.drop (Text -> Int
Data.Text.length forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> a
fst (Text, Text)
pair) Text
wd
)
matches :: [(Text, Text)]
matches :: [(Text, Text)]
matches =
[ (HanjaWordPhoneticizer
fallback Text
unmatched Text -> HanjaWordPhoneticizer
`append` Text
matched, Text
rest)
| Just (Text
unmatched, Text
matched, Text
rest) <- [Maybe (Text, Text, Text)]
matchTries
]
phoneticizeHanjaChar :: Char -> Char
phoneticizeHanjaChar :: Char -> Char
phoneticizeHanjaChar Char
c = forall a. a -> Maybe a -> a
fromMaybe Char
c forall a b. (a -> b) -> a -> b
$ do
HanjaReadings
readings <- forall k a. Ord k => k -> Map k a -> Maybe a
lookup Char
c KHangulData
kHangulData
let readings' :: [(Char, HanjaReadingCitation)]
readings' = forall k a. Map k a -> [(k, a)]
Data.Map.Strict.toList HanjaReadings
readings
let (Char
sound, HanjaReadingCitation
_) = forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> b
snd) [(Char, HanjaReadingCitation)]
readings'
let initialLawReverted :: Set Char
initialLawReverted = forall a. (a -> Bool) -> Set a -> Set a
Data.Set.filter
(forall k a. Ord k => k -> Map k a -> Bool
`Data.Map.Strict.member` HanjaReadings
readings)
(Char -> Set Char
revertInitialSoundLaw Char
sound)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case forall a. Set a -> [a]
Data.Set.toList Set Char
initialLawReverted of
[] -> Char
sound
Char
reverted : [Char]
_ -> Char
reverted
withoutBatchim :: Char -> Maybe (Char, Maybe Char)
withoutBatchim :: Char -> Maybe (Char, Maybe Char)
withoutBatchim Char
hangul = do
(Char
initial, Char
vowel, Maybe Char
final) <- Char -> Maybe JamoTriple
toJamoTriple Char
hangul
Char
noBatchim <- JamoTriple -> Maybe Char
fromJamoTriple (Char
initial, Char
vowel, forall a. Maybe a
Nothing)
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
noBatchim, Maybe Char
final)
withBatchim :: Char -> Maybe Char -> Maybe Char
withBatchim :: Char -> Maybe Char -> Maybe Char
withBatchim Char
hangul Maybe Char
final = do
(Char
initial, Char
vowel, Maybe Char
_) <- Char -> Maybe JamoTriple
toJamoTriple Char
hangul
JamoTriple -> Maybe Char
fromJamoTriple (Char
initial, Char
vowel, Maybe Char
final)
convertInitialSoundLaw :: Char -> Char
convertInitialSoundLaw :: Char -> Char
convertInitialSoundLaw Char
sound = forall a. a -> Maybe a -> a
fromMaybe Char
sound forall a b. (a -> b) -> a -> b
$ do
(Char
pattern', Maybe Char
final) <- Char -> Maybe (Char, Maybe Char)
withoutBatchim Char
sound
let converted :: Char
converted = forall k a. Ord k => a -> k -> Map k a -> a
findWithDefault Char
pattern' Char
pattern' Map Char Char
initialSoundLawTable
Char -> Maybe Char -> Maybe Char
withBatchim Char
converted Maybe Char
final
revertInitialSoundLaw :: Char -> Set Char
revertInitialSoundLaw :: Char -> Set Char
revertInitialSoundLaw Char
sound = forall a. a -> Maybe a -> a
fromMaybe forall a. Set a
Data.Set.empty forall a b. (a -> b) -> a -> b
$ do
(Char
pattern', Maybe Char
final) <- Char -> Maybe (Char, Maybe Char)
withoutBatchim Char
sound
let candidates :: [Char]
candidates = forall a. Set a -> [a]
Data.Set.toList forall a b. (a -> b) -> a -> b
$
forall k a. Ord k => a -> k -> Map k a -> a
findWithDefault forall a. Set a
Data.Set.empty Char
pattern' Map Char (Set Char)
initialSoundLawTable'
forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a. Ord a => [a] -> Set a
Data.Set.fromList forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes forall a b. (a -> b) -> a -> b
$ [Char]
candidates forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> (Char -> Maybe Char -> Maybe Char
`withBatchim` Maybe Char
final)
where
(<&>) :: Functor f => f a -> (a -> b) -> f b
<&> :: forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
(<&>) = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap
textParser :: Parser [(Bool, Text)]
textParser :: Parser Text [(Bool, Text)]
textParser = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall (t :: * -> *) a. Foldable t => t [a] -> [a]
Data.List.concat forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ do
[Char]
hanjaEntities <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall i a. Parser i a -> Parser i a
try forall a b. (a -> b) -> a -> b
$ do
Char
c <- Parser Char
unnamedCharRef
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Char -> Bool
isHanjaOrDigit Char
c) (forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"not a hanja")
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
Text
hanjaCharsText <- (Char -> Bool) -> Parser Text Text
Data.Attoparsec.Text.takeWhile Char -> Bool
isHanjaOrDigit
[Char]
hanjaChars <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall i a. Parser i a -> Parser i a
try forall a b. (a -> b) -> a -> b
$ do
Char
c <- Parser Char
unnamedCharRef forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Char
anyChar
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Char -> Bool
isHanjaOrDigit Char
c) (forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"not a hanja")
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
[Char]
entities <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall i a. Parser i a -> Parser i a
try forall a b. (a -> b) -> a -> b
$ do
Char
c <- Parser Char
unnamedCharRef
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Char -> Bool
isHanjaOrDigit Char
c) (forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"a hanja")
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
Text
charsText <- (Char -> Bool) -> Parser Text Text
takeTill Char -> Bool
isHanjaOrDigit
[Char]
chars <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ forall i a. Parser i a -> Parser i a
try forall a b. (a -> b) -> a -> b
$ do
Char
c <- Parser Char
unnamedCharRef forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Char
anyChar
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Char -> Bool
isHanjaOrDigit Char
c) (forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"a hanja")
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
let hanjaText :: Text
hanjaText = [Text] -> Text
Data.Text.concat
[[Char] -> Text
pack [Char]
hanjaEntities, Text
hanjaCharsText, [Char] -> Text
pack [Char]
hanjaChars]
let text' :: Text
text' = [Text] -> Text
Data.Text.concat [[Char] -> Text
pack [Char]
entities, Text
charsText, [Char] -> Text
pack [Char]
chars]
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Text -> Bool
Data.Text.null forall a b. (a -> b) -> a -> b
$ Text
hanjaText Text -> HanjaWordPhoneticizer
`append` Text
text') (forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"parsed nothing")
forall (m :: * -> *) a. Monad m => a -> m a
return [(Bool
True, Text
hanjaText), (Bool
False, Text
text')]
where
isHanjaOrDigit :: Char -> Bool
isHanjaOrDigit :: Char -> Bool
isHanjaOrDigit Char
c =
Char -> Bool
isDigit Char
c Bool -> Bool -> Bool
|| Char -> Bool
isHanja Char
c
isHanja :: Char -> Bool
isHanja :: Char -> Bool
isHanja Char
c =
Char
'\x2f00' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2fff' Bool -> Bool -> Bool
||
Char
'\x3007' forall a. Eq a => a -> a -> Bool
== Char
c Bool -> Bool -> Bool
||
Char
'\x3400' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x4dbf' Bool -> Bool -> Bool
||
Char
'\x4e00' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x9fcc' Bool -> Bool -> Bool
||
Char
'\xf900' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xfaff' Bool -> Bool -> Bool
||
Char
'\x20000' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2a6d6' Bool -> Bool -> Bool
||
Char
'\x2a700' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2b734' Bool -> Bool -> Bool
||
Char
'\x2b740' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2b81d' Bool -> Bool -> Bool
||
Char
'\x2b820' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2cea1' Bool -> Bool -> Bool
||
Char
'\x2ceb0' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2ebe0' Bool -> Bool -> Bool
||
Char
'\x2f800' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2fa1f'
unnamedCharRef :: Parser Char
unnamedCharRef :: Parser Char
unnamedCharRef = do
Char
_ <- Char -> Parser Char
char Char
'&'
Char
_ <- Char -> Parser Char
char Char
'#'
Bool
hex <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Bool
False ((Char -> Parser Char
char Char
'x' forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char Char
'X') forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True)
Int
codepoint <- if Bool
hex then forall a. (Integral a, Bits a) => Parser a
hexadecimal else forall a. Integral a => Parser a
decimal
Char
_ <- Char -> Parser Char
char Char
';'
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Char
chr Int
codepoint
initialSoundLawTable :: Map Char Char
initialSoundLawTable :: Map Char Char
initialSoundLawTable =
[ (Char
'녀', Char
'여')
, (Char
'뇨', Char
'요')
, (Char
'뉴', Char
'유')
, (Char
'니', Char
'이')
, (Char
'랴', Char
'야')
, (Char
'려', Char
'여')
, (Char
'례', Char
'예')
, (Char
'료', Char
'요')
, (Char
'류', Char
'유')
, (Char
'리', Char
'이')
, (Char
'라', Char
'나')
, (Char
'래', Char
'내')
, (Char
'로', Char
'노')
, (Char
'뢰', Char
'뇌')
, (Char
'루', Char
'누')
, (Char
'르', Char
'느')
]
initialSoundLawTable' :: Map Char (Set Char)
initialSoundLawTable' :: Map Char (Set Char)
initialSoundLawTable' =
forall k a b. (k -> a -> b -> b) -> b -> Map k a -> b
foldrWithKey Char -> Char -> Map Char (Set Char) -> Map Char (Set Char)
f forall k a. Map k a
Data.Map.Strict.empty Map Char Char
initialSoundLawTable
where
f :: Char -> Char -> Map Char (Set Char) -> Map Char (Set Char)
f :: Char -> Char -> Map Char (Set Char) -> Map Char (Set Char)
f Char
original Char
converted =
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
insertWith forall a. Ord a => Set a -> Set a -> Set a
Data.Set.union Char
converted (forall a. a -> Set a
Data.Set.singleton Char
original)