{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Text.Seonbi.Unihan.KHangul
    ( CharacterSet (..)
    , HanjaReadings
    , HanjaReadingCitation (..)
    , KHangulData
    , Purpose (..)
    , kHangulData
    , kHangulData'
    ) where

import Data.Either

import Data.Aeson
import Data.Attoparsec.Text
import Data.ByteString.Lazy (fromStrict)
import Data.FileEmbed
import Data.Map.Strict
import Data.Set hiding (empty)
import System.FilePath (takeDirectory, (</>))

-- $setup
-- >>> import qualified Text.Show.Unicode
-- >>> :set -interactive-print=Text.Show.Unicode.uprint

-- | Maps all Hanja characters to their possible readings.
type KHangulData = Map Char HanjaReadings

-- | All readings of a Hanja character.
type HanjaReadings = Map Char HanjaReadingCitation

-- | Represents what standard a reading of character belongs to and a purpose
-- of the reading.
data HanjaReadingCitation =
    HanjaReadingCitation CharacterSet (Set Purpose) deriving (HanjaReadingCitation -> HanjaReadingCitation -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c/= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
== :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c== :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
Eq, Eq HanjaReadingCitation
HanjaReadingCitation -> HanjaReadingCitation -> Bool
HanjaReadingCitation -> HanjaReadingCitation -> Ordering
HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
$cmin :: HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
max :: HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
$cmax :: HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
>= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c>= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
> :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c> :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
<= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c<= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
< :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c< :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
compare :: HanjaReadingCitation -> HanjaReadingCitation -> Ordering
$ccompare :: HanjaReadingCitation -> HanjaReadingCitation -> Ordering
Ord, Int -> HanjaReadingCitation -> ShowS
[HanjaReadingCitation] -> ShowS
HanjaReadingCitation -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [HanjaReadingCitation] -> ShowS
$cshowList :: [HanjaReadingCitation] -> ShowS
show :: HanjaReadingCitation -> [Char]
$cshow :: HanjaReadingCitation -> [Char]
showsPrec :: Int -> HanjaReadingCitation -> ShowS
$cshowsPrec :: Int -> HanjaReadingCitation -> ShowS
Show)

-- | Represents character set standards for Korean writing system.
data CharacterSet
    -- | KS X 1001 (정보 교환용 부호계).
    = KS_X_1001
    -- | KS X 1002 (정보 교환용 부호 확장 세트).
    | KS_X_1002
    -- | Represents that a Hanja character is not included in any Korean
    -- character set standards.
    | NonStandard
    deriving (CharacterSet -> CharacterSet -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CharacterSet -> CharacterSet -> Bool
$c/= :: CharacterSet -> CharacterSet -> Bool
== :: CharacterSet -> CharacterSet -> Bool
$c== :: CharacterSet -> CharacterSet -> Bool
Eq, Eq CharacterSet
CharacterSet -> CharacterSet -> Bool
CharacterSet -> CharacterSet -> Ordering
CharacterSet -> CharacterSet -> CharacterSet
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CharacterSet -> CharacterSet -> CharacterSet
$cmin :: CharacterSet -> CharacterSet -> CharacterSet
max :: CharacterSet -> CharacterSet -> CharacterSet
$cmax :: CharacterSet -> CharacterSet -> CharacterSet
>= :: CharacterSet -> CharacterSet -> Bool
$c>= :: CharacterSet -> CharacterSet -> Bool
> :: CharacterSet -> CharacterSet -> Bool
$c> :: CharacterSet -> CharacterSet -> Bool
<= :: CharacterSet -> CharacterSet -> Bool
$c<= :: CharacterSet -> CharacterSet -> Bool
< :: CharacterSet -> CharacterSet -> Bool
$c< :: CharacterSet -> CharacterSet -> Bool
compare :: CharacterSet -> CharacterSet -> Ordering
$ccompare :: CharacterSet -> CharacterSet -> Ordering
Ord, Int -> CharacterSet -> ShowS
[CharacterSet] -> ShowS
CharacterSet -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [CharacterSet] -> ShowS
$cshowList :: [CharacterSet] -> ShowS
show :: CharacterSet -> [Char]
$cshow :: CharacterSet -> [Char]
showsPrec :: Int -> CharacterSet -> ShowS
$cshowsPrec :: Int -> CharacterSet -> ShowS
Show)

-- | Represents purposes of Hanja characters.
data Purpose
    -- | Basic Hanja for educational use (漢文敎育用基礎漢字), a subset of
    -- Hanja defined in 1972 by a South Korean standard for educational use.
    = Education
    -- | Hanja for personal names (人名用漢字).
    | PersonalName
    deriving (Purpose -> Purpose -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Purpose -> Purpose -> Bool
$c/= :: Purpose -> Purpose -> Bool
== :: Purpose -> Purpose -> Bool
$c== :: Purpose -> Purpose -> Bool
Eq, Eq Purpose
Purpose -> Purpose -> Bool
Purpose -> Purpose -> Ordering
Purpose -> Purpose -> Purpose
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Purpose -> Purpose -> Purpose
$cmin :: Purpose -> Purpose -> Purpose
max :: Purpose -> Purpose -> Purpose
$cmax :: Purpose -> Purpose -> Purpose
>= :: Purpose -> Purpose -> Bool
$c>= :: Purpose -> Purpose -> Bool
> :: Purpose -> Purpose -> Bool
$c> :: Purpose -> Purpose -> Bool
<= :: Purpose -> Purpose -> Bool
$c<= :: Purpose -> Purpose -> Bool
< :: Purpose -> Purpose -> Bool
$c< :: Purpose -> Purpose -> Bool
compare :: Purpose -> Purpose -> Ordering
$ccompare :: Purpose -> Purpose -> Ordering
Ord, Int -> Purpose -> ShowS
[Purpose] -> ShowS
Purpose -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Purpose] -> ShowS
$cshowList :: [Purpose] -> ShowS
show :: Purpose -> [Char]
$cshow :: Purpose -> [Char]
showsPrec :: Int -> Purpose -> ShowS
$cshowsPrec :: Int -> Purpose -> ShowS
Show)

citationParser :: Parser HanjaReadingCitation
citationParser :: Parser HanjaReadingCitation
citationParser = do
    CharacterSet
charset' <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option CharacterSet
NonStandard Parser CharacterSet
charset
    [Purpose]
purposes <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' Parser Purpose
purpose
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ CharacterSet -> Set Purpose -> HanjaReadingCitation
HanjaReadingCitation CharacterSet
charset' forall a b. (a -> b) -> a -> b
$ forall a. Ord a => [a] -> Set a
Data.Set.fromList [Purpose]
purposes
  where
    charset :: Parser CharacterSet
    charset :: Parser CharacterSet
charset = do
        Char
d <- Parser Char
digit
        case Char
d of
            Char
'0' -> forall (m :: * -> *) a. Monad m => a -> m a
return CharacterSet
KS_X_1001
            Char
'1' -> forall (m :: * -> *) a. Monad m => a -> m a
return CharacterSet
KS_X_1002
            Char
c -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"Invalid kHangul character set code: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Char
c)
    purpose :: Parser Purpose
    purpose :: Parser Purpose
purpose = do
        Char
l <- Parser Char
letter
        case Char
l of
            Char
'E' -> forall (m :: * -> *) a. Monad m => a -> m a
return Purpose
Education
            Char
'N' -> forall (m :: * -> *) a. Monad m => a -> m a
return Purpose
PersonalName
            Char
c -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
"Invalid kHangul purpose code: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Char
c)

instance FromJSON HanjaReadingCitation where
    parseJSON :: Value -> Parser HanjaReadingCitation
parseJSON =
        forall a. [Char] -> (Text -> Parser a) -> Value -> Parser a
withText [Char]
"kHangul value (e.g., 0E, 1N, 0EN)" forall a b. (a -> b) -> a -> b
$ \ Text
t ->
            case forall a. Parser a -> Text -> Either [Char] a
parseOnly (Parser HanjaReadingCitation
citationParser forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) Text
t of
                Right HanjaReadingCitation
cite -> forall (m :: * -> *) a. Monad m => a -> m a
return HanjaReadingCitation
cite
                Left [Char]
msg -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
msg

kHangulData' :: Either String KHangulData
kHangulData' :: Either [Char] KHangulData
kHangulData' = forall a. FromJSON a => ByteString -> Either [Char] a
eitherDecode forall a b. (a -> b) -> a -> b
$
    ByteString -> ByteString
fromStrict $(embedFile $ takeDirectory __FILE__ </> "kHangul.json")

-- | Data that map Hanja characters to their corresponding kHangul entries
-- (i.e., Hanja readings and citations).
--
-- >>> import Data.Map.Strict as M
-- >>> let Just entries = M.lookup '天' kHangulData
-- >>> entries
-- fromList [('천',HanjaReadingCitation KS_X_1001 (fromList [Education]))]
kHangulData :: KHangulData
kHangulData :: KHangulData
kHangulData = forall b a. b -> Either a b -> b
fromRight forall k a. Map k a
empty Either [Char] KHangulData
kHangulData'

{- HLINT ignore "Unused LANGUAGE pragma" -}