Safe Haskell | Safe-Inferred |
---|---|
Language | Haskell2010 |
Since Seonbi's primitive unit to transform is HTML, this module deals with HTML.
Synopsis
- data Result r
- scanHtml :: Text -> Result [HtmlEntity]
- printHtml :: [HtmlEntity] -> Text
- printText :: [HtmlEntity] -> Text
- printXhtml :: [HtmlEntity] -> Text
- data HtmlEntity
- = HtmlStartTag { }
- | HtmlEndTag {
- tagStack :: HtmlTagStack
- tag :: HtmlTag
- | HtmlText {
- tagStack :: HtmlTagStack
- rawText :: Text
- | HtmlCdata {
- tagStack :: HtmlTagStack
- text :: Text
- | HtmlComment {
- tagStack :: HtmlTagStack
- comment :: Text
- type HtmlRawAttrs = Text
- data HtmlTag
- = A
- | Abbr
- | Acronym
- | Address
- | Area
- | Article
- | Aside
- | Audio
- | B
- | Base
- | Bdi
- | Bdo
- | Big
- | BlockQuote
- | Body
- | BR
- | Button
- | Canvas
- | Caption
- | Center
- | Cite
- | Code
- | Col
- | ColGroup
- | Data
- | DataList
- | DD
- | Del
- | Details
- | Dfn
- | Dialog
- | Div
- | DL
- | DT
- | Em
- | Embed
- | FieldSet
- | FigCaption
- | Figure
- | Footer
- | Font
- | Form
- | H1
- | H2
- | H3
- | H4
- | H5
- | H6
- | Head
- | Header
- | HR
- | Html
- | I
- | IFrame
- | Img
- | Input
- | Ins
- | Kbd
- | Label
- | Legend
- | LI
- | Link
- | Main
- | Map
- | Mark
- | Meta
- | Meter
- | Nav
- | NoBR
- | NoScript
- | Object
- | OL
- | OptGroup
- | Option
- | Output
- | P
- | Param
- | Picture
- | Pre
- | Progress
- | Q
- | RB
- | RP
- | RT
- | RTC
- | Ruby
- | S
- | Samp
- | Script
- | Select
- | Section
- | Small
- | Source
- | Span
- | Strike
- | Strong
- | Style
- | Sub
- | Summary
- | Sup
- | Table
- | TBody
- | TD
- | Template
- | TFoot
- | TextArea
- | TH
- | THead
- | Time
- | Title
- | TR
- | Track
- | TT
- | U
- | UL
- | Var
- | Video
- | WBR
- | XMP
- data HtmlTagKind
- htmlTagKind :: HtmlTag -> HtmlTagKind
- htmlTagName :: HtmlTag -> Text
- normalizeText :: [HtmlEntity] -> [HtmlEntity]
- data HtmlTagStack
HTML scanner
See more on Text.Seonbi.Html.Scanner module.
HTML printer
See more on Text.Seonbi.Html.Printer module.
printHtml :: [HtmlEntity] -> Text Source #
Print the list of HtmlEntity
into a lazy Text
.
>>>
let Done "" tokens = scanHtml "<p>Hello,<br>\n<em>world</em>!</p>"
>>>
printHtml tokens
"<p>Hello,<br>\n<em>world</em>!</p>"
printText :: [HtmlEntity] -> Text Source #
Print only the text contents (including CDATA sections) without tags
into a lazy Text
.
>>>
let Done "" tokens = scanHtml "<p>Hello,<br>\n<em>world</em>!</p>"
>>>
printText tokens
"Hello,\nworld!"
Entities are decoded:
>>>
let Done "" tokens = scanHtml "<p><code><>"&</code></p>"
>>>
printText tokens
"<>\"&"
printXhtml :: [HtmlEntity] -> Text Source #
Similar to printHtml
except it renders void (self-closing) tags as
like br/
instead of br
.
>>>
let Done "" tokens = scanHtml "<p>Hello,<br>\n<em>world</em>!</p>"
>>>
printXhtml tokens
"<p>Hello,<br/>\n<em>world</em>!</p>"
Note that normal tags are not rendered as self-closed; only void tags according to HTML specification are:
>>>
let Done "" tokens' = scanHtml "<p></p><p><br></p>"
>>>
printXhtml tokens'
"<p></p><p><br/></p>"
HTML entities
See more on Text.Seonbi.Html.Entity module.
data HtmlEntity Source #
An event entity emitted by scanHtml
.
HtmlStartTag | Represent a token which opens an HTML element. Note that |
HtmlEndTag | Represent a token which closes an HTML element.
The |
HtmlText | Represent a token of a text node. Note that |
HtmlCdata | Represent a token of a CDATA section. |
HtmlComment | Represent a token of an HTML comment. |
Instances
Show HtmlEntity Source # | |
Defined in Text.Seonbi.Html.Entity showsPrec :: Int -> HtmlEntity -> ShowS # show :: HtmlEntity -> String # showList :: [HtmlEntity] -> ShowS # | |
Eq HtmlEntity Source # | |
Defined in Text.Seonbi.Html.Entity (==) :: HtmlEntity -> HtmlEntity -> Bool # (/=) :: HtmlEntity -> HtmlEntity -> Bool # | |
Ord HtmlEntity Source # | |
Defined in Text.Seonbi.Html.Entity compare :: HtmlEntity -> HtmlEntity -> Ordering # (<) :: HtmlEntity -> HtmlEntity -> Bool # (<=) :: HtmlEntity -> HtmlEntity -> Bool # (>) :: HtmlEntity -> HtmlEntity -> Bool # (>=) :: HtmlEntity -> HtmlEntity -> Bool # max :: HtmlEntity -> HtmlEntity -> HtmlEntity # min :: HtmlEntity -> HtmlEntity -> HtmlEntity # |
type HtmlRawAttrs = Text Source #
All element attributes in a string.
HTML tags
See more on Text.Seonbi.Html.Tag module.
HTML tags. This enumeration type contains both HTML 5 and 4 tags for maximum compatibility.
data HtmlTagKind Source #
The six kinds of HTML elements.
Instances
Show HtmlTagKind Source # | |
Defined in Text.Seonbi.Html.Tag showsPrec :: Int -> HtmlTagKind -> ShowS # show :: HtmlTagKind -> String # showList :: [HtmlTagKind] -> ShowS # | |
Eq HtmlTagKind Source # | |
Defined in Text.Seonbi.Html.Tag (==) :: HtmlTagKind -> HtmlTagKind -> Bool # (/=) :: HtmlTagKind -> HtmlTagKind -> Bool # | |
Ord HtmlTagKind Source # | |
Defined in Text.Seonbi.Html.Tag compare :: HtmlTagKind -> HtmlTagKind -> Ordering # (<) :: HtmlTagKind -> HtmlTagKind -> Bool # (<=) :: HtmlTagKind -> HtmlTagKind -> Bool # (>) :: HtmlTagKind -> HtmlTagKind -> Bool # (>=) :: HtmlTagKind -> HtmlTagKind -> Bool # max :: HtmlTagKind -> HtmlTagKind -> HtmlTagKind # min :: HtmlTagKind -> HtmlTagKind -> HtmlTagKind # |
htmlTagKind :: HtmlTag -> HtmlTagKind Source #
The kind of an HtmlTag
.
>>>
Data.Set.filter ((== EscapableRawText) . htmlTagKind) htmlTags
fromList [TextArea,Title]
htmlTagName :: HtmlTag -> Text Source #
The name of an HtmlTag
in lowercase.
>>>
htmlTagName TextArea
"textarea"
\ t -> htmlTagName t == (toLower $ pack $ show (t :: HtmlTag))
HTML text normalization
See more on Text.Seonbi.Html.TextNormalizer module.
normalizeText :: [HtmlEntity] -> [HtmlEntity] Source #
As scanHtml
may emit two or more continuous HtmlText
fragments even
if these can be represented as only one HtmlText
fragment, it makes
postprocessing hard.
The normalizeText
function concatenates such continuous HtmlText
fragments into one if possible so that postprocessing can be easy:
>>>
:set -XOverloadedStrings -XOverloadedLists
>>>
normalizeText [HtmlText [] "Hello, ", HtmlText [] "world!"]
[HtmlText {tagStack = fromList [], rawText = "Hello, world!"}]
It also transforms all HtmlCdata
fragments into an HtmlText
together.
>>>
:{
normalizeText [ HtmlText [] "foo " , HtmlCdata [] "<bar>", HtmlText [] " baz!" ] :} [HtmlText {tagStack = fromList [], rawText = "foo <bar> baz!"}]
HTML hierarchical stacks
See more on Text.Seonbi.Html.TagStack module.
data HtmlTagStack Source #
Represents a hierarchy of a currently parsing position in an HtmlTag
tree.
For example, if an scanHtml
has read "<a href="#"><b><i>foo</i> bar
"
it is represented as
.HtmlTagStack
[B
, A
]
Note that the tags are stored in reverse order, from the deepest to the shallowest, to make inserting a more deeper tag efficient.