This job requires a certain savoir faire that can only be acquired over time.
+
This is a paragraph with some very-strong bold text!
+
Just testing some breaks! And useless elements...
+
+
+
+
And the next pagebreak is in the middle of a sentence.
+
+
+
+
+
Title of the chapter
+
+
+
First item
+
Second item
+
Third item
+
+
Hidden text!...
+
More Hidden text
+
More Hidden text
+
+
+
+ ★
+ ★
+ ★
+ ★
+ ☆
+
+
+
+ /\_/\
+ ( o.o )
+ ^
+
+
+ ASCII Art of a cat face
+
+
+
+ `)
+ })
+
+ nav, err := Do(context.Background(), f, manifest.Locator{
+ Href: f.Link().Href.Resolve(nil, nil),
+ })
+ require.NoError(t, err)
+ bin, _ := json.MarshalIndent(nav, "", " ")
+ t.Log(string(bin))
+}
diff --git a/pkg/guidednavigation/converter/html.go b/pkg/guidednavigation/converter/html.go
new file mode 100644
index 00000000..6d1f9c02
--- /dev/null
+++ b/pkg/guidednavigation/converter/html.go
@@ -0,0 +1,597 @@
+package converter
+
+import (
+ "encoding/xml"
+ "slices"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+
+ "github.com/readium/go-toolkit/pkg/guidednavigation"
+ "github.com/readium/go-toolkit/pkg/manifest"
+ "github.com/readium/go-toolkit/pkg/util/url"
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
+)
+
+func trimText(text string, before *string) manifest.Text {
+ var b string
+ if before != nil {
+ b = *before
+ }
+ // Get all the space from the beginning of the string and add it to the before
+ var bsb strings.Builder
+ for _, v := range text {
+ if unicode.IsSpace(v) {
+ bsb.WriteRune(v)
+ } else {
+ break
+ }
+ }
+ b += bsb.String()
+
+ // Get all the space from the end of the string and add it to the after
+ var asb strings.Builder
+ for i := len(text) - 1; i >= 0; i-- {
+ if unicode.IsSpace(rune(text[i])) {
+ asb.WriteRune(rune(text[i]))
+ } else {
+ break
+ }
+ }
+
+ return manifest.Text{
+ Before: b + bsb.String(),
+ Highlight: text[bsb.Len() : len(text)-asb.Len()],
+ After: asb.String(),
+ }
+}
+
+func onlySpace(s string) bool {
+ for _, runeValue := range s {
+ if !unicode.IsSpace(runeValue) {
+ return false
+ }
+ }
+ return true
+}
+
+func getAttr(n *html.Node, key string) string {
+ for _, attr := range n.Attr {
+ if attr.Key == key {
+ return attr.Val
+ }
+ }
+ return ""
+}
+
+/*func getFirstAttr(n *html.Node, keys []string) string {
+ for _, attr := range n.Attr {
+ if slices.Contains(keys, attr.Key) {
+ return attr.Val
+ }
+ }
+ return ""
+}*/
+
+func srcRelativeToHref(n *html.Node, base url.URL) url.URL {
+ if n == nil {
+ return nil
+ }
+
+ if v := getAttr(n, "src"); v != "" {
+ if u, _ := url.URLFromString(v); u != nil {
+ return base.Resolve(u)
+ }
+ }
+ return nil
+}
+
+// Get child elements of a certain type, with a maximum depth.
+func childrenOfType(doc *html.Node, typ atom.Atom, depth uint) (children []*html.Node) {
+ var f func(*html.Node, uint)
+ f = func(n *html.Node, d uint) {
+ if n.Type == html.ElementNode && n.DataAtom == typ {
+ children = append(children, n)
+ }
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if d > 0 {
+ f(c, d-1)
+ }
+ }
+ }
+ f(doc, depth)
+ return
+}
+
+// Get the first or last element of a certain type
+func childOfType(doc *html.Node, typ atom.Atom, first bool) *html.Node {
+ var b *html.Node
+ var f func(*html.Node)
+ f = func(n *html.Node) {
+ if n.Type == html.ElementNode && n.DataAtom == typ {
+ b = n
+ if first {
+ return
+ }
+ }
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ f(c)
+ }
+ }
+ f(doc)
+ return b
+}
+
+// Everything from this list except "device":
+// https://github.com/jhy/jsoup/blob/0b10d516ed8f907f8fb4acb9a0806137a8988d45/src/main/java/org/jsoup/parser/Tag.java#L243
+var inlineTags map[atom.Atom]struct{} = map[atom.Atom]struct{}{
+ atom.Object: {},
+ atom.Base: {},
+ atom.Font: {},
+ atom.Tt: {},
+ atom.I: {},
+ atom.B: {},
+ atom.U: {},
+ atom.Big: {},
+ atom.Small: {},
+ atom.Em: {},
+ atom.Strong: {},
+ atom.Dfn: {},
+ atom.Code: {},
+ atom.Samp: {},
+ atom.Kbd: {},
+ atom.Var: {},
+ atom.Cite: {},
+ atom.Abbr: {},
+ atom.Time: {},
+ atom.Acronym: {},
+ atom.Mark: {},
+ atom.Ruby: {},
+ atom.Rt: {},
+ atom.Rp: {},
+ atom.Rtc: {},
+ atom.A: {},
+ atom.Img: {},
+ atom.Br: {},
+ atom.Wbr: {},
+ atom.Map: {},
+ atom.Q: {},
+ atom.Sub: {},
+ atom.Sup: {},
+ atom.Bdo: {},
+ atom.Iframe: {},
+ atom.Embed: {},
+ atom.Span: {},
+ atom.Input: {},
+ atom.Select: {},
+ atom.Textarea: {},
+ atom.Label: {},
+ atom.Button: {},
+ atom.Optgroup: {},
+ atom.Option: {},
+ atom.Legend: {},
+ atom.Datalist: {},
+ atom.Keygen: {},
+ atom.Output: {},
+ atom.Progress: {},
+ atom.Meter: {},
+ atom.Area: {},
+ atom.Param: {},
+ atom.Source: {},
+ atom.Track: {},
+ atom.Summary: {},
+ atom.Command: {},
+ atom.Basefont: {},
+ atom.Bgsound: {},
+ atom.Menuitem: {},
+ atom.Data: {},
+ atom.Bdi: {},
+ atom.S: {},
+ atom.Strike: {},
+ atom.Nobr: {},
+ atom.Rb: {},
+}
+
+// Not inline = is block
+func isInlineTag(n *html.Node) bool {
+ if n == nil {
+ return false
+ }
+ _, ok := inlineTags[n.DataAtom]
+ return ok
+}
+
+// This isn't cheap to run
+func nodeLanguage(n *html.Node) *string {
+ // xml:lang takes priority over lang
+
+ var lang string
+ for _, attr := range n.Attr {
+ if attr.Key == "xml:lang" && attr.Val != "" {
+ return &attr.Val
+ } else if attr.Key == "lang" {
+ lang = attr.Val
+ }
+ }
+ if lang != "" {
+ return &lang
+ }
+
+ if n.Parent != nil {
+ return nodeLanguage(n.Parent)
+ }
+ return nil
+}
+
+// From JSoup: https://github.com/jhy/jsoup/blob/1762412a28fa7b08ccf71d93fc4c98dc73086e03/src/main/java/org/jsoup/internal/StringUtil.java#L233
+// Slight differing definition of what a whitespace characacter is
+func appendNormalizedWhitespace(accum *strings.Builder, text string, stripLeading bool) {
+ var lastWasWhite, reachedNonWhite bool
+ for _, t := range text {
+ if unicode.IsSpace(t) {
+ if (stripLeading && !reachedNonWhite) || lastWasWhite {
+ continue
+ }
+ accum.WriteRune(' ')
+ lastWasWhite = true
+ } else if t != 8203 && t != 173 { // zero width sp, soft hyphen
+ accum.WriteRune(t)
+ lastWasWhite = false
+ reachedNonWhite = true
+ }
+ }
+}
+
+type textSegment struct {
+ text string
+
+ tag *string
+ attributes []xml.Attr
+}
+
+type navigationObject struct {
+ node *html.Node
+ object guidednavigation.GuidedNavigationObject
+ children []*navigationObject
+ parent *navigationObject
+ noText bool
+}
+
+func (n *navigationObject) convert(prettify bool) guidednavigation.GuidedNavigationObject {
+ result := n.object
+
+ for _, child := range n.children {
+ res := child.convert(prettify)
+ if !res.Empty() {
+ result.Children = append(result.Children, res)
+ }
+ }
+ // Prettify
+ if len(result.Children) == 1 && result.Children[0].TextOnly() && prettify {
+ result.Text = result.Children[0].Text
+ result.Children = nil
+ }
+
+ return result
+}
+
+type HTMLConverter struct {
+ baseLocator manifest.Locator
+
+ segmentsAcc []textSegment // Segments accumulated for the current element.
+ textAcc strings.Builder // Text since the beginning of the current segment, after coalescing whitespaces.
+ currentLanguage *string // Language of the current segment.
+ lastTextNode *html.Node
+
+ root *navigationObject
+ current *navigationObject
+ skipNode bool
+}
+
+func NewHTMLConverter(baseLocator manifest.Locator) *HTMLConverter {
+ return &HTMLConverter{
+ baseLocator: baseLocator,
+ }
+}
+
+func (c *HTMLConverter) descend(n *html.Node) {
+ newNode := &navigationObject{
+ node: n,
+ parent: c.current,
+ noText: c.current.noText,
+ }
+ if c.current == nil {
+ c.root = newNode
+ } else {
+ c.current.children = append(c.current.children, newNode)
+ }
+ c.current = newNode
+}
+
+func (c *HTMLConverter) ascend() {
+ if c.current != nil {
+ c.current = c.current.parent
+ }
+}
+
+func (c *HTMLConverter) Convert(doc *html.Node) {
+ node := doc
+ c.root = &navigationObject{
+ node: doc,
+ }
+ c.current = c.root
+
+ depth := 0
+
+ for node != nil {
+ c.head(node)
+ if node.FirstChild != nil && !c.skipNode { // descend
+ node = node.FirstChild
+ depth++
+ } else {
+ for {
+ if !(node.NextSibling == nil && depth > 0) {
+ break
+ }
+ c.tail(node)
+ node = node.Parent
+ depth--
+ }
+ c.tail(node)
+ if node == doc {
+ break
+ }
+ node = node.NextSibling
+ }
+ }
+}
+
+func (c *HTMLConverter) Result() []guidednavigation.GuidedNavigationObject {
+ if c.root == nil {
+ return nil
+ }
+ return c.root.convert(true).Children
+}
+
+func (c *HTMLConverter) head(n *html.Node) {
+ if n.Type != html.ElementNode {
+ return
+ }
+
+ aria, visible := ExtractNodeAria(n)
+ if !visible {
+ c.skipNode = true
+ return
+ }
+
+ isBlock := !isInlineTag(n)
+ if isBlock {
+ // Flush text
+ c.flushText()
+ }
+ c.descend(n)
+
+ cur := &c.current.object
+
+ roles, level := ExtractNodeRoles(n)
+
+ if n.DataAtom == atom.Br {
+ c.flushSegment("", nil)
+ breakStr := "break"
+ c.segmentsAcc = append(c.segmentsAcc, textSegment{
+ text: "",
+ tag: &breakStr,
+ })
+ } else if n.DataAtom == atom.Audio || n.DataAtom == atom.Video || slices.Contains(roles, guidednavigation.RoleImage) || slices.Contains(roles, guidednavigation.RoleFigure) {
+ // These three ops are essential to ensuring the correct order of the inline elements in the guided nav tree
+ c.flushText()
+ c.ascend()
+ c.descend(n)
+ c.current.object.Role = roles
+ if slices.Contains(roles, guidednavigation.RoleImage) {
+ if href := srcRelativeToHref(n, c.baseLocator.Href); href != nil {
+ c.current.object.ImgRef = href
+ }
+ if aria != nil {
+ c.current.object.Description = aria.Plain
+ c.current.noText = true
+ }
+ } else if slices.Contains(roles, guidednavigation.RoleFigure) {
+ if aria != nil {
+ c.current.object.Description = aria.Plain
+ c.current.noText = true
+ }
+ } else { // Audio or Video
+ href := srcRelativeToHref(n, c.baseLocator.Href)
+ if href == nil {
+ sourceNodes := childrenOfType(n, atom.Source, 1)
+ for _, source := range sourceNodes {
+ if src := srcRelativeToHref(source, c.baseLocator.Href); src != nil {
+ href = src
+ // TODO: we're losing the alts
+ break
+ }
+ }
+ }
+
+ if href != nil {
+ switch n.DataAtom {
+ case atom.Audio:
+ c.current.object.AudioRef = href
+ if aria != nil {
+ c.current.object.Description = aria.Plain
+ c.current.noText = true
+ }
+ case atom.Video:
+ // TODO: videoref?
+ c.current.noText = true
+ }
+ }
+ }
+ } else {
+ cur.Level = level
+ cur.Role = roles
+ if aria != nil {
+ cur.Description = aria.Plain
+ }
+ }
+}
+
+func (c *HTMLConverter) tail(n *html.Node) {
+ if n.Type == html.TextNode && !onlySpace(n.Data) && !c.current.noText {
+ language := nodeLanguage(n)
+ ssmlTag, attrs := ConvertElementToSSMLTag(n.Parent.DataAtom)
+ if c.currentLanguage != language || ssmlTag != "" {
+ c.flushSegment(ssmlTag, attrs)
+ c.currentLanguage = language
+ }
+
+ var stripLeading bool
+ if acc := c.textAcc.String(); len(acc) > 0 && acc[len(acc)-1] == ' ' {
+ stripLeading = true
+ }
+ appendNormalizedWhitespace(&c.textAcc, n.Data, stripLeading)
+ c.lastTextNode = n
+ } else if n.Type == html.ElementNode {
+ if !isInlineTag(n) { // Is block
+ c.flushText()
+ }
+ if !c.skipNode {
+ c.ascend()
+ } else {
+ c.skipNode = false
+ }
+ }
+}
+
+func (c *HTMLConverter) flushText() {
+ if c.lastTextNode != nil {
+ ssmlTag, attrs := ConvertElementToSSMLTag(c.lastTextNode.Parent.DataAtom)
+ c.flushSegment(ssmlTag, attrs)
+ } else {
+ c.flushSegment("", nil)
+ }
+
+ if len(c.segmentsAcc) == 0 {
+ return
+ }
+
+ // Trim the end of the last segment's text to get a cleaner output for the TextElement.
+ // Only whitespaces between the segments are meaningful.
+ c.segmentsAcc[len(c.segmentsAcc)-1].text = strings.TrimRightFunc(c.segmentsAcc[len(c.segmentsAcc)-1].text, unicode.IsSpace)
+
+ cobj := guidednavigation.GuidedNavigationObject{}
+
+ var ssml bool
+ allLang := true
+ var lastLang string
+ var sb strings.Builder
+ for _, v := range c.segmentsAcc {
+ if v.tag != nil {
+ ssml = true
+ if *v.tag == "lang" {
+ // Cheating here because we're in control of the attributes
+ if lastLang != "" && lastLang != v.attributes[0].Value {
+ allLang = false
+ break
+ }
+ lastLang = v.attributes[0].Value
+ } else {
+ allLang = false
+ break
+ }
+ } else {
+ allLang = false
+ }
+ }
+ if allLang {
+ ssml = false
+ cobj.Text.Language = lastLang
+ }
+
+ for i, v := range c.segmentsAcc {
+ if i > 0 && len(c.segmentsAcc[i-1].text) > 0 && len(v.text) > 0 && v.tag == nil {
+ sb.WriteRune(' ')
+ }
+ if ssml {
+ if v.tag != nil {
+ sb.WriteRune('<')
+ sb.WriteString(*v.tag)
+ for _, attr := range v.attributes {
+ sb.WriteRune(' ')
+ sb.WriteString(attr.Name.Local)
+ sb.WriteString(`="`)
+ xml.EscapeText(&sb, []byte(attr.Value))
+ sb.WriteRune('"')
+ }
+ if len(v.text) > 0 {
+ sb.WriteRune('>')
+ } else {
+ sb.WriteString("/>")
+ }
+ }
+ if len(v.text) > 0 {
+ xml.EscapeText(&sb, []byte(v.text))
+ if v.tag != nil {
+ sb.WriteString("")
+ sb.WriteString(*v.tag)
+ sb.WriteRune('>')
+ }
+ }
+ } else {
+ sb.WriteString(v.text)
+ }
+ }
+ if ssml {
+ cobj.Text.SSML = sb.String()
+ } else {
+ cobj.Text.Plain = sb.String()
+ }
+ c.current.children = append(c.current.children, &navigationObject{
+ object: cobj,
+ })
+
+ c.segmentsAcc = []textSegment{}
+}
+
+func (c *HTMLConverter) flushSegment(asTag string, extraAttrs []xml.Attr) {
+ text := c.textAcc.String()
+ trimmedText := strings.TrimSpace(text)
+
+ if len(text) > 0 {
+ if len(c.segmentsAcc) == 0 {
+ text = strings.TrimLeftFunc(text, unicode.IsSpace)
+
+ var whitespaceSuffix string
+ r, _ := utf8.DecodeLastRuneInString(text)
+ if unicode.IsSpace(r) {
+ whitespaceSuffix = string(r)
+ }
+
+ text = trimmedText + whitespaceSuffix
+ }
+
+ obj := textSegment{
+ text: text,
+ }
+
+ if asTag != "" {
+ obj.tag = &asTag
+ }
+ obj.attributes = append(obj.attributes, extraAttrs...)
+ if c.currentLanguage != nil {
+ if obj.tag == nil {
+ langStr := "lang"
+ obj.tag = &langStr
+ }
+ obj.attributes = append(obj.attributes, xml.Attr{
+ Name: xml.Name{Local: "xml:lang"},
+ Value: *c.currentLanguage,
+ })
+ }
+ c.segmentsAcc = append(c.segmentsAcc, obj)
+ }
+
+ c.textAcc.Reset()
+}
diff --git a/pkg/guidednavigation/converter/roles.go b/pkg/guidednavigation/converter/roles.go
new file mode 100644
index 00000000..7aae5ac7
--- /dev/null
+++ b/pkg/guidednavigation/converter/roles.go
@@ -0,0 +1,308 @@
+package converter
+
+import (
+ "slices"
+ "strings"
+
+ "github.com/readium/go-toolkit/pkg/guidednavigation"
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
+)
+
+// Crawl up the tree and extract all namespaces.
+// This is technically an incorrect implementation because it doesn't distinguish between XHTML and HTML.
+// These namespaces are ignored in HTML documents, but we still support them.
+func ExtractNamespaces(el *html.Node) (namespaces map[string]string) {
+ namespaces = map[string]string{
+ "xml": "http://www.w3.org/XML/1998/namespace",
+ }
+
+ f := func(n *html.Node) {
+ for _, at := range el.Attr {
+ if at.Key == "xmlns" {
+ if _, ok := namespaces[""]; !ok {
+ // Only the first xmlns gets set
+ namespaces[""] = at.Val
+ }
+ } else if strings.HasPrefix(at.Key, "xmlns:") {
+ namespace := strings.TrimPrefix(at.Key, "xmlns:")
+ if _, ok := namespaces[namespace]; !ok {
+ // Only the first unique xmlns:prefix gets set
+ namespaces[namespace] = at.Val
+ }
+ }
+ }
+ }
+
+ f(el)
+ if el.Type != html.ElementNode || el.DataAtom != atom.Html {
+ for el.Parent != nil {
+ el = el.Parent
+ f(el)
+ if el.Type == html.ElementNode && el.DataAtom == atom.Html {
+ break
+ }
+ }
+ }
+
+ return
+}
+
+var ariaRoles = map[string]guidednavigation.GuidedNavigationRole{
+ "doc-abstract": guidednavigation.RoleAbstract,
+ "doc-acknowledgments": guidednavigation.RoleAcknowledgments,
+ "doc-afterword": guidednavigation.RoleAfterword,
+ "doc-appendix": guidednavigation.RoleAppendix,
+ "doc-backlink": guidednavigation.RoleBacklink,
+ "doc-bibliography": guidednavigation.RoleBibliography,
+ "doc-biblioref": guidednavigation.RoleBiblioref,
+ "cell": guidednavigation.RoleCell,
+ "doc-chapter": guidednavigation.RoleChapter,
+ "doc-colophon": guidednavigation.RoleColophon,
+ "columnheader": guidednavigation.RoleColumnHeader,
+ "complementary": guidednavigation.RoleComplementary,
+ "doc-conclusion": guidednavigation.RoleConclusion,
+ "doc-cover": guidednavigation.RoleCover,
+ "doc-credit": guidednavigation.RoleCredit,
+ "doc-credits": guidednavigation.RoleCredits,
+ "doc-dedication": guidednavigation.RoleDedication,
+ "definition": guidednavigation.RoleDefinition,
+ "doc-endnotes": guidednavigation.RoleEndnotes,
+ "doc-epigraph": guidednavigation.RoleEpigraph,
+ "doc-epilogue": guidednavigation.RoleEpilogue,
+ "doc-errata": guidednavigation.RoleErrata,
+ "doc-example": guidednavigation.RoleExample,
+ "figure": guidednavigation.RoleFigure,
+ "doc-footnote": guidednavigation.RoleFootnote,
+ "doc-glossary": guidednavigation.RoleGlossary,
+ "doc-glossref": guidednavigation.RoleGlossref,
+ "heading": guidednavigation.RoleHeading,
+ "img": guidednavigation.RoleImage,
+ "doc-index": guidednavigation.RoleIndex,
+ "doc-introduction": guidednavigation.RoleIntroduction,
+ "list": guidednavigation.RoleList,
+ "listitem": guidednavigation.RoleListItem,
+ "main": guidednavigation.RoleMain,
+ "math": guidednavigation.RoleMath,
+ "navigation": guidednavigation.RoleNavigation,
+ "doc-noteref": guidednavigation.RoleNoteref,
+ "doc-notice": guidednavigation.RoleNotice,
+ "doc-pagebreak": guidednavigation.RolePagebreak,
+ "doc-pagelist": guidednavigation.RolePagelist,
+ "doc-part": guidednavigation.RolePart,
+ "doc-preface": guidednavigation.RolePreface,
+ "doc-prologue": guidednavigation.RolePrologue,
+ "doc-pullquote": guidednavigation.RolePullquote,
+ "presentation": guidednavigation.RolePresentation,
+ "none": guidednavigation.RolePresentation,
+ "qna": guidednavigation.RoleQna,
+ "region": guidednavigation.RoleRegion,
+ "row": guidednavigation.RoleRow,
+ "rowheader": guidednavigation.RoleRowHeader,
+ "separator": guidednavigation.RoleSeparator,
+ "doc-subtitle": guidednavigation.RoleSubtitle,
+ "table": guidednavigation.RoleTable,
+ "term": guidednavigation.RoleTerm,
+ "doc-tip": guidednavigation.RoleTip,
+ "doc-toc": guidednavigation.RoleToc,
+}
+
+var epubTypeRoles = map[string]guidednavigation.GuidedNavigationRole{
+ "abstract": guidednavigation.RoleAbstract,
+ "acknowledgments": guidednavigation.RoleAcknowledgments,
+ "afterword": guidednavigation.RoleAfterword,
+ "appendix": guidednavigation.RoleAppendix,
+ "aside": guidednavigation.RoleAside,
+ "backlink": guidednavigation.RoleBacklink,
+ "bibliography": guidednavigation.RoleBibliography,
+ "biblioref": guidednavigation.RoleBiblioref,
+ "table-cell": guidednavigation.RoleCell,
+ "chapter": guidednavigation.RoleChapter,
+ "colophon": guidednavigation.RoleColophon,
+ "conclusion": guidednavigation.RoleConclusion,
+ "cover": guidednavigation.RoleCover,
+ "credit": guidednavigation.RoleCredit,
+ "credits": guidednavigation.RoleCredits,
+ "dedication": guidednavigation.RoleDedication,
+ "glossdef": guidednavigation.RoleDefinition,
+ "endnotes": guidednavigation.RoleEndnotes,
+ "epigraph": guidednavigation.RoleEpigraph,
+ "epilogue": guidednavigation.RoleEpilogue,
+ "errata": guidednavigation.RoleErrata,
+ "example": guidednavigation.RoleExample,
+ "figure": guidednavigation.RoleFigure,
+ "footnote": guidednavigation.RoleFootnote,
+ "glossary": guidednavigation.RoleGlossary,
+ "glossref": guidednavigation.RoleGlossref,
+ "index": guidednavigation.RoleIndex,
+ "introduction": guidednavigation.RoleIntroduction,
+ "landmarks": guidednavigation.RoleLandmarks,
+ "list": guidednavigation.RoleList,
+ "list-item": guidednavigation.RoleListItem,
+ "loa": guidednavigation.RoleLoa,
+ "loi": guidednavigation.RoleLoi,
+ "lot": guidednavigation.RoleLot,
+ "lov": guidednavigation.RoleLov,
+ "noteref": guidednavigation.RoleNoteref,
+ "notice": guidednavigation.RoleNotice,
+ "pagebreak": guidednavigation.RolePagebreak,
+ "pagelist": guidednavigation.RolePagelist,
+ "part": guidednavigation.RolePart,
+ "preface": guidednavigation.RolePreface,
+ "prologue": guidednavigation.RolePrologue,
+ "pullquote": guidednavigation.RolePullquote,
+ "qna": guidednavigation.RoleQna,
+ "table-row": guidednavigation.RoleRow,
+ "subtitle": guidednavigation.RoleSubtitle,
+ "table": guidednavigation.RoleTable,
+ "glossterm": guidednavigation.RoleTerm,
+ "tip": guidednavigation.RoleTip,
+ "toc": guidednavigation.RoleToc,
+}
+
+var simpleElementTypeRoles = map[atom.Atom]guidednavigation.GuidedNavigationRole{
+ atom.Article: guidednavigation.RoleArticle,
+ atom.Aside: guidednavigation.RoleAside,
+ atom.Audio: guidednavigation.RoleAudio,
+ atom.Blockquote: guidednavigation.RoleBlockquote,
+ atom.Caption: guidednavigation.RoleCaption,
+ atom.Figcaption: guidednavigation.RoleCaption,
+ atom.Td: guidednavigation.RoleCell,
+ atom.Dd: guidednavigation.RoleDefinition,
+ atom.Details: guidednavigation.RoleDetails,
+ atom.Figure: guidednavigation.RoleFigure,
+ atom.Header: guidednavigation.RoleHeader,
+ atom.H1: guidednavigation.RoleHeading,
+ atom.H2: guidednavigation.RoleHeading,
+ atom.H3: guidednavigation.RoleHeading,
+ atom.H4: guidednavigation.RoleHeading,
+ atom.H5: guidednavigation.RoleHeading,
+ atom.H6: guidednavigation.RoleHeading,
+ atom.Img: guidednavigation.RoleImage,
+ atom.Ul: guidednavigation.RoleList,
+ atom.Ol: guidednavigation.RoleList,
+ atom.Li: guidednavigation.RoleListItem,
+ atom.Main: guidednavigation.RoleMain,
+ atom.Math: guidednavigation.RoleMath,
+ atom.Nav: guidednavigation.RoleNavigation,
+ atom.P: guidednavigation.RoleParagraph,
+ atom.Pre: guidednavigation.RolePreformatted,
+ atom.Tr: guidednavigation.RoleRow,
+ atom.Section: guidednavigation.RoleSection,
+ atom.Hr: guidednavigation.RoleSeparator,
+ atom.Summary: guidednavigation.RoleSummary,
+ atom.Table: guidednavigation.RoleTable,
+ atom.Dfn: guidednavigation.RoleTerm,
+ atom.Dt: guidednavigation.RoleTerm,
+ atom.Video: guidednavigation.RoleVideo,
+}
+
+func ExtractNodeRoles(el *html.Node) (roles []guidednavigation.GuidedNavigationRole, level uint8) {
+ add := func(role guidednavigation.GuidedNavigationRole) {
+ if !slices.Contains(roles, role) {
+ roles = append(roles, role)
+ }
+ }
+
+ // Based on attributes
+ var namespaces map[string]string
+ var alreadyHasRole bool
+ for _, at := range el.Attr {
+
+ // Remove namespace prefix if it exists
+ frags := strings.SplitN(at.Key, ":", 2)
+ key := frags[len(frags)-1]
+
+ if len(frags) == 1 {
+ // ARIA role
+ if key == "role" {
+ if role, ok := ariaRoles[at.Val]; ok {
+ alreadyHasRole = true
+ add(role)
+ }
+ }
+ } else {
+ // First we check for an attribute key we're interested in, because extracting namespaces is expensive
+ if key != "type" {
+ continue
+ }
+
+ // Maybe the attribute has a namespace...?
+ if at.Namespace == "" {
+ if namespaces == nil {
+ // Save namespaces so they're only extracted once per element
+ namespaces = ExtractNamespaces(el)
+ }
+ if namespace, ok := namespaces[frags[0]]; ok {
+ // Set the namespace if we found it
+ at.Namespace = namespace
+ }
+ }
+
+ if at.Namespace == "http://www.idpf.org/2007/ops" && key == "type" {
+ if role, ok := epubTypeRoles[at.Val]; ok {
+ add(role)
+ }
+ }
+ }
+ }
+ if alreadyHasRole {
+ // Aria role overrides logic based on the element type
+ return
+ }
+
+ // Based on element type
+ switch el.DataAtom {
+ case atom.Th:
+ scope := getAttr(el, "scope")
+ switch scope {
+ case "col":
+ add(guidednavigation.RoleColumnHeader)
+ case "row":
+ add(guidednavigation.RoleRowHeader)
+ }
+ default:
+ if role, ok := simpleElementTypeRoles[el.DataAtom]; ok {
+ add(role)
+
+ switch role {
+ case guidednavigation.RoleHeading:
+ switch el.DataAtom {
+ case atom.H1:
+ level = 1
+ case atom.H2:
+ level = 2
+ case atom.H3:
+ level = 3
+ case atom.H4:
+ level = 4
+ case atom.H5:
+ level = 5
+ case atom.H6:
+ level = 6
+ }
+ }
+ }
+ }
+
+ /*case atom.Blockquote, atom.Q:
+ quote := element.Quote{}
+ for _, at := range el.Attr {
+ if at.Key == "cite" {
+ quote.ReferenceURL, _ = nurl.Parse(at.Val)
+ }
+ if at.Key == "title" {
+ quote.ReferenceTitle = at.Val
+ }
+ }
+ bestRole = quote*/ // TODO
+
+ return
+}
+
+func ConvertEPUBRole(role string) guidednavigation.GuidedNavigationRole {
+ if gRole, ok := epubTypeRoles[role]; ok {
+ return gRole
+ }
+ return ""
+}
diff --git a/pkg/guidednavigation/guided_navigation.go b/pkg/guidednavigation/guided_navigation.go
new file mode 100644
index 00000000..4df6d342
--- /dev/null
+++ b/pkg/guidednavigation/guided_navigation.go
@@ -0,0 +1,140 @@
+package guidednavigation
+
+import (
+ "encoding/json"
+
+ "github.com/readium/go-toolkit/pkg/manifest"
+ "github.com/readium/go-toolkit/pkg/util/url"
+)
+
+// Readium Guided Navigation Document
+// https://readium.org/guided-navigation/schema/document.schema.json
+type GuidedNavigationDocument struct {
+ Links manifest.LinkList `json:"links,omitempty"` // References to other resources that are related to the current Guided Navigation Document.
+ Guided []GuidedNavigationObject `json:"guided"` // A sequence of resources and/or media fragments into these resources, meant to be presented sequentially to the user.
+}
+
+// Readium Guided Navigation Object
+// https://readium.org/guided-navigation/schema/object.schema.json
+type GuidedNavigationObject struct {
+ AudioRef url.URL `json:"audioref,omitempty"` // References an audio resource or a fragment of it.
+ ImgRef url.URL `json:"imgref,omitempty"` // References an image or a fragment of it.
+ TextRef url.URL `json:"textref,omitempty"` // References a textual resource or a fragment of it.
+ Text GuidedNavigationText `json:"text,omitempty"` // Textual equivalent of the resources or fragment of the resources referenced by the current Guided Navigation Object.
+ Level uint8 `json:"level,omitempty"` // Level 1-6, for e.g. headings
+ Role []GuidedNavigationRole `json:"role,omitempty"` // Convey the structural semantics of a publication
+ Children []GuidedNavigationObject `json:"children,omitempty"` // Items that are children of the containing Guided Navigation Object.
+ Description string `json:"description,omitempty"` // Text, audio or image description for the current Guided Navigation Object.
+}
+
+func (o GuidedNavigationObject) Empty() bool {
+ return o.TextRef == nil && o.ImgRef == nil && o.AudioRef == nil && o.Text.Empty() && len(o.Children) == 0 && o.Description == ""
+}
+
+func (o GuidedNavigationObject) ChildrenOnly() bool {
+ return o.TextRef == nil && o.ImgRef == nil && o.AudioRef == nil && o.Text.Empty() && len(o.Children) > 0 && o.Description == "" && len(o.Role) == 0 && o.Level == 0
+}
+
+func (o GuidedNavigationObject) TextOnly() bool {
+ return o.TextRef == nil && o.ImgRef == nil && o.AudioRef == nil && !o.Text.Empty() && len(o.Children) == 0 && o.Description == "" && len(o.Role) == 0 && o.Level == 0
+}
+
+func (o GuidedNavigationObject) MarshalJSON() ([]byte, error) {
+ res := make(map[string]interface{})
+ if o.Empty() {
+ return json.Marshal(res)
+ }
+
+ if o.TextRef != nil {
+ if s := o.TextRef.String(); s != "" {
+ res["textref"] = o.TextRef.String()
+ }
+ }
+ if o.ImgRef != nil {
+ if s := o.ImgRef.String(); s != "" {
+ res["imgref"] = o.ImgRef.String()
+ }
+ }
+ if o.AudioRef != nil {
+ if s := o.AudioRef.String(); s != "" {
+ res["audioref"] = o.AudioRef.String()
+ }
+ }
+
+ if (o.Text != GuidedNavigationText{}) {
+ res["text"] = o.Text
+ }
+ if o.Level != 0 {
+ res["level"] = o.Level
+ }
+ if len(o.Role) > 0 {
+ res["role"] = o.Role
+ }
+ if len(o.Children) > 0 {
+ res["children"] = o.Children
+ }
+ if o.Description != "" {
+ res["description"] = o.Description
+ }
+
+ return json.Marshal(res)
+}
+
+type GuidedNavigationText struct {
+ Plain string `json:"plain,omitempty"` // Plain text
+ SSML string `json:"ssml,omitempty"` // SSML markup
+ Language string `json:"language,omitempty"` // BCP-47 language tag
+}
+
+func (t GuidedNavigationText) Empty() bool {
+ return t.Plain == "" && t.SSML == "" && t.Language == ""
+}
+
+func (t *GuidedNavigationText) UnmarshalJSON(data []byte) error {
+ // Just plain text
+ var plain string
+ if err := json.Unmarshal(data, &plain); err == nil {
+ t.Plain = plain
+ return nil
+ }
+
+ type alias GuidedNavigationText
+ var obj alias
+ if err := json.Unmarshal(data, &obj); err != nil {
+ return err
+ }
+ *t = GuidedNavigationText(obj)
+ return nil
+}
+
+func (t GuidedNavigationText) MarshalJSON() ([]byte, error) {
+ res := make(map[string]interface{})
+
+ if t.SSML == "" && t.Language == "" {
+ return json.Marshal(t.Plain)
+ }
+
+ if t.Plain != "" {
+ res["plain"] = t.Plain
+ }
+ if t.SSML != "" {
+ res["ssml"] = t.SSML
+ }
+ if t.Language != "" {
+ res["language"] = t.Language
+ }
+ return json.Marshal(res)
+}
+
+// Same as GuidedNavigationObject but without Children
+/*type GuidedNavigationDescriptionObject struct {
+ AudioRef url.URL `json:"audioref,omitempty"` // References an audio resource or a fragment of it.
+ ImgRef url.URL `json:"imgref,omitempty"` // References an image or a fragment of it.
+ TextRef url.URL `json:"textref,omitempty"` // References a textual resource or a fragment of it.
+ Text string `json:"text,omitempty"` // Textual equivalent of the resources or fragment of the resources referenced by the current Guided Navigation Object.
+ Level uint8 `json:"level,omitempty"` // TODO
+ Role []string `json:"role,omitempty"` // Convey the structural semantics of a publication
+}*/
+
+// TODO: functions for objects to get e.g. audio time, audio file, text file, fragment id, audio "clip", image xywh, etc.
+// This will come after the URL utility revamp to avoid implementation twice
diff --git a/pkg/guidednavigation/roles.go b/pkg/guidednavigation/roles.go
new file mode 100644
index 00000000..6ed731ea
--- /dev/null
+++ b/pkg/guidednavigation/roles.go
@@ -0,0 +1,81 @@
+package guidednavigation
+
+// Readium Guided Navigation Roles
+// https://github.com/readium/guided-navigation/blob/main/schema/roles.schema.json
+type GuidedNavigationRole string
+
+const (
+ RoleNone GuidedNavigationRole = "" // No role. Shouldn't be used
+ RoleAbstract GuidedNavigationRole = "abstract" // A short summary of the principle ideas, concepts and conclusions of the work, or of a section or excerpt within it.
+ RoleAcknowledgments GuidedNavigationRole = "acknowledgments" // A section or statement that acknowledges significant contributions by persons, organizations, governments and other entities to the realization of the work.
+ RoleAfterword GuidedNavigationRole = "afterword" // A closing statement from the author or a person of importance, typically providing insight into how the content came to be written, its significance, or related events that have transpired since its timeline.
+ RoleAppendix GuidedNavigationRole = "appendix" // A section of supplemental information located after the primary content that informs the content but is not central to it.
+ RoleArticle GuidedNavigationRole = "article" // Represents a self-contained composition in a document, page, application, or site, which is intended to be independently distributable or reusable
+ RoleAside GuidedNavigationRole = "aside" // Secondary or supplementary content.
+ RoleAudio GuidedNavigationRole = "audio" // Embedded sound content in a document.
+ RoleBacklink GuidedNavigationRole = "backlink" // A link that allows the user to return to a related location in the content (e.g., from a footnote to its reference or from a glossary definition to where a term is used).
+ RoleBibliography GuidedNavigationRole = "bibliography" // A list of external references cited in the work, which may be to print or digital sources.
+ RoleBiblioref GuidedNavigationRole = "biblioref" // A reference to a bibliography entry.
+ RoleBlockquote GuidedNavigationRole = "blockquote" // Represents a section that is quoted from another source.
+ RoleCaption GuidedNavigationRole = "caption" // A caption for an image or a table.
+ RoleChapter GuidedNavigationRole = "chapter" // A major thematic section of content in a work.
+ RoleCell GuidedNavigationRole = "cell" // A single cell of tabular data or content.
+ RoleColumnHeader GuidedNavigationRole = "columnheader" // The header cell for a column, establishing a relationship between it and the other cells in the same column.
+ RoleColophon GuidedNavigationRole = "colophon" // A short section of production notes particular to the edition (e.g., describing the typeface used), often located at the end of a work.
+ RoleComplementary GuidedNavigationRole = "complementary" // A supporting section of the document, designed to be complementary to the main content at a similar level in the DOM hierarchy, but remains meaningful when separated from the main content.
+ RoleConclusion GuidedNavigationRole = "conclusion" // A concluding section or statement that summarizes the work or wraps up the narrative.
+ RoleCover GuidedNavigationRole = "cover" // An image that sets the mood or tone for the work and typically includes the title and author.
+ RoleCredit GuidedNavigationRole = "credit" // An acknowledgment of the source of integrated content from third-party sources, such as photos. Typically identifies the creator, copyright and any restrictions on reuse.
+ RoleCredits GuidedNavigationRole = "credits" // A collection of credits.
+ RoleDedication GuidedNavigationRole = "dedication" // An inscription at the front of the work, typically addressed in tribute to one or more persons close to the author.
+ RoleDefinition GuidedNavigationRole = "definition" // A definition of a term or concept.
+ RoleDetails GuidedNavigationRole = "details" // A disclosure widget that can be expanded.
+ RoleEndnotes GuidedNavigationRole = "endnotes" // A collection of notes at the end of a work or a section within it.
+ RoleEpigraph GuidedNavigationRole = "epigraph" // A quotation set at the start of the work or a section that establishes the theme or sets the mood.
+ RoleEpilogue GuidedNavigationRole = "epilogue" // A quotation set at the start of the work or a section that establishes the theme or sets the mood.
+ RoleErrata GuidedNavigationRole = "errata" // A set of corrections discovered after initial publication of the work, sometimes referred to as corrigenda.
+ RoleExample GuidedNavigationRole = "example" // An illustration of the usage of a defined term or phrase.
+ RoleFigure GuidedNavigationRole = "figure" // An illustration, diagram, photo, code listing or similar, referenced from the text of a work, and typically annotated with a title, caption and/or credits.
+ RoleFootnote GuidedNavigationRole = "footnote" // Ancillary information, such as a citation or commentary, that provides additional context to a referenced passage of text.
+ RoleGlossary GuidedNavigationRole = "glossary" // A brief dictionary of new, uncommon, or specialized terms used in the content.
+ RoleGlossref GuidedNavigationRole = "glossref" // A reference to a glossary definition.
+ RoleHeader GuidedNavigationRole = "header" // Represents introductory content, typically a group of introductory or navigational aids.
+ RoleHeading GuidedNavigationRole = "heading" // A heading for a section of the page.
+ RoleImage GuidedNavigationRole = "image" // Represents an image.
+ RoleIndex GuidedNavigationRole = "index" // A navigational aid that provides a detailed list of links to key subjects, names and other important topics covered in the work.
+ RoleIntroduction GuidedNavigationRole = "introduction" // A preliminary section that typically introduces the scope or nature of the work.
+ RoleLandmarks GuidedNavigationRole = "landmarks" // A short summary of the principle ideas, concepts and conclusions of the work, or of a section or excerpt within it.
+ RoleList GuidedNavigationRole = "list" // A structure that contains an enumeration of related content items.
+ RoleListItem GuidedNavigationRole = "listItem" // A single item in an enumeration.
+ RoleLoa GuidedNavigationRole = "loa" // A listing of audio clips included in the work.
+ RoleLoi GuidedNavigationRole = "loi" // A listing of illustrations included in the work.
+ RoleLot GuidedNavigationRole = "lot" // A listing of tables included in the work.
+ RoleLov GuidedNavigationRole = "lov" // A listing of video clips included in the work.
+ RoleMain GuidedNavigationRole = "main" // Content that is directly related to or expands upon the central topic of the document.
+ RoleMath GuidedNavigationRole = "math" // Content that represents a mathematical expression.
+ RoleNavigation GuidedNavigationRole = "navigation" // Represents a section of a page that links to other pages or to parts within the page: a section with navigation links.
+ RoleNoteref GuidedNavigationRole = "noteref" // A reference to a footnote or endnote, typically appearing as a superscripted number or symbol in the main body of text.
+ RoleNotice GuidedNavigationRole = "notice" // Notifies the user of consequences that might arise from an action or event. Examples include warnings, cautions and dangers.
+ RolePagebreak GuidedNavigationRole = "pagebreak" // A separator denoting the position before which a break occurs between two contiguous pages in a statically paginated version of the content.
+ RolePagelist GuidedNavigationRole = "pagelist" // A navigational aid that provides a list of links to the pagebreaks in the content.
+ RoleParagraph GuidedNavigationRole = "paragraph" // Represents a paragraph.
+ RolePart GuidedNavigationRole = "part" // A major structural division in a work that contains a set of related sections dealing with a particular subject, narrative arc or similar encapsulated theme.
+ RolePreface GuidedNavigationRole = "preface" // An introductory section that precedes the work, typically written by the author of the work.
+ RolePreformatted GuidedNavigationRole = "preformatted" // Represents preformatted text which is to be presented exactly as written.
+ RolePresentation GuidedNavigationRole = "presentation" // Represents an element being used only for presentation and therefore that does not have any accessibility semantics.
+ RolePrologue GuidedNavigationRole = "prologue" // An introductory section that sets the background to a work, typically part of the narrative.
+ RolePullquote GuidedNavigationRole = "pullquote" // A distinctively placed or highlighted quotation from the current content designed to draw attention to a topic or highlight a key point.
+ RoleQna GuidedNavigationRole = "qna" // A section of content structured as a series of questions and answers, such as an interview or list of frequently asked questions.
+ RoleRegion GuidedNavigationRole = "region" // Represents content that is relevant to a specific, author-specified purpose and sufficiently important that users will likely want to be able to navigate to the section easily and to have it listed in a summary of the page.
+ RoleRow GuidedNavigationRole = "row" // A row of data or content in a tabular structure.
+ RoleRowHeader GuidedNavigationRole = "rowheader" // The header cell for a row, establishing a relationship between it and the other cells in the same row.
+ RoleSection GuidedNavigationRole = "section" // Represents a generic standalone section of a document, which doesn't have a more specific semantic element to represent it.
+ RoleSeparator GuidedNavigationRole = "separator" // Indicates the element is a divider that separates and distinguishes sections of content or groups of menuitems.
+ RoleSubtitle GuidedNavigationRole = "subtitle" // An explanatory or alternate title for the work, or a section or component within it.
+ RoleSummary GuidedNavigationRole = "summary" // A summary of an element contained in details.
+ RoleTable GuidedNavigationRole = "table" // A structure containing data or content laid out in tabular form.
+ RoleTerm GuidedNavigationRole = "term" // A word or phrase with a corresponding definition.
+ RoleTip GuidedNavigationRole = "tip" // Helpful information that clarifies some aspect of the content or assists in its comprehension.
+ RoleToc GuidedNavigationRole = "toc" // A navigational aid that provides an ordered list of links to the major sectional headings in the content. A table of contents may cover an entire work, or only a smaller section of it.
+ RoleVideo GuidedNavigationRole = "video" // Embedded videos, movies, or audio files with captions in a document.
+)
diff --git a/pkg/manifest/guided_navigation.go b/pkg/manifest/guided_navigation.go
deleted file mode 100644
index b3abeb17..00000000
--- a/pkg/manifest/guided_navigation.go
+++ /dev/null
@@ -1,24 +0,0 @@
-package manifest
-
-// Readium Guided Navigation Document
-// https://readium.org/guided-navigation/schema/document.schema.json
-type GuidedNavigationDocument struct {
- Links LinkList `json:"links,omitempty"` // References to other resources that are related to the current Guided Navigation Document.
- Guided []GuidedNavigationObject `json:"guided"` // A sequence of resources and/or media fragments into these resources, meant to be presented sequentially to the user.
-}
-
-// Readium Guided Navigation Object
-// https://readium.org/guided-navigation/schema/object.schema.json
-// TODO: Role should be typed
-// TODO: all refs should be url.URL
-type GuidedNavigationObject struct {
- AudioRef string `json:"audioref,omitempty"` // References an audio resource or a fragment of it.
- ImgRef string `json:"imgref,omitempty"` // References an image or a fragment of it.
- TextRef string `json:"textref,omitempty"` // References a textual resource or a fragment of it.
- Text string `json:"text,omitempty"` // Textual equivalent of the resources or fragment of the resources referenced by the current Guided Navigation Object.
- Role []string `json:"role,omitempty"` // Convey the structural semantics of a publication
- Children []GuidedNavigationObject `json:"children,omitempty"` // Items that are children of the containing Guided Navigation Object.
-}
-
-// TODO: functions for objects to get e.g. audio time, audio file, text file, fragment id, audio "clip", image xywh, etc.
-// This will come after the URL utility revamp to avoid implementation twice
diff --git a/pkg/parser/epub/media_overlay_service.go b/pkg/parser/epub/media_overlay_service.go
index 857e02c9..79c9cb74 100644
--- a/pkg/parser/epub/media_overlay_service.go
+++ b/pkg/parser/epub/media_overlay_service.go
@@ -5,6 +5,7 @@ import (
"slices"
"github.com/readium/go-toolkit/pkg/fetcher"
+ "github.com/readium/go-toolkit/pkg/guidednavigation"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/go-toolkit/pkg/mediatype"
"github.com/readium/go-toolkit/pkg/pub"
@@ -79,7 +80,7 @@ func (s *MediaOverlayService) HasGuideForResource(href string) bool {
return ok
}
-func (s *MediaOverlayService) GuideForResource(ctx context.Context, href string) (*manifest.GuidedNavigationDocument, error) {
+func (s *MediaOverlayService) GuideForResource(ctx context.Context, href string) (*guidednavigation.GuidedNavigationDocument, error) {
// Check if the provided resource has a guided navigation document
if link, ok := s.originalSmilAlternates[href]; ok {
res := s.fetcher.Get(ctx, link)
diff --git a/pkg/parser/epub/parser_smil.go b/pkg/parser/epub/parser_smil.go
index 1a38aca3..943ecb58 100644
--- a/pkg/parser/epub/parser_smil.go
+++ b/pkg/parser/epub/parser_smil.go
@@ -4,12 +4,13 @@ import (
"strconv"
"github.com/pkg/errors"
- "github.com/readium/go-toolkit/pkg/manifest"
+ "github.com/readium/go-toolkit/pkg/guidednavigation"
+ "github.com/readium/go-toolkit/pkg/guidednavigation/converter"
"github.com/readium/go-toolkit/pkg/util/url"
"github.com/readium/xmlquery"
)
-func ParseSMILDocument(document *xmlquery.Node, filePath url.URL) (*manifest.GuidedNavigationDocument, error) {
+func ParseSMILDocument(document *xmlquery.Node, filePath url.URL) (*guidednavigation.GuidedNavigationDocument, error) {
smil := document.SelectElement("/" + DualNSSelect(NamespaceSMIL, NamespaceSMIL2, "smil"))
if smil == nil {
return nil, errors.New("SMIL root element not found")
@@ -26,17 +27,17 @@ func ParseSMILDocument(document *xmlquery.Node, filePath url.URL) (*manifest.Gui
if err != nil {
return nil, errors.Wrap(err, "failed parsing SMIL body")
}
- return &manifest.GuidedNavigationDocument{
+ return &guidednavigation.GuidedNavigationDocument{
Guided: seqs,
}, nil
}
-func ParseSMILSeq(seq *xmlquery.Node, filePath url.URL) ([]manifest.GuidedNavigationObject, error) {
+func ParseSMILSeq(seq *xmlquery.Node, filePath url.URL) ([]guidednavigation.GuidedNavigationObject, error) {
childElements := seq.SelectElements(ManyNSSelectMany([]string{NamespaceSMIL, NamespaceSMIL2}, []string{"par", "seq"}))
if len(childElements) == 0 && seq.Data == "body" {
return nil, errors.New("SMIL body is empty")
}
- objects := make([]manifest.GuidedNavigationObject, 0, len(childElements))
+ objects := make([]guidednavigation.GuidedNavigationObject, 0, len(childElements))
for _, el := range childElements {
if el.Data == "par" {
//
@@ -47,27 +48,28 @@ func ParseSMILSeq(seq *xmlquery.Node, filePath url.URL) ([]manifest.GuidedNaviga
objects = append(objects, *o)
} else {
//
- o := &manifest.GuidedNavigationObject{
- TextRef: SelectNodeAttrNs(el, NamespaceOPS, "textref"),
- }
- if o.TextRef == "" {
+ textrefAttr := SelectNodeAttrNs(el, NamespaceOPS, "textref")
+ if textrefAttr == "" {
return nil, errors.New("SMIL seq has no textref")
}
- u, err := url.URLFromString(o.TextRef)
+ u, err := url.URLFromString(textrefAttr)
if err != nil {
return nil, errors.Wrap(err, "failed parsing SMIL seq textref")
}
- o.TextRef = filePath.Resolve(u).String()
+ o := &guidednavigation.GuidedNavigationObject{
+ TextRef: filePath.Resolve(u),
+ }
// epub:type
pp := parseProperties(SelectNodeAttrNs(el, NamespaceOPS, "type"))
if len(pp) > 0 {
- o.Role = make([]string, 0, len(pp))
+ o.Role = make([]guidednavigation.GuidedNavigationRole, 0, len(pp))
for _, prop := range pp {
- if prop == "" {
+ p := converter.ConvertEPUBRole(prop)
+ if p == "" {
continue
}
- o.Role = append(o.Role, prop)
+ o.Role = append(o.Role, p)
}
}
@@ -83,57 +85,58 @@ func ParseSMILSeq(seq *xmlquery.Node, filePath url.URL) ([]manifest.GuidedNaviga
return objects, nil
}
-func ParseSMILPar(par *xmlquery.Node, filePath url.URL) (*manifest.GuidedNavigationObject, error) {
+func ParseSMILPar(par *xmlquery.Node, filePath url.URL) (*guidednavigation.GuidedNavigationObject, error) {
text := par.SelectElement(DualNSSelect(NamespaceSMIL, NamespaceSMIL2, "text"))
if text == nil {
return nil, errors.New("SMIL par has no text element")
}
- o := &manifest.GuidedNavigationObject{
- TextRef: text.SelectAttr("src"),
- }
- if o.TextRef == "" {
+ srcAttr := text.SelectAttr("src")
+ if srcAttr == "" {
return nil, errors.New("SMIL par text element has empty src attribute")
}
- u, err := url.URLFromString(o.TextRef)
+ u, err := url.URLFromString(srcAttr)
if err != nil {
return nil, errors.Wrap(err, "failed parsing SMIL par text element textref")
}
- o.TextRef = filePath.Resolve(u).String()
+ o := &guidednavigation.GuidedNavigationObject{
+ TextRef: filePath.Resolve(u),
+ }
// Audio is optional
if audio := par.SelectElement(DualNSSelect(NamespaceSMIL, NamespaceSMIL2, "audio")); audio != nil {
- o.AudioRef = audio.SelectAttr("src")
- if o.AudioRef == "" {
+ audioAttr := audio.SelectAttr("src")
+ if audioAttr == "" {
return nil, errors.New("SMIL par audio element has empty src attribute")
}
begin := ParseClockValue(audio.SelectAttr("clipBegin"))
end := ParseClockValue(audio.SelectAttr("clipEnd"))
if begin != nil || end != nil {
- o.AudioRef += "#t="
+ audioAttr += "#t="
}
if begin != nil {
- o.AudioRef += strconv.FormatFloat(*begin, 'f', -1, 64)
+ audioAttr += strconv.FormatFloat(*begin, 'f', -1, 64)
}
if end != nil {
- o.AudioRef += "," + strconv.FormatFloat(*end, 'f', -1, 64)
+ audioAttr += "," + strconv.FormatFloat(*end, 'f', -1, 64)
}
- u, err := url.URLFromString(o.AudioRef)
+ u, err := url.URLFromString(audioAttr)
if err != nil {
return nil, errors.Wrap(err, "failed parsing SMIL par audio element textref")
}
- o.AudioRef = filePath.Resolve(u).String()
+ o.AudioRef = filePath.Resolve(u)
}
// epub:type
pp := parseProperties(SelectNodeAttrNs(par, NamespaceOPS, "type"))
if len(pp) > 0 {
- o.Role = make([]string, 0, len(pp))
+ o.Role = make([]guidednavigation.GuidedNavigationRole, 0, len(pp))
for _, prop := range pp {
- if prop == "" {
+ p := converter.ConvertEPUBRole(prop)
+ if p == "" {
continue
}
- o.Role = append(o.Role, prop)
+ o.Role = append(o.Role, p)
}
}
diff --git a/pkg/parser/epub/parser_smil_test.go b/pkg/parser/epub/parser_smil_test.go
index dcd1ca69..a7441b7e 100644
--- a/pkg/parser/epub/parser_smil_test.go
+++ b/pkg/parser/epub/parser_smil_test.go
@@ -5,12 +5,13 @@ import (
"testing"
"github.com/readium/go-toolkit/pkg/fetcher"
+ "github.com/readium/go-toolkit/pkg/guidednavigation"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/go-toolkit/pkg/util/url"
"github.com/stretchr/testify/assert"
)
-func loadSmil(ctx context.Context, name string) (*manifest.GuidedNavigationDocument, error) {
+func loadSmil(ctx context.Context, name string) (*guidednavigation.GuidedNavigationDocument, error) {
n, rerr := fetcher.ReadResourceAsXML(ctx, fetcher.NewFileResource(manifest.Link{}, "./testdata/smil/"+name+".smil"), map[string]string{
NamespaceOPS: "epub",
NamespaceSMIL: "smil",
@@ -30,8 +31,8 @@ func TestSMILDocTypicalAudio(t *testing.T) {
}
assert.Empty(t, doc.Links)
if assert.Len(t, doc.Guided, 6) {
- assert.Equal(t, "OEBPS/page1.xhtml#word0", doc.Guided[0].TextRef)
- assert.Equal(t, "OEBPS/audio/page1.m4a#t=0,0.84", doc.Guided[0].AudioRef)
+ assert.Equal(t, "OEBPS/page1.xhtml#word0", doc.Guided[0].TextRef.String())
+ assert.Equal(t, "OEBPS/audio/page1.m4a#t=0,0.84", doc.Guided[0].AudioRef.String())
}
}
@@ -51,7 +52,7 @@ func TestSMILClipBoundaries(t *testing.T) {
if !assert.Len(t, doc.Guided, 3) {
return
}
- assert.Equal(t, "OEBPS/audio/page1.m4a#t=,0.84", doc.Guided[0].AudioRef)
- assert.Equal(t, "OEBPS/audio/page1.m4a#t=0.84", doc.Guided[1].AudioRef)
- assert.Equal(t, "OEBPS/audio/page1.m4a", doc.Guided[2].AudioRef)
+ assert.Equal(t, "OEBPS/audio/page1.m4a#t=,0.84", doc.Guided[0].AudioRef.String())
+ assert.Equal(t, "OEBPS/audio/page1.m4a#t=0.84", doc.Guided[1].AudioRef.String())
+ assert.Equal(t, "OEBPS/audio/page1.m4a", doc.Guided[2].AudioRef.String())
}
diff --git a/pkg/pub/service_guided_navigation.go b/pkg/pub/service_guided_navigation.go
index 980d38d6..c4f5a4ca 100644
--- a/pkg/pub/service_guided_navigation.go
+++ b/pkg/pub/service_guided_navigation.go
@@ -6,6 +6,7 @@ import (
"github.com/pkg/errors"
"github.com/readium/go-toolkit/pkg/fetcher"
+ "github.com/readium/go-toolkit/pkg/guidednavigation"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/go-toolkit/pkg/mediatype"
"github.com/readium/go-toolkit/pkg/util/url"
@@ -27,7 +28,7 @@ func init() {
// Provides a way to access guided navigation documents for resources of a [Publication].
type GuidedNavigationService interface {
Service
- GuideForResource(ctx context.Context, href string) (*manifest.GuidedNavigationDocument, error)
+ GuideForResource(ctx context.Context, href string) (*guidednavigation.GuidedNavigationDocument, error)
HasGuideForResource(href string) bool
}