Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pkg/content/element/attributes.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package element
type AttributeKey string

const AcessibilityLabelAttributeKey AttributeKey = "accessibilityLabel"
const AccessibilityDetailsAttributeKey AttributeKey = "accessibilityDetails"
const AccessibilityLabeledByAttributeKey AttributeKey = "accessibilityLabeledBy"
const AccessibilityDescribedByAttributeKey AttributeKey = "accessibilityDescribedBy"
const LanguageAttributeKey AttributeKey = "language"

// An attribute is an arbitrary key-value metadata pair.
Expand Down
2 changes: 1 addition & 1 deletion pkg/content/element/element.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (e AudioElement) MarshalJSON() ([]byte, error) {
res := ElementToMap(e)
res["text"] = e.Text()
res["link"] = e.EmbeddedLink()
res["@type"] = "Video"
res["@type"] = "Audio"
return json.Marshal(res)
}

Expand Down
1 change: 1 addition & 0 deletions pkg/content/iterator/html_converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ func (c *HTMLConverter) flushText() {
if len(c.breadcrumbs) > 0 {
el := c.breadcrumbs[len(c.breadcrumbs)-1].node
for _, at := range el.Attr {
// THIS IS WRONG! need epub:type so split the str
if at.Namespace == "http://www.idpf.org/2007/ops" && at.Key == "type" && at.Val == "footnote" {
bestRole = element.Footnote{}
break
Expand Down
153 changes: 153 additions & 0 deletions pkg/guidednavigation/converter/a11y.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package converter

import (
"encoding/xml"
"slices"
"strings"

"github.com/readium/go-toolkit/pkg/guidednavigation"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)

func getElementByID(n *html.Node, id string) *html.Node {
if n.Type == html.ElementNode {
for _, a := range n.Attr {
if a.Key == "id" && a.Val == id {
return n
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if res := getElementByID(c, id); res != nil {
return res
}
}
return nil
}

func nodeIsHidden(n *html.Node) bool {
for _, attr := range n.Attr {
if attr.Key == "aria-hidden" && attr.Val == "true" {
return true
}
if attr.Key == "hidden" {
return true
}
}
return false
}

func nodeText(sb *strings.Builder, n *html.Node) {
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.TextNode {
sb.WriteString(n.Data)
}
if n.FirstChild != nil {
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
}
f(n)
}

// https://www.w3.org/TR/accname/#terminology
// Returns the node's accessibility text if existent, and whether or not the node is visible in the first place.
func ExtractNodeAria(el *html.Node) (*guidednavigation.GuidedNavigationText, bool) {
// 2.A
if nodeIsHidden(el) {
return nil, false
}

// 2.B
if labelledBy := strings.TrimSpace(getAttr(el, "aria-labelledby")); labelledBy != "" {
rawIds := strings.Split(strings.TrimSpace(labelledBy), " ")
ids := make([]string, 0, len(rawIds))
for _, v := range rawIds {
if v != "" && !slices.Contains(ids, v) {
ids = append(ids, v)
}
}

// Traverse up to the root of the document
doc := el
for doc.Parent != nil {
doc = doc.Parent
}

labelNodes := make([]*html.Node, 0, len(ids))
for _, v := range ids {
n := getElementByID(doc, v)
if n != nil {
labelNodes = append(labelNodes, n)
}
}
if len(labelNodes) > 0 {
var sb strings.Builder
for i, n := range labelNodes {
if nodeIsHidden(n) {
continue
}
if label := getAttr(n, "aria-label"); label != "" {
sb.WriteString(label)
} else {
nodeText(&sb, n)
}

if i < len(labelNodes)-1 {
sb.WriteRune(' ') // Add a space at the end
}
}
text := strings.TrimSpace(sb.String())
if text != "" {
return &guidednavigation.GuidedNavigationText{
Plain: text,
}, true
}
}
}

// 2.C
if label := strings.TrimSpace(getAttr(el, "aria-label")); label != "" {
return &guidednavigation.GuidedNavigationText{
Plain: label,
}, true
}

// 2.D
// TODO: more support for els
if el.DataAtom == atom.Img {
if alt := strings.TrimSpace(getAttr(el, "alt")); alt != "" {
return &guidednavigation.GuidedNavigationText{
Plain: alt,
}, true
}
}

return nil, true
}

func ConvertElementToSSMLTag(a atom.Atom) (string, []xml.Attr) {
switch a {
case atom.Em:
return "emphasis", nil
case atom.B:
return "emphasis", nil
case atom.I:
return "emphasis", []xml.Attr{{
Name: xml.Name{Local: "level"},
Value: "reduced",
}}
case atom.Strong:
return "emphasis", []xml.Attr{{
Name: xml.Name{Local: "level"},
Value: "strong",
}}
case atom.Br:
return "break", nil
default:
return "", nil
}
}
42 changes: 42 additions & 0 deletions pkg/guidednavigation/converter/converter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package converter

import (
"context"
"strings"

"github.com/pkg/errors"
"github.com/readium/go-toolkit/pkg/fetcher"
"github.com/readium/go-toolkit/pkg/guidednavigation"
"github.com/readium/go-toolkit/pkg/manifest"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)

func Do(ctx context.Context, resource fetcher.Resource, locator manifest.Locator) (*guidednavigation.GuidedNavigationDocument, error) {
raw, rerr := fetcher.ReadResourceAsString(ctx, resource)
if rerr != nil {
return nil, errors.Wrap(rerr, "failed reading HTML string of "+resource.Link().Href.String())
}

document, err := html.ParseWithOptions(
strings.NewReader(raw),
html.ParseOptionEnableScripting(false),
)
if err != nil {
return nil, errors.Wrap(err, "failed parsing HTML of "+resource.Link().Href.String())
}

body := childOfType(document, atom.Body, true)
if body == nil {
return nil, errors.New("HTML of " + resource.Link().Href.String() + " doesn't have a <body>")
}

contentConverter := NewHTMLConverter(locator)

// Traverse the document's HTML
contentConverter.Convert(body)

return &guidednavigation.GuidedNavigationDocument{
Guided: contentConverter.Result(),
}, nil
}
76 changes: 76 additions & 0 deletions pkg/guidednavigation/converter/converter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package converter

import (
"context"
"encoding/json"
"testing"

"github.com/readium/go-toolkit/pkg/fetcher"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/stretchr/testify/require"
)

func TestDo(t *testing.T) {
f := fetcher.NewBytesResource(manifest.Link{
Href: manifest.MustNewHREFFromString("hello.xhtml", false),
}, func() []byte {
return []byte(`
<!doctype html>
<html xmlns:epub="http://www.idpf.org/2007/ops"><!-- lang="en" xml:lang="en" -->
<body>
<p xml:lang="fr">Paragraphe avec image: <img src="src/image.jpg" alt="A cool image" /></p>
<p xml:lang="fr">Paragraphe avec image #1 <img src="src/image.jpg" alt="A cool image" /> et #2 <img src="src/image.jpg" alt="A second cool image" />!</p>
<p xml:lang="fr"><img src="src/image.jpg" alt="The coolest image" /> et <img src="src/image.jpg" alt="The boring image" /></p>
<p>A paragraph with: <img src="src/image.jpg" alt="A cool image" /><em xml:lang="fr">est cool!</em></p>
<p><i>Simple paragraph</i></p>
<p>This job requires a certain <em xml:lang="fr">savoir faire</em> that can only be acquired over time.</p>
<p>This is a paragraph <b>with some very-<em>strong</em> bold</b> text!</p>
<p>Just<br />testing<br>some<br /> breaks! And useless <span>elements</span>...</p>

<div>
<span id="pg04" role="doc-pagebreak" epub:type="pagebreak" title="4"/>
<p>And the next pagebreak is in the middle <span id="pg05" role="doc-pagebreak" epub:type="pagebreak" title="4"/> of a sentence.</p>
</div>


<section role="doc-chapter" epub:type="chapter">
<h1>Title of the chapter</h1>
</section>
<ul>
<li>First item</li>
<li>Second item</li>
<li>Third item</li>
</ul>
<p aria-hidden="true">Hidden <b>text!</b> <img src="with_image.jpg" />...</p>
<p aria-hidden="true">More Hidden text</p>
<p aria-hidden="true">More Hidden text</p>

<img src="image1.avif" alt="Alternative text using the alt attribute">
<span role="img" aria-label="Rating: 4 out of 5 stars">
<span>★</span>
<span>★</span>
<span>★</span>
<span>★</span>
<span>☆</span>
</span>
<figure aria-labelledby="cat-caption">
<pre>
/\_/\
( o.o )
^
</pre>
<figcaption id="cat-caption">
ASCII Art of a cat face
</figcaption>
</figure>
</body>
</html>`)
})

nav, err := Do(context.Background(), f, manifest.Locator{
Href: f.Link().Href.Resolve(nil, nil),
})
require.NoError(t, err)
bin, _ := json.MarshalIndent(nav, "", " ")
t.Log(string(bin))
}
Loading