1
0
Fork 0

Improve documentation, add correct handling for slices

This commit is contained in:
Alex Palaistras 2016-05-26 22:27:15 +01:00
parent 92fb4d6e25
commit afe72b7d47
5 changed files with 68 additions and 11 deletions

View File

@ -19,7 +19,8 @@ import (
)
// Fetch data from source pointed to by URI in `src`, and store to arbitrary
// struct pointed to by `dest`.
// struct pointed to by `dest`. Data is parsed according to `kind`, and has to
// correspond to a registered parser.
func Fetch(src string, dest interface{}, kind string) error {
// Verify destination value type.
val := reflect.ValueOf(dest)
@ -48,7 +49,8 @@ func Fetch(src string, dest interface{}, kind string) error {
return nil
}
// Set struct fields sequentially according to their `farsight` tags.
// Set struct fields from document, filtered by tags marked by "farsight"
// definitions.
func populateStruct(doc parser.Document, dest reflect.Value) error {
// Set each struct field in sequence.
for i := 0; i < dest.NumField(); i++ {
@ -84,6 +86,18 @@ func setField(doc parser.Document, field reflect.Value) error {
switch field.Kind() {
case reflect.String:
field.SetString(val)
case reflect.Slice:
// Decompose document into list and prepare destination slice.
list := doc.List()
slice := reflect.MakeSlice(field.Type(), len(list), cap(list))
for i, d := range list {
if err := setField(d, slice.Index(i)); err != nil {
return nil
}
}
field.Set(slice)
case reflect.Struct:
return populateStruct(doc, field)
default:

View File

@ -34,19 +34,30 @@ type TestCase struct {
Expected interface{}
}
// Test cases for `farsight.Fetch` function.
var fetchTests = map[string]TestCase{
// Fetch and set ID attribute.
"html://id-test": {
"html://string": {
`<html><div id="hello">Hello World</div></html>`,
&struct {
Hello string `farsight:"#hello"`
Text string `farsight:"#hello"`
}{},
&struct {
Hello string `farsight:"#hello"`
Text string `farsight:"#hello"`
}{
"Hello World",
},
},
"html://slice": {
`<body><ul id="g"><li>Hello</li><li>World</li></ul></body>`,
&struct {
List []string `farsight:"#g li"`
}{},
&struct {
List []string `farsight:"#g li"`
}{
[]string{"Hello", "World"},
},
},
}
func TestFetch(t *testing.T) {

View File

@ -7,6 +7,7 @@ package html
import (
// Standard library.
"bytes"
"fmt"
"io"
// Internal packages.
@ -17,21 +18,29 @@ import (
"golang.org/x/net/html"
)
// HTMLParser represents a parser and tokeniser for HTML documents.
type HTMLParser struct{}
// Parse reads an HTML document from the reader passed, and returns a document
// containing a single parent node. An error is returned if parsing fails.
func (h *HTMLParser) Parse(r io.Reader) (parser.Document, error) {
doc, err := html.Parse(r)
n, err := html.Parse(r)
if err != nil {
return nil, err
}
return &HTMLDocument{nodes: []*html.Node{doc}}, nil
return &HTMLDocument{nodes: []*html.Node{n}}, nil
}
// HTMLDocument represents a collection of nodes under a single parent container.
type HTMLDocument struct {
nodes []*html.Node
}
// Filter traverses the document tree and attempts to match elements against
// the provided CSS selector. On success, a new document is returned, containing
// a list of all matched elements. An error is returned if the CSS selector is
// malformed, or no elements were matched.
func (h *HTMLDocument) Filter(attr string) (parser.Document, error) {
sel, err := cascadia.Compile(attr)
if err != nil {
@ -43,9 +52,27 @@ func (h *HTMLDocument) Filter(attr string) (parser.Document, error) {
sub.nodes = append(sub.nodes, sel.MatchAll(n)...)
}
if len(sub.nodes) == 0 {
return nil, fmt.Errorf("Attribute '%s' matched no elements", attr)
}
return sub, nil
}
// List decomposes the target HTMLDocument into a slice of HTMLDocument types,
// each containing a single node from the parent's list of nodes.
func (h *HTMLDocument) List() []parser.Document {
var docs []parser.Document
for _, n := range h.nodes {
docs = append(docs, &HTMLDocument{nodes: []*html.Node{n}})
}
return docs
}
// Returns the document contents by traversing the tree and concatenating all
// data contained within text nodes.
func (h *HTMLDocument) String() string {
var buf bytes.Buffer
@ -56,6 +83,7 @@ func (h *HTMLDocument) String() string {
return buf.String()
}
// Traverse document tree and return the first text node's contents as a string.
func getNodeText(n *html.Node) string {
if n.Type == html.TextNode {
return n.Data

View File

@ -16,6 +16,7 @@ type Parser interface {
type Document interface {
Filter(attr string) (Document, error)
List() []Document
String() string
}

View File

@ -14,9 +14,12 @@ import (
"github.com/deuill/farsight/source"
)
type HTTP struct{}
// HTTPSource represents a source for HTTP and HTTPS endpoints.
type HTTPSource struct{}
func (h *HTTP) Fetch(src string) (io.Reader, error) {
// Fetch issues a GET request against the source URL pointed to by `src`, and
// returns an io.Reader for the containing HTML document.
func (h *HTTPSource) Fetch(src string) (io.Reader, error) {
// Attempt to fetch resource from source endpoint.
resp, err := http.Get(src)
if err != nil {
@ -37,7 +40,7 @@ func (h *HTTP) Fetch(src string) (io.Reader, error) {
}
func init() {
h := &HTTP{}
h := &HTTPSource{}
// Register HTTP source for both "http" and "https" endpoints.
source.Register("http", h)