Skip to content

Commit 8137247

Browse files
committed
newsbox api
1 parent 3e3d5d1 commit 8137247

File tree

2 files changed

+331
-0
lines changed

2 files changed

+331
-0
lines changed

newsbox/newsbox.go

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
// Package newsbox provides a client for accessing newsbox services.
2+
package newsbox
3+
4+
import (
5+
"encoding/json"
6+
"net/http"
7+
"net/url"
8+
"strings"
9+
"time"
10+
11+
"github.com/machinebox/sdk-go/x/boxutil"
12+
"github.com/pkg/errors"
13+
)
14+
15+
// Analysis represents an analysis of title, content and domain.
16+
type Analysis struct {
17+
// Title is the response object for the title analysis
18+
Title Title `json:"title"`
19+
// Content is the response object for the content analysis
20+
Content Content `json:"content"`
21+
// Domain is the response object for the domain analysis
22+
Domain Domain `json:"domain"`
23+
}
24+
25+
type Title struct {
26+
// Decision is the string representing the decision could be bias/unsure/impartial
27+
Decision string `json:"decision,omitempty"`
28+
// Score is the numeric score of the decision is between 0.00 (bias) and 1.00 (impartial)
29+
Score float64 `json:"score,omitempty"`
30+
// Entities represents entities discovered in the text.
31+
Entities []Entity `json:"entities,omitempty"`
32+
}
33+
34+
type Content struct {
35+
// Decision is the string representing the decision could be bias/unsure/impartial
36+
Decision string `json:"decision,omitempty"`
37+
// Score is the numeric score of the decision is between 0.00 (bias) and 1.00 (impartial)
38+
Score float64 `json:"score,omitempty"`
39+
// Entities represents entities discovered in the text.
40+
Entities []Entity `json:"entities,omitempty"`
41+
// Keywords are the most relevant keywords extracted from the text
42+
Keywords []Keyword `json:"keywords"`
43+
}
44+
45+
type Domain struct {
46+
// Domain is the domain extracted from the URL
47+
Domain string `json:"domain,omitempty"`
48+
// Category is one of the listed on the API docs
49+
Category string `json:"category,omitempty"`
50+
}
51+
52+
// Entity represents an entity discovered in the text.
53+
type Entity struct {
54+
// Type is a string describing the kind of entity.
55+
Type string `json:"type"`
56+
// Text is the text of the entity.
57+
Text string `json:"text"`
58+
// Start is the absolute start position of the entity (in the original text).
59+
Start int `json:"start"`
60+
// Start is the absolute end position of the entity (in the original text).
61+
End int `json:"end"`
62+
}
63+
64+
// Keyword represents a key word.
65+
type Keyword struct {
66+
Keyword string `json:"keyword"`
67+
}
68+
69+
// Client is an HTTP client that can make requests to the box.
70+
type Client struct {
71+
addr string
72+
73+
// HTTPClient is the http.Client that will be used to
74+
// make requests.
75+
HTTPClient *http.Client
76+
}
77+
78+
// New makes a new Client.
79+
func New(addr string) *Client {
80+
return &Client{
81+
addr: addr,
82+
HTTPClient: &http.Client{
83+
Timeout: 10 * time.Second,
84+
},
85+
}
86+
}
87+
88+
// Info gets the details about the box.
89+
func (c *Client) Info() (*boxutil.Info, error) {
90+
var info boxutil.Info
91+
u, err := url.Parse(c.addr + "/info")
92+
if err != nil {
93+
return nil, err
94+
}
95+
if !u.IsAbs() {
96+
return nil, errors.New("box address must be absolute")
97+
}
98+
req, err := http.NewRequest("GET", u.String(), nil)
99+
if err != nil {
100+
return nil, err
101+
}
102+
req.Header.Set("Accept", "application/json; charset=utf-8")
103+
resp, err := c.HTTPClient.Do(req)
104+
if err != nil {
105+
return nil, err
106+
}
107+
defer resp.Body.Close()
108+
if err := json.NewDecoder(resp.Body).Decode(&info); err != nil {
109+
return nil, err
110+
}
111+
return &info, nil
112+
}
113+
114+
// Check passes the text from the Reader to newsbox for analysis.
115+
func (c *Client) Check(title string, content string, u *url.URL) (*Analysis, error) {
116+
uu, err := url.Parse(c.addr + "/newsbox/check")
117+
if err != nil {
118+
return nil, err
119+
}
120+
if !u.IsAbs() {
121+
return nil, errors.New("box address must be absolute")
122+
}
123+
vals := url.Values{}
124+
vals.Set("title", title)
125+
vals.Set("content", content)
126+
vals.Set("url", u.String())
127+
128+
req, err := http.NewRequest("POST", uu.String(), strings.NewReader(vals.Encode()))
129+
if err != nil {
130+
return nil, err
131+
}
132+
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
133+
req.Header.Set("Accept", "application/json; charset=utf-8")
134+
resp, err := c.HTTPClient.Do(req)
135+
if err != nil {
136+
return nil, err
137+
}
138+
defer resp.Body.Close()
139+
var response struct {
140+
Success bool
141+
Error string
142+
143+
Title Title `json:"title"`
144+
Content Content `json:"content"`
145+
Domain Domain `json:"domain"`
146+
}
147+
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
148+
return nil, errors.Wrap(err, "decoding response")
149+
}
150+
if !response.Success {
151+
return nil, ErrNewsbox(response.Error)
152+
}
153+
return &Analysis{
154+
Title: response.Title,
155+
Content: response.Content,
156+
Domain: response.Domain,
157+
}, nil
158+
}
159+
160+
// ErrNewsbox represents an error from newsbox.
161+
type ErrNewsbox string
162+
163+
func (e ErrNewsbox) Error() string {
164+
return "newsbox: " + string(e)
165+
}

newsbox/newsbox_test.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
package newsbox_test
2+
3+
import (
4+
"io"
5+
"net/http"
6+
"net/http/httptest"
7+
"net/url"
8+
"testing"
9+
10+
"github.com/machinebox/sdk-go/newsbox"
11+
"github.com/matryer/is"
12+
)
13+
14+
func TestCheck(t *testing.T) {
15+
result := `
16+
{
17+
"success": true,
18+
"title": {
19+
"decision": "impartial",
20+
"score": 0.7757045030593872,
21+
"entities": [
22+
{
23+
"text": "China",
24+
"start": 0,
25+
"end": 4,
26+
"type": "place"
27+
}
28+
]
29+
},
30+
"content": {
31+
"decision": "bias",
32+
"score": 0.33232277631759644,
33+
"entities": [
34+
{
35+
"text": "Fuxing",
36+
"start": 21,
37+
"end": 26,
38+
"type": "place"
39+
},
40+
{
41+
"text": "300",
42+
"start": 74,
43+
"end": 76,
44+
"type": "cardinal"
45+
},
46+
{
47+
"text": "186mph",
48+
"start": 83,
49+
"end": 88,
50+
"type": "quantity"
51+
},
52+
{
53+
"text": "2011",
54+
"start": 94,
55+
"end": 97,
56+
"type": "date"
57+
},
58+
{
59+
"text": "two",
60+
"start": 109,
61+
"end": 111,
62+
"type": "cardinal"
63+
},
64+
{
65+
"text": "40",
66+
"start": 133,
67+
"end": 134,
68+
"type": "cardinal"
69+
},
70+
{
71+
"text": "next week",
72+
"start": 149,
73+
"end": 157,
74+
"type": "date"
75+
},
76+
{
77+
"text": "about 350",
78+
"start": 234,
79+
"end": 242,
80+
"type": "cardinal"
81+
}
82+
],
83+
"keywords": [
84+
{
85+
"keyword": "high speed"
86+
},
87+
{
88+
"keyword": "bullet train"
89+
},
90+
{
91+
"keyword": "speed"
92+
},
93+
{
94+
"keyword": "train"
95+
},
96+
{
97+
"keyword": "mph"
98+
},
99+
{
100+
"keyword": "km/h"
101+
},
102+
{
103+
"keyword": "rejuvenation"
104+
},
105+
{
106+
"keyword": "fuxing"
107+
},
108+
{
109+
"keyword": "crash"
110+
},
111+
{
112+
"keyword": "people"
113+
}
114+
]
115+
},
116+
"domain": {
117+
"domain": "bbc.co.uk",
118+
"category": "trusted"
119+
}
120+
}
121+
`
122+
is := is.New(t)
123+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
124+
is.Equal(r.Method, "POST")
125+
is.Equal(r.URL.Path, "/newsbox/check")
126+
is.Equal(r.Header.Get("Accept"), "application/json; charset=utf-8")
127+
io.WriteString(w, result)
128+
}))
129+
defer srv.Close()
130+
u, err := url.Parse("http://www.bbc.co.uk/news/technology-41011662")
131+
is.NoErr(err)
132+
133+
tb := newsbox.New(srv.URL)
134+
res, err := tb.Check(`China relaunches world's fastest train`,
135+
`The top speed of the Fuxing or "rejuvenation" bullet trains was capped at 300km/h (186mph) in 2011 following two crashes that killed 40 people.
136+
From next week, some of the trains will once again be allowed to run at a higher speed of about 350 km/h.`,
137+
u,
138+
)
139+
is.NoErr(err)
140+
141+
// Title
142+
is.Equal(res.Title.Decision, "impartial")
143+
is.True(res.Title.Score > 0.6)
144+
is.Equal(len(res.Title.Entities), 1)
145+
is.Equal(res.Title.Entities[0].Start, 0)
146+
is.Equal(res.Title.Entities[0].End, 4)
147+
is.Equal(res.Title.Entities[0].Text, "China")
148+
is.Equal(res.Title.Entities[0].Type, "place")
149+
150+
// Domain
151+
is.Equal(res.Domain.Category, "trusted")
152+
is.Equal(res.Domain.Domain, "bbc.co.uk")
153+
154+
// Content
155+
is.Equal(res.Content.Decision, "bias")
156+
is.True(res.Content.Score < 0.4)
157+
is.Equal(len(res.Content.Keywords), 10)
158+
is.Equal(res.Content.Keywords[0].Keyword, "high speed")
159+
is.Equal(res.Content.Keywords[1].Keyword, "bullet train")
160+
is.Equal(len(res.Content.Entities), 8)
161+
is.Equal(res.Content.Entities[0].Start, 21)
162+
is.Equal(res.Content.Entities[0].End, 26)
163+
is.Equal(res.Content.Entities[0].Text, "Fuxing")
164+
is.Equal(res.Content.Entities[0].Type, "place")
165+
166+
}

0 commit comments

Comments
 (0)