-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathdoc.test.js
More file actions
75 lines (66 loc) · 2.3 KB
/
doc.test.js
File metadata and controls
75 lines (66 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import spacy from '../src'
import { Doc, Token, Span } from '../src/tokens'
import { text, words, spaces, attrs } from './util'
jest.mock('../src/language')
const nlp = spacy.load('en_core_web_sm')
test('allows manual construction', () => {
const doc = new Doc(words, spaces, attrs)
expect(doc).toBeInstanceOf(Doc)
})
test('has Doc attributes', async () => {
const doc = await nlp(text)
expect(doc.text).toBe(text)
expect(doc.toString()).toBe(text)
expect(doc.length).toBe(10)
expect(doc.cats).toEqual({})
expect(doc.isTagged).toBe(true)
expect(doc.isParsed).toBe(true)
expect(doc.isSentenced).toBe(true)
})
test('allows token indexing', async () => {
const doc = await nlp(text)
for (let i = 0; i < doc.length; i++) {
expect(doc[i]).toBeInstanceOf(Token)
}
expect(doc[doc.length + 1]).toBeUndefined()
})
test('allows token iteration', async () => {
const doc = await nlp(text)
for (let token of doc) {
expect(token).toBeInstanceOf(Token)
}
})
test('has named entities (doc.ents)', async () => {
const doc = await nlp(text)
expect(doc.ents).toBeInstanceOf(Array)
expect(doc.ents).toEqual(expect.arrayContaining([expect.any(Span)]))
expect(doc.ents.length).toBe(1)
const entity = doc.ents[0]
expect(entity).toBeInstanceOf(Span)
expect(entity.text).toBe('Facebook')
expect(entity.start).toBe(8)
expect(entity.end).toBe(9)
expect(entity.label).toBe('ORG')
})
test('has sentences (doc.sents)', async () => {
const doc = await nlp(text)
expect(doc.sents).toBeInstanceOf(Array)
expect(doc.sents).toEqual(expect.arrayContaining([expect.any(Span)]))
expect(doc.sents.length).toBe(2)
const sentence = doc.sents[0]
expect(sentence).toBeInstanceOf(Span)
expect(sentence.text).toBe('Hello world!')
expect(sentence.start).toBe(0)
expect(sentence.end).toBe(3)
})
test('has noun chunks (doc.noun_chunks)', async () => {
const doc = await nlp(text)
expect(doc.nounChunks).toBeInstanceOf(Array)
expect(doc.nounChunks).toEqual(expect.arrayContaining([expect.any(Span)]))
expect(doc.nounChunks.length).toBe(3)
const chunk = doc.nounChunks[0]
expect(chunk).toBeInstanceOf(Span)
expect(chunk.text).toBe('Hello world')
expect(chunk.start).toBe(0)
expect(chunk.end).toBe(2)
})