-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathner.py
More file actions
41 lines (32 loc) · 1.18 KB
/
ner.py
File metadata and controls
41 lines (32 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from __future__ import annotations
from dataclasses import dataclass
from typing import List
@dataclass
class NEREntity:
tokens: List[str]
start_idx: int
end_idx: int
#: NER class (without BIO prefix as in ``PER`` and not ``B-PER``)
tag: str
def shifted(self, shift: int) -> NEREntity:
self_dict = vars(self)
self_dict["start_idx"] = self.start_idx + shift
self_dict["end_idx"] = self.end_idx + shift
return self.__class__(**self_dict)
def __eq__(self, other: NEREntity) -> bool:
return (
self.tokens == other.tokens
and self.start_idx == other.start_idx
and self.end_idx == other.end_idx
)
def __hash__(self) -> int:
return hash(tuple(self.tokens) + (self.start_idx, self.end_idx))
def entities_to_BIO(tokens: List[str], entities: List[NEREntity]) -> List[str]:
"""Convert a list of entities to BIO tags."""
tags = ["O"] * len(tokens)
for entity in entities:
entity_len = entity.end_idx - entity.start_idx
tags[entity.start_idx : entity.end_idx] = [f"B-{entity.tag}"] + [
f"I-{entity.tag}"
] * (entity_len - 1)
return tags