Skip to content

Commit 8a72d28

Browse files
authored
Merge branch 'master' into feature/excel_import
2 parents 7b1bbc1 + cf7d827 commit 8a72d28

28 files changed

+1898
-1826
lines changed

.travis.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ python:
66
env:
77
- DATABASE=sqlite
88
- DATABASE=postgres
9+
- DATABASE=mssql
910

1011
services:
1112
- docker
@@ -16,6 +17,11 @@ before_install:
1617
if [[ "${DATABASE}" = "postgres" ]]; then
1718
docker run --rm --name=postgres --network=doccano -d -e POSTGRES_USER=user -e POSTGRES_PASSWORD=pass -e POSTGRES_DB=db postgres
1819
export DATABASE_URL="postgres://user:pass@postgres:5432/db?sslmode=disable"
20+
21+
elif [[ "${DATABASE}" = "mssql" ]]; then
22+
docker run --rm --name=mssql --network=doccano -d -e ACCEPT_EULA=y -e SA_PASSWORD=sUp3rS3cr3t mcr.microsoft.com/mssql/server:2017-latest
23+
docker exec -it mssql sh -c "while ! /opt/mssql-tools/bin/sqlcmd -U SA -P sUp3rS3cr3t -Q 'CREATE DATABASE db;'; do sleep 3; done"
24+
export DATABASE_URL="mssql://SA:sUp3rS3cr3t@mssql:1433/db?sslmode=disable"
1925
fi
2026
2127
install:
@@ -25,7 +31,7 @@ script:
2531
- docker build --target=builder --tag=doccano-test .
2632
- >
2733
if [[ "${DATABASE}" != "sqlite" ]]; then
28-
docker run --network doccano -e DATABASE_URL="${DATABASE_URL}" -it doccano-test sh -c 'app/manage.py migrate && app/manage.py test api.tests server.tests'
34+
docker run --network doccano -e DATABASE_URL="${DATABASE_URL}" -it doccano-test sh -c 'app/manage.py wait_for_db && app/manage.py migrate && app/manage.py test api.tests server.tests'
2935
fi
3036
3137
before_deploy:

Dockerfile

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ FROM python:${PYTHON_VERSION}-stretch AS builder
44
ARG NODE_VERSION="8.x"
55
RUN curl -sL "https://deb.nodesource.com/setup_${NODE_VERSION}" | bash - \
66
&& apt-get install --no-install-recommends -y \
7-
nodejs=8.16.0-1nodesource1
7+
nodejs
88

9-
RUN apt-get install --no-install-recommends -y \
10-
unixodbc-dev=2.3.4-1
9+
COPY tools/install-mssql.sh /doccano/tools/install-mssql.sh
10+
RUN /doccano/tools/install-mssql.sh --dev
1111

1212
COPY app/server/static/package*.json /doccano/app/server/static/
1313
RUN cd /doccano/app/server/static \
@@ -33,19 +33,8 @@ RUN cd /doccano \
3333

3434
FROM python:${PYTHON_VERSION}-slim-stretch AS runtime
3535

36-
RUN apt-get update \
37-
&& apt-get install --no-install-recommends -y \
38-
curl=7.52.1-5+deb9u9 \
39-
gnupg=2.1.18-8~deb9u4 \
40-
apt-transport-https=1.4.9 \
41-
&& curl -fsS https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \
42-
&& curl -fsS https://packages.microsoft.com/config/debian/9/prod.list > /etc/apt/sources.list.d/mssql.list \
43-
&& apt-get update \
44-
&& ACCEPT_EULA=Y apt-get install --no-install-recommends -y \
45-
msodbcsql17=17.3.1.1-1 \
46-
mssql-tools=17.3.0.1-1 \
47-
&& apt-get remove -y curl gnupg apt-transport-https \
48-
&& rm -rf /var/lib/apt/lists/*
36+
COPY --from=builder /doccano/tools/install-mssql.sh /doccano/tools/install-mssql.sh
37+
RUN /doccano/tools/install-mssql.sh
4938

5039
RUN useradd -ms /bin/sh doccano
5140

Procfile

Lines changed: 0 additions & 1 deletion
This file was deleted.

README.md

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -124,21 +124,19 @@ Let’s start the development server and explore it.
124124

125125
Depending on your installation method, there are two options:
126126

127-
**Option1: Running the Docker image as a Container**
127+
#### Option 1: Running the Docker image as a Container
128128

129129
First, run a Docker container:
130130

131131
```bash
132-
docker run -d --name doccano -p 8000:8000 chakkiworks/doccano
132+
docker run -d --rm --name doccano \
133+
-e "ADMIN_USERNAME=admin" \
134+
135+
-e "ADMIN_PASSWORD=password" \
136+
-p 8000:8000 chakkiworks/doccano
133137
```
134138

135-
Then, execute `create-admin.sh` script for creating a superuser.
136-
137-
```bash
138-
docker exec doccano tools/create-admin.sh "admin" "[email protected]" "password"
139-
```
140-
141-
**Option2: Running Django development server**
139+
#### Option 2: Running Django development server
142140

143141
Before running, we need to make migration. Run the following command:
144142

@@ -170,7 +168,7 @@ Optionally, you can change the bind ip and port using the command
170168
python manage.py runserver <ip>:<port>
171169
```
172170

173-
**Option3: Running the development Docker-Compose stack**
171+
#### Option 3: Running the development Docker-Compose stack
174172

175173
We can use docker-compose to set up the webpack server, django server, database, etc. all in one command:
176174

@@ -259,14 +257,14 @@ Input file may look like this:
259257
`import.json`
260258

261259
```JSON
262-
{"text": "EU rejects German call to boycott British lamb.", "external_id": 1}
260+
{"text": "EU rejects German call to boycott British lamb.", "meta": {"external_id": 1}}
263261
```
264262

265263
and the exported file will look like this:
266264
`output.json`
267265

268266
```JSON
269-
{"doc_id": 2023, "text": "EU rejects German call to boycott British lamb.", "labels": ["news"], "username": "root", "metadata": {"external_id": 1}}
267+
{"doc_id": 2023, "text": "EU rejects German call to boycott British lamb.", "labels": ["news"], "username": "root", "meta": {"external_id": 1}}
270268
```
271269

272270
### Tutorial

app.json

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,13 @@
3636
"value": "True"
3737
}
3838
},
39+
"stack": "container",
3940
"scripts": {
40-
"postdeploy": "sh tools/heroku.sh deploy"
41+
"postdeploy": "sh tools/heroku.sh"
4142
},
4243
"addons": [
4344
{
4445
"plan": "heroku-postgresql:hobby-dev"
4546
}
46-
],
47-
"buildpacks": [
48-
{
49-
"url": "heroku/nodejs"
50-
},
51-
{
52-
"url": "heroku/python"
53-
}
5447
]
5548
}

app/api/managers.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from collections import Counter
2+
3+
from django.db.models import Manager, Count
4+
5+
6+
class AnnotationManager(Manager):
7+
8+
def get_label_per_data(self, project):
9+
label_count = Counter()
10+
user_count = Counter()
11+
docs = project.documents.all()
12+
annotations = self.filter(document_id__in=docs.all())
13+
14+
for d in annotations.values('label__text', 'user__username').annotate(Count('label'), Count('user')):
15+
label_count[d['label__text']] += d['label__count']
16+
user_count[d['user__username']] += d['user__count']
17+
18+
return label_count, user_count
19+
20+
21+
class Seq2seqAnnotationManager(Manager):
22+
23+
def get_label_per_data(self, project):
24+
label_count = Counter()
25+
user_count = Counter()
26+
docs = project.documents.all()
27+
annotations = self.filter(document_id__in=docs.all())
28+
29+
for d in annotations.values('text', 'user__username').annotate(Count('text'), Count('user')):
30+
label_count[d['text']] += d['text__count']
31+
user_count[d['user__username']] += d['user__count']
32+
33+
return label_count, user_count

app/api/models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from django.core.exceptions import ValidationError
88
from polymorphic.models import PolymorphicModel
99

10+
from .managers import AnnotationManager, Seq2seqAnnotationManager
11+
1012
DOCUMENT_CLASSIFICATION = 'DocumentClassification'
1113
SEQUENCE_LABELING = 'SequenceLabeling'
1214
SEQ2SEQ = 'Seq2seq'
@@ -191,6 +193,8 @@ def __str__(self):
191193

192194

193195
class Annotation(models.Model):
196+
objects = AnnotationManager()
197+
194198
prob = models.FloatField(default=0.0)
195199
manual = models.BooleanField(default=False)
196200
user = models.ForeignKey(User, on_delete=models.CASCADE)
@@ -224,6 +228,9 @@ class Meta:
224228

225229

226230
class Seq2seqAnnotation(Annotation):
231+
# Override AnnotationManager for custom functionality
232+
objects = Seq2seqAnnotationManager()
233+
227234
document = models.ForeignKey(Document, related_name='seq2seq_annotations', on_delete=models.CASCADE)
228235
text = models.CharField(max_length=500)
229236

app/api/serializers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,5 +160,5 @@ class Seq2seqAnnotationSerializer(serializers.ModelSerializer):
160160

161161
class Meta:
162162
model = Seq2seqAnnotation
163-
fields = ('id', 'text', 'user', 'document')
163+
fields = ('id', 'text', 'user', 'document', 'prob')
164164
read_only_fields = ('user',)

app/api/tests/data/labeling.invalid.conll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SOCCERO
1+
SOCCERO SOCCERO SOCCERO
22
- O
33
JAPAN B-LOC
44
GET O

app/api/tests/test_utils.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,13 +143,14 @@ def test_make_annotations(self):
143143

144144
class TestCoNLLParser(TestCase):
145145
def test_calc_char_offset(self):
146-
words = ['EU', 'rejects', 'German', 'call']
147-
tags = ['B-ORG', 'O', 'B-MISC', 'O']
148-
149-
entities = get_entities(tags)
150-
actual = CoNLLParser.calc_char_offset(words, tags)
151-
152-
self.assertEqual(entities, [('ORG', 0, 0), ('MISC', 2, 2)])
146+
f = io.BytesIO(
147+
b"EU\tORG\n"
148+
b"rejects\t_\n"
149+
b"German\tMISC\n"
150+
b"call\t_\n"
151+
)
152+
153+
actual = next(CoNLLParser().parse(f))[0]
153154

154155
self.assertEqual(actual, {
155156
'text': 'EU rejects German call',

0 commit comments

Comments
 (0)