Skip to content

Commit 39b2b7c

Browse files
committed
Merge branch 'dev' into monitoring
2 parents 974c206 + 369b734 commit 39b2b7c

File tree

4 files changed

+115
-92
lines changed

4 files changed

+115
-92
lines changed

.drone.yml

Lines changed: 85 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ name: arm64
6767
platform:
6868
arch: arm64
6969

70-
steps:
70+
steps:
7171
- name: submodules
7272
image: alpine/git
7373
commands:
@@ -96,41 +96,41 @@ trigger:
9696
- push
9797

9898
---
99-
kind: pipeline
100-
type: docker
101-
name: gpu
102-
103-
platform:
104-
arch: amd64
105-
106-
steps:
107-
- name: submodules
108-
image: alpine/git
109-
commands:
110-
- git submodule init
111-
- 'git config --global url."https://github.com/".insteadOf [email protected]:'
112-
- "git submodule update --recursive"
113-
- name: build and publish
114-
image: plugins/docker
115-
settings:
116-
registry: registry.dev.kern.ai
117-
username:
118-
from_secret: docker_username
119-
password:
120-
from_secret: docker_password
121-
dockerfile: gpu.Dockerfile
122-
repo: "registry.dev.kern.ai/${DRONE_REPO}"
123-
tags: ["${DRONE_COMMIT_SHA}-gpu", "${DRONE_COMMIT_BRANCH}-gpu"]
124-
cache_from:
125-
- "registry.dev.kern.ai/${DRONE_REPO}:${DRONE_COMMIT_BRANCH}-gpu"
126-
127-
trigger:
128-
branch:
129-
- dev
130-
event:
131-
- push
132-
133-
---
99+
# kind: pipeline
100+
# type: docker
101+
# name: gpu
102+
103+
# platform:
104+
# arch: amd64
105+
106+
# steps:
107+
# - name: submodules
108+
# image: alpine/git
109+
# commands:
110+
# - git submodule init
111+
# - 'git config --global url."https://github.com/".insteadOf [email protected]:'
112+
# - "git submodule update --recursive"
113+
# - name: build and publish
114+
# image: plugins/docker
115+
# settings:
116+
# registry: registry.dev.kern.ai
117+
# username:
118+
# from_secret: docker_username
119+
# password:
120+
# from_secret: docker_password
121+
# dockerfile: gpu.Dockerfile
122+
# repo: "registry.dev.kern.ai/${DRONE_REPO}"
123+
# tags: ["${DRONE_COMMIT_SHA}-gpu", "${DRONE_COMMIT_BRANCH}-gpu"]
124+
# cache_from:
125+
# - "registry.dev.kern.ai/${DRONE_REPO}:${DRONE_COMMIT_BRANCH}-gpu"
126+
127+
# trigger:
128+
# branch:
129+
# - dev
130+
# event:
131+
# - push
132+
133+
# ---
134134
kind: pipeline
135135
type: docker
136136
name: amd64-dockerhub
@@ -160,12 +160,42 @@ trigger:
160160
- tag
161161

162162
---
163+
# kind: pipeline
164+
# type: docker
165+
# name: amd64-gpu-dockerhub
166+
167+
# platform:
168+
# arch: amd64
169+
170+
# steps:
171+
# - name: submodules
172+
# image: alpine/git
173+
# commands:
174+
# - git submodule init
175+
# - 'git config --global url."https://github.com/".insteadOf [email protected]:'
176+
# - "git submodule update --recursive"
177+
# - name: build and publish
178+
# image: plugins/docker
179+
# settings:
180+
# username:
181+
# from_secret: dockerhub_username
182+
# password:
183+
# from_secret: dockerhub_password
184+
# dockerfile: gpu.Dockerfile
185+
# repo: "kernai/${DRONE_REPO_NAME}"
186+
# tag: "${DRONE_TAG}-gpu"
187+
188+
# trigger:
189+
# event:
190+
# - tag
191+
192+
# ---
163193
kind: pipeline
164194
type: docker
165-
name: amd64-gpu-dockerhub
195+
name: arm64-dockerhub
166196

167197
platform:
168-
arch: amd64
198+
arch: arm64
169199

170200
steps:
171201
- name: submodules
@@ -181,57 +211,27 @@ steps:
181211
from_secret: dockerhub_username
182212
password:
183213
from_secret: dockerhub_password
184-
dockerfile: gpu.Dockerfile
185214
repo: "kernai/${DRONE_REPO_NAME}"
186-
tag: "${DRONE_TAG}-gpu"
215+
tag: "${DRONE_TAG}-drone-arm64"
187216

188217
trigger:
189218
event:
190219
- tag
191220

192221
---
193222
kind: pipeline
194-
type: docker
195-
name: arm64-dockerhub
196-
197-
platform:
198-
arch: arm64
199-
223+
name: manifest-version
200224
steps:
201-
- name: submodules
202-
image: alpine/git
203-
commands:
204-
- git submodule init
205-
- 'git config --global url."https://github.com/".insteadOf [email protected]:'
206-
- "git submodule update --recursive"
207-
- name: build and publish
208-
image: plugins/docker
225+
- name: manifest
226+
image: plugins/manifest
209227
settings:
228+
spec: drone-manifest-version.tmpl
229+
tag: "${DRONE_TAG}"
230+
ignore_missing: true
210231
username:
211232
from_secret: dockerhub_username
212233
password:
213234
from_secret: dockerhub_password
214-
repo: "kernai/${DRONE_REPO_NAME}"
215-
tag: "${DRONE_TAG}-drone-arm64"
216-
217-
trigger:
218-
event:
219-
- tag
220-
221-
---
222-
kind: pipeline
223-
name: manifest-version
224-
steps:
225-
- name: manifest
226-
image: plugins/manifest
227-
settings:
228-
spec: drone-manifest-version.tmpl
229-
tag: "${DRONE_TAG}"
230-
ignore_missing: true
231-
username:
232-
from_secret: dockerhub_username
233-
password:
234-
from_secret: dockerhub_password
235235

236236
depends_on:
237237
- amd64-dockerhub
@@ -245,16 +245,16 @@ trigger:
245245
kind: pipeline
246246
name: manifest-latest
247247
steps:
248-
- name: manifest
249-
image: plugins/manifest
250-
settings:
251-
spec: drone-manifest-latest.tmpl
252-
tag: "${DRONE_TAG}"
253-
ignore_missing: true
254-
username:
255-
from_secret: dockerhub_username
256-
password:
257-
from_secret: dockerhub_password
248+
- name: manifest
249+
image: plugins/manifest
250+
settings:
251+
spec: drone-manifest-latest.tmpl
252+
tag: "${DRONE_TAG}"
253+
ignore_missing: true
254+
username:
255+
from_secret: dockerhub_username
256+
password:
257+
from_secret: dockerhub_password
258258

259259
depends_on:
260260
- manifest-version

src/embedders/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ def load_pca_weights(self, file_name: str):
165165
def _reduce(
166166
self,
167167
documents: List[Union[str, Doc]],
168+
as_generator: bool,
168169
fit_model: bool,
169170
fit_after_n_batches: int,
170171
):
@@ -178,11 +179,11 @@ def _reduce_batch(
178179
fit_after_n_batches: int,
179180
) -> Union[List, Generator]:
180181
if as_generator:
181-
return self._reduce(documents, fit_model, fit_after_n_batches)
182+
return self._reduce(documents, as_generator, fit_model, fit_after_n_batches)
182183
else:
183184
embeddings = []
184185
for embedding_batch in self._reduce(
185-
documents, fit_model, fit_after_n_batches
186+
documents, as_generator, fit_model, fit_after_n_batches
186187
):
187188
embeddings.extend(embedding_batch)
188189
return embeddings

src/embedders/classification/reduce.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def _transform(
2121
def _reduce(
2222
self,
2323
documents: List[Union[str, Doc]],
24+
as_generator: bool,
2425
fit_model: bool,
2526
fit_after_n_batches: int,
2627
) -> Generator[List[List[Union[float, int]]], None, None]:
@@ -56,8 +57,16 @@ def _reduce(
5657
if batch_idx > fit_after_n_batches:
5758
yield self._transform(batch)
5859
else:
59-
embeddings = self.embedder.transform(documents)
60-
yield self._transform(embeddings)
60+
if as_generator:
61+
embeddings = [
62+
emb
63+
for batch in self.embedder.transform(documents, as_generator)
64+
for emb in batch
65+
]
66+
yield from util.batch(self._transform(embeddings), self.batch_size)
67+
else:
68+
embeddings = self.embedder.transform(documents)
69+
yield self._transform(embeddings)
6170

6271
@staticmethod
6372
def load(embedder: dict) -> "PCASentenceReducer":

src/embedders/extraction/reduce.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from spacy.tokens.doc import Doc
12
from typing import List, Generator, Union
23
import numpy as np
34
from src.embedders import PCAReducer, util
@@ -24,7 +25,11 @@ def _transform(
2425
return batch_unsqueezed
2526

2627
def _reduce(
27-
self, documents, fit_model, fit_after_n_batches
28+
self,
29+
documents: List[Union[str, Doc]],
30+
as_generator: bool,
31+
fit_model: bool,
32+
fit_after_n_batches: int,
2833
) -> Generator[List[List[List[Union[float, int]]]], None, None]:
2934
if fit_model:
3035
embeddings_training = []
@@ -60,5 +65,13 @@ def _reduce(
6065
if batch_idx > fit_after_n_batches:
6166
yield self._transform(batch)
6267
else:
63-
embeddings = self.embedder.transform(documents)
64-
yield self._transform(embeddings)
68+
if as_generator:
69+
embeddings = [
70+
emb
71+
for batch in self.embedder.transform(documents, as_generator)
72+
for emb in batch
73+
]
74+
yield from util.batch(self._transform(embeddings), self.batch_size)
75+
else:
76+
embeddings = self.embedder.transform(documents)
77+
yield self._transform(embeddings)

0 commit comments

Comments
 (0)