Skip to content
This repository was archived by the owner on Sep 12, 2018. It is now read-only.

Commit 294880a

Browse files
committed
index: Factor out the SQLAlchemy Index into flexible backends
Make it easy to add alternative backends for the search endpoint. I've added a 'search_backend' config and renamed the 'search_index' to 'search_sqlalchemy_database' to avoid colliding with other backends. I've called the SQLAlchemy module 'db' so we can distinguish between the global sqlalchemy package and our SQLAlchemy-based index module without using the absolute_import future [1,2,3,4]. [1]: https://docs.python.org/2/library/__future__.html [2]: http://legacy.python.org/dev/peps/pep-0328/ [3]: #247 (comment) [4]: #247 (comment) Reported-by: Lee Trout <[email protected]>
1 parent fab832b commit 294880a

File tree

6 files changed

+235
-160
lines changed

6 files changed

+235
-160
lines changed

README.md

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ in action in the example below...
4646

4747
common:
4848
loglevel: info
49-
search_index: "_env:SEARCH_INDEX:"
49+
search_backend: "_env:SEARCH_BACKEND:"
50+
sqlalchemy_index_database:
51+
"_env:SQLALCHEMY_INDEX_DATABASE:sqlite:////tmp/docker-registry.db"
5052

5153
prod:
5254
loglevel: warn
@@ -207,19 +209,23 @@ dev:
207209

208210
### Search-engine options
209211

210-
The Docker Registry can optionally use [SQLAlchemy][] to index
211-
repository information in a database, for the `GET /v1/search`
212-
[endpoint][search-endpoint]. You can configure the backend with a
213-
configuration like:
212+
The Docker Registry can optionally index repository information in a
213+
database for the `GET /v1/search` [endpoint][search-endpoint]. You
214+
can configure the backend with a configuration like:
214215

215216
```yaml
216-
search_index: "_env:SEARCH_INDEX:"
217+
search_backend: "_env:SEARCH_BACKEND:"
217218
```
218219

219-
you can use the `SEARCH_INDEX` environment variable to configure the
220-
string passed to [create_engine][]. If the configured `search_index`
221-
is empty, no index is built, and the search endpoint always returns
222-
empty results.
220+
The `search_backend` setting selects the search backend to use. If
221+
`search_backend` is empty, no index is built, and the search endpoint
222+
always returns empty results. Currently supported backends and their
223+
backend-specific configuration options are:
224+
225+
* `sqlalchemy': Use [SQLAlchemy][].
226+
* The backing database is selected with
227+
`sqlalchemy_index_database`, which is passed through to
228+
[create_engine][].
223229

224230
### Email options
225231

@@ -422,8 +428,8 @@ cd docker-registry/
422428
tox
423429
```
424430
425-
[SQLAlchemy]: http://docs.sqlalchemy.org/
426431
[search-endpoint]:
427432
http://docs.docker.io/en/latest/reference/api/index_api/#get--v1-search
433+
[SQLAlchemy]: http://docs.sqlalchemy.org/
428434
[create_engine]:
429435
http://docs.sqlalchemy.org/en/latest/core/engines.html#sqlalchemy.create_engine

config/config_sample.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ common:
3131
# Let gunicorn set this environment variable or set a random string here
3232
secret_key: _env:SECRET_KEY
3333

34-
search_index: "_env:SEARCH_INDEX:"
34+
search_backend: "_env:SEARCH_BACKEND:"
35+
sqlalchemy_index_database:
36+
"_env:SQLALCHEMY_INDEX_DATABASE:sqlite:////tmp/docker-registry.db"
3537

3638

3739
# This is the default configuration when no flavor is specified

config/config_test.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@ test:
1515
gs_secret_key: _env:GS_SECRET_KEY
1616
gs_secure: false
1717

18-
search_index: sqlite:////tmp/docker-registry.db
18+
search_backend: sqlalchemy
19+
sqlalchemy_index_database: sqlite:////tmp/docker-registry.db

lib/index/__init__.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""Index backends for the search endpoint
2+
"""
3+
4+
import signals
5+
6+
import config
7+
8+
9+
__all__ = ['load']
10+
11+
12+
class Index (object):
13+
"""A backend for the search endpoint
14+
15+
The backend can use .walk_storage to generate an initial index,
16+
._handle_repository_* to stay up to date with registry changes,
17+
and .results to respond to queries.
18+
"""
19+
def __init__(self):
20+
signals.repository_created.connect(self._handle_repository_created)
21+
signals.repository_updated.connect(self._handle_repository_updated)
22+
signals.repository_deleted.connect(self._handle_repository_deleted)
23+
24+
def _walk_storage(self, store):
25+
"""Iterate through repositories in storage
26+
27+
This helper is useful for building an initial database for
28+
your search index. Yields dictionaries:
29+
30+
{'name': name, 'description': description}
31+
"""
32+
try:
33+
namespace_paths = list(
34+
store.list_directory(path=store.repositories))
35+
except OSError:
36+
namespace_paths = []
37+
for namespace_path in namespace_paths:
38+
namespace = namespace_path.rsplit('/', 1)[-1]
39+
try:
40+
repository_paths = list(
41+
store.list_directory(path=namespace_path))
42+
except OSError:
43+
repository_paths = []
44+
for path in repository_paths:
45+
repository = path.rsplit('/', 1)[-1]
46+
name = '{0}/{1}'.format(namespace, repository)
47+
description = None # TODO(wking): store descriptions
48+
yield({'name': name, 'description': description})
49+
50+
def _handle_repository_created(
51+
self, sender, namespace, repository, value):
52+
pass
53+
54+
def _handle_repository_updated(
55+
self, sender, namespace, repository, value):
56+
pass
57+
58+
def _handle_repository_deleted(self, sender, namespace, repository):
59+
pass
60+
61+
def results(self, search_term):
62+
"""Return a list of results matching search_term
63+
64+
The list elements should be dictionaries:
65+
66+
{'name': name, 'description': description}
67+
"""
68+
raise NotImplementedError('results method for {!r}'.format(self))
69+
70+
71+
def load(kind=None):
72+
"""Returns an Index instance according to the configuration."""
73+
cfg = config.load()
74+
if not kind:
75+
kind = cfg.search_backend.lower()
76+
if kind == 'sqlalchemy':
77+
from . import db
78+
return db.SQLAlchemyIndex()
79+
else:
80+
raise NotImplementedError('Unknown index type {0!r}'.format(kind))

lib/index/db.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""An SQLAlchemy backend for the search endpoint
2+
"""
3+
4+
import sqlalchemy
5+
import sqlalchemy.exc
6+
import sqlalchemy.ext.declarative
7+
import sqlalchemy.orm
8+
import sqlalchemy.sql.functions
9+
10+
import config
11+
import storage
12+
13+
from . import Index
14+
15+
16+
Base = sqlalchemy.ext.declarative.declarative_base()
17+
18+
19+
class Version (Base):
20+
"Schema version for the search-index database"
21+
__tablename__ = 'version'
22+
23+
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
24+
25+
def __repr__(self):
26+
return '<{0}(id={1})>'.format(type(self).__name__, self.id)
27+
28+
29+
class Repository (Base):
30+
"Repository description"
31+
__tablename__ = 'repository'
32+
33+
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
34+
name = sqlalchemy.Column(
35+
sqlalchemy.String, nullable=False, unique=True)
36+
description = sqlalchemy.Column(sqlalchemy.String)
37+
38+
def __repr__(self):
39+
return "<{0}(name='{1}', description='{2}')>".format(
40+
type(self).__name__, self.name, self.description)
41+
42+
43+
class SQLAlchemyIndex (Index):
44+
"""Maintain an index of repository data
45+
46+
The index is a dictionary. The keys are
47+
'{namespace}/{repository}' strings, and the values are description
48+
strings. For example:
49+
50+
index['library/ubuntu'] = 'An ubuntu image...'
51+
"""
52+
def __init__(self, database=None):
53+
if database is None:
54+
cfg = config.load()
55+
database = cfg.sqlalchemy_index_database
56+
self._engine = sqlalchemy.create_engine(database)
57+
self._session = sqlalchemy.orm.sessionmaker(bind=self._engine)
58+
self.version = 1
59+
self._setup_database()
60+
super(SQLAlchemyIndex, self).__init__()
61+
62+
def _setup_database(self):
63+
session = self._session()
64+
try:
65+
version = session.query(
66+
sqlalchemy.sql.functions.max(Version.id)).first()[0]
67+
except sqlalchemy.exc.OperationalError:
68+
version = None
69+
if version:
70+
if version != self.version:
71+
raise NotImplementedError(
72+
'unrecognized search index version {0}'.format(version))
73+
else:
74+
self._generate_index(session=session)
75+
session.close()
76+
77+
def _generate_index(self, session):
78+
store = storage.load()
79+
Base.metadata.create_all(self._engine)
80+
session.add(Version(id=self.version))
81+
for repository in self._walk_storage(store=store):
82+
session.add(Repository(**repository))
83+
session.commit()
84+
85+
def _handle_repository_created(
86+
self, sender, namespace, repository, value):
87+
name = '{0}/{1}'.format(namespace, repository)
88+
description = '' # TODO(wking): store descriptions
89+
session = self._session()
90+
session.add(Repository(name=name, description=description))
91+
session.commit()
92+
session.close()
93+
94+
def _handle_repository_updated(
95+
self, sender, namespace, repository, value):
96+
name = '{0}/{1}'.format(namespace, repository)
97+
description = '' # TODO(wking): store descriptions
98+
session = self._session()
99+
session.query(Repository).filter(
100+
Repository.name == name).update(
101+
values={'description': description},
102+
synchronize_session=False)
103+
session.commit()
104+
session.close()
105+
106+
def _handle_repository_deleted(self, sender, namespace, repository):
107+
name = '{0}/{1}'.format(namespace, repository)
108+
session = self._session()
109+
session.query(Repository).filter(Repository.name == name).delete()
110+
session.commit()
111+
session.close()
112+
113+
def results(self, search_term):
114+
session = self._session()
115+
like_term = '%{}%'.format(search_term)
116+
repositories = session.query(Repository).filter(
117+
sqlalchemy.sql.or_(
118+
Repository.name.like(like_term),
119+
Repository.description.like(like_term)))
120+
return [
121+
{
122+
'name': repo.name,
123+
'description': repo.description,
124+
}
125+
for repo in repositories]

0 commit comments

Comments
 (0)