Skip to content

Commit f321caa

Browse files
committed
add capability to strip outgoing changesets in opengrok-mirror
fixes #3892
1 parent 5cce316 commit f321caa

File tree

6 files changed

+157
-34
lines changed

6 files changed

+157
-34
lines changed

tools/src/main/python/opengrok_tools/mirror.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#
2020

2121
#
22-
# Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
22+
# Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
2323
# Portions Copyright (c) 2019, Krystof Tulinger <[email protected]>
2424
#
2525

@@ -58,25 +58,26 @@
5858
if major_version < 3:
5959
fatal("Need Python 3, you are running {}".format(major_version))
6060

61-
__version__ = "1.1"
61+
__version__ = "1.2"
6262

6363
OPENGROK_NO_MIRROR_ENV = "OPENGROK_NO_MIRROR"
6464

6565

6666
def worker(args):
67-
project_name, logdir, loglevel, backupcount, config, check_changes, uri, \
68-
source_root, batch, headers, api_timeout = args
67+
project_name, logdir, loglevel, backup_count, config, check_changes, check_outgoing, uri, \
68+
source_root, batch, headers, timeout, api_timeout = args
6969

7070
if batch:
7171
get_batch_logger(logdir, project_name,
7272
loglevel,
73-
backupcount,
73+
backup_count,
7474
get_class_basename())
7575

7676
return mirror_project(config, project_name,
77-
check_changes,
77+
check_changes, check_outgoing,
7878
uri, source_root, headers=headers,
79-
timeout=api_timeout)
79+
timeout=timeout,
80+
api_timeout=api_timeout)
8081

8182

8283
def main():
@@ -107,12 +108,18 @@ def main():
107108
' repositories,'
108109
' terminate the processing'
109110
' if no change is found.')
111+
parser.add_argument('--strip_outgoing', type=bool, default=False,
112+
help='check outgoing changes for each repository of a project,'
113+
'strip any such changes and remove all project data so that'
114+
'it can be reindexed from scratch')
110115
parser.add_argument('-w', '--workers', default=cpu_count(), type=int,
111116
help='Number of worker processes')
112117
add_http_headers(parser)
113118
parser.add_argument('--api_timeout', type=int, default=3,
114119
help='Set response timeout in seconds '
115120
'for RESTful API calls')
121+
parser.add_argument('--async_api_timeout', type=int, default=300,
122+
help='Set timeout in seconds for asynchronous RESTful API calls')
116123

117124
try:
118125
args = parser.parse_args()
@@ -209,9 +216,11 @@ def main():
209216
worker_args.append([x, logdir, args.loglevel,
210217
args.backupcount, config,
211218
args.check_changes,
219+
args.strip_outgoing,
212220
args.uri, source_root,
213221
args.batch, headers,
214-
args.api_timeout])
222+
args.api_timeout,
223+
args.async_api_timeout])
215224
try:
216225
project_results = pool.map(worker, worker_args, 1)
217226
except KeyboardInterrupt:

tools/src/main/python/opengrok_tools/scm/git.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#
1919

2020
#
21-
# Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
21+
# Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
2222
# Portions Copyright (c) 2020, Krystof Tulinger <[email protected]>
2323
#
2424

@@ -54,3 +54,25 @@ def reposync(self):
5454
def incoming_check(self):
5555
self._configure_git_pull()
5656
return self._run_custom_incoming_command([self.command, 'pull', '--dry-run'])
57+
58+
def strip_outgoing(self):
59+
self._configure_git_pull()
60+
status, out = self._run_command([self.command, 'log',
61+
'--pretty=tformat:%H', '--reverse', 'origin..'])
62+
if status == 0:
63+
cset = out.get(0)
64+
if cset:
65+
self.logger.debug("Resetting the repository {} to parent of changeset {}".
66+
format(self, cset))
67+
status, out = self._run_command([self.command, 'reset', '--hard',
68+
cset + '^'])
69+
if status != 0:
70+
raise RepositoryException("failed to reset {} to parent of changeset {}: {}".
71+
format(self, cset, out))
72+
else:
73+
return True
74+
else:
75+
return False
76+
77+
raise RepositoryException("failed to check for outgoing changes in {}: {}".
78+
format(self, status))

tools/src/main/python/opengrok_tools/scm/repository.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,13 +105,23 @@ def incoming(self):
105105
def incoming_check(self):
106106
"""
107107
Check if there are any incoming changes.
108-
Normally this method definition is overriden, unless the repository
108+
Normally this method definition is overridden, unless the repository
109109
type has no way how to check for incoming changes.
110110
111-
Return True if so, False otherwise.
111+
:return True if so, False otherwise.
112112
"""
113113
return True
114114

115+
def strip_outgoing(self):
116+
"""
117+
Strip any outgoing changes.
118+
Normally this method definition is overridden, unless the repository
119+
type has no way how to check for outgoing changes or cannot strip them.
120+
121+
:return True if any changes were stripped, False otherwise.
122+
"""
123+
return False
124+
115125
def _run_custom_sync_command(self, command):
116126
"""
117127
Execute the custom sync command.
@@ -152,7 +162,7 @@ def _run_command(self, command):
152162
env_vars=self.env, logger=self.logger)
153163
cmd.execute()
154164
if cmd.getretcode() != 0 or cmd.getstate() != Command.FINISHED:
155-
cmd.log_error("failed to perform command")
165+
cmd.log_error("failed to perform command {}".format(command))
156166
status = cmd.getretcode()
157167
if status == 0 and cmd.getstate() != Command.FINISHED:
158168
status = 1

tools/src/main/python/opengrok_tools/utils/mirror.py

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
)
3939
from .patterns import PROJECT_SUBST, COMMAND_PROPERTY
4040
from .utils import is_exe, check_create_dir, get_int, is_web_uri, get_bool
41-
from .opengrok import get_repos, get_repo_type, get_uri
41+
from .opengrok import get_repos, get_repo_type, get_uri, delete_project_data
4242
from .hook import run_hook
4343
from .command import Command
4444
from .restful import call_rest_api, do_api_call
@@ -64,6 +64,7 @@
6464
DISABLED_CMD_PROPERTY = 'disabled_command'
6565
HOOK_PRE_PROPERTY = "pre"
6666
HOOK_POST_PROPERTY = "post"
67+
STRIP_OUTGOING_PROPERTY = "strip_outgoing"
6768

6869

6970
def get_repos_for_project(project_name, uri, source_root,
@@ -177,6 +178,7 @@ def get_project_properties(project_config, project_name, hookdir):
177178
use_proxy = False
178179
ignored_repos = None
179180
check_changes = None
181+
check_outgoing = None
180182
ignore_errors = None
181183

182184
logger = logging.getLogger(__name__)
@@ -230,6 +232,12 @@ def get_project_properties(project_config, project_name, hookdir):
230232
project_config.get(INCOMING_PROPERTY))
231233
logger.debug("incoming check = {}".format(check_changes))
232234

235+
if project_config.get(STRIP_OUTGOING_PROPERTY) is not None:
236+
check_outgoing = get_bool(logger, ("outgoing check for project {}".
237+
format(project_name)),
238+
project_config.get(STRIP_OUTGOING_PROPERTY))
239+
logger.debug("outgoing check = {}".format(check_changes))
240+
233241
if project_config.get(IGNORE_ERR_PROPERTY) is not None:
234242
ignore_errors = get_bool(logger, ("ignore errors for project {}".
235243
format(project_name)),
@@ -240,7 +248,7 @@ def get_project_properties(project_config, project_name, hookdir):
240248
ignored_repos = []
241249

242250
return prehook, posthook, hook_timeout, command_timeout, \
243-
use_proxy, ignored_repos, check_changes, ignore_errors
251+
use_proxy, ignored_repos, check_changes, check_outgoing, ignore_errors
244252

245253

246254
def process_hook(hook_ident, hook, source_root, project_name, proxy,
@@ -342,7 +350,7 @@ def run_command(cmd, project_name):
342350

343351

344352
def handle_disabled_project(config, project_name, disabled_msg, headers=None,
345-
timeout=None):
353+
timeout=None, api_timeout=None):
346354
disabled_command = config.get(DISABLED_CMD_PROPERTY)
347355
if disabled_command:
348356
logger = logging.getLogger(__name__)
@@ -365,7 +373,7 @@ def handle_disabled_project(config, project_name, disabled_msg, headers=None,
365373

366374
try:
367375
call_rest_api(disabled_command, {PROJECT_SUBST: project_name},
368-
http_headers=headers, timeout=timeout)
376+
http_headers=headers, timeout=timeout, api_timeout=api_timeout)
369377
except RequestException as e:
370378
logger.error("API call failed for disabled command of "
371379
"project '{}': {}".
@@ -393,18 +401,58 @@ def get_mirror_retcode(ignore_errors, value):
393401
return value
394402

395403

396-
def mirror_project(config, project_name, check_changes, uri,
397-
source_root, headers=None, timeout=None):
404+
def process_outgoing(repos, project_name):
405+
"""
406+
Detect and strip any outgoing changes for the repositories.
407+
:param repos: list of repository objects
408+
:param project_name: name of the project
409+
:return: if any of the repositories had to be reset
410+
"""
411+
412+
logger = logging.getLogger(__name__)
413+
414+
ret = False
415+
for repo in repos:
416+
if repo.strip_outgoing():
417+
logger.debug('Repository {} in project {} had outgoing changes stripped'.
418+
format(repo, project_name))
419+
ret = True
420+
421+
return ret
422+
423+
424+
def wipe_project_data(project_name, uri, headers=None, timeout=None, api_timeout=None):
425+
"""
426+
Remove data for the project and mark it as not indexed.
427+
:param project_name: name of the project
428+
:param uri: URI of the webapp
429+
:param headers: HTTP headers
430+
:param timeout: connect timeout
431+
:param api_timeout: asynchronous API timeout
432+
"""
433+
434+
logger = logging.getLogger(__name__)
435+
436+
logger.info("removing data for project {}".format(project_name))
437+
delete_project_data(logger, project_name, uri,
438+
headers=headers, timeout=timeout, api_timeout=api_timeout)
439+
440+
441+
def mirror_project(config, project_name, check_changes, check_outgoing, uri,
442+
source_root, headers=None, timeout=None, api_timeout=None):
398443
"""
399444
Mirror the repositories of single project.
400445
:param config global configuration dictionary
401446
:param project_name: name of the project
402447
:param check_changes: check for changes in the project or its repositories
403448
and terminate if no change is found
449+
:param check_outgoing: check for outgoing changes in the repositories of the project,
450+
strip the changes and wipe project data if such changes were found
404451
:param uri web application URI
405452
:param source_root source root
406453
:param headers: optional dictionary of HTTP headers
407-
:param timeout: optional timeout in seconds for API call response
454+
:param timeout: connect timeout
455+
:param api_timeout: optional timeout in seconds for API call response
408456
:return exit code
409457
"""
410458

@@ -416,6 +464,7 @@ def mirror_project(config, project_name, check_changes, uri,
416464
prehook, posthook, hook_timeout, command_timeout, use_proxy, \
417465
ignored_repos, \
418466
check_changes_proj, \
467+
check_outgoing_proj, \
419468
ignore_errors_proj = get_project_properties(project_config,
420469
project_name,
421470
config.
@@ -431,6 +480,11 @@ def mirror_project(config, project_name, check_changes, uri,
431480
else:
432481
check_changes_config = check_changes_proj
433482

483+
if check_outgoing_proj is None:
484+
check_outgoing_config = config.get(STRIP_OUTGOING_PROPERTY)
485+
else:
486+
check_outgoing_config = check_outgoing_proj
487+
434488
if ignore_errors_proj is None:
435489
ignore_errors = config.get(IGNORE_ERR_PROPERTY)
436490
else:
@@ -446,7 +500,8 @@ def mirror_project(config, project_name, check_changes, uri,
446500
project_config.
447501
get(DISABLED_REASON_PROPERTY),
448502
headers=headers,
449-
timeout=timeout)
503+
timeout=timeout,
504+
api_timeout=api_timeout)
450505
logger.info("Project '{}' disabled, exiting".
451506
format(project_name))
452507
return CONTINUE_EXITVAL
@@ -473,6 +528,20 @@ def mirror_project(config, project_name, check_changes, uri,
473528
if check_changes_config is not None:
474529
check_changes = check_changes_config
475530

531+
if check_outgoing_config is not None:
532+
check_outgoing = check_outgoing_config
533+
534+
if check_outgoing:
535+
try:
536+
r = process_outgoing(repos, project_name)
537+
except RepositoryException as exc:
538+
logger.error('Failed to handle outgoing changes for '
539+
'a repository in project {}: {}'.format(project_name, exc))
540+
return get_mirror_retcode(ignore_errors, FAILURE_EXITVAL)
541+
if r:
542+
wipe_project_data(project_name, uri, headers=headers,
543+
timeout=timeout, api_timeout=api_timeout)
544+
476545
# Check if the project or any of its repositories have changed.
477546
if check_changes:
478547
r = process_changes(repos, project_name, uri, headers=headers)
@@ -522,7 +591,7 @@ def check_project_configuration(multiple_project_config, hookdir=False,
522591
HOOK_TIMEOUT_PROPERTY, PROXY_PROPERTY,
523592
IGNORED_REPOS_PROPERTY, HOOKS_PROPERTY,
524593
DISABLED_REASON_PROPERTY, INCOMING_PROPERTY,
525-
IGNORE_ERR_PROPERTY]
594+
IGNORE_ERR_PROPERTY, STRIP_OUTGOING_PROPERTY]
526595

527596
if not multiple_project_config:
528597
return True
@@ -640,7 +709,7 @@ def check_configuration(config):
640709
COMMANDS_PROPERTY, PROJECTS_PROPERTY,
641710
HOOK_TIMEOUT_PROPERTY, CMD_TIMEOUT_PROPERTY,
642711
DISABLED_CMD_PROPERTY, INCOMING_PROPERTY,
643-
IGNORE_ERR_PROPERTY]
712+
IGNORE_ERR_PROPERTY, STRIP_OUTGOING_PROPERTY]
644713

645714
diff = set(config.keys()).difference(global_tunables)
646715
if diff:

tools/src/main/python/opengrok_tools/utils/opengrok.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,9 @@ def add_project(logger, project, uri, headers=None, timeout=None, api_timeout=No
190190
return True
191191

192192

193-
def delete_project(logger, project, uri, headers=None, timeout=None, api_timeout=None):
193+
def _delete_project(logger, project, uri, headers=None, timeout=None, api_timeout=None):
194194
try:
195-
r = do_api_call('DELETE', get_uri(uri, 'api', 'v1', 'projects',
196-
urllib.parse.quote_plus(project)),
195+
r = do_api_call('DELETE', uri,
197196
headers=headers, timeout=timeout, api_timeout=api_timeout)
198197
if r is None or r.status_code != 204:
199198
logger.error(f"could not delete project '{project}' in web application")
@@ -204,3 +203,17 @@ def delete_project(logger, project, uri, headers=None, timeout=None, api_timeout
204203
return False
205204

206205
return True
206+
207+
208+
def delete_project(logger, project, uri, headers=None, timeout=None, api_timeout=None):
209+
return _delete_project(logger, project, get_uri(uri, 'api', 'v1', 'projects',
210+
urllib.parse.quote_plus(project)),
211+
headers=headers,
212+
timeout=timeout, api_timeout=api_timeout)
213+
214+
215+
def delete_project_data(logger, project, uri, headers=None, timeout=None, api_timeout=None):
216+
return _delete_project(logger, project, get_uri(uri, 'api', 'v1', 'projects',
217+
urllib.parse.quote_plus(project), 'data'),
218+
headers=headers,
219+
timeout=timeout, api_timeout=api_timeout)

0 commit comments

Comments
 (0)