Skip to content

Commit dbcfb11

Browse files
authored
Merge pull request #960 from scrapinghub/dont-log-args
--dont-log-args startup option
2 parents e809075 + 84c5536 commit dbcfb11

File tree

3 files changed

+74
-18
lines changed

3 files changed

+74
-18
lines changed

splash/_cmdline_utils.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# -*- coding: utf-8 -*-
2+
import optparse
3+
4+
ONOFF = {True: "enabled", False: "disabled"}
5+
6+
7+
def comma_separated_callback(*, is_valid_func=lambda v: True,
8+
error_msg="{invalid} is not an allowed value"):
9+
""" Return an optparse callback for comma-separated args.
10+
Default value is not processed.
11+
12+
Usage::
13+
14+
my_callback = comma_separated_callback(
15+
is_valid_func=lambda v: v in {'foo', 'bar'},
16+
error_msg="{invalid} is not an allowed value for --option-name")
17+
18+
op.add_option("--option-name",
19+
default=[],
20+
action='callback',
21+
type='string',
22+
callback=my_callback)
23+
24+
"""
25+
def callback(option, opt, value, parser):
26+
""" optparse callback for comma-separated args """
27+
values = value.split(',')
28+
for v in values:
29+
if not is_valid_func(v):
30+
msg = error_msg.format(value=value, invalid=v)
31+
raise optparse.OptionValueError(msg)
32+
setattr(parser.values, option.dest, values)
33+
return callback

splash/resources.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,15 @@ class BaseRenderResource(_ValidatingResource):
9696
isLeaf = True
9797
content_type = "text/html; charset=utf-8"
9898

99-
def __init__(self, pool, max_timeout, argument_cache, browser_engines_enabled):
99+
def __init__(self, pool, max_timeout, argument_cache,
100+
browser_engines_enabled, dont_log_args):
100101
Resource.__init__(self)
101102
self.pool = pool
102103
self.js_profiles_path = self.pool.js_profiles_path
103104
self.max_timeout = max_timeout
104105
self.argument_cache = argument_cache
105106
self.browser_engines_enabled = browser_engines_enabled
107+
self.dont_log_args = set(dont_log_args)
106108

107109
def render_GET(self, request):
108110
#log.msg("%s %s %s %s" % (id(request), request.method, request.path, request.args))
@@ -211,7 +213,16 @@ def _write_expired_args(self, request, expired_args):
211213
ex = ExpiredArguments({'expired': expired_args})
212214
return self._write_error(request, 498, ex)
213215

216+
def _value_for_logging(self, key, value):
217+
if key not in self.dont_log_args:
218+
return value
219+
return "***"
220+
214221
def _log_stats(self, request, options, error=None):
222+
options = {
223+
key: self._value_for_logging(key, value)
224+
for key, value in options.items()
225+
}
215226
msg = {
216227
# Anything we retrieve from Twisted request object contains bytes.
217228
# We have to convert it to unicode first for json.dump to succeed.
@@ -313,11 +324,13 @@ def __init__(self, pool, sandboxed,
313324
strict,
314325
implicit_main,
315326
browser_engines_enabled,
327+
dont_log_args,
316328
):
317-
BaseRenderResource.__init__(self, pool=pool,
318-
max_timeout=max_timeout,
319-
argument_cache=argument_cache,
320-
browser_engines_enabled=browser_engines_enabled)
329+
super().__init__(pool=pool,
330+
max_timeout=max_timeout,
331+
argument_cache=argument_cache,
332+
browser_engines_enabled=browser_engines_enabled,
333+
dont_log_args=dont_log_args)
321334
self.sandboxed = sandboxed
322335
self.lua_package_path = lua_package_path
323336
self.lua_sandbox_allowed_modules = lua_sandbox_allowed_modules
@@ -629,19 +642,22 @@ def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled,
629642
argument_cache_max_entries,
630643
strict_lua_runner,
631644
browser_engines_enabled: List[str],
645+
dont_log_args,
632646
):
633647
Resource.__init__(self)
634648
self.argument_cache = ArgumentCache(argument_cache_max_entries)
635649
self.ui_enabled = ui_enabled
636650
self.lua_enabled = lua_enabled
637651
self.browser_engines_enabled = browser_engines_enabled
638652
self.max_timeout = max_timeout
653+
self.dont_log_args = dont_log_args
639654

640655
_kwargs = dict(
641656
pool=pool,
642657
max_timeout=max_timeout,
643658
argument_cache=self.argument_cache,
644659
browser_engines_enabled=browser_engines_enabled,
660+
dont_log_args=self.dont_log_args,
645661
)
646662
self.putChild(b"render.html", RenderHtmlResource(**_kwargs))
647663
self.putChild(b"render.png", RenderPngResource(**_kwargs))

splash/server.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,9 @@
1010
from splash import defaults, __version__
1111
from splash import xvfb
1212
from splash.qtutils import init_qt_app
13+
from splash._cmdline_utils import ONOFF, comma_separated_callback
1314

1415

15-
ONOFF = {True: "enabled", False: "disabled"}
16-
1716

1817
def install_qtreactor(verbose):
1918
init_qt_app(verbose)
@@ -26,15 +25,6 @@ def parse_opts(jupyter=False, argv=None):
2625
argv = sys.argv
2726
_bool_default = {True: ' (default)', False: ''}
2827

29-
def browser_engine_arg(option, opt, value, parser):
30-
""" optparse callback for comma-separated args """
31-
engines = value.split(',')
32-
for engine in engines:
33-
if engine not in {'webkit', 'chromium'}:
34-
raise optparse.OptionValueError(
35-
"{} is not a supported --browser-engine".format(engine))
36-
setattr(parser.values, option.dest, engines)
37-
3828
op = optparse.OptionParser()
3929
op.add_option("-f", "--logfile", help="log file")
4030
op.add_option("-m", "--maxrss", type=float, default=0,
@@ -72,9 +62,20 @@ def browser_engine_arg(option, opt, value, parser):
7262
default=defaults.BROWSER_ENGINES_ENABLED,
7363
action='callback',
7464
type='string',
75-
callback=browser_engine_arg,
65+
callback=comma_separated_callback(
66+
is_valid_func=lambda v: v in {'webkit', 'chromium'},
67+
error_msg="{invalid} is not a supported --browser-engine",
68+
),
7669
help="Comma-separated list of enabled browser engines (default: %s). "
7770
"Allowed engines are chromium and webkit." % defaults.BROWSER_ENGINES_ENABLED)
71+
op.add_option("--dont-log-args",
72+
default=[],
73+
action='callback',
74+
type='string',
75+
callback=comma_separated_callback(),
76+
help="Comma-separated list of request args which values "
77+
"won't be logged, regardless of the log level. "
78+
"Example: lua_source,password")
7879
op.add_option("--lua-package-path", default="",
7980
help="semicolon-separated places to add to Lua package.path. "
8081
"Each place can have a ? in it that's replaced with the module name.")
@@ -183,6 +184,7 @@ def log_splash_version():
183184

184185

185186
def splash_server(portnum, ip, slots, network_manager_factory, max_timeout,
187+
*,
186188
splash_proxy_factory_cls=None,
187189
js_profiles_path=None,
188190
ui_enabled=True,
@@ -194,6 +196,7 @@ def splash_server(portnum, ip, slots, network_manager_factory, max_timeout,
194196
argument_cache_max_entries=None,
195197
disable_browser_caches=False,
196198
browser_engines_enabled=(),
199+
dont_log_args=None,
197200
verbosity=None):
198201
from twisted.internet import reactor
199202
from twisted.web.server import Site
@@ -242,6 +245,7 @@ def splash_server(portnum, ip, slots, network_manager_factory, max_timeout,
242245
argument_cache_max_entries=argument_cache_max_entries,
243246
strict_lua_runner=strict_lua_runner,
244247
browser_engines_enabled=list(browser_engines_enabled),
248+
dont_log_args=dont_log_args,
245249
)
246250
factory = Site(root)
247251
reactor.listenTCP(portnum, factory, interface=ip)
@@ -276,7 +280,7 @@ def force_shutdown():
276280
t.start(60, now=False)
277281

278282

279-
def default_splash_server(portnum, ip, max_timeout, slots=None,
283+
def default_splash_server(portnum, ip, max_timeout, *, slots=None,
280284
proxy_profiles_path=None, js_profiles_path=None,
281285
js_disable_cross_domain_access=False,
282286
filters_path=None, allowed_schemes=None,
@@ -292,6 +296,7 @@ def default_splash_server(portnum, ip, max_timeout, slots=None,
292296
server_factory=splash_server,
293297
disable_browser_caches=False,
294298
browser_engines_enabled=(),
299+
dont_log_args=None,
295300
):
296301
from splash import network_manager
297302
network_manager_factory = network_manager.NetworkManagerFactory(
@@ -322,6 +327,7 @@ def default_splash_server(portnum, ip, max_timeout, slots=None,
322327
max_timeout=max_timeout,
323328
argument_cache_max_entries=argument_cache_max_entries,
324329
browser_engines_enabled=browser_engines_enabled,
330+
dont_log_args=dont_log_args,
325331
)
326332

327333

@@ -425,6 +431,7 @@ def main(jupyter=False, argv=None, server_factory=splash_server):
425431
server_factory=server_factory,
426432
disable_browser_caches=opts.disable_browser_caches,
427433
browser_engines_enabled=opts.browser_engines,
434+
dont_log_args=set(opts.dont_log_args),
428435
)
429436
signal.signal(signal.SIGUSR1, lambda s, f: traceback.print_stack(f))
430437

0 commit comments

Comments
 (0)