diff --git a/splash/_cmdline_utils.py b/splash/_cmdline_utils.py new file mode 100644 index 000000000..8f04947e0 --- /dev/null +++ b/splash/_cmdline_utils.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +import optparse + +ONOFF = {True: "enabled", False: "disabled"} + + +def comma_separated_callback(*, is_valid_func=lambda v: True, + error_msg="{invalid} is not an allowed value"): + """ Return an optparse callback for comma-separated args. + Default value is not processed. + + Usage:: + + my_callback = comma_separated_callback( + is_valid_func=lambda v: v in {'foo', 'bar'}, + error_msg="{invalid} is not an allowed value for --option-name") + + op.add_option("--option-name", + default=[], + action='callback', + type='string', + callback=my_callback) + + """ + def callback(option, opt, value, parser): + """ optparse callback for comma-separated args """ + values = value.split(',') + for v in values: + if not is_valid_func(v): + msg = error_msg.format(value=value, invalid=v) + raise optparse.OptionValueError(msg) + setattr(parser.values, option.dest, values) + return callback diff --git a/splash/resources.py b/splash/resources.py index c8c622fb4..4cfa483f1 100644 --- a/splash/resources.py +++ b/splash/resources.py @@ -96,13 +96,15 @@ class BaseRenderResource(_ValidatingResource): isLeaf = True content_type = "text/html; charset=utf-8" - def __init__(self, pool, max_timeout, argument_cache, browser_engines_enabled): + def __init__(self, pool, max_timeout, argument_cache, + browser_engines_enabled, dont_log_args): Resource.__init__(self) self.pool = pool self.js_profiles_path = self.pool.js_profiles_path self.max_timeout = max_timeout self.argument_cache = argument_cache self.browser_engines_enabled = browser_engines_enabled + self.dont_log_args = set(dont_log_args) def render_GET(self, request): #log.msg("%s %s %s %s" % (id(request), request.method, request.path, request.args)) @@ -211,7 +213,16 @@ def _write_expired_args(self, request, expired_args): ex = ExpiredArguments({'expired': expired_args}) return self._write_error(request, 498, ex) + def _value_for_logging(self, key, value): + if key not in self.dont_log_args: + return value + return "***" + def _log_stats(self, request, options, error=None): + options = { + key: self._value_for_logging(key, value) + for key, value in options.items() + } msg = { # Anything we retrieve from Twisted request object contains bytes. # We have to convert it to unicode first for json.dump to succeed. @@ -313,11 +324,13 @@ def __init__(self, pool, sandboxed, strict, implicit_main, browser_engines_enabled, + dont_log_args, ): - BaseRenderResource.__init__(self, pool=pool, - max_timeout=max_timeout, - argument_cache=argument_cache, - browser_engines_enabled=browser_engines_enabled) + super().__init__(pool=pool, + max_timeout=max_timeout, + argument_cache=argument_cache, + browser_engines_enabled=browser_engines_enabled, + dont_log_args=dont_log_args) self.sandboxed = sandboxed self.lua_package_path = lua_package_path self.lua_sandbox_allowed_modules = lua_sandbox_allowed_modules @@ -629,6 +642,7 @@ def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled, argument_cache_max_entries, strict_lua_runner, browser_engines_enabled: List[str], + dont_log_args, ): Resource.__init__(self) self.argument_cache = ArgumentCache(argument_cache_max_entries) @@ -636,12 +650,14 @@ def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled, self.lua_enabled = lua_enabled self.browser_engines_enabled = browser_engines_enabled self.max_timeout = max_timeout + self.dont_log_args = dont_log_args _kwargs = dict( pool=pool, max_timeout=max_timeout, argument_cache=self.argument_cache, browser_engines_enabled=browser_engines_enabled, + dont_log_args=self.dont_log_args, ) self.putChild(b"render.html", RenderHtmlResource(**_kwargs)) self.putChild(b"render.png", RenderPngResource(**_kwargs)) diff --git a/splash/server.py b/splash/server.py index 78706ca66..7680810dc 100644 --- a/splash/server.py +++ b/splash/server.py @@ -10,10 +10,9 @@ from splash import defaults, __version__ from splash import xvfb from splash.qtutils import init_qt_app +from splash._cmdline_utils import ONOFF, comma_separated_callback -ONOFF = {True: "enabled", False: "disabled"} - def install_qtreactor(verbose): init_qt_app(verbose) @@ -26,15 +25,6 @@ def parse_opts(jupyter=False, argv=None): argv = sys.argv _bool_default = {True: ' (default)', False: ''} - def browser_engine_arg(option, opt, value, parser): - """ optparse callback for comma-separated args """ - engines = value.split(',') - for engine in engines: - if engine not in {'webkit', 'chromium'}: - raise optparse.OptionValueError( - "{} is not a supported --browser-engine".format(engine)) - setattr(parser.values, option.dest, engines) - op = optparse.OptionParser() op.add_option("-f", "--logfile", help="log file") op.add_option("-m", "--maxrss", type=float, default=0, @@ -72,9 +62,20 @@ def browser_engine_arg(option, opt, value, parser): default=defaults.BROWSER_ENGINES_ENABLED, action='callback', type='string', - callback=browser_engine_arg, + callback=comma_separated_callback( + is_valid_func=lambda v: v in {'webkit', 'chromium'}, + error_msg="{invalid} is not a supported --browser-engine", + ), help="Comma-separated list of enabled browser engines (default: %s). " "Allowed engines are chromium and webkit." % defaults.BROWSER_ENGINES_ENABLED) + op.add_option("--dont-log-args", + default=[], + action='callback', + type='string', + callback=comma_separated_callback(), + help="Comma-separated list of request args which values " + "won't be logged, regardless of the log level. " + "Example: lua_source,password") op.add_option("--lua-package-path", default="", help="semicolon-separated places to add to Lua package.path. " "Each place can have a ? in it that's replaced with the module name.") @@ -183,6 +184,7 @@ def log_splash_version(): def splash_server(portnum, ip, slots, network_manager_factory, max_timeout, + *, splash_proxy_factory_cls=None, js_profiles_path=None, ui_enabled=True, @@ -194,6 +196,7 @@ def splash_server(portnum, ip, slots, network_manager_factory, max_timeout, argument_cache_max_entries=None, disable_browser_caches=False, browser_engines_enabled=(), + dont_log_args=None, verbosity=None): from twisted.internet import reactor from twisted.web.server import Site @@ -242,6 +245,7 @@ def splash_server(portnum, ip, slots, network_manager_factory, max_timeout, argument_cache_max_entries=argument_cache_max_entries, strict_lua_runner=strict_lua_runner, browser_engines_enabled=list(browser_engines_enabled), + dont_log_args=dont_log_args, ) factory = Site(root) reactor.listenTCP(portnum, factory, interface=ip) @@ -276,7 +280,7 @@ def force_shutdown(): t.start(60, now=False) -def default_splash_server(portnum, ip, max_timeout, slots=None, +def default_splash_server(portnum, ip, max_timeout, *, slots=None, proxy_profiles_path=None, js_profiles_path=None, js_disable_cross_domain_access=False, filters_path=None, allowed_schemes=None, @@ -292,6 +296,7 @@ def default_splash_server(portnum, ip, max_timeout, slots=None, server_factory=splash_server, disable_browser_caches=False, browser_engines_enabled=(), + dont_log_args=None, ): from splash import network_manager network_manager_factory = network_manager.NetworkManagerFactory( @@ -322,6 +327,7 @@ def default_splash_server(portnum, ip, max_timeout, slots=None, max_timeout=max_timeout, argument_cache_max_entries=argument_cache_max_entries, browser_engines_enabled=browser_engines_enabled, + dont_log_args=dont_log_args, ) @@ -425,6 +431,7 @@ def main(jupyter=False, argv=None, server_factory=splash_server): server_factory=server_factory, disable_browser_caches=opts.disable_browser_caches, browser_engines_enabled=opts.browser_engines, + dont_log_args=set(opts.dont_log_args), ) signal.signal(signal.SIGUSR1, lambda s, f: traceback.print_stack(f))