diff --git a/rebar.config.script b/rebar.config.script index 53f7fe8b856..4d1773fc8bf 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -127,6 +127,7 @@ SubDirs = [ "src/couch_mrview", "src/couch_replicator", "src/couch_pse_tests", + "src/couch_srt", "src/couch_stats", "src/couch_peruser", "src/couch_tests", diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index dfefa62dc03..b2ffa87b7d9 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -1150,3 +1150,94 @@ url = {{nouveau_url}} ;mem3_shards = true ;nouveau_index_manager = true ;dreyfus_index_manager = true + +; Couch Stats Resource Tracker (CSRT) +[csrt] +;enable = false +;enable_init_p = false +;enable_reporting = false +;enable_rpc_reporting = false + +; Truncate reports to not include zero values for counter fields. This is a +; simple way to save space and should be left enabled unless you need a fixed +; output structure in the process lifecycle reports. +;should_truncate_reports = true + +; Limit queries to a maxinum number of rows +;query_limit = 100 +;query_cardinality_limit = 10000 + +; CSRT Logger Matchers +; +; These matchers are filters to decide whether or not to generate a process +; lifecyle report at the end of an HTTP request with a detailed report +; quantifying the CouchDB resources used to fulfill that request. These filters +; are design to make it easy to log a report for requests that utilize a lot +; of CouchDB resources, take a long time, or use heavy filtering, without having +; to enable report logging for _all_ requests. These reports can be enabled at +; the RPC worker level too, but for view queries and other aggregate operations, +; that can generate a report per shard interacted with to fullfill the request, +; and can generate a lot of data. The logger matchers are a way to dynamically +; control what is being logged on the fly, or to tailor fit the quantity of logs +; generated to store usage information in a predictable manner. +; +; These reports can be used to find potential workloads +; to refactor, but also to retroactively understand the workload that during a +; particular window of time. The node level stats collected and reported can +; inform you that, for example, a great deal of IO operations and database reads +; are being performed, but they do not provide cardinality into the databases +; and requests inducing te resource usage. This is where the process lifecycle +; reports come in: after a request is completed, if it matched a filter, a +; report is logged containing the final quantitative counts of resources usage +; to fulfill that request as well as qualitative information like username, +; dbname, nonce, and more. See CSRT.md for more details. +; +; There are a series of default logger matchers designed to filter for requests +; that surpass a threshold on a particular dimension, for example, when enabled, +; the ioq_calls default matcher filters true for requests that invoke more than +; 10,000 IOQ calls. The default named matchers are enabled by name and boolean +; in the `[csrt_logger.matchers_enabled]` section, and similarly, the Threshold +; value for each of the default matchers is specified in the config section +; `[csrt_logger.matchers_threshold]` by name and integer threshold quantity. +; +; The default loggers above operate against any HTTP requests flowing through +; CouchDB, whereas the `[csrt_logger.dbnames_io]` provides a simple way to +; specify database specific matchers, at the expense of the granularity +; available in the default matchers. The "dbnames_io" logger matcher filters +; for requests against a particular database that induce more than the specified +; threshold of IO operations. This is a generic IO catchall matcher, not +; specific to ioq_calls or docs_read, like the default matchers. +; +; CSRT dbname matchers +; Given a dbname and a positive integer, this will enable an IO matcher +; against the provided db for any requests that induce IO in quantities +; greater than the provided threshold on any one of: ioq_calls, rows_read +; docs_read, get_kp_node, get_kv_node, or changes_processed. +[csrt_logger.dbnames_io] +; For example: +; foo = 100 +; _dbs = 123 +; _users = 234 +; foo/bar = 200 + +; CSRT default matchers - enablement configuration +; The default CSRT loggers can be individually enabled below +[csrt_logger.matchers_enabled] +;all_coordinators = false +;all_rpc_workers = false +;docs_read = false +;rows_read = false +;docs_written = false +;long_reqs = false +;changes_processed = false +;ioq_calls = false + +; CSRT default matchers - threshold configuration +; This specifies the integer Threshold for the various builtin matchers +[csrt_logger.matchers_threshold] +;docs_read = 1000 +;rows_read = 1000 +;docs_written = 500 +;long_reqs = 60000 +;changes_processed = 1000 +;ioq_calls = 10000 diff --git a/rel/reltool.config b/rel/reltool.config index b85bd49b624..48456684395 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -38,6 +38,7 @@ couch_log, couch_mrview, couch_replicator, + couch_srt, couch_stats, couch_event, couch_peruser, @@ -103,6 +104,7 @@ {app, couch_log, [{incl_cond, include}]}, {app, couch_mrview, [{incl_cond, include}]}, {app, couch_replicator, [{incl_cond, include}]}, + {app, couch_srt, [{incl_cond, include}]}, {app, couch_stats, [{incl_cond, include}]}, {app, couch_event, [{incl_cond, include}]}, {app, couch_peruser, [{incl_cond, include}]}, diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 57a3aeaeaa6..0a4f6225f99 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -339,6 +339,10 @@ handle_request_int(MochiReq) -> % Save client socket so that it can be monitored for disconnects chttpd_util:mochiweb_client_req_set(MochiReq), + %% This is probably better in before_request, but having Path is nice + couch_srt:create_coordinator_context(HttpReq0, Path), + couch_srt:set_context_handler_fun({?MODULE, ?FUNCTION_NAME}), + {HttpReq2, Response} = case before_request(HttpReq0) of {ok, HttpReq1} -> @@ -369,6 +373,7 @@ handle_request_int(MochiReq) -> before_request(HttpReq) -> try + couch_srt:set_context_handler_fun({?MODULE, ?FUNCTION_NAME}), chttpd_stats:init(), chttpd_plugin:before_request(HttpReq) catch @@ -388,6 +393,8 @@ after_request(HttpReq, HttpResp0) -> HttpResp2 = update_stats(HttpReq, HttpResp1), chttpd_stats:report(HttpReq, HttpResp2), maybe_log(HttpReq, HttpResp2), + %% NOTE: do not set_context_handler_fun to preserve the Handler + couch_srt:destroy_context(), HttpResp2. process_request(#httpd{mochi_req = MochiReq} = HttpReq) -> @@ -400,6 +407,7 @@ process_request(#httpd{mochi_req = MochiReq} = HttpReq) -> RawUri = MochiReq:get(raw_path), try + couch_srt:set_context_handler_fun({?MODULE, ?FUNCTION_NAME}), couch_httpd:validate_host(HttpReq), check_request_uri_length(RawUri), check_url_encoding(RawUri), @@ -425,10 +433,12 @@ handle_req_after_auth(HandlerKey, HttpReq) -> HandlerKey, fun chttpd_db:handle_request/1 ), + couch_srt:set_context_handler_fun(HandlerFun), AuthorizedReq = chttpd_auth:authorize( possibly_hack(HttpReq), fun chttpd_auth_request:authorize_request/1 ), + couch_srt:set_context_username(AuthorizedReq), {AuthorizedReq, HandlerFun(AuthorizedReq)} catch ErrorType:Error:Stack -> diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index a43baeae485..4915ff67e35 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -83,6 +83,7 @@ % Database request handlers handle_request(#httpd{path_parts = [DbName | RestParts], method = Method} = Req) -> + couch_srt:set_context_dbname(DbName), case {Method, RestParts} of {'PUT', []} -> create_db_req(Req, DbName); @@ -103,6 +104,7 @@ handle_request(#httpd{path_parts = [DbName | RestParts], method = Method} = Req) do_db_req(Req, fun db_req/2); {_, [SecondPart | _]} -> Handler = chttpd_handlers:db_handler(SecondPart, fun db_req/2), + couch_srt:set_context_handler_fun(Handler), do_db_req(Req, Handler) end. diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index 932b52e5f6e..ad496463ced 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -20,6 +20,7 @@ url_handler(<<"_utils">>) -> fun chttpd_misc:handle_utils_dir_req/1; url_handler(<<"_all_dbs">>) -> fun chttpd_misc:handle_all_dbs_req/1; url_handler(<<"_dbs_info">>) -> fun chttpd_misc:handle_dbs_info_req/1; url_handler(<<"_active_tasks">>) -> fun chttpd_misc:handle_task_status_req/1; +url_handler(<<"_active_resources">>) -> fun couch_srt_httpd:handle_resource_status_req/1; url_handler(<<"_scheduler">>) -> fun couch_replicator_httpd:handle_scheduler_req/1; url_handler(<<"_node">>) -> fun chttpd_node:handle_node_req/1; url_handler(<<"_reload_query_servers">>) -> fun chttpd_misc:handle_reload_query_servers_req/1; diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg index 6a7120f87ef..7597e8fc323 100644 --- a/src/couch/priv/stats_descriptions.cfg +++ b/src/couch/priv/stats_descriptions.cfg @@ -306,6 +306,10 @@ {type, counter}, {desc, <<"number of couch_server LRU operations skipped">>} ]}. +{[couchdb, couch_server, open], [ + {type, counter}, + {desc, <<"number of couch_server open operations invoked">>} +]}. {[couchdb, query_server, vdu_rejects], [ {type, counter}, {desc, <<"number of rejections by validate_doc_update function">>} @@ -422,6 +426,22 @@ {type, counter}, {desc, <<"number of legacy checksums found in couch_file instances">>} ]}. +{[couchdb, btree, get_node, kp_node], [ + {type, counter}, + {desc, <<"number of couch btree kp_nodes read">>} +]}. +{[couchdb, btree, get_node, kv_node], [ + {type, counter}, + {desc, <<"number of couch btree kv_nodes read">>} +]}. +{[couchdb, btree, write_node, kp_node], [ + {type, counter}, + {desc, <<"number of couch btree kp_nodes written">>} +]}. +{[couchdb, btree, write_node, kv_node], [ + {type, counter}, + {desc, <<"number of couch btree kv_nodes written">>} +]}. {[pread, exceed_eof], [ {type, counter}, {desc, <<"number of the attempts to read beyond end of db file">>} diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src index 5f1fb9800bd..924a030c4be 100644 --- a/src/couch/src/couch.app.src +++ b/src/couch/src/couch.app.src @@ -47,6 +47,7 @@ couch_log, couch_event, ioq, + couch_srt, couch_stats, couch_dist, couch_quickjs diff --git a/src/couch/src/couch_btree.erl b/src/couch/src/couch_btree.erl index b974a22eeca..628388194d0 100644 --- a/src/couch/src/couch_btree.erl +++ b/src/couch/src/couch_btree.erl @@ -472,6 +472,7 @@ reduce_tree_size(kp_node, NodeSize, [{_K, {_P, _Red, Sz}} | NodeList]) -> get_node(#btree{fd = Fd}, NodePos) -> {ok, {NodeType, NodeList}} = couch_file:pread_term(Fd, NodePos), + couch_stats:increment_counter([couchdb, btree, get_node, NodeType]), {NodeType, NodeList}. write_node(#btree{fd = Fd, compression = Comp} = Bt, NodeType, NodeList) -> @@ -480,6 +481,7 @@ write_node(#btree{fd = Fd, compression = Comp} = Bt, NodeType, NodeList) -> % now write out each chunk and return the KeyPointer pairs for those nodes ToWrite = [{NodeType, Chunk} || Chunk <- Chunks], WriteOpts = [{compression, Comp}], + couch_stats:increment_counter([couchdb, btree, write_node, NodeType]), {ok, PtrSizes} = couch_file:append_terms(Fd, ToWrite, WriteOpts), {ok, group_kps(Bt, NodeType, Chunks, PtrSizes)}. diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index 3b222e0810e..4cd0aa9acf0 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -614,6 +614,12 @@ filter_docs(Req, Db, DDoc, FName, Docs) -> end. filter_docs_int(Db, DDoc, FName, JsonReq, JsonDocs) -> + %% Count usage in _int version as this can be repeated for OS error + %% Pros & cons... might not have actually processed `length(JsonDocs)` docs + %% but it certainly undercounts if we count in `filter_docs/5` above + %% TODO: replace with couchdb.query_server.*.ddoc_filter stats once we can + %% funnel back the stats used in the couchjs process to this caller process + couch_srt:js_filtered(length(JsonDocs)), [true, Passes] = ddoc_prompt( Db, DDoc, diff --git a/src/couch/src/couch_server.erl b/src/couch/src/couch_server.erl index aee2d9904e4..f54da3d266b 100644 --- a/src/couch/src/couch_server.erl +++ b/src/couch/src/couch_server.erl @@ -114,6 +114,7 @@ sup_start_link(N) -> gen_server:start_link({local, couch_server(N)}, couch_server, [N], []). open(DbName, Options) -> + couch_stats:increment_counter([couchdb, couch_server, open]), try validate_open_or_create(DbName, Options), open_int(DbName, Options) diff --git a/src/couch_log/src/couch_log_formatter.erl b/src/couch_log/src/couch_log_formatter.erl index cc8b5d8087d..4e54957a2a7 100644 --- a/src/couch_log/src/couch_log_formatter.erl +++ b/src/couch_log/src/couch_log_formatter.erl @@ -470,7 +470,12 @@ format_meta(Meta) -> lists:sort( maps:fold( fun(K, V, Acc) -> - [to_str(K, V) | Acc] + case to_str(K, V) of + "" -> + Acc; + Str -> + [Str | Acc] + end end, [], Meta @@ -487,6 +492,9 @@ format_meta(Meta) -> %% - maps %% However we are not going to try to distinguish lists from string %% Atoms would be printed as strings +%% `null` JSON values are skipped +to_str(_K, null) -> + ""; to_str(K, _) when not (is_list(K) or is_atom(K)) -> ""; to_str(K, Term) when is_list(Term) -> diff --git a/src/couch_log/test/eunit/couch_log_formatter_test.erl b/src/couch_log/test/eunit/couch_log_formatter_test.erl index cdb7eae3126..8081eaad174 100644 --- a/src/couch_log/test/eunit/couch_log_formatter_test.erl +++ b/src/couch_log/test/eunit/couch_log_formatter_test.erl @@ -34,11 +34,13 @@ format_report_etoolong_test() -> format_report_test() -> {ok, Entry} = couch_log_formatter:format_report(self(), report123, #{ + empty => null, foo => 123, bar => "barStr", baz => baz }), % Rely on `couch_log_formatter:format_meta/1` to sort keys + % `empty` is missing as `null` values are skipped Formatted = "[bar=\"barStr\" baz=\"baz\" foo=123]", ?assertEqual(Formatted, lists:flatten(Entry#log_entry.msg)). diff --git a/src/couch_srt/LICENSE b/src/couch_srt/LICENSE new file mode 100644 index 00000000000..3ddd6642618 --- /dev/null +++ b/src/couch_srt/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/couch_srt/README.md b/src/couch_srt/README.md new file mode 100644 index 00000000000..119f56fc017 --- /dev/null +++ b/src/couch_srt/README.md @@ -0,0 +1,840 @@ +# couch_srt: Couch Stats Resource Tracker aka CSRT + +The `couch_srt` app introduces the Couch Stats Resource Tracker, aka CSRT for +short. CSRT is a real time stats tracking system that tracks the quantity of +resources induced at the process level in a live queryable manner, while also +generating process lifetime reports containing statistics on the total resource +load of a request, as a function of CouchDB operations like dbs/docs opened, +view and changes rows read, changes returned vs processed, Javascript filter +usage, request duration, and more. This system is a paradigm shift in CouchDB +visibility and introspection, allowing for expressive real time querying +capabilities to introspect, understand, and aggregate CouchDB internal resource +usage, as well as powerful filtering facilities for conditionally generating +reports on "heavy usage" requests or "long/slow" requests. CSRT also extends +`recon:proc_window` with `couch_srt:proc_window` allowing for the same style of +battle hardened introspection with Recon's excellent `proc_window`, but with the +sample window over any of the CSRT tracked CouchDB stats! + +CSRT does this by piggy-backing off of the existing metrics tracked by way of +`couch_stats:increment_counter` at the time when the local process induces those +metrics inc calls, and then CSRT updates an ets entry containing the context +information for the local process, such that global aggregate queries can be +performed against the ets table as well as the generation of the process +resource usage reports at the conclusions of the process's lifecyle.The ability +to do aggregate querying in realtime in addition to the process lifecycle +reports for post facto analysis over time, is a cornerstone of CSRT that is the +result of a series of iterations until a robust and scalable aproach was built. + +The real time querying is achieved by way of a global ets table with +`read_concurrency`, `write_concurrency`, and `decentralized_counters` enabled. +Great care was taken to ensure that _zero_ concurrent writes to the same key +occur in this model, and this entire system is predicated on the fact that +incremental updates to `ets:update_counters` provides *really* fast and +efficient updates in an atomic and isolated fashion when coupled with +decentralized counters and write concurrency. Each process that calls +`couch_stats:increment_counter` tracks their local context in CSRT as well, with +zero concurrent writes from any other processes. Outside of the context setup +and teardown logic, _only_ operations to `ets:update_counter` are performed, one +per process invocation of `couch_stats:increment_counter`, and one for +coordinators to update worker deltas in a single batch, resulting in a 1:1 ratio +of ets calls to real time stats updates for the primary workloads. + +The primary achievement of CSRT is the core framework iself for concurrent +process local stats tracking and real time RPC delta accumulation in a scalable +manner that allows for real time aggregate querying and process lifecycle +reports. This took several versions to find a scalable and robust approach that +induced minimal impact on maximum system throughput. Now that the framework is +in place, it can be extended to track any further desired process local uses of +`couch_stats:increment_counter`. That said, the currently selected set of stats +to track was heavily influenced by the challenges in retroactively understanding +the quantity of resources induced by a query like `/db/_changes?since=$SEQ`, or +similarly, `/db/_find`. + +CSRT started as an extension of the Mango execution stats logic to `_changes` +feeds to get proper visibility into quantity of docs read and filtered per +changes request, but then the focus inverted with the realization that we should +instead use the existing stats tracking mechanisms that have already been deemed +critical information to track, which then also allows for the real time tracking +and aggregate query capabilities. The Mango execution stats can be ported into +CSRT itself and just become one subset of the stats tracked as a whole, and +similarly, any additional desired stats tracking can be easily added and will +be picked up in the RPC deltas and process lifetime reports. + +## A Simple Example + +Given a database `foo` with 11k documents containing a `doc.value` field that is an +integer value which can be filtered in a design doc by way of even and odd. If +we instantiate a series of while loops in parallel making requests of the form: + +> GET /foo/_changes?filter=bar/even&include_docs=true + +We can generate a good chunk of load on a local laptop dev setup, resulting in +requests that take a few seconds to load through the changes feed, fetch all 11k +docs, and then funnel them through the Javascript engine to filter for even +valued docs; this allows us time to query these heavier requests live and see +them in progress with the real time stats tracking and querying capabilities of +CSRT. + +For example, let's use `couch_srt:proc_window/3` as one would do with +`recon:proc_window/3` to get an idea of the heavy active processes on the +system: + +``` +(node1@127.0.0.1)2> rp([{PR, couch_srt:to_json(couch_srt:get_resource(PR))} || {PR, _, _} <- couch_srt:proc_window(ioq_calls, 3, 1000)]). +[{{<0.5090.0>,#Ref<0.2277656623.605290499.37969>}, + #{changes_returned => 3962,db_open => 10,dbname => <<"foo">>, + docs_read => 7917,docs_written => 0,get_kp_node => 54, + get_kv_node => 1241,ioq_calls => 15834,js_filter => 7917, + js_filtered_docs => 7917,nonce => <<"cc5a814ceb">>, + pid_ref => + <<"<0.5090.0>:#Ref<0.2277656623.605290499.37969>">>, + rows_read => 7917, + started_at => <<"2025-07-21T17:25:08.784z">>, + type => + <<"coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes">>, + updated_at => <<"2025-07-21T17:25:13.051z">>, + username => <<"adm">>}}, + {{<0.5087.0>,#Ref<0.2277656623.606601217.92191>}, + #{changes_returned => 4310,db_open => 10,dbname => <<"foo">>, + docs_read => 8624,docs_written => 0,get_kp_node => 58, + get_kv_node => 1358,ioq_calls => 17248,js_filter => 8624, + js_filtered_docs => 8624,nonce => <<"0e625c723a">>, + pid_ref => + <<"<0.5087.0>:#Ref<0.2277656623.606601217.92191>">>, + rows_read => 8624, + started_at => <<"2025-07-21T17:25:08.424z">>, + type => + <<"coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes">>, + updated_at => <<"2025-07-21T17:25:13.051z">>, + username => <<"adm">>}}, + {{<0.5086.0>,#Ref<0.2277656623.605290499.27728>}, + #{changes_returned => 4285,db_open => 10,dbname => <<"foo">>, + docs_read => 8569,docs_written => 0,get_kp_node => 57, + get_kv_node => 1349,ioq_calls => 17138,js_filter => 8569, + js_filtered_docs => 8569,nonce => <<"962cda1645">>, + pid_ref => + <<"<0.5086.0>:#Ref<0.2277656623.605290499.27728>">>, + rows_read => 8569, + started_at => <<"2025-07-21T17:25:08.406z">>, + type => + <<"coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes">>, + updated_at => <<"2025-07-21T17:25:13.051z">>, + username => <<"adm">>}}] +ok +``` + +This shows us the top 3 most active processes (being tracked in CSRT) over the +next 1000 milliseconds, sorted by number of `ioq_calls` induced! All of three +of these processes are incurring heavy usage, reading many thousands of docs +with 15k+ IOQ calls and heavy JS filter usage, exactly the types of requests +you want to be alerted to. CSRT's proc window logic is built on top of Recon's, +which doesn't return the process info itself, so you'll need to fetch the +process status with `couch_srt:get_resource/1` and then pretty print it with +`couch_srt:to_json/1`. + +The output above is a real time snapshot of the live running system and shows +processes actively inducing additional resource usage, so these CSRT context +values are just a time snapshot of where that process was at, as of the +`updated_at` timestamp. We can reference the nonce value to search through the +report logs for a final report, assuming the given context ended up using +sufficient resources to trigger a logger matcher lifetime report. The above +changes requests were induced specifically to induce reports as well, so +unsurprisingly we have reports for all three. + +However, I want to first show the existing visibility into these changes +requests exposed by the raw HTTP logs to highlight the impact of the CSRT +reports and new visibility into request workloads exposed. + +First, let's look at the existing HTTP logs for those 3 requests: + +``` +(chewbranca)-(jobs:1)-(~/src/couchdb_csrt_v3) +(! 9872)-> grep 'cc5a814ceb\|0e625c723a\|962cda1645' ./dev/logs/node1.log | grep -v '^\[report]' +[notice] 2025-07-21T17:25:14.520641Z node1@127.0.0.1 <0.5087.0> 0e625c723a localhost:15984 127.0.0.1 adm GET /foo/_changes?filter=bar/even&asdf=fdsa&include_docs=true 200 ok 6096 +[notice] 2025-07-21T17:25:14.521417Z node1@127.0.0.1 <0.5086.0> 962cda1645 localhost:15984 127.0.0.1 adm GET /foo/_changes?filter=bar/even&asdf=fdsa&include_docs=true 200 ok 6115 +[notice] 2025-07-21T17:25:14.844317Z node1@127.0.0.1 <0.5090.0> cc5a814ceb localhost:15984 127.0.0.1 adm GET /foo/_changes?filter=bar/even&asdf=fdsa&include_docs=true 200 ok 6059 +``` + +So we see the requests were made, and we can see it's doing `include_docs=true` +as well as using a customer filter, both obvious indications that this is a +potentially heavier request, however, we don't know if database foo had a +thousand docs or a billion docs, whether those docs were small or large, nor any +indication of the computational complexity of the reference filter function. +This makes it challenging to retroactively correlate heavy resource usage at a +hardware level with the underlying requests that induced those workloads, +especially if the heavy requests are an inconspicuous subset of the full +database workload. + +CSRT resolves this by providing a real time querying system to find the active +heavy processes, live, as well as a process lifecycle reporting engine providing +detailed analysis of the workloads induced by the request. + +Let's assume we had the default IOQ logger matcher enabled, with the default +configuration of logging any requests inducing more than 10k IOQ calls, which +would catch all three of our requests above, even though they're all still +going. As a result, we generate process lifecycle reports for all three of those +requests, as we can see: + +``` +(chewbranca)-(jobs:1)-(~/src/couchdb_csrt_v3) +(! 9873)-> grep 'cc5a814ceb\|0e625c723a\|962cda1645' ./dev/logs/node1.log | grep '^\[report]' +[report] 2025-07-21T17:25:14.520787Z node1@127.0.0.1 <0.5174.0> -------- [csrt-pid-usage-lifetime changes_returned=5500 db_open=10 dbname="foo" docs_read=11001 get_kp_node=72 get_kv_node=1754 ioq_calls=22002 js_filter=11001 js_filtered_docs=11001 nonce="0e625c723a" pid_ref="<0.5087.0>:#Ref<0.2277656623.606601217.92191>" rows_read=11001 started_at="2025-07-21T17:25:08.424z" type="coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes" updated_at="2025-07-21T17:25:14.520z" username="adm"] +[report] 2025-07-21T17:25:14.521578Z node1@127.0.0.1 <0.5155.0> -------- [csrt-pid-usage-lifetime changes_returned=5500 db_open=10 dbname="foo" docs_read=11001 get_kp_node=72 get_kv_node=1754 ioq_calls=22002 js_filter=11001 js_filtered_docs=11001 nonce="962cda1645" pid_ref="<0.5086.0>:#Ref<0.2277656623.605290499.27728>" rows_read=11001 started_at="2025-07-21T17:25:08.406z" type="coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes" updated_at="2025-07-21T17:25:14.521z" username="adm"] +[report] 2025-07-21T17:25:14.844436Z node1@127.0.0.1 <0.5213.0> -------- [csrt-pid-usage-lifetime changes_returned=5500 db_open=10 dbname="foo" docs_read=11001 get_kp_node=72 get_kv_node=1754 ioq_calls=22002 js_filter=11001 js_filtered_docs=11001 nonce="cc5a814ceb" pid_ref="<0.5090.0>:#Ref<0.2277656623.605290499.37969>" rows_read=11001 started_at="2025-07-21T17:25:08.784z" type="coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes" updated_at="2025-07-21T17:25:14.844z" username="adm"] +``` + +We find the process lifecycle reports for the requests with the three grep'ed on +nonces, and we can see they all read the 11k core documents, plus the one design +document, JS filtered all 11,001 docs, and then only returned the 5500 doc's +containing an even `doc.value` field. + +This also shows the discrepancy between the quantity of induced resource usage +to actually generate a request, relative to the magnitude of the data returned. +All of our `doc.value` fields were positive integers, if we had a filter +function searching for negative `doc.value` results, we would have found none, +resulting in `changes_returned=0`, but we would have still induced the 11,001 +doc loads and Javascript filter calls. + +CSRT is specifically built to automatically find and report these types of +workload discrepancies and in general to help highlight where individual HTTP +requests use drastically more resources than the median workloads. + +See the dedicated proc window documentation section further down for more info. + +## Additional Overview and Examples + +The query and HTTP API's are well documented and tested (h/t @iilyak) and +provide an excellent overview of the interaction patterns and query capabilities +of CSRT. Those can be found at: + +* `couch_srt_query.erl` "Query API functions" + - https://github.com/apache/couchdb/blob/da87fc3fd7beb79f1ba63cf430dd92818fb02a62/src/couch_srt/src/couch_srt_query.erl#L412-L740 + - the above highlighted functions are well tested, typespec'ed, and have + auxiliary documentation and examples, an excellent resource +* the `couch_srt_query_tests.erl` Eunit tests are an excellent overview of utilizing + the `couch_srt_query:` API from Erlang to find, filter, and aggregate CSRT real + time contexts + - https://github.com/apache/couchdb/blob/da87fc3fd7beb79f1ba63cf430dd92818fb02a62/src/couch_stats/test/eunit/couch_srt_query_tests.erl +* similarly, the `couch_srt_httpd_tests.erl` Eunit tests are an excellent overview of + performing the same style `couch_srt_query:` queries, but through the HTTP API + - https://github.com/apache/couchdb/blob/da87fc3fd7beb79f1ba63cf430dd92818fb02a62/src/couch_stats/test/eunit/couch_srt_httpd_tests.erl +* Additionally there's the `couch_srt_logger_tests.erl` Eunit tests which demonstrate + the different default logger matchers in action + - https://github.com/apache/couchdb/blob/da87fc3fd7beb79f1ba63cf430dd92818fb02a62/src/couch_stats/test/eunit/couch_srt_logger_tests.erl + +# CSRT Code Markers + +## -define(CSRT_ETS, csrt_server). + +This is the reference to the CSRT ets table, it's managed by `csrt_server` so +that's where the name originates from. + +## -define(MATCHERS_KEY, {csrt_logger, all_csrt_matchers}). + +This marker is where the active matchers are written to in `persistent_term` for +concurrently and parallelly and accessing the logger matchers in the CSRT +tracker processes for lifecycle reporting. + +# CSRT Process Dictionary Markers + +## -define(PID_REF, {csrt, pid_ref}). + +This marker is for the core storing the core `PidRef` identifier. The key idea +here is that a lifecycle is a context lifecycle is contained to within the given +`PidRef`, meaning that a `Pid` can instantiate different CSRT lifecycles and +pass those to different workers. + +This is specifically necessary for long running processes that need to handle +many CSRT context lifecycles over the course of that individual process's +lifecycle independent. In practice, this is immediately needed for the actual +coordinator lifecycle tracking, as `chttpd` uses a worker pool of http request +handlers that can be re-used, so we need a way to create a CSRT lifecycle +corresponding to the given request currently being serviced. This is also +intended to be used in other long running processes, like IOQ or `couch_js` pids +such that we can track the specific context inducing the operations on the +`couch_file` pid or indexer or replicator or whatever. + +Worker processes have a more clear cut lifecycle, but either style of process +can be exit'ed in a manner that skips the ability to do cleanup operations, so +additionally there's a dedicated tracker process spawned to monitor the process +that induced the CSRT context such that we can do the dynamic logger matching +directly in these tracker processes and also we can properly cleanup the ets +entries even if the Pid crashes. + +## -define(TRACKER_PID, {csrt, tracker}). + +A handle to the spawned tracker process that does cleanup and logger matching +reports at the end of the process lifecycle. We store a reference to the tracker +pid so that for explicit context destruction, like in `chttpd` workers after a +request has been serviced, we can update stop the tracker and perform the +expected cleanup directly. + +## -define(DELTA_TA, {csrt, delta_ta}). + +This stores our last delta snapshot to track progress since the last incremental +streaming of stats back to the coordinator process. This will be updated after +the next delta is made with the latest value. Eg this stores `T0` so we can do +`T1 = get_resource()` `make_delta(T0, T1)` and then we save `T1` as the new `T0` +for use in our next delta. + +## -define(LAST_UPDATED, {csrt, last_updated}). + +This stores the integer corresponding to the `erlang:monotonic_time()` value of +the most recent `updated_at` value. Basically this lets us utilize a pdict +value to be able to turn `update_at` tracking into an incremental operation that +can be chained in the existing atomic `ets:update_counter` and +`ets:update_element` calls. + +The issue being that our updates are of the form `+2 to ioq_calls for $pid_ref`, +which ets does atomically in a guaranteed `atomic` and `isolated` manner. The +strict use of the atomic operations for tracking these values is why this +system works efficiently at scale. This means that we can increment counters on +all of the stats counter fields in a batch, very quickly, but for tracking +`updated_at` timestamps we'd need to either do an extra ets call to get the last +`updated_at` value, or do an extra ets call to `ets:update_element` to set the +`updated_at` value to `couch_srt_util:tnow()`. The core problem with this is that the +batch inc operation is essentially the only write operation performed after the +initial context setting of dbname/handler/etc; this means that we'd literally +double the number of ets calls induced to track CSRT updates, just for tracking +the `updated_at`. So instead, we rely on the fact that the local process +corresponding to `$pid_ref` is the _only_ process doing updates so we know the +last `updated_at` value will be the last time this process updated the data. So +we track that value in the pdict and then take a delta between `tnow()` and +`updated_at`, and then `updated_at` becomes a value we can sneak into the other +integer counter updates we're already performing! + + +# Core CSRT API + +The `csrt(.erl)` module is the primary entry point into CSRT, containing API +functionality for tracking the lifecycle of processes, inducing metric tracking +over that lifecycle, and also a variety of functions for aggregate querying. + +It's worth noting that the CSRT context tracking functions are specifically +designed to not `throw` and be safe in the event of unexpected CSRT failures or +edge cases. The aggregate query API has some callers that will actually throw, +but aside from this core CSRT operations will not bubble up exceptions, and will +either return the error value, or catch the error and move on rather than +chaining further errors. + +## Context Lifecycle API + +These are the CRUD functions for handling a CSRT context lifecycle, where a +lifecycle context is created in a `chttpd` coordinator process by way of +`couch_srt:create_coordinator_context/2`, or in `rexi_server:init_p` by way of +`couch_srt:create_worker_context/3`. Additional functions are exposed for setting +context specific info like username/dbname/handler. `get_resource` fetches the +context being tracked corresponding to the given `PidRef`. + +``` +-export([ + create_context/2, + create_coordinator_context/2, + create_worker_context/3, + destroy_context/0, + destroy_context/1, + get_resource/0, + get_resource/1, + set_context_dbname/1, + set_context_dbname/2, + set_context_handler_fun/1, + set_context_handler_fun/2, + set_context_username/1, + set_context_username/2 +]). +``` + +## Public API + +The "Public" or miscellaneous API for lack of a better name. These are various +functions exposed for wider use and/or testing purposes. + +``` +-export([ + do_report/2, + is_enabled/0, + is_enabled_init_p/0, + is_enabled_reporting/0, + is_enabled_rpc_reporting/0, + maybe_report/2, + to_json/1 +]). +``` + +These tools provide a direct and conditional mechanism to generate a report, +with `do_report/2`, and `maybe_report`, respectively, with the latter testing +the provided `rctx()` against the actively registered Logger Matchers. + +The `is_enabled*` checks perform the various enablement checks, as described in +the corresponding Config documentation sections for the fields. + +And lastly, is the `couch_srt:to_json/1` function which takes a `maybe_rctx()` as +opposed to `couch_srt_entry:to_json/1` which only takes an actual `rctx()`, +specifically to make it easier to map `couch_srt:to_json/1` to output from +`couch_srt:proc_window/3` and easily handle the case when +`couch_srt:get_resource/1` returns undefined in the event the context has +already exited before we could look at it. + + +## Stats Collection API + +This is the stats collection API utilized by way of +`couch_stats:increment_counter` to do local process tracking, and also in `rexi` +to adding and extracting delta contexts and then accumulating those values. + +NOTE: `make_delta/0` is a "destructive" operation that will induce a new delta +by way of the last local pdict's rctx delta snapshot, and then update to the +most recent version. Two individual rctx snapshots for a PidRef can safely +generate an actual delta by way of `couch_srt_util:rctx_delta/2`. + +``` +-export([ + accumulate_delta/1, + add_delta/2, + docs_written/1, + extract_delta/1, + get_delta/0, + inc/1, + inc/2, + ioq_called/0, + js_filtered/1, + make_delta/0, + rctx_delta/2, + maybe_add_delta/1, + maybe_add_delta/2, + maybe_inc/2, + should_track_init_p/1 +]). +``` + +## Query API + +See the `Additional Overview and Examples` section above for more details. + +``` +% Aggregate Query API +-export([ + active/0, + active/1, + active_coordinators/0, + active_coordinators/1, + active_workers/0, + active_workers/1, + find_by_nonce/1, + find_by_pid/1, + find_by_pidref/1, + find_workers_by_pidref/1, + query_matcher/1, + query_matcher/2 +]). + +-export([ + query/1, + from/1, + group_by/1, + group_by/2, + sort_by/1, + sort_by/2, + count_by/1, + options/1, + unlimited/0, + with_limit/1, + + run/1, + unsafe_run/1 +]). +``` + +## couch_srt:proc_window/3 -- Recon API Ports of https://github.com/ferd/recon/releases/tag/2.5.6 + +This is a "port" of `recon:proc_window` to `couch_srt:proc_window`, allowing for +`proc_window` style aggregations/sorting/filtering but with the stats fields +collected by CSRT! This is also a direct port of `recon:proc_window` in that it +utilizes the same underlying logic and efficient internal data structures as +`recon:proc_window`, but rather only changes the Sample function: + +```erlang +%% This is a recon:proc_window/3 [1] port with the same core logic but +%% recon_lib:proc_attrs/1 replaced with pid_ref_attrs/1, and returning on +%% pid_ref() rather than pid(). +%% [1] https://github.com/ferd/recon/blob/c2a76855be3a226a3148c0dfc21ce000b6186ef8/src/recon.erl#L268-L300 +-spec proc_window(AttrName, Num, Time) -> term() | throw(any()) when + AttrName :: rctx_field(), Num :: non_neg_integer(), Time :: pos_integer(). +proc_window(AttrName, Num, Time) -> + Sample = fun() -> pid_ref_attrs(AttrName) end, + {First, Last} = recon_lib:sample(Time, Sample), + recon_lib:sublist_top_n_attrs(recon_lib:sliding_window(First, Last), Num). +``` + +In particular, our change is `Sample = fun() -> pid_ref_attrs(AttrName) end,`, +and in fact, if recon upstream parameterized the option of `AttrName` or +`SampleFunction`, this could be reimplemented as: + +```erlang +%% couch_srt:proc_window +proc_window(AttrName, Num, Time) -> + Sample = fun() -> pid_ref_attrs(AttrName) end, + recon:proc_window(Sample, Num, Time). +``` + +This implementation is being highlighted here because `recon:proc_window/3` is +battle hardened and `recon_lib:sliding_window` uses an efficient internal data +structure for storing the two samples that has been proven to work in production +systems with millions of active processes, so swapping the `Sample` function +with a CSRT version allows us to utilize the production grade recon +functionality, but extended out to the particular CouchDB statistics we're +especially interested in. + +And on a fun note: any further stats tracking fields added to CSRT tracking will +automatically work with this too. + + +``` +-export([ + pid_ref_attrs/1, + pid_ref_matchspec/1, + proc_window/3 +]). +``` + +
+ +# Core types and Maybe types + +Before we look at the `#rctx{}` record fields, lets examine the core datatypes +defined by CSRT for use in Dialyzer typespecs. There are more, but these are the +essentials and demonstrate the "maybe" typespec approach utilized in CSRT. + +Let's say we have a `-type foo() :: #foo{}` and `-type maybe_foo() :: foo() | +undefined`, we then can construct functions of the form `-spec get_foo(id()) -> +maybe_foo()` and then we can use Dialyzer to statically assert all callers of +`get_foo/1` handle the `maybe_foo()` data type rather than just `foo()` and +ensure that all subsequent callers do as well. + +This approach of `-spec maybe_ :: | undefined` is utilized +throughout CSRT and has greatly aided in the development, refactoring, and +static analysis of this system. Here's a useful snippet for running Dialyzer +while hacking on CSRT: + +> make && time make dialyze apps=couch_stats + +```erlang +-type pid_ref() :: {pid(), reference()}. +-type maybe_pid_ref() :: pid_ref() | undefined. + +-type coordinator_rctx() :: #rctx{type :: coordinator()}. +-type rpc_worker_rctx() :: #rctx{type :: rpc_worker()}. +-type rctx() :: #rctx{} | coordinator_rctx() | rpc_worker_rctx(). +-type rctxs() :: [#rctx{}] | []. +-type maybe_rctx() :: rctx() | undefined. +``` + +Above we have the core `pid_ref()` data type, which is just a tuple with a +`pid()` and a `reference()`, and naturally, `maybe_pid_ref()` handles the +optional presence of a `pid_ref()`, allowing for our APIs like +`couch_srt:get_resource(maybe_pidref())` to handle ambiguity of the presence of a +`pid_ref()`. + +We define our core `rctx()` data type as an empty `#rctx{}`, or the more +specific `coordinator_rctx()` or `rpc_worker_rctx()` such that we can be +specific about the `rctx()` type in functions that need to distinguish. And then +as expected, we have the notion of `maybe_rctx()`. + +# #rctx{} + +This is the core data structure utilized to track a CSRT context for a +coordinator or rpc_worker process, represented by the `#rctx{}` record, and +stored in the `?CSRT_ETS` table keyed on `{keypos, #rctx.pid_ref}`. + +The Metadata fields store labeling data for the given process being tracked, +such as `started_at` and `updated_at` timings, the primary `pid_ref` id key, the +type of the process context, and some additional information like username, +dbname, and the nonce of the coordinator request. + +The Stats Counters fields are `non_neg_integer()` monotonically increasing +counters corresponding to the `couch_stats` metrics counters we're interested in +tracking at a process level cardinality. The use of these purely integer counter +fields represented by a record represented in an ets table is the cornerstone of +CSRT and why its able to operate at high throughput and high concurrency, as +`ets:update_counter/{3,4}` take increment operations to be performed atomically +and in isolation, in a manner in which does not require fetching and loading the +data directly. We then take care to batch the accumulation of delta updates into +a single `update_counter` call and even sneak in the `updated_at` tracking as a +integer counter update without inducing an extra ets call. + +NOTE: the typespec's for these fields include `'_'` atoms as possible types as +that is the matchspec wildcard any of the fields can be set to when using an +existing `#rctx{}` record to search with. + + +```erlang +-record(rctx, { + %% Metadata + started_at = couch_srt_util:tnow() :: integer() | '_', + %% NOTE: updated_at must be after started_at to preserve time congruity + updated_at = couch_srt_util:tnow() :: integer() | '_', + pid_ref :: maybe_pid_ref() | {'_', '_'} | '_', + nonce :: nonce() | undefined | '_', + type :: rctx_type() | undefined | '_', + dbname :: dbname() | undefined | '_', + username :: username() | undefined | '_', + + %% Stats Counters + db_open = 0 :: non_neg_integer() | '_', + docs_read = 0 :: non_neg_integer() | '_', + docs_written = 0 :: non_neg_integer() | '_', + rows_read = 0 :: non_neg_integer() | '_', + changes_returned = 0 :: non_neg_integer() | '_', + ioq_calls = 0 :: non_neg_integer() | '_', + js_filter = 0 :: non_neg_integer() | '_', + js_filtered_docs = 0 :: non_neg_integer() | '_', + get_kv_node = 0 :: non_neg_integer() | '_', + get_kp_node = 0 :: non_neg_integer() | '_' + %% "Example to extend CSRT" + %%write_kv_node = 0 :: non_neg_integer() | '_', + %%write_kp_node = 0 :: non_neg_integer() | '_' +}). +``` + +## Metadata + +We use `couch_srt_util:tnow()` for time tracking, which is a `native` format +`erlang:monotonic_time()` integer, which, notably, _can_ be and is often a +negative value. You must either take a delta or convert the time to get into a +useable format, as one might suspect by the use of `native`. + +We make use of `erlang:mononotic_time/0` as per the recommendation in +https://www.erlang.org/doc/apps/erts/time_correction.html#how-to-work-with-the-new-api +for the suggested way to `Measure Elasped Time`, as quoted: + +``` +Take time stamps with erlang:monotonic_time/0 and calculate the time difference +using ordinary subtraction. The result is in native time unit. If you want to +convert the result to another time unit, you can use erlang:convert_time_unit/3. + +An easier way to do this is to use erlang:monotonic_time/1 with the desired time +unit. However, you can then lose accuracy and precision. +``` + +So our `couch_srt_util:tnow/0` is implemented as the following, and we store +timestamps in `native` format as long as possible to avoid precision loss at +higher units of time, eg 300 microseconds is zero milliseconds. + +``` +-spec tnow() -> integer(). +tnow() -> + erlang:monotonic_time(). +``` + +We store timestamps in the node's local erlang representation of time, +specifically to be able to efficiently do time deltas, and then we track time +deltas from the local node's perspective to not send timestamps across the wire. +We then utilize `calendar:system_time_to_rfc3339` to convert the local node's +native time representation to its corresponding time format when we generate the +process life cycle reports or send an http response. + +NOTE: because we do an inline definition and assignment of the +`#rctx.started_at` and `#rctx.updated_at` fields to `couch_srt_util:tnow()`, we +_must_ declare `#rctx.updated_at` *after* `#rctx.started_at` to avoid +fundamental time incongruities. + +### #rctx.started_at = couch_srt_util:tnow() :: integer() | '_', + +A static value corresponding to the local node's Erlang monotonic_time at which +this context was created. + +### #rctx.updated_at = couch_srt_util:tnow() :: integer() | '_', + +A dynamic value corresponding to the local node's Erlang monotonic_time at which +this context was updated. Note: unlike `#rctx.started_at`, this value will +update over time, and in the process lifecycle reports the `#rctx.updated_at` +value corresponds to the point at which the context was destroyed, allowing for +calculation of the total duration of the request/context. + +### #rctx.pid_ref :: maybe_pid_ref() | {'_', '_'} | '_', + +The primary identifier used to track the resources consumed by a given `pid()` +for a specific context identified with a `make_ref()`, and combined together as +unit as a given `pid()`, eg the `chttpd` worker pool, can have many contexts +over time. + +### #rctx.nonce :: nonce() | undefined | '_', + +The `Nonce` value of the http request being serviced by the `coordinator_rctx()` +used as the primary grouping identifier of workers across the cluster, as the +`Nonce` is funneled through `rexi_server`. + +### #rctx.type :: rctx_type() | undefined | '_', + +A subtype classifier for the `#rctx{}` contexts, right now only supporting +`#rpc_worker{}` and `#coordinator{}`, but CSRT was designed to accommodate +additional context types like `#view_indexer{}`, `#search_indexer{}`, +`#replicator{}`, `#compactor{}`, `#etc{}`. + +### #rctx.dbname :: dbname() | undefined | '_', + +The database name, filled in at some point after the initial context creation by +way of `couch_srt:set_context_dbname/{1,2}`. + +### #rctx.username :: username() | undefined | '_', + +The requester's username, filled in at some point after the initial context +creation by way of `couch_srt:set_context_username/{1,2}`. + +## Stats Counters + +All of these stats counters are strictly `non_neg_integer()` counter values that +are monotonically increasing, as we only induce positive counter increment calls +in CSRT. Not all of these values will be nonzero, eg if the context doesn't +induce Javascript filtering of documents, it won't inc the `#rctx.js_filter` +field. The `"should_truncate_reports"` config value described in this document +will conditionally exclude the zero valued fields from being included in the +process life cycle report. + +### #rctx.db_open = 0 :: non_neg_integer() | '_', + +> Tracking `couch_stats:increment_counter([couchdb, couch_server, open])` + +The number of `couch_server:open/2` invocations induced by this context. + +### #rctx.docs_read = 0 :: non_neg_integer() | '_', + +> Tracking `couch_stats:increment_counter([couchdb, database_reads])` + +The number of `couch_db:open_doc/3` invocations induced by this context. + +### #rctx.docs_written = 0 :: non_neg_integer() | '_', + +A phony metric counting docs written by the context, induced by +`couch_srt:docs_written(length(Docs0)),` in `fabric_rpc:update_docs/3` as a way to +count the magnitude of docs written, as the actual document writes happen in the +`#db.main_pid` `couch_db_updater` pid and subprocess tracking is not yet +supported in CSRT. + +This can be replaced with direct counting once passthrough contexts work. + +### #rctx.rows_read = 0 :: non_neg_integer() | '_', + +> Tracking `couch_stats:increment_counter([fabric_rpc, changes, processed])` +> also Tracking `couch_stats:increment_counter([fabric_rpc, view, rows_read])` + +A value tracking multiple possible metrics corresponding to rows streamed in +aggregate operations. This is used for view_rows/changes_rows/all_docs/etc. + +### #rctx.changes_returned = 0 :: non_neg_integer() | '_', + +The number of `fabric_rpc:changes_row/2` invocations induced by this context, +specifically tracking the number of changes rows streamed back to the client +requeest, allowing for distinguishing between the number of changes processed to +fulfill a request versus the number actually returned in the http response. + +### #rctx.ioq_calls = 0 :: non_neg_integer() | '_', + +A phony metric counting invocations of `ioq:call/3` induced by this context. As +with `#rctx.docs_written`, we need a proxy metric to represent these calls +until CSRT context passing is supported so that the `ioq_server` pid and return +its own delta back to the worker pid. + +### #rctx.js_filter = 0 :: non_neg_integer() | '_', + +A phony metric counting the number of `couch_query_servers:filter_docs_int/5` +(eg ddoc_prompt) invocations induced by this context. This is called by way of +`couch_srt:js_filtered(length(JsonDocs))` which both increments `js_filter` by 1, and +`js_filtered_docs` by the length of the docs so we can track magnitude of docs +and doc revs being filtered. + +### #rctx.js_filtered_docs = 0 :: non_neg_integer() | '_', + +A phony metric counting the quantity of documents filtered by way of +`couch_query_servers:filter_docs_int/5` (eg ddoc_prompt) invocations induced by +this context. This is called by way of `couch_srt:js_filtered(length(JsonDocs))` +which both increments `#rctx.js_filter` by 1, and `#rctx.js_filtered_docs` by +the length of the docs so we can track magnitude of docs and doc revs being +filtered. + +### #rctx.get_kv_node = 0 :: non_neg_integer() | '_', + +This metric tracks the number of invocations to `couch_btree:get_node/2` in +which the `NodeType` returned by `couch_file:pread_term/2` is `kv_node`, instead +of `kp_node`. + +This provides a mechanism to quantify the impact of document count and document +size as those values become larger in the logarithmic complexity btree +algorithms. size on the logarithmic complexity btree algorithms as the database +btrees grow. + +### #rctx.get_kp_node = 0 :: non_neg_integer() | '_' + +This metric tracks the number of invocations to `couch_btree:get_node/2` in +which the `NodeType` returned by `couch_file:pread_term/2` is `kp_node`, instead +of `kv_node`. + +This provides a mechanism to quantify the impact of document count and document +size as those values become larger in the logarithmic complexity btree +algorithms. size on the logarithmic complexity btree algorithms as the database +btrees grow. + +# Extending CSRT + +There are documentation markers in the code highlighting where and how to extend +CSRT with additional stats to track. The currently selected stats are targeted +as a working demonstration of CSRT being able to highlight heavy usage changes +requests. CSRT has been designed to support extending out to all stats +collection and all resource usage inducing processes within CouchDB. + +Grep for `'Example to extend CSRT'` to find the code points, eg: + +> grep -ri 'Example to extend CSRT' src/ + +## Next Steps + +Next steps to continue CSRT improvements: + +* Create an expressive syntax that can map something like Mango queries, + expressible in ini files for persistent storage and easy configuration, and + turn those into `ets:match_spec()`. The current logic is getting progressively + closer to that, but we're not fully generating dynamic matchspecs. When we can, + the default matchers can be rewritten directly, and also create a `POST` + /_active_resources` API that will take a declarative query and translate it into + the matchspec and funnel it along efficiently to `ets:select/3` +* chain CSRT contexts through the various callers, for example, funneling the + context through IOQ and onto `couch_file` should allow for the induced + workload of the `couch_file` process to be tracked _specifically_ in the context + of the RPC worker who made the request. This would let us bubble up any stats + from any process in the chain, but will need some type of delta hook mechanism, + we utilize RPC workers sending of data through `rexi` as a trigger for sending + deltas, we'd need a similar hook for forwarding the context from the caller to + IOQ to couch_file and then back to IOQ and back to the caller. Similarly, funnel + doc and rev counts back from `couch_db_update`, could also potentially track + some stats around merging of doc updates. +* extend CSRT to the rest of core API operations, especially things like search + and replication +* Port Mango stats into CSRT stats and remove the old stats RPC mechanisms + - Magno tracked _extra_ stats, CSRT says if a stat is worth tracking, making + it a formal `couch_stats` metric and then CSRT can pick it up directly + - this removes the need for phony metrics. Similarly, this removes the need + for `js_filtered()` and `ioq_called()` + - for now these are simple ways to get some data into CSRT without needing to + chain the deltas from nested processes +* Chain RPC work induced from index processes back to caller +* Port `csrt:proc_window` to have an HTTP API, should be fairly simple +* add report snapshots, time regular summaries of node usage levels +* Add additional query functions, like `longest() -> topK_on_time_delta()` +* Add background job tracking for things like `#view_index{}`, + `#db_compactor{}`, etc. +* Handle double/multi-counting. Right now a coordinator includes the stats + induced by the RPC workers, so taking the full sum of a field across all +coordinators and RPC workers double counts that data. You can find out exactly +what the coordinator did if you generate RPC reports and subtract the values, +but as more processes chain results, more duplicate of resource counting will be +done, so query mechanisms around aggregating the total workloads on the cluster +need to take in the duplication hierarchies. A more expressive config syntax is +key to enable this, making it trivial to chain constraints. +* Introduce a fabric request context that manages the lifecycle of _all_ worker + responses, and doesn't just ignore any data from losing shards or orphaned +workers. This can reduce a lot of complexity by supplying proper lifecycles +around closing all the remote RPC workers successfully as well as tracking CSRT +data, and both can be done independently of the HTTP request. +* More direct Prometheus integration, there's likely things that Prometheus can + handle extended granularity on, like labels, that older tools like Graphite +are unable to handle. diff --git a/src/couch_srt/rebar.config.script b/src/couch_srt/rebar.config.script new file mode 100644 index 00000000000..76d9974ad9b --- /dev/null +++ b/src/couch_srt/rebar.config.script @@ -0,0 +1,19 @@ +ProperConfig = case code:lib_dir(proper) of + {error, bad_name} -> []; + _ -> [{d, 'WITH_PROPER'}] +end. + +AddConfig = [ + {erl_opts, [ + {i, "../"} + ] ++ ProperConfig} +]. + +lists:foldl(fun({K, V}, CfgAcc) -> + case lists:keyfind(K, 1, CfgAcc) of + {K, Existent} when is_list(Existent) andalso is_list(V) -> + lists:keystore(K, 1, CfgAcc, {K, Existent ++ V}); + false -> + lists:keystore(K, 1, CfgAcc, {K, V}) + end +end, CONFIG, AddConfig). diff --git a/src/couch_srt/src/couch_srt.app.src b/src/couch_srt/src/couch_srt.app.src new file mode 100644 index 00000000000..c4c8b90349f --- /dev/null +++ b/src/couch_srt/src/couch_srt.app.src @@ -0,0 +1,24 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_srt, [ + {description, + "Couch Stats Resource Tracker (CSRT) - realtime process local stats"}, + {vsn, git}, + {registered, [ + couch_srt_server, + couch_srt_logger + ]}, + {applications, [kernel, stdlib, couch_log, couch_stats]}, + {mod, {couch_srt_app, []}}, + {env, []} +]}. diff --git a/src/couch_srt/src/couch_srt.erl b/src/couch_srt/src/couch_srt.erl new file mode 100644 index 00000000000..2aae70ee789 --- /dev/null +++ b/src/couch_srt/src/couch_srt.erl @@ -0,0 +1,952 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_srt.hrl"). + +%% PidRef API +-export([ + destroy_pid_ref/0, + destroy_pid_ref/1, + create_pid_ref/0, + get_pid_ref/0, + get_pid_ref/1, + set_pid_ref/1 +]). + +%% Context Lifecycle API +-export([ + create_context/2, + create_coordinator_context/2, + create_worker_context/3, + destroy_context/0, + destroy_context/1, + get_resource/0, + get_resource/1, + set_context_dbname/1, + set_context_dbname/2, + set_context_handler_fun/1, + set_context_handler_fun/2, + set_context_username/1, + set_context_username/2 +]). + +%% Public API +-export([ + maybe_track_rexi_init_p/1, + maybe_track_local_counter/2, + clear_pdict_markers/0, + do_report/2, + is_enabled/0, + is_enabled_init_p/0, + is_enabled_reporting/0, + is_enabled_rpc_reporting/0, + maybe_report/2, + to_json/1 +]). + +%% Stats Collection API +-export([ + accumulate_delta/1, + add_delta/2, + docs_written/1, + extract_delta/1, + get_delta/0, + inc/1, + inc/2, + ioq_called/0, + js_filtered/1, + make_delta/0, + rctx_delta/2, + maybe_add_delta/1, + maybe_add_delta/2, + maybe_inc/2, + should_track_init_p/1 +]). + +%% RPC API +-export([ + rpc_run/1, + rpc_unsafe_run/1 +]). + +%% Aggregate Query API +-export([ + active/0, + active/1, + active_coordinators/0, + active_coordinators/1, + active_workers/0, + active_workers/1, + find_by_nonce/1, + find_by_pid/1, + find_by_pidref/1, + find_workers_by_pidref/1, + query_matcher/1, + query_matcher/2 +]). + +%% Recon API Ports of https://github.com/ferd/recon/releases/tag/2.5.6 +-export([ + pid_ref_attrs/1, + pid_ref_matchspec/1, + proc_window/3 +]). + +-export([ + query/1, + from/1, + group_by/1, + group_by/2, + sort_by/1, + sort_by/2, + count_by/1, + options/1, + unlimited/0, + with_limit/1, + + run/1, + unsafe_run/1 +]). + +-export_type([ + query/0, + query_expression/0, + query_option/0 +]). + +-opaque query() :: couch_srt_query:query(). +-opaque query_expression() :: couch_srt_query:query_expression(). +-opaque query_option() :: couch_srt_query:query_option(). + +%% +%% RPC Operations +%% + +-spec rpc_run(Query :: query()) -> + [ + #{ + node => node(), + result => [{aggregation_key(), pos_integer()}], + errors => [atom()] + } + ]. +rpc_run(Query) -> + Nodes = mem3:nodes(), + merge_results(Nodes, erpc:multicall(Nodes, ?MODULE, run, [Query])). + +-spec rpc_unsafe_run(Query :: query()) -> + [ + #{ + node => node(), + result => [{aggregation_key(), pos_integer()}], + errors => [atom()] + } + ]. +rpc_unsafe_run(Query) -> + Nodes = mem3:nodes(), + merge_results(Nodes, erpc:multicall(Nodes, ?MODULE, unsafe_run, [Query])). + +merge_results(Nodes, Resp) -> + %% The result of erpc:multicall is returned as a list where the result from each + %% node is placed at the same position as the node name is placed in Nodes. + %% That is why we can use `lists:zip/2` here. + lists:map(fun format_response/1, lists:zip(Nodes, Resp)). + +format_response({Node, {ok, {ok, Result}}}) -> + #{ + node => Node, + result => Result, + errors => [] + }; +format_response({Node, {ok, {error, Reason}}}) -> + #{ + node => Node, + result => none, + errors => [Reason] + }; +format_response({Node, {ok, Result}}) -> + #{ + node => Node, + result => Result, + errors => [] + }; +format_response({Node, {error, {erpc, Reason}}}) -> + #{ + node => Node, + result => none, + errors => [Reason] + }; +format_response({Node, {Tag, _}}) -> + #{ + node => Node, + result => none, + errors => [Tag] + }. + +%% +%% PidRef Operations +%% + +-spec get_pid_ref() -> maybe_pid_ref(). +get_pid_ref() -> + couch_srt_util:get_pid_ref(). + +-spec get_pid_ref(Rctx :: rctx()) -> pid_ref(). +get_pid_ref(Rctx) -> + couch_srt_util:get_pid_ref(Rctx). + +-spec set_pid_ref(PidRef :: pid_ref()) -> pid_ref(). +set_pid_ref(PidRef) -> + couch_srt_util:set_pid_ref(PidRef). + +-spec create_pid_ref() -> pid_ref(). +create_pid_ref() -> + couch_srt_server:create_pid_ref(). + +-spec destroy_pid_ref() -> maybe_pid_ref(). +destroy_pid_ref() -> + destroy_pid_ref(get_pid_ref()). + +%%destroy_pid_ref(undefined) -> +%% undefined; +-spec destroy_pid_ref(PidRef :: maybe_pid_ref()) -> maybe_pid_ref(). +destroy_pid_ref(_PidRef) -> + erase(?PID_REF). + +%% +%% Context Lifecycle API +%% + +-spec create_worker_context(From, MFA, Nonce) -> pid_ref() | false when + From :: pid_ref(), MFA :: mfa(), Nonce :: nonce(). +create_worker_context(From, {M, F, _A}, Nonce) -> + case is_enabled() of + true -> + Type = #rpc_worker{from = From, mod = M, func = F}, + create_context(Type, Nonce); + false -> + false + end. + +-spec create_coordinator_context(Httpd, Path) -> pid_ref() | false when + Httpd :: #httpd{}, Path :: list(). +create_coordinator_context(#httpd{method = Verb, nonce = Nonce}, Path0) -> + case is_enabled() of + true -> + Path = list_to_binary([$/ | Path0]), + Type = #coordinator{method = Verb, path = Path}, + create_context(Type, Nonce); + false -> + false + end. + +-spec create_context(Type :: rctx_type(), Nonce :: term()) -> pid_ref() | false. +create_context(Type, Nonce) -> + Rctx = couch_srt_server:new_context(Type, Nonce), + PidRef = get_pid_ref(Rctx), + set_pid_ref(PidRef), + try + couch_srt_util:put_delta_a(Rctx), + couch_srt_util:put_updated_at(Rctx), + couch_srt_server:create_resource(Rctx), + couch_srt_logger:track(Rctx), + PidRef + catch + _:_ -> + couch_srt_server:destroy_resource(PidRef), + %% calling destroy_context(PidRef) clears the tracker too + destroy_context(PidRef), + false + end. + +-spec set_context_dbname(DbName :: binary()) -> boolean(). +set_context_dbname(DbName) -> + set_context_dbname(DbName, get_pid_ref()). + +-spec set_context_dbname(DbName, PidRef) -> boolean() when + DbName :: binary(), PidRef :: maybe_pid_ref(). +set_context_dbname(_, undefined) -> + false; +set_context_dbname(DbName, PidRef) -> + is_enabled() andalso couch_srt_server:set_context_dbname(DbName, PidRef). + +-spec set_context_handler_fun(Handler) -> boolean() when + Handler :: function() | {atom(), atom()}. +set_context_handler_fun(Handler) -> + set_context_handler_fun(Handler, get_pid_ref()). + +-spec set_context_handler_fun(Handler, PidRef) -> boolean() when + Handler :: function() | {atom(), atom()}, PidRef :: maybe_pid_ref(). +set_context_handler_fun(_, undefined) -> + false; +set_context_handler_fun(Fun, PidRef) when is_function(Fun) -> + case is_enabled() of + false -> + false; + true -> + FProps = erlang:fun_info(Fun), + Mod = proplists:get_value(module, FProps), + Func = proplists:get_value(name, FProps), + set_context_handler_fun({Mod, Func}, PidRef) + end; +set_context_handler_fun({Mod, Func}, PidRef) -> + case is_enabled() of + false -> + false; + true -> + couch_srt_server:set_context_handler_fun({Mod, Func}, PidRef) + end. + +%% @equiv set_context_username(User, get_pid_ref()) +set_context_username(User) -> + set_context_username(User, get_pid_ref()). + +-spec set_context_username(User, PidRef) -> boolean() when + User :: null | undefined | #httpd{} | #user_ctx{} | binary(), + PidRef :: maybe_pid_ref(). +set_context_username(null, _) -> + false; +set_context_username(_, undefined) -> + false; +set_context_username(#httpd{user_ctx = Ctx}, PidRef) -> + set_context_username(Ctx, PidRef); +set_context_username(#user_ctx{name = Name}, PidRef) -> + set_context_username(Name, PidRef); +set_context_username(UserName, PidRef) -> + is_enabled() andalso couch_srt_server:set_context_username(UserName, PidRef). + +-spec destroy_context() -> ok. +destroy_context() -> + destroy_context(get_pid_ref()). + +-spec destroy_context(PidRef :: maybe_pid_ref()) -> ok. +destroy_context(undefined) -> + ok; +destroy_context(PidRef) -> + %% Stopping the tracker clears the ets entry for PidRef on its way out + couch_srt_logger:stop_tracker(), + destroy_pid_ref(PidRef), + clear_pdict_markers(), + ok. + +-spec clear_pdict_markers() -> ok. +clear_pdict_markers() -> + ok = lists:foreach( + fun + ({{csrt, _} = K, _V}) -> + erlang:erase(K); + (_) -> + ok + end, + erlang:get() + ). + +%% +%% Public API +%% + +-spec maybe_track_rexi_init_p({M, F, A}) -> couch_stats:response() when + M :: atom(), F :: atom(), A :: non_neg_integer(). +maybe_track_rexi_init_p({M, F, _A}) -> + Metric = [M, F, spawned], + case couch_srt:should_track_init_p(Metric) of + true -> couch_stats:increment_counter(Metric); + false -> ok + end. + +%% Only potentially track positive increments to counters +-spec maybe_track_local_counter(any(), any()) -> ok. +maybe_track_local_counter(Name, Val) when is_integer(Val) andalso Val > 0 -> + couch_srt:maybe_inc(Name, Val), + ok; +maybe_track_local_counter(_, _) -> + ok. + +%% @equiv couch_srt_util:is_enabled(). +-spec is_enabled() -> boolean(). +is_enabled() -> + couch_srt_util:is_enabled(). + +%% @equiv couch_srt_util:is_enabled_reporting(). +-spec is_enabled_reporting() -> boolean(). +is_enabled_reporting() -> + couch_srt_util:is_enabled_reporting(). + +%% @equiv couch_srt_util:is_enabled_rpc_reporting(). +-spec is_enabled_rpc_reporting() -> boolean(). +is_enabled_rpc_reporting() -> + couch_srt_util:is_enabled_rpc_reporting(). + +%% @equiv couch_srt_util:is_enabled_init_p(). +-spec is_enabled_init_p() -> boolean(). +is_enabled_init_p() -> + couch_srt_util:is_enabled_init_p(). + +-spec get_resource() -> maybe_rctx(). +get_resource() -> + get_resource(get_pid_ref()). + +-spec get_resource(PidRef :: maybe_pid_ref()) -> maybe_rctx(). +get_resource(PidRef) -> + couch_srt_server:get_resource(PidRef). + +%% Log a CSRT report if any filters match +-spec maybe_report(ReportName :: string(), PidRef :: pid_ref()) -> ok. +maybe_report(ReportName, PidRef) -> + couch_srt_logger:maybe_report(ReportName, PidRef). + +%% Direct report logic skipping should log filters +-spec do_report(ReportName :: string(), PidRef :: pid_ref()) -> boolean(). +do_report(ReportName, PidRef) -> + couch_srt_logger:do_report(ReportName, get_resource(PidRef)). + +-spec to_json(Rctx :: maybe_rctx()) -> map() | null. +to_json(undefined) -> + null; +to_json(Rctx) -> + couch_srt_entry:to_json(Rctx). + +%% +%% Stat Collection API +%% + +-spec inc(Key :: rctx_field()) -> non_neg_integer(). +inc(Key) -> + case is_enabled() of + true -> + couch_srt_server:inc(get_pid_ref(), Key); + false -> + 0 + end. + +-spec inc(Key :: rctx_field(), N :: non_neg_integer()) -> non_neg_integer(). +inc(Key, N) when is_integer(N) andalso N >= 0 -> + case is_enabled() of + true -> + couch_srt_server:inc(get_pid_ref(), Key, N); + false -> + 0 + end. + +-spec maybe_inc(Stat :: atom(), Val :: non_neg_integer()) -> non_neg_integer(). +maybe_inc(Stat, Val) -> + case maps:is_key(Stat, ?STATS_TO_KEYS) of + true -> + inc(maps:get(Stat, ?STATS_TO_KEYS), Val); + false -> + 0 + end. + +-spec should_track_init_p(Stat :: [atom()]) -> boolean(). +%% "Example to extend CSRT" +%% should_track_init_p([fabric_rpc, foo, spawned]) -> +%% is_enabled_init_p(); +should_track_init_p([fabric_rpc, all_docs, spawned]) -> + is_enabled_init_p(); +should_track_init_p([fabric_rpc, changes, spawned]) -> + is_enabled_init_p(); +should_track_init_p([fabric_rpc, get_all_security, spawned]) -> + is_enabled_init_p(); +should_track_init_p([fabric_rpc, map_view, spawned]) -> + is_enabled_init_p(); +should_track_init_p([fabric_rpc, open_doc, spawned]) -> + is_enabled_init_p(); +should_track_init_p([fabric_rpc, open_shard, spawned]) -> + is_enabled_init_p(); +should_track_init_p([fabric_rpc, reduce_view, spawned]) -> + is_enabled_init_p(); +should_track_init_p([fabric_rpc, update_docs, spawned]) -> + is_enabled_init_p(); +should_track_init_p(_Metric) -> + false. + +-spec ioq_called() -> non_neg_integer(). +ioq_called() -> + inc(ioq_calls). + +%% we cannot yet use stats couchdb.query_server.*.ddoc_filter because those +%% are collected in a dedicated process. +%% TODO: funnel back stats from background worker processes to the RPC worker +js_filtered(N) -> + inc(js_filter), + inc(js_filtered_docs, N). + +docs_written(N) -> + inc(docs_written, N). + +-spec accumulate_delta(Delta :: map() | undefined) -> ok. +accumulate_delta(Delta) when is_map(Delta) -> + is_enabled() andalso couch_srt_server:update_counters(get_pid_ref(), Delta), + ok; +accumulate_delta(undefined) -> + ok. + +-spec make_delta() -> maybe_delta(). +make_delta() -> + case is_enabled() of + false -> + undefined; + true -> + couch_srt_util:make_delta(get_pid_ref()) + end. + +-spec rctx_delta(TA :: maybe_rctx(), TB :: maybe_rctx()) -> maybe_delta(). +rctx_delta(TA, TB) -> + couch_srt_util:rctx_delta(TA, TB). + +%% +%% Aggregate Query API +%% + +-spec active() -> [rctx()]. +active() -> + couch_srt_query:active(). + +-spec active(Type :: json) -> [rctx()]. +active(Type) -> + couch_srt_query:active(Type). + +-spec active_coordinators() -> [coordinator_rctx()]. +active_coordinators() -> + couch_srt_query:active_coordinators(). + +%% TODO: cleanup json logic here +-spec active_coordinators(Type :: json) -> [coordinator_rctx()]. +active_coordinators(Type) -> + couch_srt_query:active_coordinators(Type). + +-spec active_workers() -> [rpc_worker_rctx()]. +active_workers() -> + couch_srt_query:active_workers(). + +-spec active_workers(Type :: json) -> [rpc_worker_rctx()]. +active_workers(Type) -> + couch_srt_query:active_workers(Type). + +find_by_nonce(Nonce) -> + couch_srt_query:find_by_nonce(Nonce). + +find_by_pid(Pid) -> + couch_srt_query:find_by_pid(Pid). + +find_by_pidref(PidRef) -> + couch_srt_query:find_by_pidref(PidRef). + +find_workers_by_pidref(PidRef) -> + couch_srt_query:find_workers_by_pidref(PidRef). + +-spec pid_ref_matchspec(AttrName :: rctx_field()) -> term() | throw(any()). +pid_ref_matchspec(AttrName) -> + couch_srt_logger:pid_ref_matchspec(AttrName). + +-spec pid_ref_attrs(AttrName :: rctx_field()) -> term() | throw(any()). +pid_ref_attrs(AttrName) -> + couch_srt_logger:pid_ref_attrs(AttrName). + +%% This is a recon:proc_window/3 [1] port with the same core logic but +%% recon_lib:proc_attrs/1 replaced with couch_srt_logger:pid_ref_attrs/1, and +%% returning on pid_ref() rather than pid(). +%% [1] https://github.com/ferd/recon/blob/c2a76855be3a226a3148c0dfc21ce000b6186ef8/src/recon.erl#L268-L300 +-spec proc_window(AttrName, Num, Time) -> term() | throw(any()) when + AttrName :: rctx_field(), Num :: non_neg_integer(), Time :: pos_integer(). +proc_window(AttrName, Num, Time) -> + couch_srt_logger:proc_window(AttrName, Num, Time). + +-spec query_matcher(MatcherName :: matcher_name()) -> + {ok, query_result()} + | {error, any()}. +query_matcher(MatcherName) -> + couch_srt_query:query_matcher(MatcherName). + +-spec query_matcher(MatcherName :: matcher_name(), Limit :: pos_integer()) -> + {ok, query_result()} + | {error, any()}. +query_matcher(MatcherName, Limit) -> + couch_srt_query:query_matcher(MatcherName, Limit). + +%% +%% Delta API +%% + +-spec add_delta(T :: term(), Delta :: maybe_delta()) -> term_delta(). +add_delta(T, Delta) -> + couch_srt_util:add_delta(T, Delta). + +-spec extract_delta(T :: term_delta()) -> {term(), maybe_delta()}. +extract_delta(T) -> + couch_srt_util:extract_delta(T). + +-spec get_delta() -> tagged_delta(). +get_delta() -> + couch_srt_util:get_delta(get_pid_ref()). + +-spec maybe_add_delta(T :: term()) -> term_delta(). +maybe_add_delta(T) -> + couch_srt_util:maybe_add_delta(T). + +-spec maybe_add_delta(T :: term(), Delta :: maybe_delta()) -> term_delta(). +maybe_add_delta(T, Delta) -> + couch_srt_util:maybe_add_delta(T, Delta). + +%% +%% Query API functions +%% +%% + +%% @doc Construct query from the expressions. +%% There are following types of expressions allowed in the query. +%%
  • group_by/1 @see group_by/1
  • +%%
  • group_by/2 @see group_by/1
  • +%%
  • sort_by/1 @see sort_by/1
  • +%%
  • count_by/1 @see count_by/1
  • +%%
  • options/1 @see options/1
  • +%%
  • from/1 @see from/1
  • +%% The order of expressions doesn't matter. +%% +%% Q = query([ +%% from("docs_read"), +%% group_by(username, dbname, ioq_calls), +%% options([ +%% with_limit(10) +%% ]) +%% ]), +%% +%% @end +-spec query(QueryExpression :: [query_expression()]) -> + query() | {error, any()}. +query(QueryExpression) -> + couch_srt_query:query(QueryExpression). + +%% @doc Specify the matcher to use for the query. +%% If atom 'all' is used then all entries would be in the scope of the query. +%% Also the use of 'all' makes the query 'unsafe'. Because it scans through all entries +%% and can return many matching rows. +%% Unsafe queries can only be run using 'unsafe_run/1'. +%% +%% Q = query([ +%% ... +%% from("docs_read") +%% ]), +%% +%% @end +-spec from(MatcherNameOrAll :: string() | all) -> + query_expression() | {error, any()}. +from(MatcherNameOrAll) -> + couch_srt_query:from(MatcherNameOrAll). + +%% @doc Request 'group_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% group_by([username, dbname]) +%% ]), +%% +%% @end +-spec group_by(AggregationKeys) -> + query_expression() | {error, any()} +when + AggregationKeys :: + binary() + | rctx_field() + | [binary()] + | [rctx_field()]. +group_by(AggregationKeys) -> + couch_srt_query:group_by(AggregationKeys). + +%% @doc Request 'group_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% group_by([username, dbname], ioq_calls) +%% ]), +%% +%% @end +-spec group_by(AggregationKeys, ValueKey) -> + query_expression() | {error, any()} +when + AggregationKeys :: + binary() + | rctx_field() + | [binary()] + | [rctx_field()], + ValueKey :: + binary() + | rctx_field(). +group_by(AggregationKeys, ValueKey) -> + couch_srt_query:group_by(AggregationKeys, ValueKey). + +%% @doc Request 'sort_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% sort_by([username, dbname]) +%% ]), +%% +%% @end +-spec sort_by(AggregationKeys) -> + query_expression() | {error, any()} +when + AggregationKeys :: + binary() + | rctx_field() + | [binary()] + | [rctx_field()]. +sort_by(AggregationKeys) -> + couch_srt_query:sort_by(AggregationKeys). + +%% @doc Request 'sort_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% sort_by([username, dbname], ioq_calls) +%% ]), +%% +%% @end +-spec sort_by(AggregationKeys, ValueKey) -> + query_expression() | {error, any()} +when + AggregationKeys :: + binary() + | rctx_field() + | [binary()] + | [rctx_field()], + ValueKey :: + binary() + | rctx_field(). +sort_by(AggregationKeys, ValueKey) -> + couch_srt_query:sort_by(AggregationKeys, ValueKey). + +%% @doc Request 'count_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% count_by(username) +%% ]), +%% +%% @end +-spec count_by(AggregationKeys) -> + query_expression() | {error, any()} +when + AggregationKeys :: + binary() + | rctx_field() + | [binary()] + | [rctx_field()]. +count_by(AggregationKeys) -> + couch_srt_query:count_by(AggregationKeys). + +%% @doc Construct 'options' query expression. +%% There are following types of expressions allowed in the query. +%%
  • unlimited/0 @see unlimited/0 (cannot be used with 'with_limit/1')
  • +%%
  • with_limit/1 @see with_limit/1 (cannot be used with 'unlimited/0')
  • +%% The order of expressions doesn't matter. +%% +%% Q = query([ +%% ... +%% options([ +%% ... +%% ]) +%% ]), +%% +%% @end +-spec options([query_option()]) -> + query_expression() | {error, any()}. +options(OptionsExpression) -> + couch_srt_query:options(OptionsExpression). + +%% @doc Enable unlimited number of results from the query. +%% The use of 'unlimited' makes the query 'unsafe'. Because it can return many matching rows. +%% Unsafe queries can only be run using 'unsafe_run/1'. +%% +%% Q = query([ +%% ... +%% options([ +%% unlimited() +%% ]) +%% ]), +%% +%% @end +-spec unlimited() -> + query_expression(). +unlimited() -> + couch_srt_query:unlimited(). + +%% @doc Set limit on number of results returned from the query. +%% The construction of the query fail if the 'limit' is greater than +%% allowed for this cluster. +%% +%% Q = query([ +%% ... +%% options([ +%% with_limit(100) +%% ]) +%% ]), +%% +%% @end +-spec with_limit(Limit :: pos_integer()) -> + query_expression() | {error, any()}. +with_limit(Limit) -> + couch_srt_query:with_limit(Limit). + +%% @doc Executes provided query. Only 'safe' queries can be executed using 'run'. +%% The query considered 'unsafe' if any of the conditions bellow are met: +%%
  • Query uses 'unlimited/0'
  • +%%
  • Query uses 'from(all)'
  • +%% +%% Q = query([ +%% from("docs_read"), +%% group_by(username, dbname, ioq_calls), +%% options([ +%% with_limit(10) +%% ]) +%% ]), +%% run(Q) +%% +%% @end +-spec run(query()) -> + {ok, [{aggregation_key(), pos_integer()}]} + | {limit, [{aggregation_key(), pos_integer()}]}. +run(Query) -> + couch_srt_query:run(Query). + +%% @doc Executes provided query. This function is similar to 'run/1', +%% however it supports 'unsafe' queries. Be very careful using it. +%% Pay attention to cardinality of the result. +%% The query considered 'unsafe' if any of the conditions bellow are met: +%%
  • Query uses 'unlimited/0'
  • +%%
  • Query uses 'from(all)'
  • +%% +%% Q = query([ +%% from("docs_read"), +%% group_by(username, dbname, ioq_calls), +%% options([ +%% with_limit(10) +%% ]) +%% ]), +%% unsafe_run(Q) +%% +%% @end +-spec unsafe_run(query()) -> + {ok, [{aggregation_key(), pos_integer()}]} + | {limit, [{aggregation_key(), pos_integer()}]}. +unsafe_run(Query) -> + couch_srt_query:unsafe_run(Query). + +%% +%% Tests +%% + +-ifdef(TEST). + +-include_lib("couch/include/couch_eunit.hrl"). + +couch_stats_resource_tracker_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_should_track_init_p_enabled), + ?TDEF_FE(t_should_not_track_init_p_enabled), + ?TDEF_FE(t_should_not_track_init_p_disabled), + ?TDEF_FE(t_static_map_translations), + ?TDEF_FE(t_should_extract_fields_properly) + ] + }. + +setup() -> + Ctx = test_util:start_couch(), + config:set_boolean(?CSRT, "randomize_testing", false, false), + Ctx. + +teardown(Ctx) -> + test_util:stop_couch(Ctx). + +t_static_map_translations(_) -> + %% Bit of a hack to delete duplicated rows_read between views and changes + SingularStats = lists:delete(rows_read, maps:values(?STATS_TO_KEYS)), + ?assert(lists:all(fun(E) -> maps:is_key(E, ?STAT_KEYS_TO_FIELDS) end, SingularStats)), + %% TODO: properly handle ioq_calls field + ?assertEqual( + lists:sort(SingularStats), + lists:sort( + lists:foldl( + fun(E, A) -> + %% Ignore fields regarding external processes + Deletions = [docs_written, ioq_calls, js_filter, js_filtered_docs], + case lists:member(E, Deletions) of + true -> + A; + false -> + [E | A] + end + end, + [], + maps:keys(?STAT_KEYS_TO_FIELDS) + ) + ) + ). + +t_should_not_track_init_p_enabled(_) -> + enable_init_p(), + Metrics = [ + [couch_db, name, spawned], + [couch_db, get_db_info, spawned], + [couch_db, open, spawned], + [fabric_rpc, get_purge_seq, spawned] + ], + [?assert(should_track_init_p(M) =:= false, M) || M <- Metrics]. + +t_should_track_init_p_enabled(_) -> + enable_init_p(), + [?assert(should_track_init_p(M), M) || M <- base_metrics()]. + +t_should_not_track_init_p_disabled(_) -> + disable_init_p(), + [?assert(should_track_init_p(M) =:= false, M) || M <- base_metrics()]. + +t_should_extract_fields_properly(_) -> + Rctx = #rctx{}, + #{fields := Fields} = couch_srt_entry:record_info(), + %% couch_srt_entry:value/2 throws on invalid fields, assert that the function succeeded + TestField = fun(Field) -> + try + couch_srt_entry:value(Field, Rctx), + true + catch + _:_ -> false + end + end, + [?assert(TestField(Field)) || Field <- Fields]. + +enable_init_p() -> + config:set(?CSRT, "enable_init_p", "true", false). + +disable_init_p() -> + config:set(?CSRT, "enable_init_p", "false", false). + +base_metrics() -> + [ + [fabric_rpc, all_docs, spawned], + [fabric_rpc, changes, spawned], + [fabric_rpc, map_view, spawned], + [fabric_rpc, reduce_view, spawned], + [fabric_rpc, get_all_security, spawned], + [fabric_rpc, open_doc, spawned], + [fabric_rpc, update_docs, spawned], + [fabric_rpc, open_shard, spawned] + ]. + +-endif. diff --git a/src/couch_srt/src/couch_srt.hrl b/src/couch_srt/src/couch_srt.hrl new file mode 100644 index 00000000000..8a338dd4b69 --- /dev/null +++ b/src/couch_srt/src/couch_srt.hrl @@ -0,0 +1,207 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(CSRT, "csrt"). +-define(CSRT_INIT_P, "csrt.init_p"). +-define(CSRT_ETS, csrt_ets). + +%% CSRT pdict markers +-define(DELTA_TA, {csrt, delta_ta}). +-define(LAST_UPDATED, {csrt, last_updated}). +-define(PID_REF, {csrt, pid_ref}). %% track local ID +-define(TRACKER_PID, {csrt, tracker}). %% tracker pid + +%% Stats fields +-define(DB_OPEN_DOC, docs_read). +-define(DB_OPEN, db_open). +-define(COUCH_SERVER_OPEN, db_open). +-define(COUCH_BT_GET_KP_NODE, get_kp_node). +-define(COUCH_BT_GET_KV_NODE, get_kv_node). +%% "Example to extend CSRT" +%%-define(COUCH_BT_WRITE_KP_NODE, write_kp_node). +%%-define(COUCH_BT_WRITE_KV_NODE, write_kv_node). +-define(COUCH_JS_FILTER, js_filter). +-define(COUCH_JS_FILTERED_DOCS, js_filtered_docs). +-define(IOQ_CALLS, ioq_calls). +-define(DOCS_WRITTEN, docs_written). +-define(ROWS_READ, rows_read). +-define(FRPC_CHANGES_RETURNED, changes_returned). + +%% couch_srt_logger matcher keys +-define(MATCHERS_KEY, {csrt_logger, all_csrt_matchers}). +-define(CSRT_MATCHERS_ENABLED, "csrt_logger.matchers_enabled"). +-define(CSRT_MATCHERS_THRESHOLD, "csrt_logger.matchers_threshold"). +-define(CSRT_MATCHERS_DBNAMES, "csrt_logger.dbnames_io"). + +%% matcher query magnitude default limitations +-define(QUERY_CARDINALITY_LIMIT, 10000). +-define(QUERY_LIMIT, 100). + +%% Mapping of couch_stat metric names to #rctx{} field names. +%% These are used for fields that we inc a counter on. +-define(STATS_TO_KEYS, #{ + [couchdb, database_reads] => ?DB_OPEN_DOC, + %% Double on ?ROWS_READ for changes_processed as we only need the one + %% field, as opposed to needing both metrics to distinguish changes + %% workloads and view/_all_docs. + [fabric_rpc, changes, processed] => ?ROWS_READ, + [fabric_rpc, changes, returned] => ?FRPC_CHANGES_RETURNED, + [fabric_rpc, view, rows_read] => ?ROWS_READ, + [couchdb, couch_server, open] => ?DB_OPEN, + [couchdb, btree, get_node, kp_node] => ?COUCH_BT_GET_KP_NODE, + [couchdb, btree, get_node, kv_node] => ?COUCH_BT_GET_KV_NODE + + %% NOTE: these stats are not local to the RPC worker, need forwarding + %% "Example to extend CSRT" + %% [couchdb, btree, write_node, kp_node] => ?COUCH_BT_WRITE_KP_NODE, + %% [couchdb, btree, write_node, kv_node] => ?COUCH_BT_WRITE_KV_NODE, + %% [couchdb, query_server, calls, ddoc_filter] => ?COUCH_JS_FILTER +}). + +%% Mapping of stat field names to their corresponding record entries. +%% This only includes integer fields valid for ets:update_counter +-define(STAT_KEYS_TO_FIELDS, #{ + ?DB_OPEN => #rctx.?DB_OPEN, + ?ROWS_READ => #rctx.?ROWS_READ, + ?FRPC_CHANGES_RETURNED => #rctx.?FRPC_CHANGES_RETURNED, + ?DOCS_WRITTEN => #rctx.?DOCS_WRITTEN, + ?IOQ_CALLS => #rctx.?IOQ_CALLS, + ?COUCH_JS_FILTER => #rctx.?COUCH_JS_FILTER, + ?COUCH_JS_FILTERED_DOCS => #rctx.?COUCH_JS_FILTERED_DOCS, + ?DB_OPEN_DOC => #rctx.?DB_OPEN_DOC, + ?COUCH_BT_GET_KP_NODE => #rctx.?COUCH_BT_GET_KP_NODE, + ?COUCH_BT_GET_KV_NODE => #rctx.?COUCH_BT_GET_KV_NODE + %% "Example to extend CSRT" + %% ?COUCH_BT_WRITE_KP_NODE => #rctx.?COUCH_BT_WRITE_KP_NODE, + %% ?COUCH_BT_WRITE_KV_NODE => #rctx.?COUCH_BT_WRITE_KV_NODE +}). + +-type throw(_Reason) :: no_return(). + +-type pid_ref() :: {pid(), reference()}. +-type maybe_pid_ref() :: pid_ref() | undefined. +-type maybe_pid() :: pid() | undefined. + +-record(rpc_worker, { + mod :: atom() | '_', + func :: atom() | '_', + from :: pid_ref() | '_' +}). + +-record(coordinator, { + mod :: atom() | '_', + func :: atom() | '_', + method :: atom() | '_', + path :: binary() | '_' +}). + +-type coordinator() :: #coordinator{}. +-type rpc_worker() :: #rpc_worker{}. +-type rctx_type() :: coordinator() | rpc_worker(). + +-record(rctx, { + %% Metadata + started_at = couch_srt_util:tnow() :: integer() | '_', + %% NOTE: updated_at must be after started_at to preserve time congruity + updated_at = couch_srt_util:tnow() :: integer() | '_', + pid_ref :: maybe_pid_ref() | {'_', '_'} | '_', + nonce :: nonce() | undefined | '_', + type :: rctx_type() | undefined | '_', + dbname :: dbname() | undefined | '_', + username :: username() | undefined | '_', + + %% Stats Counters + db_open = 0 :: non_neg_integer() | '_', + docs_read = 0 :: non_neg_integer() | '_', + docs_written = 0 :: non_neg_integer() | '_', + rows_read = 0 :: non_neg_integer() | '_', + changes_returned = 0 :: non_neg_integer() | '_', + ioq_calls = 0 :: non_neg_integer() | '_', + js_filter = 0 :: non_neg_integer() | '_', + js_filtered_docs = 0 :: non_neg_integer() | '_', + get_kv_node = 0 :: non_neg_integer() | '_', + get_kp_node = 0 :: non_neg_integer() | '_' + %% "Example to extend CSRT" + %%write_kv_node = 0 :: non_neg_integer() | '_', + %%write_kp_node = 0 :: non_neg_integer() | '_' +}). + +-type rctx_field() :: + started_at + | updated_at + | pid_ref + | nonce + | type + | dbname + | username + | db_open + | docs_read + | docs_written + | rows_read + | changes_returned + | ioq_calls + | js_filter + | js_filtered_docs + | get_kv_node + | get_kp_node. + %% "Example to extend CSRT" + %%| write_kv_node + %%| write_kp_node. + + +-type coordinator_rctx() :: #rctx{type :: coordinator()}. +-type rpc_worker_rctx() :: #rctx{type :: rpc_worker()}. +-type rctx() :: #rctx{} | coordinator_rctx() | rpc_worker_rctx(). +-type rctxs() :: [#rctx{}] | []. +-type maybe_rctx() :: rctx() | undefined. + +%% TODO: solidify nonce type and ideally move to couch_db.hrl +-type nonce() :: any(). +-type dbname() :: iodata(). +-type username() :: iodata(). + +-type delta() :: map(). +-type maybe_delta() :: delta() | undefined. +-type tagged_delta() :: {delta, maybe_delta()}. +-type term_delta() :: term() | {term(), tagged_delta()}. + +-type matcher_name() :: string(). +-type matcher() :: {ets:match_spec(), ets:comp_match_spec()}. +-type matchers() :: #{matcher_name() => matcher()} | #{}. +-type matcher_matches() :: #{matcher_name() => rctxs()} | #{}. +-type maybe_matcher() :: matcher() | undefined. +-type maybe_matchers() :: matchers() | undefined. + +-type maybe_integer() :: integer() | undefined. +%% This is a little awkward to type, it's a list of ets:update_counter UpdateOp's +%% where ets types the updates as `UpdateOp = {Pos, Incr}`. We can do better than +%% that because we know `Pos` is the #rctx record field index, a non_neg_integer(), +%% and similarly, we know Incr is from `couch_srt_util:make_dt`, which is returns at +%% least one. Ideally, we'd specify the `Pos` type sufficiently to be one of the +%% valid #rctx record field names, however, a clean solution is not obvious. +-type counter_updates_list() :: [{non_neg_integer(), pos_integer()}] | []. + +-type tuple_of_field_values() :: tuple(). +-type tuple_of_field_names() :: tuple(). + +-type query_options() :: #{aggregation => group_by | sort_by | count_by, limit => pos_integer()}. +-type aggregation_key() :: tuple_of_field_names(). +-type aggregation_values() :: tuple_of_field_values(). + +-type field_value() :: any(). +-type aggregation_value() :: field_value(). +-type aggregation_result() :: #{aggregation_key() => non_neg_integer()}. +-type ordered_result() :: [{aggregation_key(), non_neg_integer()}]. +-type query_result() :: aggregation_result() | ordered_result(). + +-type json_spec(_Spec) :: term(). +-type json_string() :: binary(). \ No newline at end of file diff --git a/src/couch_srt/src/couch_srt_app.erl b/src/couch_srt/src/couch_srt_app.erl new file mode 100644 index 00000000000..c9397414b27 --- /dev/null +++ b/src/couch_srt/src/couch_srt_app.erl @@ -0,0 +1,23 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_app). + +-behaviour(application). + +-export([start/2, stop/1]). + +start(_StartType, _StartArgs) -> + couch_srt_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/couch_srt/src/couch_srt_entry.erl b/src/couch_srt/src/couch_srt_entry.erl new file mode 100644 index 00000000000..8626fce7dae --- /dev/null +++ b/src/couch_srt/src/couch_srt_entry.erl @@ -0,0 +1,244 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_entry). + +-include_lib("stdlib/include/ms_transform.hrl"). +-include_lib("couch_srt.hrl"). + +-export([ + value/2, + key/1, + from_map/1, + record_info/0 +]). + +%% JSON Conversion API +-export([ + convert_type/1, + convert_pidref/1, + convert_pid/1, + convert_ref/1, + convert_string/1, + to_json/1 +]). + +-spec value(rctx_field(), #rctx{}) -> any(). + +value(pid_ref, #rctx{pid_ref = Val}) -> Val; +value(nonce, #rctx{nonce = Val}) -> Val; +value(type, #rctx{type = Val}) -> convert_type(Val); +value(dbname, #rctx{dbname = Val}) -> Val; +value(username, #rctx{username = Val}) -> Val; +value(db_open, #rctx{db_open = Val}) -> Val; +value(docs_read, #rctx{docs_read = Val}) -> Val; +value(docs_written, #rctx{docs_written = Val}) -> Val; +value(rows_read, #rctx{rows_read = Val}) -> Val; +value(changes_returned, #rctx{changes_returned = Val}) -> Val; +value(ioq_calls, #rctx{ioq_calls = Val}) -> Val; +value(js_filter, #rctx{js_filter = Val}) -> Val; +value(js_filtered_docs, #rctx{js_filtered_docs = Val}) -> Val; +value(get_kv_node, #rctx{get_kv_node = Val}) -> Val; +value(get_kp_node, #rctx{get_kp_node = Val}) -> Val; +value(started_at, #rctx{started_at = Val}) -> Val; +value(updated_at, #rctx{updated_at = Val}) -> Val. + +-spec key(BinKey :: binary() | string() | atom()) -> + Key :: + rctx_field() + | {error, Reason :: any()}. + +key(Key) when is_atom(Key) -> + key_from_atom(Key); +key(Key) when is_binary(Key) -> + key_from_binary(Key); +key(Key) when is_list(Key) -> + case key_from_binary(list_to_binary(Key)) of + {error, {invalid_key, _Key}} -> + {error, {invalid_key, Key}}; + Res -> + Res + end; +key(Other) -> + key_error(Other). + +key_from_atom(pid_ref) -> pid_ref; +key_from_atom(nonce) -> nonce; +key_from_atom(type) -> type; +key_from_atom(dbname) -> dbname; +key_from_atom(username) -> username; +key_from_atom(db_open) -> db_open; +key_from_atom(docs_read) -> docs_read; +key_from_atom(rows_read) -> rows_read; +key_from_atom(changes_returned) -> changes_returned; +key_from_atom(ioq_calls) -> ioq_calls; +key_from_atom(js_filter) -> js_filter; +key_from_atom(js_filtered_docs) -> js_filtered_docs; +key_from_atom(get_kv_node) -> get_kv_node; +key_from_atom(get_kp_node) -> get_kp_node; +key_from_atom(Other) -> key_error(Other). + +key_from_binary(<<"pid_ref">>) -> pid_ref; +key_from_binary(<<"nonce">>) -> nonce; +key_from_binary(<<"type">>) -> type; +key_from_binary(<<"dbname">>) -> dbname; +key_from_binary(<<"username">>) -> username; +key_from_binary(<<"db_open">>) -> db_open; +key_from_binary(<<"docs_read">>) -> docs_read; +key_from_binary(<<"rows_read">>) -> rows_read; +key_from_binary(<<"changes_returned">>) -> changes_returned; +key_from_binary(<<"ioq_calls">>) -> ioq_calls; +key_from_binary(<<"js_filter">>) -> js_filter; +key_from_binary(<<"js_filtered_docs">>) -> js_filtered_docs; +key_from_binary(<<"get_kv_node">>) -> get_kv_node; +key_from_binary(<<"get_kp_node">>) -> get_kp_node; +key_from_binary(Other) -> key_error(Other). + +key_error(Key) -> + {error, {invalid_key, Key}}. + +-spec from_map(Map :: map()) -> rctx(). + +from_map(Map) -> + maps:fold(fun set_field/3, #rctx{}, Map). + +-spec set_field(Field :: rctx_field(), Val :: any(), Rctx :: rctx()) -> rctx(). +set_field(updated_at, Val, Rctx) -> + Rctx#rctx{updated_at = Val}; +set_field(started_at, Val, Rctx) -> + Rctx#rctx{started_at = Val}; +set_field(pid_ref, Val, Rctx) -> + Rctx#rctx{pid_ref = Val}; +set_field(nonce, Val, Rctx) -> + Rctx#rctx{nonce = Val}; +set_field(dbname, Val, Rctx) -> + Rctx#rctx{dbname = Val}; +set_field(username, Val, Rctx) -> + Rctx#rctx{username = Val}; +set_field(db_open, Val, Rctx) -> + Rctx#rctx{db_open = Val}; +set_field(docs_read, Val, Rctx) -> + Rctx#rctx{docs_read = Val}; +set_field(docs_written, Val, Rctx) -> + Rctx#rctx{docs_written = Val}; +set_field(js_filter, Val, Rctx) -> + Rctx#rctx{js_filter = Val}; +set_field(js_filtered_docs, Val, Rctx) -> + Rctx#rctx{js_filtered_docs = Val}; +set_field(rows_read, Val, Rctx) -> + Rctx#rctx{rows_read = Val}; +set_field(type, Val, Rctx) -> + Rctx#rctx{type = Val}; +set_field(get_kp_node, Val, Rctx) -> + Rctx#rctx{get_kp_node = Val}; +set_field(get_kv_node, Val, Rctx) -> + Rctx#rctx{get_kv_node = Val}; +%% "Example to extend CSRT" +%% set_field(write_kp_node, Val, Rctx) -> +%% Rctx#rctx{write_kp_node = Val}; +%% set_field(write_kv_node, Val, Rctx) -> +%% Rctx#rctx{write_kv_node = Val}; +set_field(changes_returned, Val, Rctx) -> + Rctx#rctx{changes_returned = Val}; +set_field(ioq_calls, Val, Rctx) -> + Rctx#rctx{ioq_calls = Val}; +set_field(_, _, Rctx) -> + %% Unknown key, could throw but just move on + Rctx. + +-spec record_info() -> + #{ + fields => [rctx_field()], + size => pos_integer(), + field_idx => #{rctx_field() => pos_integer()} + }. +record_info() -> + Fields = record_info(fields, rctx), + Size = record_info(size, rctx), + Idx = maps:from_list(lists:zip(Fields, lists:seq(1, length(Fields)))), + #{ + fields => Fields, + field_idx => Idx, + size => Size + }. + +-spec to_json(Rctx :: rctx()) -> map(). +to_json(#rctx{} = Rctx) -> + #{ + updated_at => convert_string(couch_srt_util:tutc(Rctx#rctx.updated_at)), + started_at => convert_string(couch_srt_util:tutc(Rctx#rctx.started_at)), + pid_ref => convert_pidref(Rctx#rctx.pid_ref), + nonce => convert_string(Rctx#rctx.nonce), + dbname => convert_string(Rctx#rctx.dbname), + username => convert_string(Rctx#rctx.username), + db_open => Rctx#rctx.db_open, + docs_read => Rctx#rctx.docs_read, + docs_written => Rctx#rctx.docs_written, + js_filter => Rctx#rctx.js_filter, + js_filtered_docs => Rctx#rctx.js_filtered_docs, + rows_read => Rctx#rctx.rows_read, + type => convert_type(Rctx#rctx.type), + get_kp_node => Rctx#rctx.get_kp_node, + get_kv_node => Rctx#rctx.get_kv_node, + %% "Example to extend CSRT" + %% write_kp_node => Rctx#rctx.write_kp_node, + %% write_kv_node => Rctx#rctx.write_kv_node, + changes_returned => Rctx#rctx.changes_returned, + ioq_calls => Rctx#rctx.ioq_calls + }. + +%% +%% Conversion API for outputting JSON +%% + +-spec convert_type(T) -> binary() | null when + T :: #coordinator{} | #rpc_worker{} | undefined. +convert_type(#coordinator{method = Verb0, path = Path, mod = M0, func = F0}) -> + M = atom_to_binary(M0), + F = atom_to_binary(F0), + Verb = atom_to_binary(Verb0), + <<"coordinator-{", M/binary, ":", F/binary, "}:", Verb/binary, ":", Path/binary>>; +convert_type(#rpc_worker{mod = M0, func = F0, from = From0}) -> + M = atom_to_binary(M0), + F = atom_to_binary(F0), + %% Technically From is a PidRef data type from Pid, but different Ref for fabric + From = convert_pidref(From0), + <<"rpc_worker-{", From/binary, "}:", M/binary, ":", F/binary>>; +convert_type(undefined) -> + null. + +-spec convert_pidref(PidRef) -> binary() | null when + PidRef :: {A :: pid(), B :: reference()} | undefined. +convert_pidref({Parent0, ParentRef0}) -> + Parent = convert_pid(Parent0), + ParentRef = convert_ref(ParentRef0), + <>; +%%convert_pidref(null) -> +%% null; +convert_pidref(undefined) -> + null. + +-spec convert_pid(Pid :: pid()) -> binary(). +convert_pid(Pid) when is_pid(Pid) -> + list_to_binary(pid_to_list(Pid)). + +-spec convert_ref(Ref :: reference()) -> binary(). +convert_ref(Ref) when is_reference(Ref) -> + list_to_binary(ref_to_list(Ref)). + +-spec convert_string(Str :: string() | binary() | undefined) -> binary() | null. +convert_string(undefined) -> + null; +convert_string(Str) when is_list(Str) -> + list_to_binary(Str); +convert_string(Bin) when is_binary(Bin) -> + Bin. diff --git a/src/couch_srt/src/couch_srt_httpd.erl b/src/couch_srt/src/couch_srt_httpd.erl new file mode 100644 index 00000000000..532d53fbcb7 --- /dev/null +++ b/src/couch_srt/src/couch_srt_httpd.erl @@ -0,0 +1,159 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_httpd). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_srt.hrl"). + +-export([handle_resource_status_req/1]). + +-import( + chttpd, + [ + send_json/2, send_json/3, + send_method_not_allowed/2 + ] +). + +handle_resource_status_req( + #httpd{method = 'POST', path_parts = [<<"_active_resources">>, <<"_match">>, MatcherNameBin]} = + Req +) -> + chttpd:validate_ctype(Req, "application/json"), + {JsonProps} = chttpd:json_body_obj(Req), + GroupBy = couch_util:get_value(<<"group_by">>, JsonProps), + SortBy = couch_util:get_value(<<"sort_by">>, JsonProps), + CountBy = couch_util:get_value(<<"count_by">>, JsonProps), + MatcherName = binary_to_list(MatcherNameBin), + {AggregationKeys, Query} = + case {GroupBy, SortBy, CountBy} of + {undefined, undefined, {Props}} -> + Keys = couch_util:get_value(<<"aggregate_keys">>, Props), + {Keys, couch_srt:query([couch_srt:from(MatcherName), couch_srt:count_by(Keys)])}; + {undefined, {Props}, undefined} -> + Keys = couch_util:get_value(<<"aggregate_keys">>, Props), + CounterKey = couch_util:get_value(<<"counter_key">>, Props), + {Keys, + couch_srt:query([ + couch_srt:from(MatcherName), couch_srt:sort_by(Keys, CounterKey) + ])}; + {{Props}, undefined, undefined} -> + Keys = couch_util:get_value(<<"aggregate_keys">>, Props), + CounterKey = couch_util:get_value(<<"counter_key">>, Props), + {Keys, + couch_srt:query([ + couch_srt:from(MatcherName), couch_srt:group_by(Keys, CounterKey) + ])}; + {_, _, _} -> + throw({bad_request, <<"Multiple aggregations are not supported">>}) + end, + case Query of + {error, Reason} -> + send_error(Req, Reason); + Q -> + JSON = to_json(AggregationKeys, couch_srt:rpc_run(Q)), + send_json(Req, JSON) + end; +handle_resource_status_req(#httpd{path_parts = [<<"_active_resources">>]} = Req) -> + ok = chttpd:verify_is_server_admin(Req), + send_method_not_allowed(Req, "GET,HEAD"); +handle_resource_status_req(Req) -> + ok = chttpd:verify_is_server_admin(Req), + send_method_not_allowed(Req, "GET,HEAD,POST"). + +to_json(AggregationKeys, Results) -> + lists:map(fun(E) -> node_reply_to_json(AggregationKeys, E) end, Results). + +node_reply_to_json(_AggregationKeys, #{node := Node, result := none, errors := Errors}) -> + #{ + node => atom_to_binary(Node), + result => none, + errors => lists:map(fun erlang:atom_to_list/1, Errors) + }; +node_reply_to_json(AggregationKeys, #{node := Node, result := Result, errors := Errors}) -> + #{ + node => atom_to_binary(Node), + result => aggregation_result_to_json(AggregationKeys, Result), + errors => lists:map(fun erlang:atom_to_list/1, Errors) + }. + +encode_key(AggregationKeys, Key) -> + maps:from_list(lists:zip(AggregationKeys, tuple_to_list(Key))). + +-spec aggregation_result_to_json(AggregationKeys :: binary() | [binary()], Map :: query_result()) -> + json_spec(#{ + value => non_neg_integer(), + key => #{ + username => json_string(), + dbname => json_string() + } + }). + +aggregation_result_to_json(AggregationKeys, Map) when + is_map(Map) andalso is_list(AggregationKeys) +-> + maps:fold( + fun(K, V, Acc) -> + [ + #{ + value => V, + key => encode_key(AggregationKeys, K) + } + | Acc + ] + end, + [], + Map + ); +aggregation_result_to_json(AggregationKey, Map) when + is_map(Map) andalso is_binary(AggregationKey) +-> + maps:fold( + fun(K, V, Acc) -> + [ + #{value => V, key => #{AggregationKey => K}} | Acc + ] + end, + [], + Map + ); +aggregation_result_to_json(AggregationKeys, Ordered) when + is_list(Ordered) andalso is_list(AggregationKeys) +-> + lists:map( + fun({K, V}) -> + #{ + value => V, + key => encode_key(AggregationKeys, K) + } + end, + Ordered + ); +aggregation_result_to_json(AggregationKey, Ordered) when + is_list(Ordered) andalso is_binary(AggregationKey) +-> + lists:map( + fun({K, V}) -> + #{value => V, key => #{AggregationKey => K}} + end, + Ordered + ). + +send_error(Req, [{unknown_matcher, Matcher} | _]) -> + MatcherBin = list_to_binary(Matcher), + chttpd:send_error(Req, {bad_request, <<"Unknown matcher '", MatcherBin/binary, "'">>}); +send_error(Req, [{invalid_key, FieldName} | _]) -> + chttpd:send_error(Req, {bad_request, <<"Unknown field name '", FieldName/binary, "'">>}); +send_error(Req, [multiple_value_keys | _]) -> + chttpd:send_error(Req, {bad_request, <<"Multiple keys in 'counter_key'">>}); +send_error(Req, [Reason | _]) -> + chttpd:send_error(Req, {error, Reason}). diff --git a/src/couch_srt/src/couch_srt_logger.erl b/src/couch_srt/src/couch_srt_logger.erl new file mode 100644 index 00000000000..a2ac085640d --- /dev/null +++ b/src/couch_srt/src/couch_srt_logger.erl @@ -0,0 +1,606 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_logger). + +-behaviour(gen_server). + +%% Process lifetime logging api +-export([ + get_tracker/0, + log_process_lifetime_report/1, + put_tracker/1, + stop_tracker/0, + stop_tracker/1, + track/1, + tracker/1 +]). + +%% Raw API that bypasses is_enabled checks +-export([ + do_lifetime_report/1, + do_status_report/1, + do_report/2, + maybe_report/2, + should_truncate_reports/0 +]). + +%% gen_server callbacks +-export([ + start_link/0, + init/1, + handle_call/3, + handle_cast/2, + handle_info/2 +]). + +%% Config update subscription API +-export([ + subscribe_changes/0, + handle_config_change/5, + handle_config_terminate/3 +]). + +%% Matchers +-export([ + deregister_matcher/1, + find_all_matches/2, + find_matches/2, + get_matcher/1, + get_matchers/0, + get_registered_matchers/0, + is_match/1, + is_match/2, + matcher_on_all_coordinators/0, + matcher_on_all_rpc_workers/0, + matcher_on_dbname/1, + matcher_on_docs_read/1, + matcher_on_docs_written/1, + matcher_on_rows_read/1, + matcher_on_changes_processed/1, + matcher_on_ioq_calls/1, + matcher_on_nonce/1, + matcher_on_long_reqs/1, + register_matcher/2, + reload_matchers/0 +]). + +%% Recon API Ports of https://github.com/ferd/recon/releases/tag/2.5.6 +-export([ + pid_ref_attrs/1, + pid_ref_matchspec/1, + proc_window/3 +]). + +-include_lib("stdlib/include/ms_transform.hrl"). +-include_lib("couch_srt.hrl"). + +-record(st, { + registered_matchers = #{} +}). + +-spec track(Rctx :: rctx()) -> pid(). +track(#rctx{pid_ref = PidRef}) -> + case get_tracker() of + undefined -> + Pid = spawn(?MODULE, tracker, [PidRef]), + put_tracker(Pid), + Pid; + Pid when is_pid(Pid) -> + Pid + end. + +-spec tracker(PidRef :: pid_ref()) -> ok. +tracker({Pid, _Ref} = PidRef) -> + MonRef = erlang:monitor(process, Pid), + receive + stop -> + log_process_lifetime_report(PidRef), + couch_srt_server:destroy_resource(PidRef), + ok; + {'DOWN', MonRef, _Type, _0DPid, _Reason0} -> + %% TODO: should we pass reason to log_process_lifetime_report? + %% Reason = case Reason0 of + %% {shutdown, Shutdown0} -> + %% Shutdown = atom_to_binary(Shutdown0), + %% <<"shutdown: ", Shutdown/binary>>; + %% Reason0 -> + %% Reason0 + %% end, + %% TODO: should we send the induced work delta to the coordinator? + log_process_lifetime_report(PidRef), + couch_srt_server:destroy_resource(PidRef), + ok + end. + +-spec register_matcher(Name, MSpec) -> ok | {error, badarg} when + Name :: string(), MSpec :: ets:match_spec(). +register_matcher(Name, MSpec) -> + gen_server:call(?MODULE, {register, Name, MSpec}, infinity). + +-spec deregister_matcher(Name :: string()) -> ok. +deregister_matcher(Name) -> + gen_server:call(?MODULE, {deregister, Name}, infinity). + +-spec log_process_lifetime_report(PidRef :: pid_ref()) -> ok. +log_process_lifetime_report(PidRef) -> + case couch_srt_util:is_enabled() andalso couch_srt_util:is_enabled_reporting() of + true -> + maybe_report("csrt-pid-usage-lifetime", PidRef); + false -> + ok + end. + +%% Return a subset of Matchers for each Matcher that matches on Rctxs +-spec find_matches(Rctxs :: [rctx()], Matchers :: matchers()) -> matchers(). +find_matches(Rctxs, Matchers) when is_list(Rctxs) andalso is_map(Matchers) -> + Rctxs1 = + case couch_srt_util:is_enabled_rpc_reporting() of + true -> + Rctxs; + false -> + [Rctx || #rctx{type = #coordinator{}} = Rctx <- Rctxs] + end, + maps:filter( + fun(_Name, {_MSpec, CompMSpec}) -> + (catch ets:match_spec_run(Rctxs1, CompMSpec)) =/= [] + end, + Matchers + ). + +%% Return a Map of #{MatcherName => SRctxs :: rctxs()} for all MatcherName => Matcher +%% in Matchers where SRctxs is the subset of Rctxs matched by the given Matcher +-spec find_all_matches(Rctxs :: rctxs(), Matchers :: matchers()) -> matcher_matches(). +find_all_matches(Rctxs, Matchers) when is_list(Rctxs) andalso is_map(Matchers) -> + maps:map( + fun(_Name, {_MSpec, CompMSpec}) -> + try + ets:match_spec_run(Rctxs, CompMSpec) + catch + _:_ -> + [] + end + end, + Matchers + ). + +-spec reload_matchers() -> ok. +reload_matchers() -> + ok = gen_server:call(?MODULE, reload_matchers, infinity). + +-spec get_matchers() -> matchers(). +get_matchers() -> + persistent_term:get(?MATCHERS_KEY, #{}). + +-spec get_matcher(Name :: matcher_name()) -> maybe_matcher(). +get_matcher(Name) -> + maps:get(Name, get_matchers(), undefined). + +-spec get_registered_matchers() -> matchers(). +get_registered_matchers() -> + gen_server:call(?MODULE, get_registered_matchers, infinity). + +-spec is_match(Rctx :: maybe_rctx()) -> boolean(). +is_match(undefined) -> + false; +is_match(#rctx{} = Rctx) -> + is_match(Rctx, get_matchers()). + +-spec is_match(Rctx :: maybe_rctx(), Matchers :: matchers()) -> boolean(). +is_match(undefined, _Matchers) -> + false; +is_match(_Rctx, undefined) -> + false; +is_match(#rctx{} = Rctx, Matchers) when is_map(Matchers) -> + maps:size(find_matches([Rctx], Matchers)) > 0. + +%% Generate a report for the Rctx if it triggers an active Matcher +-spec maybe_report(ReportName :: string(), PidRef :: maybe_pid_ref()) -> ok. +maybe_report(ReportName, PidRef) -> + Rctx = couch_srt_server:get_resource(PidRef), + case is_match(Rctx) of + true -> + do_report(ReportName, Rctx), + ok; + false -> + ok + end. + +%% Whether or not to remove zero value fields from reports to save on volume +-spec should_truncate_reports() -> boolean(). +should_truncate_reports() -> + config:get_boolean(?CSRT, "should_truncate_reports", true). + +-spec do_lifetime_report(Rctx :: rctx()) -> boolean(). +do_lifetime_report(Rctx) -> + do_report("csrt-pid-usage-lifetime", Rctx). + +-spec do_status_report(Rctx :: rctx()) -> boolean(). +do_status_report(Rctx) -> + do_report("csrt-pid-usage-status", Rctx). + +-spec do_report(ReportName :: string(), Rctx :: rctx()) -> boolean(). +do_report(ReportName, #rctx{} = Rctx) -> + JRctx = + case {should_truncate_reports(), couch_srt_entry:to_json(Rctx)} of + {true, JRctx0} -> + maps:filter(fun(_K, V) -> V > 0 end, JRctx0); + {false, JRctx0} -> + JRctx0 + end, + couch_log:report(ReportName, JRctx). + +%% +%% Process lifetime logging api +%% + +-spec get_tracker() -> maybe_pid(). +get_tracker() -> + get(?TRACKER_PID). + +-spec put_tracker(Pid :: pid()) -> maybe_pid(). +put_tracker(Pid) when is_pid(Pid) -> + put(?TRACKER_PID, Pid). + +-spec stop_tracker() -> ok. +stop_tracker() -> + stop_tracker(get_tracker()). + +-spec stop_tracker(Pid :: maybe_pid()) -> ok. +stop_tracker(undefined) -> + ok; +stop_tracker(Pid) when is_pid(Pid) -> + Pid ! stop, + ok. + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + St = #st{}, + ok = initialize_matchers(St#st.registered_matchers), + ok = subscribe_changes(), + {ok, St}. + +handle_call({register, Name, MSpec}, _From, #st{registered_matchers = RMatchers} = St) -> + case add_matcher(Name, MSpec, RMatchers) of + {ok, RMatchers1} -> + ok = initialize_matchers(RMatchers1), + {reply, ok, St#st{registered_matchers = RMatchers1}}; + {error, {invalid_ms, _, _}} = Error -> + {reply, Error, St} + end; +handle_call({deregister, Name}, _From, #st{registered_matchers = RMatchers} = St) -> + case maps:is_key(Name, RMatchers) of + false -> + {reply, {error, missing_matcher}, St}; + true -> + RMatchers1 = maps:remove(Name, RMatchers), + ok = initialize_matchers(RMatchers1), + {reply, ok, St#st{registered_matchers = RMatchers1}} + end; +handle_call(reload_matchers, _From, St) -> + couch_log:warning("Reloading persistent term matchers", []), + ok = initialize_matchers(St#st.registered_matchers), + {reply, ok, St}; +handle_call(get_registered_matchers, _From, St) -> + {reply, St#st.registered_matchers, St}; +handle_call(Msg, From, St) -> + {stop, {unknown_call, Msg, From}, St}. + +handle_cast(Msg, St) -> + {stop, {unknown_cast, Msg}, St}. + +handle_info(restart_config_listener, St) -> + ok = subscribe_changes(), + {noreply, St}; +handle_info(Msg, St) -> + {stop, {unknown_info, Msg}, St}. + +%% +%% Matchers +%% + +-spec matcher_on_all_coordinators() -> ets:match_spec(). +matcher_on_all_coordinators() -> + ets:fun2ms(fun(#rctx{type = #coordinator{}} = R) -> R end). + +-spec matcher_on_all_rpc_workers() -> ets:match_spec(). +matcher_on_all_rpc_workers() -> + ets:fun2ms(fun(#rctx{type = #rpc_worker{}} = R) -> R end). + +-spec matcher_on_dbname(DbName :: dbname()) -> ets:match_spec(). +matcher_on_dbname(DbName) when + is_binary(DbName) +-> + ets:fun2ms(fun(#rctx{dbname = DbName1} = R) when DbName =:= DbName1 -> R end). + +-spec matcher_on_dbnames_io_threshold(DbName, Threshold) -> ets:match_spec() when + DbName :: dbname(), Threshold :: pos_integer(). +matcher_on_dbnames_io_threshold(DbName, Threshold) when + is_binary(DbName) +-> + ets:fun2ms(fun( + #rctx{ + dbname = DbName1, + ioq_calls = IOQ, + get_kv_node = KVN, + get_kp_node = KPN, + docs_read = Docs, + rows_read = Rows + } = R + ) when + DbName =:= DbName1 andalso + ((IOQ >= Threshold) orelse + (KVN >= Threshold) orelse + (KPN >= Threshold) orelse + (Docs >= Threshold) orelse + (Rows >= Threshold)) + -> + R + end). + +-spec matcher_on_docs_read(Threshold :: pos_integer()) -> ets:match_spec(). +matcher_on_docs_read(Threshold) when + is_integer(Threshold) andalso Threshold > 0 +-> + ets:fun2ms(fun(#rctx{docs_read = DocsRead} = R) when DocsRead >= Threshold -> R end). + +-spec matcher_on_docs_written(Threshold :: pos_integer()) -> ets:match_spec(). +matcher_on_docs_written(Threshold) when + is_integer(Threshold) andalso Threshold > 0 +-> + ets:fun2ms(fun(#rctx{docs_written = DocsWritten} = R) when DocsWritten >= Threshold -> R end). + +-spec matcher_on_rows_read(Threshold :: pos_integer()) -> ets:match_spec(). +matcher_on_rows_read(Threshold) when + is_integer(Threshold) andalso Threshold > 0 +-> + ets:fun2ms(fun(#rctx{rows_read = RowsRead} = R) when RowsRead >= Threshold -> R end). + +-spec matcher_on_nonce(Nonce :: nonce()) -> ets:match_spec(). +matcher_on_nonce(Nonce) -> + ets:fun2ms(fun(#rctx{nonce = Nonce1} = R) when Nonce =:= Nonce1 -> R end). + +-spec matcher_on_changes_processed(Threshold :: pos_integer()) -> ets:match_spec(). +matcher_on_changes_processed(Threshold) when + is_integer(Threshold) andalso Threshold > 0 +-> + %% HACK: because we overload the use of #rctx.rows_read for + %% changes_processed, we must specify a direct match against a changes + %% context. Fow now, just match on #coordinator's + ets:fun2ms( + fun( + #rctx{ + type = #coordinator{mod = chttpd_db, func = handle_changes_req}, + rows_read = Processed, + changes_returned = Returned + } = R + ) when (Processed - Returned) >= Threshold -> + R + end + ). + +%% Matcher on requests taking longer than Threshold milliseconds +-spec matcher_on_long_reqs(Threshold :: pos_integer()) -> ets:match_spec(). +matcher_on_long_reqs(Threshold) when + is_integer(Threshold) andalso Threshold > 0 +-> + %% Threshold is in milliseconds, but we track erlang:monotonic_time/0 + %% which is in native format, a machine dependent internal representation + %% so we must convert the provided Threshold from milliseconds to native + %% representation which then allows us to match against entries where the + %% time delta between the started_at and updated_at is greater than the + %% native converted threshold. + %% + %% Time warps and is relative and is complicated, so here's an example of + %% converting 10000 milliseconds into a native time format and back, then + %% using couch_srt_util:tnow/0 to accurately measure sleeping for 10000 ms. + %% + %% (node1@127.0.0.1)5> erlang:convert_time_unit(10000, millisecond, native). + %% 10000000000 + %% (node1@127.0.0.1)6> erlang:convert_time_unit(10000000000, native, millisecond). + %% 10000 + %% (node1@127.0.0.1)7> T0 = couch_srt_util:tnow(), timer:sleep(10000), T1 = couch_srt_util:tnow(), + %% erlang:convert_time_unit(T1 - T0, native, millisecond). + %% 10000 + + NativeThreshold = erlang:convert_time_unit(Threshold, millisecond, native), + ets:fun2ms( + fun( + #rctx{ + started_at = Started, + updated_at = Updated + } = R + ) when Updated - Started >= NativeThreshold -> + R + end + ). + +-spec matcher_on_ioq_calls(Threshold :: pos_integer()) -> ets:match_spec(). +matcher_on_ioq_calls(Threshold) when + is_integer(Threshold) andalso Threshold > 0 +-> + ets:fun2ms(fun(#rctx{ioq_calls = IOQCalls} = R) when IOQCalls >= Threshold -> R end). + +-spec pid_ref_matchspec(AttrName :: rctx_field()) -> matcher() | throw(any()). +pid_ref_matchspec(AttrName) -> + #{field_idx := FieldIdx} = couch_srt_entry:record_info(), + RctxMatch0 = #rctx{_ = '_'}, + RctxMatch1 = setelement(maps:get(pid_ref, FieldIdx) + 1, RctxMatch0, '$1'), + RctxMatch = setelement(maps:get(AttrName, FieldIdx) + 1, RctxMatch1, '$2'), + MatchSpec = [{RctxMatch, [], [{{'$1', '$2'}}]}], + {MatchSpec, ets:match_spec_compile(MatchSpec)}. + +-spec pid_ref_attrs(AttrName :: rctx_field()) -> list() | throw(any()). +pid_ref_attrs(AttrName) -> + {MatchSpec, _CompMatch} = pid_ref_matchspec(AttrName), + %% Base fields at least an empty list, but we could add more info here. + %% The recon typespec is an improper list of the form: + %%Base = [Name | [{current_function, mfa()} | {initial_call, mfa()}]], + Base = [], + [{PidRef, Val, Base} || {PidRef, Val} <- ets:select(?CSRT_ETS, MatchSpec)]. + +%% This is a recon:proc_window/3 [1] port with the same core logic but +%% recon_lib:proc_attrs/1 replaced with pid_ref_attrs/1, and returning on +%% pid_ref() rather than pid(). +%% [1] https://github.com/ferd/recon/blob/c2a76855be3a226a3148c0dfc21ce000b6186ef8/src/recon.erl#L268-L300 +-spec proc_window(AttrName, Num, Time) -> term() | throw(any()) when + AttrName :: rctx_field(), Num :: non_neg_integer(), Time :: pos_integer(). +proc_window(AttrName, Num, Time) -> + Sample = fun() -> pid_ref_attrs(AttrName) end, + {First, Last} = recon_lib:sample(Time, Sample), + recon_lib:sublist_top_n_attrs(recon_lib:sliding_window(First, Last), Num). + +-spec add_matcher(Name, MSpec, Matchers) -> + {ok, matchers()} | {error, {invalid_ms, string(), ets:match_spec()}} +when + Name :: string(), MSpec :: ets:match_spec() | undefined, Matchers :: matchers(). +add_matcher(Name, undefined = MSpec, _Matchers) -> + {error, {invalid_ms, Name, MSpec}}; +add_matcher(Name, MSpec, Matchers) -> + try ets:match_spec_compile(MSpec) of + CompMSpec -> + %% TODO: handle already registered name case + Matchers1 = maps:put(Name, {MSpec, CompMSpec}, Matchers), + {ok, Matchers1} + catch + error:badarg -> + {error, {invalid_ms, Name, MSpec}} + end. + +-spec set_matchers_term(Matchers :: matchers()) -> ok. +set_matchers_term(Matchers) when is_map(Matchers) -> + persistent_term:put(?MATCHERS_KEY, Matchers). + +-spec initialize_matchers(RegisteredMatchers :: map()) -> ok. +initialize_matchers(RegisteredMatchers) when is_map(RegisteredMatchers) -> + %% Standard matchers to conditionally enable + DefaultMatchers = [ + {all_coordinators, fun matcher_on_all_coordinators/0, undefined}, + {all_rpc_workers, fun matcher_on_all_rpc_workers/0, undefined}, + {docs_read, fun matcher_on_docs_read/1, 1000}, + {rows_read, fun matcher_on_rows_read/1, 1000}, + {docs_written, fun matcher_on_docs_written/1, 500}, + %% long_reqs Threshold in milliseconds + {long_reqs, fun matcher_on_long_reqs/1, 60000}, + {changes_processed, fun matcher_on_changes_processed/1, 1000}, + {ioq_calls, fun matcher_on_ioq_calls/1, 10000} + ], + + %% Add enabled Matchers for standard matchers + Matchers = lists:foldl( + fun({Name0, MatchGenFunc, Threshold0}, Matchers0) when is_atom(Name0) -> + Name = atom_to_list(Name0), + case matcher_enabled(Name) of + true -> + %% Wrap in a try-catch to handle MatcherGen errors + try + MSpec = + case erlang:fun_info(MatchGenFunc, arity) of + {arity, 1} -> + Threshold = matcher_threshold(Name, Threshold0), + MatchGenFunc(Threshold); + {arity, 0} -> + MatchGenFunc(); + _ -> + undefined + end, + case add_matcher(Name, MSpec, Matchers0) of + {ok, Matchers1} -> + Matchers1; + {error, {invalid_ms, NameE, MSpecE}} -> + couch_log:warning("[~p] Failed to initialize matcher[~p]: ~p", [ + ?MODULE, NameE, MSpecE + ]), + Matchers0 + end + catch + _:_ -> + Matchers0 + end; + false -> + Matchers0 + end + end, + #{}, + DefaultMatchers + ), + + %% Add additional dbnames_io matchers + Matchers1 = lists:foldl( + fun({Dbname, Value}, Matchers0) -> + try list_to_integer(Value) of + Threshold when Threshold > 0 -> + Name = "dbnames_io__" ++ Dbname ++ "__" ++ Value, + DbnameB = list_to_binary(Dbname), + MSpec = matcher_on_dbnames_io_threshold(DbnameB, Threshold), + case add_matcher(Name, MSpec, Matchers0) of + {ok, Matchers1} -> + Matchers1; + {error, {invalid_ms, NameE, MSpecE}} -> + couch_log:warning("[~p] Failed to initialize matcher[~p]: ~p", [ + ?MODULE, NameE, MSpecE + ]), + Matchers0 + end; + _ -> + Matchers0 + catch + error:badarg -> + couch_log:warning("[~p] Failed to initialize dbname io matcher on[~p]: ~p", [ + ?MODULE, Dbname, Value + ]), + Matchers0 + end + end, + Matchers, + config:get(?CSRT_MATCHERS_DBNAMES) + ), + + %% Finally, merge in the dynamically registered matchers, with priority + Matchers2 = maps:merge(Matchers1, RegisteredMatchers), + + couch_log:notice("Initialized ~p CSRT Logger matchers", [maps:size(Matchers2)]), + set_matchers_term(Matchers2), + ok. + +-spec matcher_enabled(Name :: string()) -> boolean(). +matcher_enabled(Name) when is_list(Name) -> + config:get_boolean(?CSRT_MATCHERS_ENABLED, Name, false). + +-spec matcher_threshold(Name, Threshold) -> string() | integer() when + Name :: string(), Threshold :: pos_integer() | string(). +matcher_threshold(Name, Default) when + is_list(Name) andalso is_integer(Default) andalso Default > 0 +-> + config:get_integer(?CSRT_MATCHERS_THRESHOLD, Name, Default). + +subscribe_changes() -> + config:listen_for_changes(?MODULE, nil). + +handle_config_change(?CSRT_MATCHERS_ENABLED, _Key, _Val, _Persist, St) -> + ok = gen_server:call(?MODULE, reload_matchers, infinity), + {ok, St}; +handle_config_change(?CSRT_MATCHERS_THRESHOLD, _Key, _Val, _Persist, St) -> + ok = gen_server:call(?MODULE, reload_matchers, infinity), + {ok, St}; +handle_config_change(?CSRT_MATCHERS_DBNAMES, _Key, _Val, _Persist, St) -> + ok = gen_server:call(?MODULE, reload_matchers, infinity), + {ok, St}; +handle_config_change(_Sec, _Key, _Val, _Persist, St) -> + {ok, St}. + +handle_config_terminate(_, stop, _) -> + ok; +handle_config_terminate(_, _, _) -> + erlang:send_after(5000, whereis(?MODULE), restart_config_listener). diff --git a/src/couch_srt/src/couch_srt_query.erl b/src/couch_srt/src/couch_srt_query.erl new file mode 100644 index 00000000000..a7578a36a5c --- /dev/null +++ b/src/couch_srt/src/couch_srt_query.erl @@ -0,0 +1,910 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_query). + +-feature(maybe_expr, enable). + +-include_lib("stdlib/include/ms_transform.hrl"). +-include_lib("couch_srt.hrl"). + +%% aggregate query api +-export([ + active/0, + active/1, + active_coordinators/0, + active_coordinators/1, + active_workers/0, + active_workers/1, + + all/0, + find_by_nonce/1, + find_by_pid/1, + find_by_pidref/1, + find_workers_by_pidref/1, + + query_matcher/1, + query_matcher/2, + query_matcher_rows/1, + query_matcher_rows/2, + + query/1, + from/1, + group_by/1, + group_by/2, + sort_by/1, + sort_by/2, + count_by/1, + options/1, + unlimited/0, + with_limit/1, + + run/1, + unsafe_run/1 +]). + +-ifdef(TEST). +-export([topK/2]). +-endif. + +-export_type([ + query/0, + query_expression/0, + query_option/0 +]). + +-type aggregation_keys_fun() :: fun((Ele :: #rctx{}) -> aggregation_values() | aggregation_value()). +-type value_key_fun() :: fun((Ele :: #rctx{}) -> aggregation_values() | aggregation_value()). +-type count_key_fun() :: fun((A :: pos_integer(), B :: pos_integer()) -> pos_integer()). + +-record(selector, { + aggregation_keys = undefined :: + rctx_field() + | [rctx_field()] + | undefined, + value_key = undefined :: + rctx_field() + | undefined +}). + +-record(unsafe_selector, { + aggregation_keys = undefined :: + aggregation_keys_fun() + | rctx_field() + | [rctx_field()] + | undefined, + value_key = undefined :: + value_key_fun() + | rctx_field() + | undefined +}). + +-record(query_options, { + limit = undefined :: pos_integer() | unlimited | undefined, + is_safe = undefined :: boolean() | undefined +}). + +-type aggregation() :: group_by | sort_by | count_by. + +-record(query, { + matcher = undefined :: matcher_name() | all | undefined, + selector = undefined :: #selector{} | #unsafe_selector{} | undefined, + limit = undefined :: pos_integer() | unlimited | undefined, + aggregation = undefined :: aggregation() | undefined, + is_safe = true :: boolean() +}). + +-record(from, { + matcher = undefined :: matcher_name() | all | undefined, + is_safe = undefined :: boolean() | undefined +}). + +-opaque query() :: #query{}. +-opaque query_expression() :: + #from{} + | #query_options{} + | #selector{} + | #unsafe_selector{} + | query_option() + | {aggregation(), #selector{}} + | {aggregation(), #unsafe_selector{}}. +-opaque query_option() :: + {limit, pos_integer() | unlimited | undefined}. + +%% +%% Aggregate query API +%% + +active() -> + active_int(all). + +active_coordinators() -> + active_int(coordinators). + +active_workers() -> + active_int(workers). + +%% active_json() or active(json)? +active(json) -> + to_json_list(active_int(all)). + +active_coordinators(json) -> + to_json_list(active_int(coordinators)). + +active_workers(json) -> + to_json_list(active_int(workers)). + +active_int(coordinators) -> + select_by_type(coordinators); +active_int(workers) -> + select_by_type(workers); +active_int(all) -> + select_by_type(all). + +select_by_type(coordinators) -> + ets:select(?CSRT_ETS, ets:fun2ms(fun(#rctx{type = #coordinator{}} = R) -> R end)); +select_by_type(workers) -> + ets:select(?CSRT_ETS, ets:fun2ms(fun(#rctx{type = #rpc_worker{}} = R) -> R end)); +select_by_type(all) -> + ets:tab2list(?CSRT_ETS). + +find_by_nonce(Nonce) -> + couch_srt_server:match_resource(#rctx{nonce = Nonce}). + +find_by_pid(Pid) -> + couch_srt_server:match_resource(#rctx{pid_ref = {Pid, '_'}}). + +find_by_pidref(PidRef) -> + couch_srt_server:match_resource(#rctx{pid_ref = PidRef}). + +find_workers_by_pidref(PidRef) -> + couch_srt_server:match_resource(#rctx{type = #rpc_worker{from = PidRef}}). + +curry_field(Field) -> + fun(Ele) -> couch_srt_entry:value(Field, Ele) end. + +-spec group_by(Matcher, KeyFun, ValFun) -> + {ok, aggregation_result()} | {limit, aggregation_result()} +when + Matcher :: matcher(), + KeyFun :: + aggregation_keys_fun() + | rctx_field() + | [rctx_field()], + ValFun :: + value_key_fun() + | rctx_field(). +group_by(Matcher, KeyFun, ValFun) -> + AggFun = fun erlang:'+'/2, + group_by(Matcher, KeyFun, ValFun, AggFun). + +-spec group_by(Matcher, KeyFun, ValFun, AggFun) -> + {ok, aggregation_result()} | {limit, aggregation_result()} +when + Matcher :: matcher(), + KeyFun :: + aggregation_keys_fun() + | rctx_field() + | [rctx_field()], + ValFun :: + value_key_fun() + | rctx_field(), + AggFun :: + count_key_fun(). +group_by(Matcher, KeyFun, ValFun, AggFun) -> + group_by(Matcher, KeyFun, ValFun, AggFun, query_cardinality_limit()). + +-spec all() -> + matcher(). + +all() -> + Spec = ets:fun2ms(fun(#rctx{} = R) -> R end), + {Spec, ets:match_spec_compile(Spec)}. + +%% eg: group_by(all(), username, docs_read). +%% eg: ^^ or: group_by(all(), [username, docs_read], ioq_calls). +%% eg: group_by(all(), [username, dbname, js_filter], docs_read). +%% eg: group_by(all(), [username, dbname, js_filter], ioq_calls). +%% eg: group_by(all(), [username, dbname, js_filter], get_kv_node). +-spec group_by(Matcher, KeyFun, ValFun, AggFun, Limit) -> + {ok, aggregation_result()} | {limit, aggregation_result()} +when + Matcher :: matcher(), + KeyFun :: + aggregation_keys_fun() + | rctx_field() + | [rctx_field()], + ValFun :: + value_key_fun() + | rctx_field(), + AggFun :: + count_key_fun(), + Limit :: pos_integer(). + +group_by(Matcher, KeyL, ValFun, AggFun, Limit) when is_list(KeyL) -> + KeyFun = fun(Ele) -> list_to_tuple([couch_srt_entry:value(Key, Ele) || Key <- KeyL]) end, + group_by(Matcher, KeyFun, ValFun, AggFun, Limit); +group_by(Matcher, Key, ValFun, AggFun, Limit) when is_atom(Key) -> + group_by(Matcher, curry_field(Key), ValFun, AggFun, Limit); +group_by(Matcher, KeyFun, Val, AggFun, Limit) when is_atom(Val) -> + group_by(Matcher, KeyFun, curry_field(Val), AggFun, Limit); +group_by(Matcher, KeyFun, ValFun, AggFun, Limit) -> + AggregateFun = fun(Rctx, Acc) -> + Key = KeyFun(Rctx), + Val = ValFun(Rctx), + CurrVal = maps:get(Key, Acc, 0), + case AggFun(CurrVal, Val) of + 0 -> + Acc; + NewVal -> + maps:put(Key, NewVal, Acc) + end + end, + fold_ets(Matcher, AggregateFun, Limit, ?CSRT_ETS). + +fold_ets(Matcher, Aggregate, TotalLimit, Table) -> + %% fold by batches of size 5000 rows max + fold_ets(Matcher, Aggregate, 5000, TotalLimit, #{}, ok, Table). + +fold_ets(Matcher, Aggregate, BatchSize, LeftToGo, Acc, ok, Table) when LeftToGo > 0 -> + RequestSize = min(BatchSize, LeftToGo), + {Status, Rctxs} = select_rows(Matcher, RequestSize), + {Size, Aggregated} = lists:foldl( + fun(E, {Idx, A}) -> + {Idx + 1, Aggregate(E, A)} + end, + {0, Acc}, + Rctxs + ), + case Size < RequestSize of + true -> + {Status, Aggregated}; + false -> + fold_ets(Matcher, Aggregate, BatchSize, LeftToGo - Size, Aggregated, Status, Table) + end; +fold_ets(_Matcher, _Aggregate, _BatchSize, _LeftToGo, Acc, limit, _Table) -> + {limit, Acc}; +fold_ets(_Matcher, _Aggregate, _BatchSize, _LeftToGo, Acc, Result, _Table) -> + {Result, Acc}. + +select_rows(Matcher, Limit) -> + try + %% Use `ets:select/3` as this does the ets fold internally in a space + %% efficient way that is still faster than the sequential traversal + %% through the table. See the `ets:select/3` documentation for more + %% info. We also use `ets:select/3` to pass the limit along, which + %% results in ets efficiently traversing rows until `Limit` rows have + %% been accumulated and returned. + %% ets:select/* takes match_spec(), not comp_match_spec() + {MSpec, _CMSpec} = Matcher, + case ets:select(?CSRT_ETS, MSpec, Limit) of + %% Technically the {Rctxs, `continuation()`} here is an `opaque()` + %% type, but we assume `'$end_of_table'` is a reasonable indication + %% of no more rows. However, we fallback to checking the quantity + %% returned in case this is ever no longer true. + {Rctxs, '$end_of_table'} -> + {ok, Rctxs}; + {Rctxs, _Continuation} -> + %% Continuation is opaque, and there's no `is_more_rows` API to + %% check to see if we actually limit the table Limit or we hit + %% the edge case where exactly `Limit` rows were found. The + %% continuation can be passed back to `ets:select/1` to see if + %% explicitly returns `'$end_of_table'`, but if it did hit the + %% `Limit`, we now wastefully fetch the next chunk of rows, so + %% instead for now we assume that when the length of rows + %% equals `Limit` that we hit the cap. Note that this is only + %% relevant because the API returning `'$end_of_table'` is not + %% formally specified, but in theory this clause should not be + %% hit. + case length(Rctxs) >= Limit of + true -> + {limit, Rctxs}; + false -> + {ok, Rctxs} + end; + %% Handle '$end_of_table' + _ -> + {ok, []} + end + catch + _:_ -> + {ok, []} + end. + +%% +%% Auxiliary functions to calculate topK +%% + +topK(#{} = Map, T) when is_integer(T), T > 0 -> + Fun = fun(K, V, Set0) -> + case gb_sets:size(Set0) >= T of + true -> + case V =< element(1, gb_sets:smallest(Set0)) of + true -> Set0; + false -> element(2, gb_sets:take_smallest(gb_sets:add({V, K}, Set0))) + end; + false -> + gb_sets:add({V, K}, Set0) + end + end, + Set = maps:fold(Fun, gb_sets:empty(), Map), + lists:reverse([{K, V} || {V, K} <- gb_sets:to_list(Set)]). + +%% +%% Query API functions +%% + +%% @doc Specify the matcher to use for the query. +%% If atom 'all' is used then all entries would be in the scope of the query. +%% Also the use of 'all' makes the query 'unsafe'. Because it scans through all entries +%% and can return many matching rows. +%% Unsafe queries can only be run using 'unsafe_run/1'. +%% +%% Q = query([ +%% ... +%% from("docs_read") +%% ]), +%% +%% @end +-spec from(MatcherName :: matcher_name() | all) -> + {ok, #{from => matcher_name() | all, is_unsafe => boolean()}} | {error, any()}. +from(all) -> + #from{matcher = all, is_safe = false}; +from(MatcherName) -> + case couch_srt_logger:get_matcher(MatcherName) of + undefined -> + {error, {unknown_matcher, MatcherName}}; + _ -> + #from{matcher = MatcherName, is_safe = true} + end. + +%% @doc Construct 'options' query expression. +%% There are following types of expressions allowed in the query. +%%
  • unlimited/0 @see unlimited/0 (cannot be used with 'with_limit/1')
  • +%%
  • with_limit/1 @see with_limit/1 (cannot be used with 'unlimited/0')
  • +%% The order of expressions doesn't matter. +%% +%% Q = query([ +%% ... +%% options([ +%% ... +%% ]) +%% ]), +%% +%% @end +-spec options([query_option()]) -> + #query_options{} | {error, any()}. +options(Options) -> + lists:foldl( + fun + (_, {error, _} = Error) -> + Error; + ({limit, unlimited}, Acc) -> + Acc#query_options{limit = unlimited, is_safe = false}; + ({limit, Limit}, Acc) when is_integer(Limit) -> + Acc#query_options{limit = Limit}; + ({error, _} = Error, _Acc) -> + Error + end, + #query_options{is_safe = true}, + Options + ). + +%% @doc Enable unlimited number of results from the query. +%% The use of 'unlimited' makes the query 'unsafe'. Because it can return many matching rows. +%% Unsafe queries can only be run using 'unsafe_run/1'. +%% +%% Q = query([ +%% ... +%% options([ +%% unlimited() +%% ]) +%% ]), +%% +%% @end +unlimited() -> + {limit, unlimited}. + +%% @doc Set limit on number of results returned from the query. +%% The construction of the query fail if the 'limit' is greater than +%% allowed for this cluster. +%% +%% Q = query([ +%% ... +%% options([ +%% with_limit(100) +%% ]) +%% ]), +%% +%% @end +with_limit(Limit) when is_integer(Limit) -> + case Limit =< query_limit() of + true -> + {limit, Limit}; + false -> + {error, {beyond_limit, Limit}} + end; +with_limit(Limit) -> + {error, {invalid_limit, Limit}}. + +%% @doc Request 'count_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% count_by(username) +%% ]), +%% +%% @end +-spec count_by(AggregationKeys) -> + {count_by, #selector{}} | {count_by, #unsafe_selector{}} | {error, any()} +when + AggregationKeys :: + aggregation_keys_fun() + | binary() + | rctx_field() + | [binary()] + | [rctx_field()]. +count_by(AggregationKeys) -> + with_tag(select(AggregationKeys), count_by). + +%% @doc Request 'sort_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% sort_by([username, dbname]) +%% ]), +%% +%% @end +-spec sort_by(AggregationKeys) -> + {sort_by, #selector{}} | {sort_by, #unsafe_selector{}} | {error, any()} +when + AggregationKeys :: + aggregation_keys_fun() + | binary() + | rctx_field() + | [binary()] + | [rctx_field()]. +sort_by(AggregationKeys) -> + with_tag(select(AggregationKeys), sort_by). + +%% @doc Request 'sort_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% sort_by([username, dbname], ioq_calls) +%% ]), +%% +%% @end +-spec sort_by(AggregationKeys, ValueKey) -> + {sort_by, #selector{}} | {sort_by, #unsafe_selector{}} | {error, any()} +when + AggregationKeys :: + aggregation_keys_fun() + | binary() + | rctx_field() + | [binary()] + | [rctx_field()], + ValueKey :: + value_key_fun() + | binary() + | rctx_field(). +sort_by(AggregationKeys, ValueKey) -> + with_tag(select(AggregationKeys, ValueKey), sort_by). + +%% @doc Request 'group_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% group_by([username, dbname]) +%% ]), +%% +%% @end +-spec group_by(AggregationKeys) -> + {group_by, #selector{}} | {group_by, #unsafe_selector{}} | {error, any()} +when + AggregationKeys :: + aggregation_keys_fun() + | binary() + | rctx_field() + | [binary()] + | [rctx_field()]. +group_by(AggregationKeys) -> + with_tag(select(AggregationKeys), group_by). + +%% @doc Request 'group_by' aggregation of results. +%% +%% Q = query([ +%% ... +%% group_by([username, dbname], ioq_calls) +%% ]), +%% +%% @end +-spec group_by(AggregationKeys, ValueKey) -> + {group_by, #selector{}} | {group_by, #unsafe_selector{}} | {error, any()} +when + AggregationKeys :: + aggregation_keys_fun() + | binary() + | rctx_field() + | [binary()] + | [rctx_field()], + ValueKey :: + value_key_fun() + | binary() + | rctx_field(). +group_by(AggregationKeys, ValueKey) -> + with_tag(select(AggregationKeys, ValueKey), group_by). + +%% @doc Construct query from the expressions. +%% There are following types of expressions allowed in the query. +%%
  • group_by/1 @see group_by/1
  • +%%
  • group_by/2 @see group_by/1
  • +%%
  • sort_by/1 @see sort_by/1
  • +%%
  • count_by/1 @see count_by/1
  • +%%
  • options/1 @see options/1
  • +%%
  • from/1 @see from/1
  • +%% The order of expressions doesn't matter. +%% +%% Q = query([ +%% from("docs_read"), +%% group_by(username, dbname, ioq_calls), +%% options([ +%% with_limit(10) +%% ]) +%% ]), +%% +%% @end +query(Query) -> + % start assuming safe query and turn to unsafe when we detect issues + Acc = #query{is_safe = true}, + Result = lists:foldr( + fun + ({Aggregation, #unsafe_selector{} = Selector}, {E, #query{selector = undefined} = Q}) -> + {E, Q#query{selector = Selector, is_safe = false, aggregation = Aggregation}}; + ({Aggregation, #unsafe_selector{}}, {E, Q}) -> + {[{more_than_once, {select, Aggregation}} | E], Q}; + ({Aggregation, #selector{} = Selector}, {E, #query{selector = undefined} = Q}) -> + {E, Q#query{selector = Selector, aggregation = Aggregation}}; + ({Aggregation, #selector{}}, {E, Q}) -> + {[{more_than_once, {select, Aggregation}} | E], Q}; + (#query_options{is_safe = false, limit = Limit}, {E, #query{limit = undefined} = Q}) -> + {E, Q#query{limit = Limit, is_safe = false}}; + (#query_options{limit = Limit}, {E, #query{limit = undefined} = Q}) -> + {E, Q#query{limit = Limit}}; + (#query_options{}, {E, Q}) -> + {[{more_than_once, options} | E], Q}; + (#from{matcher = Matcher, is_safe = false}, {E, #query{matcher = undefined} = Q}) -> + {E, Q#query{matcher = Matcher, is_safe = false}}; + (#from{matcher = Matcher}, {E, #query{matcher = undefined} = Q}) -> + {E, Q#query{matcher = Matcher}}; + (#from{}, {E, Q}) -> + {[{more_than_once, from} | E], Q}; + ({error, Reason}, {E, Q}) -> + {[Reason | E], Q} + end, + {[], Acc}, + Query + ), + case Result of + {[], #query{} = Q} -> + Q; + {Errors, _} -> + {error, Errors} + end. + +%% @doc Executes provided query. Only 'safe' queries can be executed using 'run'. +%% The query considered 'unsafe' if any of the conditions bellow are met: +%%
  • Query uses 'unlimited/0'
  • +%%
  • Query uses 'from(all)'
  • +%% +%% Q = query([ +%% from("docs_read"), +%% group_by(username, dbname, ioq_calls), +%% options([ +%% with_limit(10) +%% ]) +%% ]), +%% run(Q) +%% +%% @end +-spec run(#query{}) -> + {ok, [{aggregation_key(), pos_integer()}]} + | {limit, [{aggregation_key(), pos_integer()}]}. +run(#query{ + is_safe = true, + matcher = MatcherName, + selector = #selector{} = Selector, + limit = Limit, + aggregation = Aggregation +}) -> + % we validated the presence of the matcher so this shouldn't fail + {ok, Matcher} = get_matcher(MatcherName), + case {Aggregation, Selector} of + {count_by, #selector{aggregation_keys = AKey, value_key = undefined}} -> + ValFun = fun(_) -> 1 end, + to_map(maybe_apply_limit(group_by(Matcher, AKey, ValFun), Limit)); + {count_by, #selector{aggregation_keys = AKey, value_key = VKey}} -> + to_map(maybe_apply_limit(group_by(Matcher, AKey, VKey), Limit)); + {sort_by, #selector{aggregation_keys = AKey, value_key = VKey}} -> + maybe_apply_limit(group_by(Matcher, AKey, VKey), Limit); + {group_by, #selector{aggregation_keys = AKey, value_key = undefined}} -> + ValFun = fun(_) -> 1 end, + to_map(maybe_apply_limit(group_by(Matcher, AKey, ValFun), Limit)); + {group_by, #selector{aggregation_keys = AKey, value_key = VKey}} -> + to_map(maybe_apply_limit(group_by(Matcher, AKey, VKey), Limit)) + end; +run(#query{}) -> + {error, + {unsafe_query, "Please use 'unsafe(Query)' instead if you really know what you are doing."}}. + +%% @doc Executes provided query. This function is similar to 'run/1', +%% however it supports 'unsafe' queries. Be very careful using it. +%% Pay attention to cardinality of the result. +%% The query considered 'unsafe' if any of the conditions bellow are met: +%%
  • Query uses 'unlimited/0'
  • +%%
  • Query uses 'from(all)'
  • +%% +%% Q = query([ +%% from("docs_read"), +%% group_by(username, dbname, ioq_calls), +%% options([ +%% with_limit(10) +%% ]) +%% ]), +%% unsafe_run(Q) +%% +%% @end +-spec unsafe_run(#query{}) -> + {ok, [{aggregation_key(), pos_integer()}]} + | {limit, [{aggregation_key(), pos_integer()}]}. +unsafe_run(#query{selector = #unsafe_selector{} = Selector} = Query) -> + %% mutate the record (since all fields stay the same) + unsafe_run(Query#query{selector = setelement(1, Selector, selector)}); +unsafe_run(#query{ + matcher = MatcherName, + selector = #selector{} = Selector, + limit = Limit, + aggregation = Aggregation +}) -> + Matcher = choose_matcher(MatcherName), + case {Aggregation, Selector} of + {count_by, #selector{aggregation_keys = AKey, value_key = undefined}} -> + ValFun = fun(_) -> 1 end, + to_map(maybe_apply_limit(group_by(Matcher, AKey, ValFun), Limit)); + {count_by, #selector{aggregation_keys = AKey, value_key = VKey}} -> + to_map(maybe_apply_limit(group_by(Matcher, AKey, VKey), Limit)); + {sort_by, #selector{aggregation_keys = AKey, value_key = VKey}} -> + maybe_apply_limit(group_by(Matcher, AKey, VKey), Limit); + {group_by, #selector{aggregation_keys = AKey, value_key = undefined}} -> + ValFun = fun(_) -> 1 end, + to_map(maybe_apply_limit(group_by(Matcher, AKey, ValFun), Limit)); + {group_by, #selector{aggregation_keys = AKey, value_key = VKey}} -> + to_map(maybe_apply_limit(group_by(Matcher, AKey, VKey), Limit)) + end. + +%% +%% Query API auxiliary functions +%% + +-spec select(AggregationKeys) -> + #selector{} | #unsafe_selector{} | {error, any()} +when + AggregationKeys :: + aggregation_keys_fun() | binary() | rctx_field() | [binary()] | [rctx_field()]. + +select(AggregationKeys) -> + maybe + {ok, AKey} ?= parse_aggregation_keys(AggregationKeys), + case is_safe_key(AKey) of + true -> + #selector{aggregation_keys = AKey}; + false -> + #unsafe_selector{aggregation_keys = AKey} + end + end. + +-spec select(AggregationKeys, ValueKey) -> + {ok, #selector{} | #unsafe_selector{}} | {error, any()} +when + AggregationKeys :: + aggregation_keys_fun() | binary() | rctx_field() | [binary()] | [rctx_field()], + ValueKey :: value_key_fun() | binary() | rctx_field(). + +select(AggregationKeys, ValueKey) -> + maybe + {ok, AKey} ?= parse_aggregation_keys(AggregationKeys), + {ok, VKey} ?= parse_value_key(ValueKey), + case is_safe_key(AKey) andalso is_safe_key(VKey) of + true -> + #selector{aggregation_keys = AKey, value_key = VKey}; + false -> + #unsafe_selector{aggregation_keys = AKey, value_key = VKey} + end + end. + +is_safe_key(Fun) when is_function(Fun) -> + false; +is_safe_key(_) -> + true. + +parse_aggregation_keys(Fun) when is_function(Fun) -> + validate_fun(Fun, key_fun); +parse_aggregation_keys(Keys) -> + with_ok(parse_key(Keys)). + +parse_value_key(Fun) when is_function(Fun) -> + validate_fun(Fun, value_fun); +parse_value_key(Key) -> + case parse_key(Key) of + {error, _} = Error -> + Error; + Keys when is_list(Keys) -> + {error, multiple_value_keys}; + K -> + {ok, K} + end. + +with_tag({error, _} = Error, _) -> + Error; +with_tag(Result, Tag) -> + {Tag, Result}. + +with_ok({error, _} = Error) -> + Error; +with_ok(Result) -> + {ok, Result}. + +validate_fun(Fun, Tag) when is_function(Fun, 1) -> + try Fun(#rctx{}) of + _ -> + {ok, Fun} + catch + _:_ -> + {error, {invalid_fun, Tag}} + end; +validate_fun(_Fun, Tag) -> + {error, {invalid_fun, Tag}}. + +choose_matcher(all) -> + all(); +choose_matcher(MatcherName) -> + % we validated the presence of the matcher so this shouldn't fail + {ok, Matcher} = get_matcher(MatcherName), + Matcher. + +-spec maybe_apply_limit(ResultsOrError, Limit) -> OrderedResultsOrError when + ResultsOrError :: + {ok, aggregation_result()} + | {limit, aggregation_result()} + | {error, any()}, + Limit :: unlimited | undefined | pos_integer(), + OrderedResultsOrError :: + {ok, ordered_result()} + | {limit, ordered_result()} + | {ok, aggregation_result()} + | {limit, aggregation_result()} + | {error, any()}. + +maybe_apply_limit({Result, Results}, unlimited) -> + {Result, Results}; +maybe_apply_limit({Result, Results}, undefined) -> + {Result, topK(Results, query_limit())}; +maybe_apply_limit({Result, Results}, Limit) when is_integer(Limit) -> + {Result, topK(Results, Limit)}. + +-spec to_map(ResultsOrError) -> OrderedResultsOrError when + ResultsOrError :: + {ok, ordered_result() | aggregation_result()} + | {limit, ordered_result() | aggregation_result()}, + OrderedResultsOrError :: + {ok, aggregation_result()} + | {limit, aggregation_result()}. +to_map({Result, Results}) when is_list(Results) -> + {Result, maps:from_list(Results)}; +to_map({Result, Results}) when is_map(Results) -> + {Result, Results}. + +-spec parse_key(Keys :: binary() | atom() | [binary()] | [atom()]) -> + rctx_field() + | [rctx_field()] + | {error, Reason :: any()}. + +parse_key([C | _] = Key) when is_integer(C) -> + couch_srt_entry:key(Key); +parse_key(Keys) when is_list(Keys) -> + parse_key(Keys, []); +parse_key(BinKey) when is_binary(BinKey) -> + couch_srt_entry:key(BinKey); +parse_key(undefined) -> + undefined; +parse_key(Key) when is_atom(Key) -> + couch_srt_entry:key(Key). + +parse_key([BinKey | Rest], Keys) -> + case couch_srt_entry:key(BinKey) of + {error, _} = Error -> + Error; + Key -> + parse_key(Rest, [Key | Keys]) + end; +parse_key([], Keys) -> + lists:reverse(Keys). + +%% +%% Scanning with matchers +%% +-spec query_matcher(MatcherName :: string()) -> + {ok, query_result()} + | {error, any()}. +query_matcher(MatcherName) when is_list(MatcherName) -> + query_matcher(MatcherName, query_limit()). + +-spec query_matcher(MatcherName :: matcher_name(), Limit :: pos_integer()) -> + {ok, query_result()} + | {error, any()}. +query_matcher(MatcherName, Limit) when is_list(MatcherName) andalso is_integer(Limit) -> + case get_matcher(MatcherName) of + {ok, Matcher} -> + query_matcher_rows(Matcher, Limit); + Error -> + Error + end. + +-spec query_matcher_rows(Matcher :: matcher()) -> + {ok, query_result()} + | {error, any()}. +query_matcher_rows(Matcher) -> + query_matcher_rows(Matcher, query_limit()). + +-spec query_matcher_rows(Matcher :: matcher(), Limit :: pos_integer()) -> + {ok, query_result()} + | {error, any()}. +query_matcher_rows({MSpec, _CompMSpec}, Limit) when + is_list(MSpec) andalso is_integer(Limit) andalso Limit >= 1 +-> + try + %% ets:select/* takes match_spec(), not comp_match_spec() + %% use ets:select/3 to constrain to Limit rows, but we need to handle + %% the continuation() style return type compared with ets:select/2. + Rctxs = + case ets:select(?CSRT_ETS, MSpec, Limit) of + {Rctxs0, _Continuation} -> + Rctxs0; + %% Handle '$end_of_table' + _ -> + [] + end, + {ok, to_json_list(Rctxs)} + catch + _:_ = Error -> + {error, Error} + end. + +get_matcher(MatcherName) -> + case couch_srt_logger:get_matcher(MatcherName) of + undefined -> + {error, {unknown_matcher, MatcherName}}; + Matcher -> + {ok, Matcher} + end. + +%% +%% Auxiliary functions +%% +query_limit() -> + config:get_integer(?CSRT, "query_limit", ?QUERY_LIMIT). + +query_cardinality_limit() -> + config:get_integer(?CSRT, "query_cardinality_limit", ?QUERY_CARDINALITY_LIMIT). + +to_json_list(List) when is_list(List) -> + lists:map(fun couch_srt_entry:to_json/1, List). diff --git a/src/couch_srt/src/couch_srt_server.erl b/src/couch_srt/src/couch_srt_server.erl new file mode 100644 index 00000000000..a2ed6944381 --- /dev/null +++ b/src/couch_srt/src/couch_srt_server.erl @@ -0,0 +1,328 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_server). + +-behaviour(gen_server). + +-export([ + start_link/0, + init/1, + handle_call/3, + handle_cast/2 +]). + +-export([ + create_pid_ref/0, + create_resource/1, + destroy_resource/1, + get_resource/1, + get_context_type/1, + inc/2, + inc/3, + match_resource/1, + new_context/2, + set_context_dbname/2, + set_context_handler_fun/2, + set_context_type/2, + set_context_username/2, + update_counter/3, + update_counter/4, + update_counters/2, + update_counters/3 +]). + +-include_lib("stdlib/include/ms_transform.hrl"). +-include_lib("couch_srt.hrl"). + +-record(st, {}). + +%% +%% Public API +%% + +-spec create_pid_ref() -> pid_ref(). +create_pid_ref() -> + {self(), make_ref()}. + +%% +%% +%% Context lifecycle API +%% + +-spec new_context(Type :: rctx_type(), Nonce :: nonce()) -> rctx(). +new_context(Type, Nonce) -> + #rctx{ + nonce = Nonce, + pid_ref = create_pid_ref(), + type = Type + }. + +-spec set_context_dbname(DbName, PidRef) -> boolean() when + DbName :: dbname(), PidRef :: maybe_pid_ref(). +set_context_dbname(_, undefined) -> + false; +set_context_dbname(DbName, PidRef) -> + update_element(PidRef, [{#rctx.dbname, DbName}]). + +-spec set_context_handler_fun({Mod, Func}, PidRef) -> boolean() when + Mod :: atom(), Func :: atom(), PidRef :: maybe_pid_ref(). +set_context_handler_fun(_, undefined) -> + false; +set_context_handler_fun({Mod, Func}, PidRef) -> + case get_resource(PidRef) of + undefined -> + false; + #rctx{} = Rctx -> + %% TODO: #coordinator{} assumption needs to adapt for other types + case couch_srt_server:get_context_type(Rctx) of + #coordinator{} = Coordinator0 -> + Coordinator = Coordinator0#coordinator{mod = Mod, func = Func}, + set_context_type(Coordinator, PidRef); + _ -> + false + end + end. + +-spec set_context_username(UserName, PidRef) -> boolean() when + UserName :: username(), PidRef :: maybe_pid_ref(). +set_context_username(_, undefined) -> + false; +set_context_username(UserName, PidRef) -> + update_element(PidRef, [{#rctx.username, UserName}]). + +-spec get_context_type(Rctx :: rctx()) -> rctx_type(). +get_context_type(#rctx{type = Type}) -> + Type. + +-spec set_context_type(Type, PidRef) -> boolean() when + Type :: rctx_type(), PidRef :: maybe_pid_ref(). +set_context_type(Type, PidRef) -> + update_element(PidRef, [{#rctx.type, Type}]). + +-spec create_resource(Rctx :: rctx()) -> boolean(). +create_resource(#rctx{} = Rctx) -> + try ets:insert(?CSRT_ETS, Rctx) of + Result -> Result + catch + error:badarg -> + false + end. + +-spec destroy_resource(PidRef :: maybe_pid_ref()) -> boolean(). +destroy_resource(undefined) -> + false; +destroy_resource({_, _} = PidRef) -> + try ets:delete(?CSRT_ETS, PidRef) of + Result -> Result + catch + error:badarg -> + false + end. + +-spec get_resource(PidRef :: maybe_pid_ref()) -> maybe_rctx(). +get_resource(undefined) -> + undefined; +get_resource(PidRef) -> + try ets:lookup(?CSRT_ETS, PidRef) of + [#rctx{} = Rctx] -> + Rctx; + [] -> + undefined + catch + error:badarg -> + undefined + end. + +-spec match_resource(Rctx :: maybe_rctx()) -> [] | [rctx()]. +match_resource(undefined) -> + []; +match_resource(#rctx{} = Rctx) -> + try + ets:match_object(?CSRT_ETS, Rctx) + catch + error:badarg -> + [] + end. + +%% Is this a valid #rctx{} field for inducing ets:update_counter upon? +-spec is_rctx_stat_field(Field :: rctx_field() | atom()) -> boolean(). +is_rctx_stat_field(Field) -> + maps:is_key(Field, ?STAT_KEYS_TO_FIELDS). + +%% Get the #rctx{} field record index of the corresponding stat counter field +-spec get_rctx_stat_field(Field :: rctx_field()) -> + non_neg_integer() + | throw({badkey, Key :: any()}). +get_rctx_stat_field(Field) -> + maps:get(Field, ?STAT_KEYS_TO_FIELDS). + +%% This provides a base set of updates to include along with any other #rctx{} +%% updates. Specifically, this provides a way to automatically track and +%% increment the #rctx.updated_at field without having to do ets:lookup to find +%% the last updated_at time, or having to do ets:update_element to set a +%% specific updated_at. We trade a pdict marker to keep inc operations as only +%% a singular ets call while sneaking in updated_at. +%% Calling couch_srt_util:put_updated_at/1 within this function is not the cleanest, +%% but it allows us to encapsulate the automatic updated_at inclusion into the +%% ?MODULE:update_counter(s)/3-4 arity call-through while still allowing the +%% 4-arity version to be exposed to pass an empty base updates list. Isolating +%% this logic means the final arity functions operate independently of any +%% local pdict values. +-spec make_base_counter_updates() -> counter_updates_list(). +make_base_counter_updates() -> + case couch_srt_util:get_updated_at() of + undefined -> + []; + LastUpdated -> + Now = couch_srt_util:tnow(), + couch_srt_util:put_updated_at(Now), + UpdatedInc = couch_srt_util:make_dt(LastUpdated, Now, native), + [{#rctx.updated_at, UpdatedInc}] + end. + +-spec update_counter(PidRef, Field, Count) -> non_neg_integer() when + PidRef :: maybe_pid_ref(), + Field :: rctx_field(), + Count :: non_neg_integer(). +update_counter(undefined, _Field, _Count) -> + 0; +update_counter(_PidRef, _Field, 0) -> + 0; +update_counter(PidRef, Field, Count) -> + %% Only call make_base_counter_updates() if PidRef, Field, Count all valid + case is_rctx_stat_field(Field) of + true -> + update_counter(PidRef, Field, Count, make_base_counter_updates()); + false -> + 0 + end. + +-spec update_counter(PidRef, Field, Count, BaseUpdates) -> non_neg_integer() when + PidRef :: maybe_pid_ref(), + Field :: rctx_field(), + Count :: non_neg_integer(), + BaseUpdates :: [] | [{rctx_field(), integer()}]. +update_counter(undefined, _Field, _Count, _BaseUpdates) -> + 0; +update_counter({_Pid, _Ref} = PidRef, Field, Count, BaseUpdates) when Count >= 0 -> + case is_rctx_stat_field(Field) of + true -> + Updates = [{get_rctx_stat_field(Field), Count} | BaseUpdates], + try + ets:update_counter(?CSRT_ETS, PidRef, Updates, #rctx{pid_ref = PidRef}) + catch + error:badarg -> + 0 + end; + false -> + 0 + end. + +-spec update_counters(PidRef, Delta) -> boolean() when + PidRef :: maybe_pid_ref(), + Delta :: delta(). +update_counters(undefined, _Delta) -> + false; +update_counters(PidRef, Delta) when is_map(Delta) -> + update_counters(PidRef, Delta, make_base_counter_updates()). + +-spec update_counters(PidRef, Delta, BaseUpdates) -> boolean() when + PidRef :: maybe_pid_ref(), + Delta :: delta(), + BaseUpdates :: [] | [{rctx_field(), integer()}]. +update_counters(undefined, _Delta, _BaseUpdates) -> + false; +update_counters({_Pid, _Ref} = PidRef, Delta, BaseUpdates) when is_map(Delta) -> + Updates = maps:fold( + fun(Field, Count, Acc) -> + case is_rctx_stat_field(Field) of + true -> + [{get_rctx_stat_field(Field), Count} | Acc]; + false -> + %% This skips entries that are not is_rctx_stat_field's + %% Another approach would be: + %% lists:all(lists:map(fun is_rctx_stat_field/1, maps:keys(Delta))) + %% But that's a lot of looping for not even acumulating the update. + %% Need to drop Delta.dt either way as it's not an rctx_field + Acc + end + end, + BaseUpdates, + Delta + ), + + case Updates of + [] -> + false; + _ -> + try + ets:update_counter(?CSRT_ETS, PidRef, Updates, #rctx{pid_ref = PidRef}), + true + catch + error:badarg -> + false + end + end. + +-spec inc(PidRef :: maybe_pid_ref(), Field :: rctx_field()) -> non_neg_integer(). +inc(PidRef, Field) -> + inc(PidRef, Field, 1). + +-spec inc(PidRef, Field, N) -> non_neg_integer() when + PidRef :: maybe_pid_ref(), + Field :: rctx_field(), + N :: non_neg_integer(). +inc(undefined, _Field, _) -> + 0; +inc(_PidRef, _Field, 0) -> + 0; +inc({_Pid, _Ref} = PidRef, Field, N) when is_integer(N) andalso N > 0 -> + case is_rctx_stat_field(Field) of + true -> + update_counter(PidRef, Field, N); + false -> + 0 + end. + +%% +%% gen_server callbacks +%% + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + ets:new(?CSRT_ETS, [ + named_table, + public, + {write_concurrency, auto}, + {read_concurrency, true}, + {keypos, #rctx.pid_ref} + ]), + {ok, #st{}}. + +handle_call(Msg, From, State) -> + {stop, {unknown_call, Msg, From}, State}. + +handle_cast(Msg, State) -> + {stop, {unknown_cast, Msg}, State}. + +%% +%% private functions +%% + +-spec update_element(PidRef :: maybe_pid_ref(), Updates :: [tuple()]) -> boolean(). +update_element(undefined, _Update) -> + false; +update_element({_Pid, _Ref} = PidRef, Update) -> + (catch ets:update_element(?CSRT_ETS, PidRef, Update)) == true. diff --git a/src/couch_srt/src/couch_srt_sup.erl b/src/couch_srt/src/couch_srt_sup.erl new file mode 100644 index 00000000000..288c661b0e4 --- /dev/null +++ b/src/couch_srt/src/couch_srt_sup.erl @@ -0,0 +1,40 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_sup). + +-behaviour(supervisor). + +-export([ + start_link/0, + init/1 +]). + +%% Set the child workers to have restart strategy set to `transient` +%% so that if a CSRT failure arrises that triggers the sup rate limiter +%% thresholds, that shutdown signal will bubble up here and be ignored, +%% as the use of transient specifies that `normal` and `shutdown` signals +%% are ignored. +%% Switch this to `permanent` once CSRT is out of experimental stage. +-define(CHILD(I, Type), {I, {I, start_link, []}, transient, 5000, Type, [I]}). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + {ok, + { + {one_for_one, 5, 10}, [ + ?CHILD(couch_srt_server, worker), + ?CHILD(couch_srt_logger, worker) + ] + }}. diff --git a/src/couch_srt/src/couch_srt_util.erl b/src/couch_srt/src/couch_srt_util.erl new file mode 100644 index 00000000000..7815e5ffa51 --- /dev/null +++ b/src/couch_srt/src/couch_srt_util.erl @@ -0,0 +1,244 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_util). + +-export([ + is_enabled/0, + is_enabled_init_p/0, + is_enabled_reporting/0, + is_enabled_rpc_reporting/0, + get_pid_ref/0, + get_pid_ref/1, + set_pid_ref/1, + tnow/0, + tutc/0, + tutc/1 +]). + +%% Delta API +-export([ + add_delta/2, + extract_delta/1, + get_delta/1, + get_delta_a/0, + get_updated_at/0, + maybe_add_delta/1, + maybe_add_delta/2, + make_delta/1, + make_dt/2, + make_dt/3, + rctx_delta/2, + put_delta_a/1, + put_updated_at/1 +]). + +-include_lib("couch_srt.hrl"). + +-ifdef(TEST). +-spec is_enabled() -> boolean(). +is_enabled() -> + %% randomly enable CSRT during testing to handle unexpected failures + case config:get_boolean(?CSRT, "randomize_testing", true) of + true -> + rand:uniform(100) > 80; + false -> + config:get_boolean(?CSRT, "enable", true) + end. +-else. +-spec is_enabled() -> boolean(). +is_enabled() -> + config:get_boolean(?CSRT, "enable", false). +-endif. + +-spec is_enabled_init_p() -> boolean(). +is_enabled_init_p() -> + config:get_boolean(?CSRT, "enable_init_p", false). + +%% Toggle to disable all reporting +-spec is_enabled_reporting() -> boolean(). +is_enabled_reporting() -> + config:get_boolean(?CSRT, "enable_reporting", false). + +%% Toggle to disable all reporting from #rpc_worker{} types, eg only log +%% #coordinator{} types. This is a bit of a kludge that would be better served +%% by a dynamic match spec generator, but this provides a know for disabling +%% any rpc worker logs, even if they hit the normal logging Threshold's. +-spec is_enabled_rpc_reporting() -> boolean(). +is_enabled_rpc_reporting() -> + config:get_boolean(?CSRT, "enable_rpc_reporting", false). + +%% Monotonic time now in native format using time forward only event tracking +-spec tnow() -> integer(). +tnow() -> + erlang:monotonic_time(). + +%% Get current system time in UTC RFC 3339 format +-spec tutc() -> calendar:rfc3339_string(). +tutc() -> + tutc(tnow()). + +%% Convert an Erlang native monotonic_time() into UTC RFC 3339 format +-spec tutc(Time :: integer()) -> calendar:rfc3339_string(). +tutc(Time0) when is_integer(Time0) -> + calendar:system_time_to_rfc3339( + Time0 + erlang:time_offset(), + [{unit, native}, {offset, "z"}] + ). + +%% Returns dt (delta time) in microseconds +%% @equiv make_dt(A, B, microsecond) +-spec make_dt(A, B) -> pos_integer() when + A :: integer(), + B :: integer(). +make_dt(A, B) -> + make_dt(A, B, microsecond). + +%% Returns monotonic dt (delta time) in specified time_unit() +-spec make_dt(A, B, Unit) -> pos_integer() when + A :: integer(), + B :: integer(), + Unit :: erlang:time_unit(). +make_dt(A, A, _Unit) when is_integer(A) -> + %% Handle edge case when monotonic_time()'s are equal + %% Always return a non zero value so we don't divide by zero + %% This always returns 1, independent of unit, as that's the smallest + %% possible positive integer value delta. + 1; +make_dt(A, B, Unit) when is_integer(A) andalso is_integer(B) andalso B > A -> + case erlang:convert_time_unit(B - A, native, Unit) of + Delta when Delta > 0 -> + Delta; + _ -> + %% Handle case where Delta is smaller than a whole Unit, eg: + %% Unit = millisecond, + %% (node1@127.0.0.1)2> erlang:convert_time_unit(423, native, Unit). + %% 0 + 1 + end. + +-spec add_delta(T :: term(), Delta :: maybe_delta()) -> term_delta(). +add_delta(T, undefined) -> + T; +add_delta(T, Delta) when is_map(Delta) -> + add_delta_int(T, {delta, Delta}). + +-spec add_delta_int(T :: term(), Delta :: tagged_delta()) -> term_delta(). +add_delta_int(T, {delta, _} = Delta) -> + {T, Delta}. + +-spec extract_delta(T :: term_delta()) -> {term(), maybe_delta()}. +extract_delta({Msg, {delta, Delta}}) -> + {Msg, Delta}; +extract_delta(Msg) -> + {Msg, undefined}. + +-spec get_delta(PidRef :: maybe_pid_ref()) -> tagged_delta(). +get_delta(PidRef) -> + {delta, make_delta(PidRef)}. + +-spec maybe_add_delta(T :: term()) -> term_delta(). +maybe_add_delta(T) -> + case is_enabled() of + false -> + T; + true -> + maybe_add_delta_int(T, get_delta(get_pid_ref())) + end. + +%% Allow for externally provided Delta in error handling scenarios +%% eg in cases like rexi_server:notify_caller/3 +-spec maybe_add_delta(T :: term(), Delta :: maybe_delta()) -> term_delta(). +maybe_add_delta(T, undefined) -> + T; +maybe_add_delta(T, Delta0) when is_map(Delta0) -> + case is_enabled() of + false -> + T; + true -> + Delta = {delta, Delta0}, + maybe_add_delta_int(T, Delta) + end. + +-spec maybe_add_delta_int(T :: term(), Delta :: tagged_delta()) -> term_delta(). +maybe_add_delta_int(T, {delta, undefined}) -> + T; +maybe_add_delta_int(T, {delta, _} = Delta) -> + add_delta_int(T, Delta). + +-spec make_delta(PidRef :: maybe_pid_ref()) -> maybe_delta(). +make_delta(undefined) -> + undefined; +make_delta(PidRef) -> + TA = get_delta_a(), + TB = couch_srt_server:get_resource(PidRef), + Delta = rctx_delta(TA, TB), + put_delta_a(TB), + Delta. + +-spec rctx_delta(TA :: Rctx, TB :: Rctx) -> map(). +rctx_delta(#rctx{} = TA, #rctx{} = TB) -> + Delta = #{ + docs_read => TB#rctx.docs_read - TA#rctx.docs_read, + docs_written => TB#rctx.docs_written - TA#rctx.docs_written, + js_filter => TB#rctx.js_filter - TA#rctx.js_filter, + js_filtered_docs => TB#rctx.js_filtered_docs - TA#rctx.js_filtered_docs, + rows_read => TB#rctx.rows_read - TA#rctx.rows_read, + changes_returned => TB#rctx.changes_returned - TA#rctx.changes_returned, + get_kp_node => TB#rctx.get_kp_node - TA#rctx.get_kp_node, + get_kv_node => TB#rctx.get_kv_node - TA#rctx.get_kv_node, + db_open => TB#rctx.db_open - TA#rctx.db_open, + ioq_calls => TB#rctx.ioq_calls - TA#rctx.ioq_calls, + %% "Example to extend CSRT" + %% write_kp_node => TB#rctx.write_kp_node - TA#rctx.write_kp_node, + %% write_kv_node => TB#rctx.write_kv_node - TA#rctx.write_kv_node, + dt => make_dt(TA#rctx.updated_at, TB#rctx.updated_at) + }, + %% TODO: reevaluate this decision + %% Only return non zero (and also positive) delta fields + %% NOTE: this can result in Delta's of the form #{dt => 1} + maps:filter(fun(_K, V) -> V > 0 end, Delta); +rctx_delta(_, _) -> + undefined. + +-spec get_delta_a() -> maybe_rctx(). +get_delta_a() -> + erlang:get(?DELTA_TA). + +-spec put_delta_a(TA :: rctx()) -> maybe_rctx(). +put_delta_a(TA) -> + erlang:put(?DELTA_TA, TA). + +-spec get_updated_at() -> maybe_integer(). +get_updated_at() -> + erlang:get(?LAST_UPDATED). + +-spec put_updated_at(Updated :: rctx() | integer()) -> maybe_integer(). +put_updated_at(#rctx{updated_at = Updated}) -> + put_updated_at(Updated); +put_updated_at(Updated) when is_integer(Updated) -> + erlang:put(?LAST_UPDATED, Updated). + +-spec get_pid_ref() -> maybe_pid_ref(). +get_pid_ref() -> + get(?PID_REF). + +-spec get_pid_ref(Rctx :: rctx()) -> maybe_pid_ref(). +get_pid_ref(#rctx{pid_ref = PidRef}) -> + PidRef; +get_pid_ref(_) -> + undefined. + +-spec set_pid_ref(PidRef :: pid_ref()) -> pid_ref(). +set_pid_ref(PidRef) -> + erlang:put(?PID_REF, PidRef), + PidRef. diff --git a/src/couch_srt/test/eunit/couch_srt_httpd_tests.erl b/src/couch_srt/test/eunit/couch_srt_httpd_tests.erl new file mode 100644 index 00000000000..003aed5a65a --- /dev/null +++ b/src/couch_srt/test/eunit/couch_srt_httpd_tests.erl @@ -0,0 +1,677 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_httpd_tests). + +-include_lib("stdlib/include/ms_transform.hrl"). + +-include_lib("couch/include/couch_eunit.hrl"). +-include("../../src/couch_srt.hrl"). + +-define(USER, ?MODULE_STRING ++ "_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). + +-define(JSON, "application/json"). +-define(JSON_CT, {"Content-Type", ?JSON}). +-define(ACCEPT_JSON, {"Accept", ?JSON}). + +csrt_httpd_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_query_group_by_multiple_keys), + ?TDEF_FE(t_query_group_by_single_key), + ?TDEF_FE(t_query_group_by_binary_key), + ?TDEF_FE(t_query_group_by_bad_request), + ?TDEF_FE(t_query_count_by_multiple_keys), + ?TDEF_FE(t_query_count_by_single_key), + ?TDEF_FE(t_query_count_by_binary_key), + ?TDEF_FE(t_query_count_by_bad_request), + ?TDEF_FE(t_query_sort_by_multiple_keys), + ?TDEF_FE(t_query_sort_by_single_key), + ?TDEF_FE(t_query_sort_by_binary_key), + ?TDEF_FE(t_query_sort_by_bad_request) + ] + }. + +setup_ctx() -> + Ctx = test_util:start_couch([chttpd, fabric, couch_stats, couch_srt]), + Hashed = couch_passwords:hash_admin_password(?PASS), + HashedList = binary_to_list(Hashed), + ok = config:set("admins", ?USER, HashedList, false), + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + Url = lists:concat(["http://", Addr, ":", Port, "/"]), + {Ctx, Url}. + +setup() -> + {Ctx, Url} = setup_ctx(), + couch_srt_test_helper:enable_default_logger_matchers(), + Rctxs = [ + rctx(#{dbname => <<"db1">>, ioq_calls => 123, username => <<"user_foo">>}), + rctx(#{dbname => <<"db1">>, ioq_calls => 321, username => <<"user_foo">>}), + rctx(#{dbname => <<"db2">>, ioq_calls => 345, username => <<"user_bar">>}), + rctx(#{dbname => <<"db2">>, ioq_calls => 543, username => <<"user_bar">>}), + rctx(#{dbname => <<"db1">>, ioq_calls => 678, username => <<"user_bar">>}), + rctx(#{dbname => <<"db2">>, ioq_calls => 987, username => <<"user_foo">>}) + ], + ets:insert(?CSRT_ETS, Rctxs), + #{ctx => Ctx, url => Url, rctxs => Rctxs}. + +teardown(#{ctx := Ctx}) -> + Persist = false, + ok = config:delete("admins", ?USER, Persist), + test_util:stop_couch(Ctx). + +active_resources_group_by(Url, AggregationKeys, CounterKey) -> + active_resources_group_by("docs_read", Url, AggregationKeys, CounterKey). + +active_resources_group_by(MatcherName, Url, AggregationKeys, CounterKey) -> + Body = #{ + <<"group_by">> => #{ + <<"aggregate_keys">> => AggregationKeys, + <<"counter_key">> => CounterKey + } + }, + active_resources(Url, MatcherName, Body). + +t_query_group_by_multiple_keys(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username, dbname], ioq_calls, Rctxs), + Grouped = group(Aggregated), + {RC, Results} = active_resources_group_by(Url, [<<"username">>, <<"dbname">>], <<"ioq_calls">>), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 4, length(Result), format("Expected four entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _} + ], + Result, + "Unexpected shape of the result" + ), + OrderedByKey = order_by_key([username, dbname], Result), + V1 = maps:get({<<"user_bar">>, <<"db1">>}, Grouped), + V2 = maps:get({<<"user_bar">>, <<"db2">>}, Grouped), + V3 = maps:get({<<"user_foo">>, <<"db1">>}, Grouped), + V4 = maps:get({<<"user_foo">>, <<"db2">>}, Grouped), + ?assertMatch( + [ + #{ + <<"key">> := #{<<"username">> := <<"user_bar">>, <<"dbname">> := <<"db1">>}, + <<"value">> := V1 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_bar">>, <<"dbname">> := <<"db2">>}, + <<"value">> := V2 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_foo">>, <<"dbname">> := <<"db1">>}, + <<"value">> := V3 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_foo">>, <<"dbname">> := <<"db2">>}, + <<"value">> := V4 + } + ], + OrderedByKey + ), + ok. + +t_query_group_by_single_key(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username], ioq_calls, Rctxs), + Grouped = group(Aggregated), + {RC, Results} = active_resources_group_by(Url, [<<"username">>], <<"ioq_calls">>), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 2, length(Result), format("Expected two entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _} + ], + Result, + "Unexpected shape of the result" + ), + OrderedByKey = order_by_key([username], Result), + V1 = maps:get({<<"user_bar">>}, Grouped), + V2 = maps:get({<<"user_foo">>}, Grouped), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := <<"user_bar">>}, <<"value">> := V1}, + #{<<"key">> := #{<<"username">> := <<"user_foo">>}, <<"value">> := V2} + ], + OrderedByKey + ), + ok. + +t_query_group_by_binary_key(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username], ioq_calls, Rctxs), + Grouped = group(Aggregated), + {RC, Results} = active_resources_group_by(Url, <<"username">>, <<"ioq_calls">>), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 2, length(Result), format("Expected two entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _} + ], + Result, + format("Unexpected shape of the result~n ~p~n", [Result]) + ), + OrderedByKey = order_by_key([username], Result), + V1 = maps:get({<<"user_bar">>}, Grouped), + V2 = maps:get({<<"user_foo">>}, Grouped), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := <<"user_bar">>}, <<"value">> := V1}, + #{<<"key">> := #{<<"username">> := <<"user_foo">>}, <<"value">> := V2} + ], + OrderedByKey + ), + ok. + +t_query_group_by_bad_request(#{url := Url}) -> + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Multiple keys in 'counter_key'">> + }}, + active_resources_group_by(Url, <<"username">>, [<<"ioq_calls">>, <<"docs_read">>]), + "Should return error if multiple keys provided in 'counter_key'" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown matcher 'unknown_matcher'">> + }}, + active_resources_group_by("unknown_matcher", Url, <<"username">>, <<"ioq_calls">>), + "Should return error if 'matcher' is unknown" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown field name 'unknown_field'">> + }}, + active_resources_group_by(Url, [<<"unknown_field">>], <<"ioq_calls">>), + "Should return error if 'AggregationKeys' contain unknown field" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown field name 'unknown_field'">> + }}, + active_resources_group_by(Url, <<"unknown_field">>, <<"ioq_calls">>), + "Should return error if 'AggregationKeys' is unknown field" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown field name 'unknown_field'">> + }}, + active_resources_group_by(Url, <<"username">>, <<"unknown_field">>), + "Should return error if 'ValueKey' contain unknown field" + ), + ok. + +active_resources_count_by(Url, AggregationKeys) -> + active_resources_count_by("docs_read", Url, AggregationKeys). + +active_resources_count_by(MatcherName, Url, AggregationKeys) -> + Body = #{ + <<"count_by">> => #{ + <<"aggregate_keys">> => AggregationKeys + } + }, + active_resources(Url, MatcherName, Body). + +t_query_count_by_multiple_keys(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username, dbname], ioq_calls, Rctxs), + Grouped = count(Aggregated), + {RC, Results} = active_resources_count_by(Url, [<<"username">>, <<"dbname">>]), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 4, length(Result), format("Expected four entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _} + ], + Result, + "Unexpected shape of the result" + ), + OrderedByKey = order_by_key([username, dbname], Result), + V1 = maps:get({<<"user_bar">>, <<"db1">>}, Grouped), + V2 = maps:get({<<"user_bar">>, <<"db2">>}, Grouped), + V3 = maps:get({<<"user_foo">>, <<"db1">>}, Grouped), + V4 = maps:get({<<"user_foo">>, <<"db2">>}, Grouped), + ?assertMatch( + [ + #{ + <<"key">> := #{<<"username">> := <<"user_bar">>, <<"dbname">> := <<"db1">>}, + <<"value">> := V1 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_bar">>, <<"dbname">> := <<"db2">>}, + <<"value">> := V2 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_foo">>, <<"dbname">> := <<"db1">>}, + <<"value">> := V3 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_foo">>, <<"dbname">> := <<"db2">>}, + <<"value">> := V4 + } + ], + OrderedByKey + ), + ok. + +t_query_count_by_single_key(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username], ioq_calls, Rctxs), + Grouped = count(Aggregated), + {RC, Results} = active_resources_count_by(Url, [<<"username">>]), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 2, length(Result), format("Expected two entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _} + ], + Result, + "Unexpected shape of the result" + ), + OrderedByKey = order_by_key([username], Result), + V1 = maps:get({<<"user_bar">>}, Grouped), + V2 = maps:get({<<"user_foo">>}, Grouped), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := <<"user_bar">>}, <<"value">> := V1}, + #{<<"key">> := #{<<"username">> := <<"user_foo">>}, <<"value">> := V2} + ], + OrderedByKey + ), + ok. + +t_query_count_by_binary_key(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username], ioq_calls, Rctxs), + Grouped = count(Aggregated), + {RC, Results} = active_resources_count_by(Url, <<"username">>), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 2, length(Result), format("Expected two entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _} + ], + Result, + "Unexpected shape of the result" + ), + OrderedByKey = order_by_key([username], Result), + V1 = maps:get({<<"user_bar">>}, Grouped), + V2 = maps:get({<<"user_foo">>}, Grouped), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := <<"user_bar">>}, <<"value">> := V1}, + #{<<"key">> := #{<<"username">> := <<"user_foo">>}, <<"value">> := V2} + ], + OrderedByKey + ), + ok. + +t_query_count_by_bad_request(#{url := Url}) -> + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown matcher 'unknown_matcher'">> + }}, + active_resources_count_by("unknown_matcher", Url, <<"username">>), + "Should return error if 'matcher' is unknown" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown field name 'unknown_field'">> + }}, + active_resources_count_by(Url, [<<"unknown_field">>]), + "Should return error if 'AggregationKeys' contain unknown field" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown field name 'unknown_field'">> + }}, + active_resources_count_by(Url, <<"unknown_field">>), + "Should return error if 'AggregationKeys' is unknown field" + ), + ok. + +active_resources_sort_by(Url, AggregationKeys, CounterKey) -> + active_resources_sort_by("docs_read", Url, AggregationKeys, CounterKey). + +active_resources_sort_by(MatcherName, Url, AggregationKeys, CounterKey) -> + Body = #{ + <<"sort_by">> => #{ + <<"aggregate_keys">> => AggregationKeys, + <<"counter_key">> => CounterKey + } + }, + active_resources(Url, MatcherName, Body). + +t_query_sort_by_multiple_keys(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username, dbname], ioq_calls, Rctxs), + Grouped = group(Aggregated), + Ordered = order_by_value(Grouped), + {RC, Results} = active_resources_sort_by(Url, [<<"username">>, <<"dbname">>], <<"ioq_calls">>), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 4, length(Result), format("Expected four entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _, <<"dbname">> := _}, <<"value">> := _} + ], + Result, + "Unexpected shape of the result" + ), + [ + {{<<"user_foo">>, <<"db2">>}, V1}, + {{<<"user_bar">>, <<"db2">>}, V2}, + {{<<"user_bar">>, <<"db1">>}, V3}, + {{<<"user_foo">>, <<"db1">>}, V4} + ] = Ordered, + ?assertMatch( + [ + #{ + <<"key">> := #{<<"username">> := <<"user_foo">>, <<"dbname">> := <<"db2">>}, + <<"value">> := V1 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_bar">>, <<"dbname">> := <<"db2">>}, + <<"value">> := V2 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_bar">>, <<"dbname">> := <<"db1">>}, + <<"value">> := V3 + }, + #{ + <<"key">> := #{<<"username">> := <<"user_foo">>, <<"dbname">> := <<"db1">>}, + <<"value">> := V4 + } + ], + Result + ), + ok. + +t_query_sort_by_single_key(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username], ioq_calls, Rctxs), + Grouped = group(Aggregated), + Ordered = order_by_value(Grouped), + {RC, Results} = active_resources_sort_by(Url, [<<"username">>], <<"ioq_calls">>), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 2, length(Result), format("Expected two entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _} + ], + Result, + "Unexpected shape of the result" + ), + [ + {{<<"user_bar">>}, V1}, + {{<<"user_foo">>}, V2} + ] = Ordered, + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := <<"user_bar">>}, <<"value">> := V1}, + #{<<"key">> := #{<<"username">> := <<"user_foo">>}, <<"value">> := V2} + ], + Result + ), + ok. + +t_query_sort_by_binary_key(#{rctxs := Rctxs, url := Url}) -> + Aggregated = aggregate([username], ioq_calls, Rctxs), + Grouped = group(Aggregated), + Ordered = order_by_value(Grouped), + {RC, Results} = active_resources_sort_by(Url, <<"username">>, <<"ioq_calls">>), + ?assertEqual(200, RC, format("Should have '200' return code, got ~p~n ~p~n", [RC, Results])), + [ + #{ + <<"errors">> := [], + <<"node">> := _, + <<"result">> := Result + } + ] = Results, + ?assert(is_list(Result), format("Expected list of entries, got ~p~n", [Result])), + ?assertEqual( + 2, length(Result), format("Expected two entries, got ~p~n ~p~n", [length(Result), Result]) + ), + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _}, + #{<<"key">> := #{<<"username">> := _}, <<"value">> := _} + ], + Result, + "Unexpected shape of the result" + ), + [ + {{<<"user_bar">>}, V1}, + {{<<"user_foo">>}, V2} + ] = Ordered, + ?assertMatch( + [ + #{<<"key">> := #{<<"username">> := <<"user_bar">>}, <<"value">> := V1}, + #{<<"key">> := #{<<"username">> := <<"user_foo">>}, <<"value">> := V2} + ], + Result + ), + ok. + +t_query_sort_by_bad_request(#{url := Url}) -> + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Multiple keys in 'counter_key'">> + }}, + active_resources_sort_by(Url, <<"username">>, [<<"ioq_calls">>, <<"docs_read">>]), + "Should return error if multiple keys provided in 'counter_key'" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown matcher 'unknown_matcher'">> + }}, + active_resources_sort_by("unknown_matcher", Url, <<"username">>, <<"ioq_calls">>), + "Should return error if 'matcher' is unknown" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown field name 'unknown_field'">> + }}, + active_resources_sort_by(Url, [<<"unknown_field">>], <<"ioq_calls">>), + "Should return error if 'AggregationKeys' contain unknown field" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown field name 'unknown_field'">> + }}, + active_resources_sort_by(Url, <<"unknown_field">>, <<"ioq_calls">>), + "Should return error if 'AggregationKeys' is unknown field" + ), + ?assertMatch( + {400, #{ + <<"error">> := <<"bad_request">>, + <<"reason">> := <<"Unknown field name 'unknown_field'">> + }}, + active_resources_sort_by(Url, <<"username">>, <<"unknown_field">>), + "Should return error if 'ValueKey' contain unknown field" + ), + ok. + +format(Fmt, Args) -> + lists:flatten(io_lib:format(Fmt, Args)). + +aggregate(AggregationKeys, ValField, Records) -> + lists:foldl( + fun(Rctx, Acc) -> + Key = list_to_tuple([couch_srt_entry:value(Field, Rctx) || Field <- AggregationKeys]), + CurrVal = maps:get(Key, Acc, []), + maps:put(Key, [couch_srt_entry:value(ValField, Rctx) | CurrVal], Acc) + end, + #{}, + Records + ). + +group(Aggregated) -> + maps:fold( + fun(Key, Val, Acc) -> + maps:put(Key, lists:foldl(fun erlang:'+'/2, 0, Val), Acc) + end, + #{}, + Aggregated + ). + +count(Aggregated) -> + maps:fold( + fun(Key, Val, Acc) -> + maps:put(Key, lists:foldl(fun(_, A) -> A + 1 end, 0, Val), Acc) + end, + #{}, + Aggregated + ). + +order_by_value(Grouped) -> + lists:reverse(lists:keysort(2, maps:to_list(Grouped))). + +% This function handles both representations of entries of the result +% #{<<"key">> => #{<<"dbname">> => <<"db2">>, <<"username">> => <<"user_foo">>}, <<"value">> => 1} +% and +% {{<<"db2">>, <<"user_foo">>}, 1} +order_by_key(AggregationKeys, Entries) when is_list(AggregationKeys) andalso is_list(Entries) -> + lists:sort( + fun(A, B) -> + get_key(AggregationKeys, A) =< get_key(AggregationKeys, B) + end, + Entries + ). + +% This function handles both representations of entries of the result +% #{<<"key">> => #{<<"dbname">> => <<"db2">>, <<"username">> => <<"user_foo">>}, <<"value">> => 1} +% and +% {{<<"db2">>, <<"user_foo">>}, 1} +get_key(AggregationKeys, #{<<"key">> := Key}) -> + list_to_tuple([maps:get(atom_to_binary(Field), Key) || Field <- AggregationKeys]); +get_key(_AggregationKeys, {Key, _}) -> + Key. + +active_resources(Url, MatchName, Body) -> + EndpointUrl = Url ++ "/_active_resources/_match/" ++ MatchName, + Headers = [?JSON_CT, ?AUTH, ?ACCEPT_JSON], + {ok, Code, _, Res} = test_request:request(post, EndpointUrl, Headers, jiffy:encode(Body)), + {Code, jiffy:decode(Res, [return_maps])}. + +rctx(Opts) -> + % Update `docs_read` to make standard `{docs_read, fun matcher_on_docs_read/1, 1000}` + % matcher match. + Threshold = config:get("csrt_logger.matchers_threshold", "rows_read", 1000), + BaseOpts = #{docs_read => Threshold + 1, username => <<"user_foo">>}, + couch_srt_test_helper:rctx_gen(maps:merge(BaseOpts, Opts)). diff --git a/src/couch_srt/test/eunit/couch_srt_logger_tests.erl b/src/couch_srt/test/eunit/couch_srt_logger_tests.erl new file mode 100644 index 00000000000..e611f326934 --- /dev/null +++ b/src/couch_srt/test/eunit/couch_srt_logger_tests.erl @@ -0,0 +1,468 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_logger_tests). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include("../../src/couch_srt.hrl"). + +%% Use different values than default configs to ensure they're picked up +-define(THRESHOLD_DBNAME_IO, 91). +-define(THRESHOLD_DOCS_READ, 123). +-define(THRESHOLD_DOCS_WRITTEN, 12). +-define(THRESHOLD_IOQ_CALLS, 439). +-define(THRESHOLD_ROWS_READ, 143). +-define(THRESHOLD_CHANGES, 79). +-define(THRESHOLD_LONG_REQS, 432). + +csrt_logger_reporting_works_test_() -> + { + foreach, + fun setup_reporting/0, + fun teardown_reporting/1, + [ + ?TDEF_FE(t_enablement), + ?TDEF_FE(t_do_report), + ?TDEF_FE(t_do_lifetime_report), + ?TDEF_FE(t_do_status_report) + ] + }. + +csrt_logger_matchers_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_enablement), + ?TDEF_FE(t_matcher_on_dbnames_io), + ?TDEF_FE(t_matcher_on_docs_read), + ?TDEF_FE(t_matcher_on_docs_written), + ?TDEF_FE(t_matcher_on_rows_read), + ?TDEF_FE(t_matcher_on_changes_processed), + ?TDEF_FE(t_matcher_on_long_reqs), + ?TDEF_FE(t_matcher_on_ioq_calls), + ?TDEF_FE(t_matcher_on_nonce), + ?TDEF_FE(t_matcher_on_all_coordinators), + ?TDEF_FE(t_matcher_on_all_rpc_workers), + ?TDEF_FE(t_matcher_register_deregister) + ] + }. + +make_docs(Count) -> + lists:map( + fun(I) -> + #doc{ + id = ?l2b("foo_" ++ integer_to_list(I)), + body = {[{<<"value">>, I}]} + } + end, + lists:seq(1, Count) + ). + +set_matcher_threshold(_Key, undefined) -> + ok; +set_matcher_threshold(Key, Val) when is_integer(Val) -> + config:set(?CSRT_MATCHERS_THRESHOLD, Key, integer_to_list(Val), false). + +set_dbnames_io_threshold(Key, Val) when is_integer(Val) -> + config:set(?CSRT_MATCHERS_DBNAMES, Key, integer_to_list(Val), false). + +setup() -> + Ctx = test_util:start_couch([fabric, couch_stats, couch_srt]), + couch_srt_test_helper:enable_default_logger_matchers(), + config:set_boolean(?CSRT, "randomize_testing", false, false), + config:set_boolean(?CSRT, "enable_reporting", true, false), + config:set_boolean(?CSRT, "enable_rpc_reporting", true, false), + ok = meck:new(ioq, [passthrough]), + ok = meck:expect(ioq, bypass, fun(_, _) -> false end), + DbName = ?tempdb(), + ok = fabric:create_db(DbName, [{q, 8}, {n, 1}]), + Docs = make_docs(100), + Opts = [], + {ok, _} = fabric:update_docs(DbName, Docs, Opts), + Method = 'GET', + Path = "/" ++ ?b2l(DbName) ++ "/_all_docs", + Nonce = couch_util:to_hex(crypto:strong_rand_bytes(5)), + Req = #httpd{method = Method, nonce = Nonce}, + {_, _} = PidRef = couch_srt:create_coordinator_context(Req, Path), + MArgs = #mrargs{include_docs = false}, + _Res = fabric:all_docs(DbName, [?ADMIN_CTX], fun view_cb/2, [], MArgs), + Rctx = load_rctx(PidRef), + + DefaultMatcherThresholds = [ + {"all_coordinators", undefined}, + {"all_rpc_workers", undefined}, + {"docs_read", ?THRESHOLD_DOCS_READ}, + {"docs_written", ?THRESHOLD_DOCS_WRITTEN}, + {"ioq_calls", ?THRESHOLD_IOQ_CALLS}, + {"rows_read", ?THRESHOLD_ROWS_READ}, + {"changes_processed", ?THRESHOLD_CHANGES}, + {"long_reqs", ?THRESHOLD_LONG_REQS} + ], + [set_matcher_threshold(Key, Val) || {Key, Val} <- DefaultMatcherThresholds], + + DbnameIOMatcherThresholds = [ + {"foo", ?THRESHOLD_DBNAME_IO}, + {"bar", ?THRESHOLD_DBNAME_IO}, + {"foo/bar", ?THRESHOLD_DBNAME_IO} + ], + [set_dbnames_io_threshold(Key, Val) || {Key, Val} <- DbnameIOMatcherThresholds], + + couch_srt_logger:reload_matchers(), + #{ctx => Ctx, dbname => DbName, rctx => Rctx, rctxs => couch_srt_test_helper:rctxs()}. + +teardown(#{ctx := Ctx, dbname := DbName}) -> + ok = fabric:delete_db(DbName, [?ADMIN_CTX]), + ok = meck:unload(ioq), + test_util:stop_couch(Ctx). + +setup_reporting() -> + Ctx = setup(), + ok = meck:new(couch_log, [passthrough]), + ok = meck:expect(couch_log, report, fun(_, _) -> true end), + Ctx. + +teardown_reporting(Ctx) -> + ok = meck:unload(couch_log), + teardown(Ctx). + +t_enablement(#{}) -> + %% Set an invalid match spec to ensure couch_srt_logger is resilient + config:set(?CSRT_MATCHERS_DBNAMES, "foobar", "lkajsdfkjkkadfjkajkf", false), + ?assertEqual(ok, couch_srt_logger:reload_matchers(), "reloads even with bad matcher specs set"), + ?assert(couch_srt_util:is_enabled(), "CSRT is enabled"), + ?assert(couch_srt_util:is_enabled_reporting(), "CSRT reporting is enabled"), + ?assert(couch_srt_util:is_enabled_rpc_reporting(), "CSRT RPC reporting is enabled"). + +t_do_report(#{rctx := Rctx}) -> + JRctx = couch_srt_test_helper:jrctx(Rctx), + ReportName = "foo", + ?assert(couch_srt_logger:do_report(ReportName, Rctx), "CSRT _logger:do_report " ++ ReportName), + ?assert(meck:validate(couch_log), "CSRT do_report"), + ?assert(meck:validate(couch_log), "CSRT validate couch_log"), + ?assert( + meck:called(couch_log, report, [ReportName, JRctx]), + "CSRT couch_log:report" + ). + +t_do_lifetime_report(#{rctx := Rctx}) -> + JRctx = couch_srt_test_helper:jrctx(Rctx), + ReportName = "csrt-pid-usage-lifetime", + ?assert( + couch_srt_logger:do_lifetime_report(Rctx), + "CSRT _logger:do_report " ++ ReportName + ), + ?assert(meck:validate(couch_log), "CSRT validate couch_log"), + ?assert( + meck:called(couch_log, report, [ReportName, JRctx]), + "CSRT couch_log:report" + ). + +t_do_status_report(#{rctx := Rctx}) -> + JRctx = couch_srt_test_helper:jrctx(Rctx), + ReportName = "csrt-pid-usage-status", + ?assert(couch_srt_logger:do_status_report(Rctx), "couch_srt_logger:do_ " ++ ReportName), + ?assert(meck:validate(couch_log), "CSRT validate couch_log"), + ?assert( + meck:called(couch_log, report, [ReportName, JRctx]), + "CSRT couch_log:report" + ). + +t_matcher_on_docs_read(#{rctxs := Rctxs0}) -> + Threshold = ?THRESHOLD_DOCS_READ, + %% Make sure we have at least one match + Rctxs = [couch_srt_test_helper:rctx_gen(#{docs_read => Threshold + 10}) | Rctxs0], + ?assertEqual( + lists:sort(lists:filter(matcher_gte(docs_read, Threshold), Rctxs)), + lists:sort(lists:filter(matcher_for_csrt("docs_read"), Rctxs)), + "Docs read matcher" + ). + +t_matcher_on_docs_written(#{rctxs := Rctxs0}) -> + Threshold = ?THRESHOLD_DOCS_WRITTEN, + %% Make sure we have at least one match + Rctxs = [couch_srt_test_helper:rctx_gen(#{docs_written => Threshold + 10}) | Rctxs0], + ?assertEqual( + lists:sort(lists:filter(matcher_gte(docs_written, Threshold), Rctxs)), + lists:sort(lists:filter(matcher_for_csrt("docs_written"), Rctxs)), + "Docs written matcher" + ). + +t_matcher_on_rows_read(#{rctxs := Rctxs0}) -> + Threshold = ?THRESHOLD_ROWS_READ, + %% Make sure we have at least one match + Rctxs = [couch_srt_test_helper:rctx_gen(#{rows_read => Threshold + 10}) | Rctxs0], + ?assertEqual( + lists:sort(lists:filter(matcher_gte(rows_read, Threshold), Rctxs)), + lists:sort(lists:filter(matcher_for_csrt("rows_read"), Rctxs)), + "Rows read matcher" + ). + +t_matcher_on_changes_processed(#{rctxs := Rctxs0}) -> + Threshold = ?THRESHOLD_CHANGES, + %% Make sure we have at least one match + Rctx0 = couch_srt_test_helper:rctx_gen(#{ + mod => chttpd_db, func => handle_changes_req, rows_read => Threshold + 10 + }), + Rctxs = [Rctx0 | Rctxs0], + ChangesFilter = + fun + %% Matcher on changes only works for coordinators at the moment due + %% to overloading over rows_read for all aggregate operations + (#rctx{type = #coordinator{mod = chttpd_db, func = handle_changes_req}} = R) -> + Ret = couch_srt_entry:value(changes_returned, R), + Proc = couch_srt_entry:value(rows_read, R), + (Proc - Ret) >= Threshold; + (_) -> + false + end, + ?assertEqual( + lists:sort(lists:filter(ChangesFilter, Rctxs)), + lists:sort(lists:filter(matcher_for_csrt("changes_processed"), Rctxs)), + "Changes processed matcher" + ). + +t_matcher_on_long_reqs(#{rctxs := Rctxs0}) -> + %% Threshold is in milliseconds, convert to native time format + Threshold = ?THRESHOLD_LONG_REQS, + NativeThreshold = erlang:convert_time_unit(Threshold, millisecond, native), + %% Native is a small timescale, make sure we have enough for a millisecond + %% measureable time delta + %% Make sure we have at least one match + Now = couch_srt_util:tnow(), + UpdatedAt = Now - round(NativeThreshold * 1.23), + Rctxs = [ + couch_srt_test_helper:rctx_gen(#{started_at => Now, updated_at => UpdatedAt}) | Rctxs0 + ], + DurationFilter = fun(R) -> + Started = couch_srt_entry:value(started_at, R), + Updated = couch_srt_entry:value(updated_at, R), + Updated - Started >= NativeThreshold + end, + ?assertEqual( + lists:sort(lists:filter(DurationFilter, Rctxs)), + lists:sort(lists:filter(matcher_for_csrt("long_reqs"), Rctxs)), + "Long requests matcher" + ). + +t_matcher_on_ioq_calls(#{rctxs := Rctxs0}) -> + Threshold = ?THRESHOLD_IOQ_CALLS, + %% Make sure we have at least one match + Rctxs = [couch_srt_test_helper:rctx_gen(#{ioq_calls => Threshold + 10}) | Rctxs0], + ?assertEqual( + lists:sort(lists:filter(matcher_gte(ioq_calls, Threshold), Rctxs)), + lists:sort(lists:filter(matcher_for_csrt("ioq_calls"), Rctxs)), + "IOQ calls matcher" + ). + +t_matcher_on_nonce(#{rctxs := Rctxs0}) -> + Nonce = "foobar7799", + %% Make sure we have at least one match + Rctxs = [couch_srt_test_helper:rctx_gen(#{nonce => Nonce}) | Rctxs0], + %% Nonce requires dynamic matcher as it's a static match + %% TODO: add pattern based nonce matching + MSpec = couch_srt_logger:matcher_on_nonce(Nonce), + CompMSpec = ets:match_spec_compile(MSpec), + Matchers = #{"nonce" => {MSpec, CompMSpec}}, + IsMatch = fun(ARctx) -> couch_srt_logger:is_match(ARctx, Matchers) end, + ?assertEqual( + lists:sort(lists:filter(matcher_on(nonce, Nonce), Rctxs)), + lists:sort(lists:filter(IsMatch, Rctxs)), + "Rows read matcher" + ). + +t_matcher_on_dbnames_io(#{rctxs := Rctxs0}) -> + Threshold = ?THRESHOLD_DBNAME_IO, + SThreshold = integer_to_list(Threshold), + DbFoo = "foo", + DbBar = "bar", + MatcherFoo = matcher_for_csrt("dbnames_io__" ++ DbFoo ++ "__" ++ SThreshold), + MatcherBar = matcher_for_csrt("dbnames_io__" ++ DbBar ++ "__" ++ SThreshold), + MatcherFooBar = matcher_for_csrt("dbnames_io__foo/bar__" ++ SThreshold), + %% Add an extra Rctx with dbname foo/bar to ensure correct naming matches + ExtraRctx = couch_srt_test_helper:rctx_gen(#{ + dbname => <<"foo/bar">>, get_kp_node => Threshold + 10 + }), + %% Make sure we have at least one match + Rctxs = [ExtraRctx, couch_srt_test_helper:rctx_gen(#{ioq_calls => Threshold + 10}) | Rctxs0], + ?assertEqual( + lists:sort(lists:filter(matcher_for_dbnames_io(DbFoo, Threshold), Rctxs)), + lists:sort(lists:filter(MatcherFoo, Rctxs)), + "dbnames_io foo matcher" + ), + ?assertEqual( + lists:sort(lists:filter(matcher_for_dbnames_io(DbBar, Threshold), Rctxs)), + lists:sort(lists:filter(MatcherBar, Rctxs)), + "dbnames_io bar matcher" + ), + ?assertEqual( + [ExtraRctx], + lists:sort(lists:filter(MatcherFooBar, Rctxs)), + "dbnames_io foo/bar matcher" + ). + +t_matcher_register_deregister(#{rctxs := Rctxs0}) -> + CrazyDbName = <<"asdf123@?!&#fdsa">>, + MName = "Crazy-Matcher", + MSpec = couch_srt_logger:matcher_on_dbname(CrazyDbName), + %% Add an extra Rctx with CrazyDbName to create a specific match + ExtraRctx = couch_srt_test_helper:rctx_gen(#{dbname => CrazyDbName}), + %% Make sure we have at least one match + Rctxs = [ExtraRctx | Rctxs0], + + ?assertEqual(#{}, couch_srt_logger:get_registered_matchers(), "no current registered matchers"), + ?assertEqual( + {error, {invalid_ms, "bad_spec", "fdsa"}}, + couch_srt_logger:register_matcher("bad_spec", "fdsa"), + "register bad matcher fails" + ), + ?assertEqual(ok, couch_srt_logger:register_matcher(MName, MSpec), "register matcher"), + CompMSpec = test_util:wait( + fun() -> + case couch_srt_logger:get_matcher(MName) of + undefined -> + wait; + {MSpec, _Ref} = CompMSpec0 -> + CompMSpec0 + end + end + ), + Matchers = #{MName => CompMSpec}, + ?assert(CompMSpec =/= timeout, "newly registered matcher was initialized"), + ?assertEqual( + [MName], + maps:keys(couch_srt_logger:get_registered_matchers()), + "correct current registered matchers" + ), + ?assert( + couch_srt_logger:is_match(ExtraRctx, Matchers), "our registered matcher matches expectedly" + ), + ?assert( + couch_srt_logger:is_match(ExtraRctx), + "our registered matcher is picked up and matches expectedly" + ), + ?assertEqual( + Matchers, + couch_srt_logger:find_matches(Rctxs, Matchers), + "we find our matcher and no extra matchers" + ), + ?assert( + maps:is_key( + MName, + couch_srt_logger:find_matches(Rctxs, couch_srt_logger:get_matchers()) + ), + "find our CrazyDbName matcher in matches against all registered matchers" + ), + ?assertEqual( + #{MName => [ExtraRctx]}, + couch_srt_logger:find_all_matches(Rctxs, Matchers), + "find our CrazyDb ExtraRctx with our Matcher, and nothing else" + ), + ?assertEqual(ok, couch_srt_logger:reload_matchers(), "we can reload matchers"), + ?assertEqual( + [MName], + maps:keys(couch_srt_logger:get_registered_matchers()), + "correct current registered matchers after a global reload" + ), + ?assert( + maps:is_key( + MName, + couch_srt_logger:find_matches(Rctxs, couch_srt_logger:get_matchers()) + ), + "our matcher still behaves expectedly after a global matcher reload" + ), + ?assertEqual(ok, couch_srt_logger:deregister_matcher(MName), "deregister_matcher returns ok"), + Matcher2 = test_util:wait( + fun() -> + case couch_srt_logger:get_matcher(MName) of + undefined -> + undefined; + _ -> + wait + end + end + ), + ?assertEqual(undefined, Matcher2, "matcher was deregistered successfully"), + ?assertEqual( + #{}, couch_srt_logger:get_registered_matchers(), "no leftover registered matchers" + ). + +t_matcher_on_all_coordinators(#{rctxs := Rctxs0}) -> + %% Make sure we have at least one match + Rctxs = [couch_srt_test_helper:rctx_gen(#{type => #coordinator{}}) | Rctxs0], + ?assertEqual( + lists:sort(lists:filter(matcher_on_coordinators(), Rctxs)), + lists:sort(lists:filter(matcher_for_csrt("all_coordinators"), Rctxs)), + "All Coordinators matcher" + ). + +t_matcher_on_all_rpc_workers(#{rctxs := Rctxs0}) -> + %% Make sure we have at least one match + Rctxs = [couch_srt_test_helper:rctx_gen(#{type => #rpc_worker{}}) | Rctxs0], + ?assertEqual( + lists:sort(lists:filter(matcher_on_rpc_workers(), Rctxs)), + lists:sort(lists:filter(matcher_for_csrt("all_rpc_workers"), Rctxs)), + "All RPC Workers matcher" + ). + +load_rctx(PidRef) -> + %% Add slight delay to accumulate RPC response deltas + timer:sleep(50), + couch_srt:get_resource(PidRef). + +view_cb({row, Row}, Acc) -> + {ok, [Row | Acc]}; +view_cb(_Msg, Acc) -> + {ok, Acc}. + +matcher_gte(Field, Value) -> + matcher_for(Field, Value, fun erlang:'>='/2). + +matcher_on(Field, Value) -> + matcher_for(Field, Value, fun erlang:'=:='/2). + +matcher_for(Field, Value, Op) -> + fun(Rctx) -> Op(couch_srt_entry:value(Field, Rctx), Value) end. + +matcher_on_coordinators() -> + fun + (#rctx{type = #coordinator{}}) -> true; + (_) -> false + end. + +matcher_on_rpc_workers() -> + fun + (#rctx{type = #rpc_worker{}}) -> true; + (_) -> false + end. + +matcher_for_csrt(MatcherName) -> + Matchers = #{MatcherName => {_, _} = couch_srt_logger:get_matcher(MatcherName)}, + case couch_srt_logger:get_matcher(MatcherName) of + {_, _} = Matcher -> + Matchers = #{MatcherName => Matcher}, + fun(Rctx) -> couch_srt_logger:is_match(Rctx, Matchers) end; + _ -> + throw({missing_matcher, MatcherName}) + end. + +matcher_for_dbnames_io(Dbname0, Threshold) -> + Dbname = list_to_binary(Dbname0), + fun(Rctx) -> + DbnameA = couch_srt_entry:value(dbname, Rctx), + Fields = [ioq_calls, get_kv_node, get_kp_node, docs_read, rows_read], + Vals = [{F, couch_srt_entry:value(F, Rctx)} || F <- Fields], + Dbname =:= mem3:dbname(DbnameA) andalso lists:any(fun({_K, V}) -> V >= Threshold end, Vals) + end. diff --git a/src/couch_srt/test/eunit/couch_srt_query_tests.erl b/src/couch_srt/test/eunit/couch_srt_query_tests.erl new file mode 100644 index 00000000000..063b90a970b --- /dev/null +++ b/src/couch_srt/test/eunit/couch_srt_query_tests.erl @@ -0,0 +1,734 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_query_tests). + +-ifdef(WITH_PROPER). +-include_lib("couch/include/couch_eunit_proper.hrl"). +-endif. + +-include_lib("couch/include/couch_eunit.hrl"). + +-include_lib("stdlib/include/ms_transform.hrl"). +-include("../../src/couch_srt.hrl"). + +-define(MATCHERS_THRESHOLD, 1000). +csrt_query_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_group_by_multiple_keys), + ?TDEF_FE(t_group_by_single_key), + ?TDEF_FE(t_group_by_binary_key), + ?TDEF_FE(t_group_by_detect_unsafe_query), + ?TDEF_FE(t_group_by_run_unsafe_query), + ?TDEF_FE(t_group_by_run_unsafe_correctness), + ?TDEF_FE(t_group_by_bad_request), + ?TDEF_FE(t_count_by_multiple_keys), + ?TDEF_FE(t_count_by_single_key), + ?TDEF_FE(t_count_by_binary_key), + ?TDEF_FE(t_count_by_bad_request), + ?TDEF_FE(t_sort_by_multiple_keys), + ?TDEF_FE(t_sort_by_single_key), + ?TDEF_FE(t_sort_by_binary_key), + ?TDEF_FE(t_sort_by_bad_request) + ] + }. + +csrt_query_cardinality_limit_test_() -> + { + foreach, + fun setup_query_limit/0, + fun teardown_query_limit/1, + [ + ?TDEF_FE(t_run_hits_query_cardinality_limit) + ] + }. + +setup() -> + Rctxs = [ + rctx(#{dbname => <<"db1">>, ioq_calls => 123, username => <<"user_foo">>}), + rctx(#{dbname => <<"db1">>, ioq_calls => 321, username => <<"user_foo">>}), + rctx(#{dbname => <<"db2">>, ioq_calls => 345, username => <<"user_bar">>}), + rctx(#{dbname => <<"db2">>, ioq_calls => 543, username => <<"user_bar">>}), + rctx(#{dbname => <<"db1">>, ioq_calls => 678, username => <<"user_bar">>}), + rctx(#{dbname => <<"db2">>, ioq_calls => 987, username => <<"user_foo">>}) + ], + ets:new(?CSRT_ETS, [ + named_table, + public, + {keypos, #rctx.pid_ref} + ]), + ets:insert(?CSRT_ETS, Rctxs), + add_matcher("docs_read", couch_srt_logger:matcher_on_docs_read(?MATCHERS_THRESHOLD)), + #{rctxs => Rctxs}. + +teardown(_) -> + ets:delete(?CSRT_ETS). + +setup_query_limit() -> + Ctx = test_util:start_couch([couch_srt]), + config:set("csrt", "enable", "true", false), + config:set("csrt", "query_cardinality_limit", "5", false), + config:set("csrt_logger.matchers_enabled", "docs_read", "true", false), + config:set_boolean(?CSRT, "randomize_testing", false, false), + ets:insert(?CSRT_ETS, couch_srt_test_helper:rctxs()), + #{ctx => Ctx}. + +teardown_query_limit(#{ctx := Ctx}) -> + test_util:stop_couch(Ctx). + +rctx(Opts) -> + % Update `docs_read` to make standard `{docs_read, fun matcher_on_docs_read/1, 1000}` + % matcher match. + BaseOpts = #{docs_read => ?MATCHERS_THRESHOLD + 1, username => <<"user_foo">>}, + couch_srt_test_helper:rctx_gen(maps:merge(BaseOpts, Opts)). + +dummy_key_fun(#rctx{username = Username}) -> + Username. + +dummy_value_fun(#rctx{ioq_calls = IoqCalls}) -> + IoqCalls. + +t_group_by_multiple_keys(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username, dbname], ioq_calls, Rctxs), + Grouped = group(Aggregated), + V1 = maps:get({<<"user_bar">>, <<"db1">>}, Grouped), + V2 = maps:get({<<"user_bar">>, <<"db2">>}, Grouped), + V3 = maps:get({<<"user_foo">>, <<"db1">>}, Grouped), + V4 = maps:get({<<"user_foo">>, <<"db2">>}, Grouped), + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by([<<"username">>, <<"dbname">>], <<"ioq_calls">>) + ]), + ?assertMatch( + {ok, #{ + {<<"user_bar">>, <<"db1">>} := V1, + {<<"user_bar">>, <<"db2">>} := V2, + {<<"user_foo">>, <<"db1">>} := V3, + {<<"user_foo">>, <<"db2">>} := V4 + }}, + couch_srt:run(Q) + ), + ok. + +t_group_by_single_key(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username], ioq_calls, Rctxs), + Grouped = group(Aggregated), + V1 = maps:get({<<"user_bar">>}, Grouped), + V2 = maps:get({<<"user_foo">>}, Grouped), + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by([<<"username">>], <<"ioq_calls">>) + ]), + ?assertMatch( + {ok, #{ + {<<"user_bar">>} := V1, + {<<"user_foo">>} := V2 + }}, + couch_srt:run(Q) + ), + ok. + +t_group_by_binary_key(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username], ioq_calls, Rctxs), + Grouped = group(Aggregated), + V1 = maps:get({<<"user_bar">>}, Grouped), + V2 = maps:get({<<"user_foo">>}, Grouped), + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>) + ]), + ?assertMatch( + {ok, #{ + <<"user_bar">> := V1, + <<"user_foo">> := V2 + }}, + couch_srt:run(Q) + ), + ok. + +t_group_by_detect_unsafe_query(_) -> + ?assertMatch( + {error, {unsafe_query, _}}, + couch_srt:run( + couch_srt:query([ + couch_srt:from(all), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>) + ]) + ), + "Should detect `unsafe` when `all` matcher is used" + ), + ?assertMatch( + {error, {unsafe_query, _}}, + couch_srt:run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(fun dummy_key_fun/1, <<"ioq_calls">>) + ]) + ), + "Should detect `unsafe` when `AggregationKey` is a function()" + ), + ?assertMatch( + {error, {unsafe_query, _}}, + couch_srt:run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, fun dummy_value_fun/1) + ]) + ), + "Should detect `unsafe` when `ValueKey` is a function()" + ), + ?assertMatch( + {error, {unsafe_query, _}}, + couch_srt:run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>), + couch_srt:options([ + couch_srt:unlimited() + ]) + ]) + ), + "Should detect `unsafe` when `unlimited()` is used" + ), + ok. + +t_group_by_run_unsafe_query(_) -> + ?assertMatch( + {ok, _}, + couch_srt:unsafe_run( + couch_srt:query([ + couch_srt:from(all), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>) + ]) + ), + "Should be able to use `unsafe_run` when `all` matcher is used" + ), + ?assertMatch( + {ok, _}, + couch_srt:unsafe_run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(fun dummy_key_fun/1, <<"ioq_calls">>) + ]) + ), + "Should be able to use `unsafe_run` when `AggregationKey` is a function()" + ), + ?assertMatch( + {ok, _}, + couch_srt:unsafe_run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, fun dummy_value_fun/1) + ]) + ), + "Should be able to use `unsafe_run` when `ValueKey` is a function()" + ), + ?assertMatch( + {ok, _}, + couch_srt:unsafe_run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>), + couch_srt:options([ + couch_srt:unlimited() + ]) + ]) + ), + "Should be able to use `unsafe_run` when `unlimited()` is used" + ), + ok. + +t_group_by_run_unsafe_correctness(_) -> + % we are checking that safe analog of the query return same result + ?assertEqual( + couch_srt:run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>) + ]) + ), + couch_srt:unsafe_run( + couch_srt:query([ + couch_srt:from(all), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>) + ]) + ), + "Should get correct result from `unsafe_run` when `all` matcher is used" + ), + ?assertEqual( + couch_srt:run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>) + ]) + ), + couch_srt:unsafe_run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(fun dummy_key_fun/1, <<"ioq_calls">>) + ]) + ), + "Should get correct result from `unsafe_run` when `AggregationKey` is a function()" + ), + ?assertEqual( + couch_srt:run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, ioq_calls) + ]) + ), + couch_srt:unsafe_run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, fun dummy_value_fun/1) + ]) + ), + "Should get correct result from `unsafe_run` when `ValueKey` is a function()" + ), + ?assertEqual( + couch_srt:run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>) + ]) + ), + couch_srt:unsafe_run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>), + couch_srt:options([ + couch_srt:unlimited() + ]) + ]) + ), + "Should get correct result from `unsafe_run` when `unlimited()` is used" + ), + ok. + +t_group_by_bad_request(_) -> + ?assertMatch( + {error, [{unknown_matcher, "unknown_matcher"}]}, + couch_srt:query([ + couch_srt:from("unknown_matcher"), + couch_srt:group_by(<<"username">>, <<"ioq_calls">>) + ]), + "Should return error if 'matcher' is unknown" + ), + ?assertMatch( + {error, [{unknown_matcher, rows_read}]}, + couch_srt:query([ + couch_srt:from(rows_read), + couch_srt:group_by([username, dbname], ioq_calls) + ]), + "Should return error if 'matcher' is not a string()" + ), + ?assertMatch( + {error, [{invalid_key, "unknown_field"}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by("unknown_field", ioq_calls) + ]), + "Should return error if 'AggregationKeys' contain unknown field" + ), + ?assertMatch( + {error, [{invalid_key, "unknown_field"}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by("username", "unknown_field") + ]), + "Should return error if 'ValueKey' contain unknown field" + ), + ?assertMatch( + {error, [{beyond_limit, ?QUERY_LIMIT + 1}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:group_by("username", ioq_calls), + couch_srt:options([ + couch_srt:with_limit(?QUERY_LIMIT + 1) + ]) + ]), + "Should return error when 'limit' is greater than configured" + ), + ok. + +t_count_by_multiple_keys(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username, dbname], ioq_calls, Rctxs), + Grouped = count(Aggregated), + V1 = maps:get({<<"user_bar">>, <<"db1">>}, Grouped), + V2 = maps:get({<<"user_bar">>, <<"db2">>}, Grouped), + V3 = maps:get({<<"user_foo">>, <<"db1">>}, Grouped), + V4 = maps:get({<<"user_foo">>, <<"db2">>}, Grouped), + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:count_by([<<"username">>, <<"dbname">>]) + ]), + ?assertMatch( + {ok, #{ + {<<"user_bar">>, <<"db1">>} := V1, + {<<"user_bar">>, <<"db2">>} := V2, + {<<"user_foo">>, <<"db1">>} := V3, + {<<"user_foo">>, <<"db2">>} := V4 + }}, + couch_srt:run(Q) + ), + ok. + +t_count_by_single_key(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username], ioq_calls, Rctxs), + Grouped = count(Aggregated), + V1 = maps:get({<<"user_bar">>}, Grouped), + V2 = maps:get({<<"user_foo">>}, Grouped), + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:count_by([<<"username">>]) + ]), + ?assertMatch( + {ok, #{ + {<<"user_bar">>} := V1, + {<<"user_foo">>} := V2 + }}, + couch_srt:run(Q) + ), + ok. + +t_count_by_binary_key(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username], ioq_calls, Rctxs), + Grouped = count(Aggregated), + V1 = maps:get({<<"user_bar">>}, Grouped), + V2 = maps:get({<<"user_foo">>}, Grouped), + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:count_by(<<"username">>) + ]), + ?assertMatch( + {ok, #{ + <<"user_bar">> := V1, + <<"user_foo">> := V2 + }}, + couch_srt:run(Q) + ), + ok. + +t_count_by_bad_request(_) -> + ?assertMatch( + {error, [{unknown_matcher, "unknown_matcher"}]}, + couch_srt:query([ + couch_srt:from("unknown_matcher"), + couch_srt:count_by(<<"username">>) + ]), + "Should return error if 'matcher' is unknown" + ), + ?assertMatch( + {error, [{unknown_matcher, rows_read}]}, + couch_srt:query([ + couch_srt:from(rows_read), + couch_srt:count_by([username, dbname]) + ]), + "Should return error if 'matcher' is not a string()" + ), + ?assertMatch( + {error, [{invalid_key, "unknown_field"}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:count_by("unknown_field") + ]), + "Should return error if 'AggregationKeys' contain unknown field" + ), + ?assertMatch( + {error, [{beyond_limit, ?QUERY_LIMIT + 1}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:count_by("username"), + couch_srt:options([ + couch_srt:with_limit(?QUERY_LIMIT + 1) + ]) + ]), + "Should return error when 'limit' is greater than configured" + ), + ok. + +t_sort_by_multiple_keys(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username, dbname], ioq_calls, Rctxs), + Grouped = group(Aggregated), + Ordered = order_by_value(Grouped), + [ + {{<<"user_foo">>, <<"db2">>}, V1}, + {{<<"user_bar">>, <<"db2">>}, V2}, + {{<<"user_bar">>, <<"db1">>}, V3}, + {{<<"user_foo">>, <<"db1">>}, V4} + ] = Ordered, + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by([<<"username">>, <<"dbname">>], <<"ioq_calls">>) + ]), + ?assertMatch( + {ok, [ + {{<<"user_foo">>, <<"db2">>}, V1}, + {{<<"user_bar">>, <<"db2">>}, V2}, + {{<<"user_bar">>, <<"db1">>}, V3}, + {{<<"user_foo">>, <<"db1">>}, V4} + ]}, + couch_srt:run(Q) + ), + ok. + +t_sort_by_single_key(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username], ioq_calls, Rctxs), + Grouped = group(Aggregated), + Ordered = order_by_value(Grouped), + [ + {{<<"user_bar">>}, V1}, + {{<<"user_foo">>}, V2} + ] = Ordered, + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by([<<"username">>], <<"ioq_calls">>) + ]), + ?assertMatch( + {ok, [ + {{<<"user_bar">>}, V1}, + {{<<"user_foo">>}, V2} + ]}, + couch_srt:run(Q) + ), + ok. + +t_sort_by_binary_key(#{rctxs := Rctxs}) -> + Aggregated = aggregate_by([username], ioq_calls, Rctxs), + Grouped = group(Aggregated), + Ordered = order_by_value(Grouped), + [ + {{<<"user_bar">>}, V1}, + {{<<"user_foo">>}, V2} + ] = Ordered, + Q = couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by(<<"username">>, <<"ioq_calls">>) + ]), + ?assertMatch( + {ok, [ + {<<"user_bar">>, V1}, + {<<"user_foo">>, V2} + ]}, + couch_srt:run(Q) + ), + ok. + +t_sort_by_bad_request(_) -> + ?assertMatch( + {error, [{unknown_matcher, "unknown_matcher"}]}, + couch_srt:query([ + couch_srt:from("unknown_matcher"), + couch_srt:sort_by(<<"username">>, <<"ioq_calls">>) + ]), + "Should return error if 'matcher' is unknown" + ), + ?assertMatch( + {error, [{unknown_matcher, "unknown_matcher"}]}, + couch_srt:query([ + couch_srt:from("unknown_matcher"), + couch_srt:sort_by(<<"username">>) + ]), + "Should return error if 'matcher' is unknown" + ), + ?assertMatch( + {error, [{unknown_matcher, rows_read}]}, + couch_srt:query([ + couch_srt:from(rows_read), + couch_srt:sort_by([username, dbname], ioq_calls) + ]), + "Should return error if 'matcher' is not a string()" + ), + ?assertMatch( + {error, [{unknown_matcher, rows_read}]}, + couch_srt:query([ + couch_srt:from(rows_read), + couch_srt:sort_by([username, dbname]) + ]), + "Should return error if 'matcher' is not a string()" + ), + ?assertMatch( + {error, [{invalid_key, "unknown_field"}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by("unknown_field", ioq_calls) + ]), + "Should return error if 'AggregationKeys' contain unknown field" + ), + ?assertMatch( + {error, [{invalid_key, "unknown_field"}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by("unknown_field") + ]), + "Should return error if 'AggregationKeys' contain unknown field" + ), + ?assertMatch( + {error, [{invalid_key, "unknown_field"}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by("username", "unknown_field") + ]), + "Should return error if 'ValueKey' contain unknown field" + ), + ?assertMatch( + {error, [{beyond_limit, ?QUERY_LIMIT + 1}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by("username", ioq_calls), + couch_srt:options([ + couch_srt:with_limit(?QUERY_LIMIT + 1) + ]) + ]), + "Should return error when 'limit' is greater than configured" + ), + ?assertMatch( + {error, [{beyond_limit, ?QUERY_LIMIT + 1}]}, + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by("username"), + couch_srt:options([ + couch_srt:with_limit(?QUERY_LIMIT + 1) + ]) + ]), + "Should return error when 'limit' is greater than configured" + ), + ok. + +t_run_hits_query_cardinality_limit(_) -> + %% Use a sort_by query to easily pattern match on a non empty list + ?assertMatch( + {limit, [_ | _]}, + couch_srt:run( + couch_srt:query([ + couch_srt:from("docs_read"), + couch_srt:sort_by([<<"dbname">>, <<"username">>], <<"ioq_calls">>), + couch_srt:options([ + couch_srt:with_limit(1) + ]) + ]) + ), + "Should hit limit but still return results when configured query_cardinality_limit is smaller than the working set" + ), + ok. + +add_matcher(Name, MSpec) -> + persistent_term:put({csrt_logger, all_csrt_matchers}, #{ + Name => {MSpec, ets:match_spec_compile(MSpec)} + }). + +aggregate_by(AggregationKeys, ValField, Records) -> + lists:foldl( + fun(Rctx, Acc) -> + Key = list_to_tuple([couch_srt_entry:value(Field, Rctx) || Field <- AggregationKeys]), + CurrVal = maps:get(Key, Acc, []), + maps:put(Key, [couch_srt_entry:value(ValField, Rctx) | CurrVal], Acc) + end, + #{}, + Records + ). + +group(Aggregated) -> + maps:fold( + fun(Key, Val, Acc) -> + maps:put(Key, lists:foldl(fun erlang:'+'/2, 0, Val), Acc) + end, + #{}, + Aggregated + ). + +count(Aggregated) -> + maps:fold( + fun(Key, Val, Acc) -> + maps:put(Key, lists:foldl(fun(_, A) -> A + 1 end, 0, Val), Acc) + end, + #{}, + Aggregated + ). + +order_by_value(Grouped) -> + lists:reverse(lists:keysort(2, maps:to_list(Grouped))). + +-ifdef(WITH_PROPER). + +format(Fmt, Args) -> + lists:flatten(io_lib:format(Fmt, Args)). + +new_topK_test_() -> + ?EUNIT_QUICKCHECK(60, 10000). + +prop_sorted_after_update() -> + ?FORALL( + Updates, + updates_g(), + begin + Limit = 10, + TopKResults = couch_srt_query:topK(Updates, Limit), + NResults = length(TopKResults), + Values = [V || {_K, V} <- TopKResults], + ?assert(Values == lists:reverse(lists:sort(Values)), "Expected values to be ordered"), + ?assert( + NResults =< Limit, + format( + "Expected the number of values to be less then the limit topK = ~p, limit = ~p", + [NResults, Limit] + ) + ), + Model = update_model(Updates, Limit), + ?assert( + NResults == length(Model), + format( + "Expected the same number of values from topK as in the model topK = ~p, model = ~p", + [NResults, length(Model)] + ) + ), + ModelValues = [V || {_K, V} <- Model], + ?assert( + Values == ModelValues, + format( + "Expected values from topK to be equal to values from the model topK = ~p, model = ~p", + [Values, ModelValues] + ) + ), + true + end + ). + +update_model(Updates, Limit) -> + UpdatesList = maps:to_list(Updates), + SortedResults = lists:sort( + fun({AK, AV}, {BK, BV}) -> + AV > BV orelse (AV == BV andalso AK >= BK) + end, + UpdatesList + ), + Size = min(Limit, length(SortedResults)), + {Model, _} = lists:split(Size, SortedResults), + Model. + +non_empty_tuple(Type) -> + ?LET(L, non_empty(list(Type)), list_to_tuple(L)). + +aggregation_key_g() -> + non_empty_tuple(string()). + +value_g() -> + non_neg_integer(). + +updates_g() -> + map(aggregation_key_g(), value_g()). + +-endif. diff --git a/src/couch_srt/test/eunit/couch_srt_server_tests.erl b/src/couch_srt/test/eunit/couch_srt_server_tests.erl new file mode 100644 index 00000000000..6a9a827df93 --- /dev/null +++ b/src/couch_srt/test/eunit/couch_srt_server_tests.erl @@ -0,0 +1,643 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_server_tests). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include("../../src/couch_srt.hrl"). + +-define(DOCS_COUNT, 100). +-define(DDOCS_COUNT, 1). +-define(DB_Q, 8). + +-define(DEBUG_ENABLED, false). + +csrt_context_test_() -> + { + setup, + fun setup/0, + fun teardown/1, + with([ + ?TDEF(t_context_setting) + ]) + }. + +test_funs() -> + [ + ?TDEF_FE(t_all_docs_include_false), + ?TDEF_FE(t_all_docs_include_true), + ?TDEF_FE(t_all_docs_limit_zero), + ?TDEF_FE(t_get_doc), + ?TDEF_FE(t_put_doc), + ?TDEF_FE(t_delete_doc), + ?TDEF_FE(t_update_docs), + ?TDEF_FE(t_changes), + ?TDEF_FE(t_changes_limit_zero), + ?TDEF_FE(t_changes_filtered), + ?TDEF_FE(t_updated_at), + ?TDEF_FE(t_view_query), + ?TDEF_FE(t_view_query_include_docs) + ]. + +ddoc_test_funs() -> + [ + ?TDEF_FE(t_changes_js_filtered) + | test_funs() + ]. + +csrt_fabric_no_ddoc_test_() -> + { + "CSRT fabric tests with no DDoc present", + foreach, + fun setup/0, + fun teardown/1, + test_funs() + }. + +csrt_fabric_test_() -> + { + "CSRT fabric tests with a DDoc present", + foreach, + fun() -> setup_ddoc(<<"_design/foo">>, <<"bar">>) end, + fun teardown/1, + ddoc_test_funs() + }. + +make_docs(Count) -> + lists:map( + fun(I) -> + #doc{ + id = ?l2b("foo_" ++ integer_to_list(I)), + body = {[{<<"value">>, I}]} + } + end, + lists:seq(1, Count) + ). + +setup() -> + Ctx = test_util:start_couch([fabric, couch_stats, couch_srt]), + config:set_boolean(?CSRT, "randomize_testing", false, false), + ok = meck:new(ioq, [passthrough]), + ok = meck:expect(ioq, bypass, fun(_, _) -> false end), + DbName = ?tempdb(), + ok = fabric:create_db(DbName, [{q, ?DB_Q}, {n, 1}]), + Docs = make_docs(?DOCS_COUNT), + Opts = [], + {ok, _} = fabric:update_docs(DbName, Docs, Opts), + {Ctx, DbName, undefined}. + +teardown({Ctx, DbName, _View}) -> + ok = fabric:delete_db(DbName, [?ADMIN_CTX]), + ok = meck:unload(ioq), + test_util:stop_couch(Ctx). + +setup_ddoc(DDocId, ViewName) -> + {Ctx, DbName, undefined} = setup(), + DDoc = couch_doc:from_json_obj( + {[ + {<<"_id">>, DDocId}, + {<<"language">>, <<"javascript">>}, + { + <<"views">>, + {[ + { + ViewName, + {[ + {<<"map">>, <<"function(doc) { emit(doc.value, null); }">>} + ]} + } + ]} + }, + { + <<"filters">>, + {[ + { + <<"even">>, + <<"function(doc) { return (doc.value % 2 == 0); }">> + } + ]} + } + ]} + ), + {ok, _Rev} = fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]), + {Ctx, DbName, {DDocId, ViewName}}. + +t_context_setting({_Ctx, _DbName, _View}) -> + false. + +t_all_docs_limit_zero({_Ctx, DbName, _View}) -> + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/_all_docs" + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + MArgs = #mrargs{include_docs = false, limit = 0}, + _Res = fabric:all_docs(DbName, [?ADMIN_CTX], fun view_cb/2, [], MArgs), + Rctx = wait_rctx(PidRef, nonzero_local_io()), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => ?DB_Q, + rows_read => 0, + docs_read => 0, + docs_written => 0, + ioq_calls => assert_gt(), + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_all_docs_include_false({_Ctx, DbName, View}) -> + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/_all_docs" + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + MArgs = #mrargs{include_docs = false}, + _Res = fabric:all_docs(DbName, [?ADMIN_CTX], fun view_cb/2, [], MArgs), + Rctx = wait_rctx(PidRef, nonzero_local_io()), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => ?DB_Q, + rows_read => docs_count(View), + docs_read => 0, + docs_written => 0, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_all_docs_include_true({_Ctx, DbName, View}) -> + pdebug(dbname, DbName), + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/_all_docs" + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + MArgs = #mrargs{include_docs = true}, + _Res = fabric:all_docs(DbName, [?ADMIN_CTX], fun view_cb/2, [], MArgs), + Rctx = wait_rctx(PidRef, nonzero_local_io()), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => ?DB_Q, + rows_read => docs_count(View), + docs_read => docs_count(View), + docs_written => 0, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_update_docs({_Ctx, DbName, View}) -> + pdebug(dbname, DbName), + Context = #{ + method => 'POST', + path => "/" ++ ?b2l(DbName) + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + Docs = [#doc{id = ?l2b("bar_" ++ integer_to_list(I))} || I <- lists:seq(1, ?DOCS_COUNT)], + _Res = fabric:update_docs(DbName, Docs, [?ADMIN_CTX]), + Rctx = wait_rctx(PidRef, ddoc_dependent_local_io(View)), + ?assert(is_map(Rctx), "Expected a zero local io for view"), + pdebug(rctx, Rctx), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => ?DB_Q, + rows_read => 0, + docs_read => 0, + docs_written => ?DOCS_COUNT, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_get_doc({_Ctx, DbName, _View}) -> + pdebug(dbname, DbName), + DocId = "foo_17", + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/" ++ DocId + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + _Res = fabric:open_doc(DbName, DocId, [?ADMIN_CTX]), + Rctx = wait_rctx(PidRef, nonzero_local_io(io_sum)), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + pdebug(rctx, Rctx), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => 1, + rows_read => 0, + docs_read => 1, + docs_written => 0, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_updated_at({_Ctx, DbName, _View}) -> + %% Same test as t_get_doc but with a timer sleep and updated_at assertion + TimeDelay = 1234, + pdebug(dbname, DbName), + DocId = "foo_17", + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/" ++ DocId + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + timer:sleep(TimeDelay), + _Res = fabric:open_doc(DbName, DocId, [?ADMIN_CTX]), + Rctx = wait_rctx(PidRef, nonzero_local_io(io_sum)), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + %% Get RawRctx to have pre-json-converted timestamps + RawRctx = couch_srt:get_resource(PidRef), + pdebug(rctx, Rctx), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => 1, + rows_read => 0, + docs_read => 1, + docs_written => 0, + pid_ref => PidRef + }), + Started = couch_srt_entry:value(started_at, RawRctx), + Updated = couch_srt_entry:value(updated_at, RawRctx), + ?assert( + couch_srt_util:make_dt(Started, Updated, millisecond) > TimeDelay, + "updated_at gets updated with an expected TimeDelay" + ), + ?assert( + couch_srt_util:make_dt(Started, Updated, millisecond) < 2 * TimeDelay, + "updated_at gets updated in a reasonable time frame" + ), + ok = assert_teardown(PidRef). + +t_put_doc({_Ctx, DbName, View}) -> + pdebug(dbname, DbName), + DocId = "bar_put_1919", + Context = #{ + method => 'PUT', + path => "/" ++ ?b2l(DbName) ++ "/" ++ DocId + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + Doc = #doc{id = ?l2b(DocId)}, + _Res = fabric:update_doc(DbName, Doc, [?ADMIN_CTX]), + Rctx = wait_rctx(PidRef, ddoc_dependent_local_io(View)), + ?assert(is_map(Rctx), "Expected a zero local io for view"), + pdebug(rctx, Rctx), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => 1, + rows_read => 0, + docs_read => 0, + docs_written => 1, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_delete_doc({_Ctx, DbName, View}) -> + pdebug(dbname, DbName), + DocId = "foo_17", + {ok, Doc0} = fabric:open_doc(DbName, DocId, [?ADMIN_CTX]), + Doc = Doc0#doc{body = {[{<<"_deleted">>, true}]}}, + Context = #{ + method => 'DELETE', + path => "/" ++ ?b2l(DbName) ++ "/" ++ DocId + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + _Res = fabric:update_doc(DbName, Doc, [?ADMIN_CTX]), + Rctx = wait_rctx(PidRef, ddoc_dependent_local_io(View)), + ?assert(is_map(Rctx), "Expected a zero local io for view"), + pdebug(rctx, Rctx), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => 1, + rows_read => 0, + docs_read => 0, + docs_written => 1, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_changes({_Ctx, DbName, View}) -> + pdebug(dbname, DbName), + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/_changes" + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + _Res = fabric:changes(DbName, fun changes_cb/2, [], #changes_args{}), + Rctx = wait_rctx(PidRef, nonzero_local_io()), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => ?DB_Q, + rows_read => docs_count(View), + changes_returned => docs_count(View), + docs_read => 0, + docs_written => 0, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_changes_limit_zero({_Ctx, DbName, _View}) -> + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/_changes" + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + _Res = fabric:changes(DbName, fun changes_cb/2, [], #changes_args{limit = 0}), + Rctx = wait_rctx(PidRef, nonzero_local_io()), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => ?DB_Q, + rows_read => assert_gte(?DB_Q), + changes_returned => assert_gte(?DB_Q), + docs_read => 0, + docs_written => 0, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +%% TODO: stub in non JS filter with selector +t_changes_filtered({_Ctx, _DbName, _View}) -> + false. + +t_changes_js_filtered({_Ctx, DbName, {DDocId, _ViewName} = View}) -> + pdebug(dbname, DbName), + Method = 'GET', + Path = "/" ++ ?b2l(DbName) ++ "/_changes", + Context = #{ + method => Method, + path => Path + }, + {PidRef, Nonce} = coordinator_context(Context), + Req = {json_req, null}, + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + Filter = configure_filter(DbName, DDocId, Req), + Args = #changes_args{filter_fun = Filter}, + _Res = fabric:changes(DbName, fun changes_cb/2, [], Args), + Rctx = wait_rctx(PidRef, nonzero_local_io()), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => assert_gte(?DB_Q), + rows_read => assert_gte(docs_count(View)), + changes_returned => round(?DOCS_COUNT / 2), + docs_read => assert_gte(docs_count(View)), + docs_written => 0, + pid_ref => PidRef, + js_filter => docs_count(View), + js_filtered_docs => docs_count(View) + }), + ok = assert_teardown(PidRef). + +t_view_query({_Ctx, DbName, View}) -> + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/_design/foo/_view/bar" + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + MArgs = #mrargs{include_docs = false}, + _Res = fabric:all_docs(DbName, [?ADMIN_CTX], fun view_cb/2, [], MArgs), + Rctx = wait_rctx(PidRef, nonzero_local_io()), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => ?DB_Q, + rows_read => docs_count(View), + docs_read => 0, + docs_written => 0, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +t_view_query_include_docs({_Ctx, DbName, View}) -> + Context = #{ + method => 'GET', + path => "/" ++ ?b2l(DbName) ++ "/_design/foo/_view/bar" + }, + {PidRef, Nonce} = coordinator_context(Context), + Rctx0 = load_rctx(PidRef, Nonce), + ok = fresh_rctx_assert(Rctx0, PidRef, Nonce), + MArgs = #mrargs{include_docs = true}, + _Res = fabric:all_docs(DbName, [?ADMIN_CTX], fun view_cb/2, [], MArgs), + Rctx = wait_rctx(PidRef, nonzero_local_io()), + ?assert(is_map(Rctx), "Expected a nonzero local io"), + ok = rctx_assert(Rctx, #{ + nonce => Nonce, + db_open => ?DB_Q, + rows_read => docs_count(View), + docs_read => docs_count(View), + docs_written => 0, + pid_ref => PidRef + }), + ok = assert_teardown(PidRef). + +assert_teardown(PidRef) -> + ?assertEqual(ok, couch_srt:destroy_context(PidRef)), + ?assertEqual(undefined, couch_srt:get_resource()), + %% Normally the tracker is responsible for destroying the resource + ?assertEqual(true, couch_srt_server:destroy_resource(PidRef)), + ?assertEqual(undefined, couch_srt:get_resource(PidRef)), + ok. + +view_cb({row, Row}, Acc) -> + {ok, [Row | Acc]}; +view_cb(_Msg, Acc) -> + {ok, Acc}. + +changes_cb({change, {Change}}, Acc) -> + {ok, [Change | Acc]}; +changes_cb(_Msg, Acc) -> + {ok, Acc}. + +pdebug(dbname, DbName) -> + case ?DEBUG_ENABLED =:= true of + true -> + ?debugFmt("DBNAME[~p]: ~p", [DbName, fabric:get_db_info(DbName)]); + false -> + ok + end; +pdebug(rctx, Rctx) -> + ?DEBUG_ENABLED andalso ?debugFmt("GOT RCTX: ~p~n", [Rctx]). + +pdbg(Str, Args) -> + ?DEBUG_ENABLED andalso ?debugFmt(Str, Args). + +convert_pidref({_, _} = PidRef) -> + couch_srt_entry:convert_pidref(PidRef); +convert_pidref(PidRef) when is_binary(PidRef) -> + PidRef; +convert_pidref(false) -> + false. + +rctx_assert(Rctx, Asserts0) -> + DefaultAsserts = #{ + changes_returned => 0, + js_filter => 0, + js_filtered_docs => 0, + nonce => null, + db_open => 0, + rows_read => 0, + docs_read => 0, + docs_written => 0, + pid_ref => null + }, + Updates = #{ + pid_ref => fun convert_pidref/1, + nonce => fun couch_srt_entry:convert_string/1 + }, + Asserts = maps:merge( + DefaultAsserts, + maps:fold(fun maps:update_with/3, Asserts0, Updates) + ), + ok = maps:foreach( + fun + (_K, false) -> + ok; + (K, Fun) when is_function(Fun) -> + Fun(K, maps:get(K, Rctx)); + (K, V) -> + case maps:get(K, Rctx) of + false -> + ok; + RV -> + pdbg("?assertEqual(~p, ~p, ~p)", [V, RV, K]), + ?assertEqual(V, RV, K) + end + end, + Asserts + ), + ok. + +wait_rctx(PidRef, WaitFun) -> + test_util:wait(fun() -> + Rctx = couch_srt_entry:to_json(couch_srt:get_resource(PidRef)), + WaitFun(Rctx) + end). + +%% Doc updates and others don't perform local IO, they funnel to another pid +zero_local_io() -> + fun + (#{ioq_calls := 0, get_kp_node := 0, get_kv_node := 0} = Ctx) -> + Ctx; + (_) -> + wait + end. + +nonzero_local_io() -> + nonzero_local_io(io_separate). + +nonzero_local_io(io_sum) -> + fun + ( + #{ + ioq_calls := IoqCalls, + get_kp_node := KPNodes, + get_kv_node := KVNodes + } = Ctx + ) when IoqCalls > 0 andalso (KPNodes + KVNodes) > 0 -> + Ctx; + (_) -> + wait + end; +nonzero_local_io(io_separate) -> + fun + ( + #{ + ioq_calls := IoqCalls, + get_kp_node := KPNodes, + get_kv_node := KVNodes + } = Ctx + ) when IoqCalls > 0 andalso KPNodes > 0 andalso KVNodes > 0 -> + Ctx; + (_) -> + wait + end. + +ddoc_dependent_local_io(undefined) -> + zero_local_io(); +ddoc_dependent_local_io({_DDoc, _ViewName}) -> + nonzero_local_io(io_sum). + +coordinator_context(#{method := Method, path := Path}) -> + Nonce = couch_util:to_hex(crypto:strong_rand_bytes(5)), + Req = #httpd{method = Method, nonce = Nonce}, + {_, _} = PidRef = couch_srt:create_coordinator_context(Req, Path), + {PidRef, Nonce}. + +fresh_rctx_assert(Rctx, PidRef, Nonce) -> + pdebug(rctx, Rctx), + FreshAsserts = #{ + nonce => Nonce, + db_open => 0, + rows_read => 0, + docs_read => 0, + docs_written => 0, + pid_ref => PidRef + }, + rctx_assert(Rctx, FreshAsserts). + +assert_gt() -> + assert_gt(0). + +assert_gt(N) -> + fun(K, RV) -> ?assert(RV > N, {K, RV, N}) end. + +assert_gte(N) -> + fun(K, RV) -> ?assert(RV >= N, {K, RV, N}) end. + +docs_count(undefined) -> + ?DOCS_COUNT; +docs_count({_, _}) -> + ?DOCS_COUNT + ?DDOCS_COUNT. + +configure_filter(DbName, DDocId, Req) -> + configure_filter(DbName, DDocId, Req, <<"even">>). + +configure_filter(DbName, DDocId, Req, FName) -> + {ok, DDoc} = ddoc_cache:open_doc(DbName, DDocId), + DIR = fabric_util:doc_id_and_rev(DDoc), + Style = main_only, + {fetch, custom, Style, Req, DIR, FName}. + +load_rctx(PidRef, NonceString) -> + Nonce = list_to_binary(NonceString), + wait_rctx(PidRef, fun + (#{nonce := N} = Rctx) when N == Nonce -> Rctx; + (S) -> + ?debugFmt("Nonce = ~p R = ~p~n", [Nonce, S]), + wait + end). diff --git a/src/couch_srt/test/eunit/couch_srt_test_helper.erl b/src/couch_srt/test/eunit/couch_srt_test_helper.erl new file mode 100644 index 00000000000..60e4f6acf5a --- /dev/null +++ b/src/couch_srt/test/eunit/couch_srt_test_helper.erl @@ -0,0 +1,139 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_srt_test_helper). + +-export([ + enable_default_logger_matchers/0, + rctx_gen/0, + rctx_gen/1, + rctxs/0, + rctxs/1, + jrctx/1 +]). + +-include_lib("couch/include/couch_eunit.hrl"). + +-include("../../src/couch_srt.hrl"). +-define(RCTX_RANGE, 1000). +-define(RCTX_COUNT, 10000). + +-define(RCTX_RPC, #rpc_worker{from = {self(), make_ref()}}). +-define(RCTX_COORDINATOR, #coordinator{ + method = 'GET', path = <<"/foo/_all_docs">>, mod = chttp_db, func = db_req +}). +-define(RCTX_CHANGES_COORDINATOR, #coordinator{ + method = 'GET', path = <<"/foo/_changes">>, mod = chttp_db, func = handle_changes_req +}). + +rctx_gen() -> + rctx_gen(#{}). + +rctx_gen(Opts0) -> + DbnameGen = one_of([<<"foo">>, <<"bar">>, ?tempdb]), + UsernameGen = one_of([<<"user_foo">>, <<"user_bar">>, <<"adm">>]), + TypeGen = one_of([?RCTX_RPC, ?RCTX_COORDINATOR, ?RCTX_CHANGES_COORDINATOR]), + R = fun() -> rand:uniform(?RCTX_RANGE) end, + R10 = fun() -> 3 + rand:uniform(round(?RCTX_RANGE / 10)) end, + Occasional = one_of([0, 0, 0, 0, 0, R]), + Nonce = one_of(["9c54fa9283", "foobar7799" | lists:duplicate(10, fun nonce/0)]), + Base = #{ + dbname => DbnameGen, + db_open => R10, + docs_read => R, + docs_written => Occasional, + get_kp_node => R10, + get_kv_node => R, + nonce => Nonce, + pid_ref => {self(), make_ref()}, + ioq_calls => R, + rows_read => R, + type => TypeGen, + username => UsernameGen, + %% Hack because we need to modify both fields + '_do_changes' => true + }, + Opts = maps:merge(Base, Opts0), + couch_srt_entry:from_map( + maps:fold( + fun + %% Hack for changes because we need to modify both + %% changes_processed (rows_read) and changes_returned but the + %% latter must be <= the former + ('_do_changes', V, Acc) -> + case V of + true -> + Processed = R(), + Returned = (one_of([0, 0, 1, Processed, rand:uniform(Processed)]))(), + maps:put( + rows_read, + Processed, + maps:put(changes_returned, Returned, Acc) + ); + _ -> + Acc + end; + (K, F, Acc) when is_function(F) -> + maps:put(K, F(), Acc); + (K, V, Acc) -> + maps:put(K, V, Acc) + end, + #{}, + Opts + ) + ). + +rctxs() -> + rctxs(?RCTX_COUNT). + +rctxs(Count) when is_integer(Count) andalso Count >= 1 -> + [rctx_gen() || _ <- lists:seq(1, Count)]. + +jrctx(Rctx) -> + JRctx = couch_srt_entry:to_json(Rctx), + case couch_srt_logger:should_truncate_reports() of + true -> + maps:filter(fun(_K, V) -> V > 0 end, JRctx); + false -> + JRctx + end. + +nonce() -> + couch_util:to_hex(crypto:strong_rand_bytes(5)). + +one_of(L) -> + fun() -> + case lists:nth(rand:uniform(length(L)), L) of + F when is_function(F) -> + F(); + N -> + N + end + end. + +enable_default_logger_matchers() -> + DefaultMatchers = [ + all_coordinators, + all_rpc_workers, + docs_read, + rows_read, + docs_written, + long_reqs, + changes_processed, + ioq_calls + ], + lists:foreach( + fun(Name) -> + config:set(?CSRT_MATCHERS_ENABLED, atom_to_list(Name), "true", false) + end, + DefaultMatchers + ). diff --git a/src/couch_stats/src/couch_stats.app.src b/src/couch_stats/src/couch_stats.app.src index a54fac7349f..fc1938045a7 100644 --- a/src/couch_stats/src/couch_stats.app.src +++ b/src/couch_stats/src/couch_stats.app.src @@ -13,8 +13,11 @@ {application, couch_stats, [ {description, "Simple statistics collection"}, {vsn, git}, - {registered, [couch_stats_aggregator, couch_stats_process_tracker]}, - {applications, [kernel, stdlib]}, + {registered, [ + couch_stats_aggregator, + couch_stats_process_tracker + ]}, + {applications, [kernel, stdlib, couch_log]}, {mod, {couch_stats_app, []}}, {env, []} ]}. diff --git a/src/couch_stats/src/couch_stats.erl b/src/couch_stats/src/couch_stats.erl index 29a4024491f..9ce03bb2fb0 100644 --- a/src/couch_stats/src/couch_stats.erl +++ b/src/couch_stats/src/couch_stats.erl @@ -49,6 +49,7 @@ increment_counter(Name) -> -spec increment_counter(any(), pos_integer()) -> response(). increment_counter(Name, Value) -> + couch_srt:maybe_track_local_counter(Name, Value), case couch_stats_util:get_counter(Name, stats()) of {ok, Ctx} -> couch_stats_counter:increment(Ctx, Value); {error, Error} -> {error, Error} diff --git a/src/docs/images/csrt-sample-workload.png b/src/docs/images/csrt-sample-workload.png new file mode 100644 index 00000000000..4accf73dbc3 Binary files /dev/null and b/src/docs/images/csrt-sample-workload.png differ diff --git a/src/docs/src/api/server/csrt.rst b/src/docs/src/api/server/csrt.rst new file mode 100644 index 00000000000..5c816b91fb9 --- /dev/null +++ b/src/docs/src/api/server/csrt.rst @@ -0,0 +1,345 @@ +.. Licensed under the Apache License, Version 2.0 (the "License"); you may not +.. use this file except in compliance with the License. You may obtain a copy of +.. the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +.. WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +.. License for the specific language governing permissions and limitations under +.. the License. + +.. _api/server/csrt: + +============================================ +``/_active_resources/_match/{matcher-name}`` +============================================ + +.. versionadded:: 3.5.1 + +Find active processes (being tracked in CSRT) using a declarative JSON querying syntax. +You can learn more about Couch Stats Resource Tracker (CSRT) :doc:`here `. +The query passed to the endpoint can be in following forms + +* :ref:`group_by ` - The value of ``counter_key`` would be + extracted and aggregated in the order of provided ``aggregate_keys``. + +* :ref:`count_by ` - The value of ``counter_key`` would be + extracted and sorted by given ``aggregate_keys``. + +* :ref:`sort_by ` - Count number of unique combinations of + values by given ``aggregate_keys``. + +.. http:post:: /_active_resources/_match/{matcher-name} + :synopsis: Return snapshot of active processes (being tracked in CSRT) + + Find active processes (being tracked in CSRT) using a declarative JSON querying + syntax. + + :param matcher-name: The name of the matcher to use for filtering active processes. + + :
    ` query. + + :` query. + + :` query. + + :header Content-Type: - :mimetype:`application/json` + + :>json array: Array of objects containing the aggregated counter values for each node. + :>jsonarr object row: Object containing the results received from a node. + :>json array row.result: Array of objects containing the aggregated counter values. + :>jsonarr object result[_].key: Object containing the aggregate keys and their values. + :>json string|none result[_].key.pid_ref: Opaque string representing identity of the + resource. + :>json string|none result[_].key.dbname: The database name used by the resource. + :>json string|none result[_].key.username: The username used to access the resource. + :>json string|none result[_].key.dbname: The database name. + :>json string|none result[_].key.type: The string representing a type of the resource. + :>jsonarr int result[_].value: The aggregated value of ``counter_key`` + :>json string row.node: The node that the aggregated counter values belong to. + :>json array row.errors: Array of error messages. + + :code 200: Request completed successfully + :code 400: Invalid request (a JSON object of following structure): + + .. code-block:: json + + { + "error": "bad_request", + "reason": "Unknown field name 'unknown_field'" + } + + The following errors may be returned: + + +-------------+-------------------------------------------+ + | Error | Reason | + +=============+===========================================+ + | bad_request | Unknown field name '...' | + +-------------+-------------------------------------------+ + | bad_request | Unknown matcher '...' | + +-------------+-------------------------------------------+ + | bad_request | "Multiple aggregations are not supported" | + +-------------+-------------------------------------------+ + | bad_request | "Multiple keys in 'counter_key'" | + +-------------+-------------------------------------------+ + +.. _api/server/csrt/group_by: + +``group_by`` Syntax +------------------- + +The ``group_by`` syntax is used to find active processes (being tracked in CSRT) which +are matched by given ``matcher-name``. For all active processes matching the matcher +the value of ``counter_key`` would be extracted and aggregated in the order of provided +``aggregate_keys``. + + **Request**: + + Example request body to return ``ioq_calls`` grouped by ``["username", "dbname"]`` + from ``docs_read`` matcher. + + .. code-block:: http + + POST /_active_resources/_match/docs_read HTTP/1.1 + Accept: application/json + Content-Type: application/json + Content-Length: 80 + Host: localhost:5984 + + { + "group_by": { + "counter_key": "ioq_calls", + "aggregate_keys": [ + "username", + "dbname" + ] + } + } + + **Response**: + + .. code-block:: http + + HTTP/1.1 200 OK + Cache-Control: must-revalidate + Content-Length: 783 + Content-Type: application/json + Date: Thu, 29 Jul 2025 14:05:59 GMT + Server: CouchDB (Erlang OTP/26) + + [ + { + "result": [ + { + "value": 90817, + "key": { + "username": "user_foo", + "dbname": "db2" + } + }, + { + "value": 42434, + "key" : { + "username" : "user_foo", + "dbname" : "db1" + } + }, + { + "value" : 84828, + "key" : { + "username" : "user_bar", + "dbname" : "db2" + } + }, + { + "value" : 6278, + "key" : { + "username" : "user_bar", + "dbname" : "db1" + } + } + ], + "node" : "node1@127.0.0.1", + "errors" : [ + ] + } + ] + +.. _api/server/csrt/sort_by: + +``sort_by`` Syntax +------------------ + +The ``sort_by`` syntax is used to find active processes (being tracked in CSRT) which +are matched by given ``matcher-name``. For all active processes matching the matcher +the value of ``counter_key`` would be extracted and sorted by given ``aggregate_keys``. + + **Request**: + + Example request body to return ``ioq_calls`` sorted by ``["username", "dbname"]`` + from ``docs_read`` matcher. + + .. code-block:: http + + POST /_active_resources/_match/docs_read HTTP/1.1 + Accept: application/json + Content-Type: application/json + Content-Length: 80 + Host: localhost:5984 + + { + "sort_by" : { + "counter_key" : "ioq_calls", + "aggregate_keys" : [ + "username", + "dbname" + ] + } + } + + **Response**: + + .. code-block:: http + + HTTP/1.1 200 OK + Cache-Control: must-revalidate + Content-Length: 783 + Content-Type: application/json + Date: Thu, 29 Jul 2025 14:05:59 GMT + Server: CouchDB (Erlang OTP/26) + + [ + { + "result": [ + { + "value": 90817, + "key": { + "username": "user_foo", + "dbname": "db2" + } + }, + { + "value": 42434, + "key" : { + "username" : "user_foo", + "dbname" : "db1" + } + }, + { + "value" : 84828, + "key" : { + "username" : "user_bar", + "dbname" : "db2" + } + }, + { + "value" : 6278, + "key" : { + "username" : "user_bar", + "dbname" : "db1" + } + } + ], + "node" : "node1@127.0.0.1", + "errors" : [ + ] + } + ] + +.. _api/server/csrt/count_by: + +``count_by`` Syntax +------------------- + +The ``count_by`` syntax is used to find active processes (being tracked in CSRT) which +are matched by given ``matcher-name``. For all active processes matching the matcher +we count number of unique combinations of values by given ``aggregate_keys``. + + **Request**: + + Example request body to return number of unique combinations of values + ``["username", "dbname"]`` from ``docs_read`` matcher. + + .. code-block:: http + + POST /_active_resources/_match/docs_read HTTP/1.1 + Accept: application/json + Content-Type: application/json + Content-Length: 72 + Host: localhost:5984 + + { + "count_by" : { + "aggregate_keys" : [ + "username", + "dbname" + ] + } + } + + **Response**: + + .. code-block:: http + + HTTP/1.1 200 OK + Cache-Control: must-revalidate + Content-Length: 715 + Content-Type: application/json + Date: Thu, 29 Jul 2025 14:12:32 GMT + Server: CouchDB (Erlang OTP/26) + + [ + { + "result": [ + { + "value": 7, + "key": { + "username": "user_foo", + "dbname": "db2" + } + }, + { + "value": 42, + "key" : { + "username" : "user_foo", + "dbname" : "db1" + } + }, + { + "value" : 28, + "key" : { + "username" : "user_bar", + "dbname" : "db2" + } + }, + { + "value" : 627, + "key" : { + "username" : "user_bar", + "dbname" : "db1" + } + } + ], + "node" : "node1@127.0.0.1", + "errors" : [ + ] + } + ] diff --git a/src/docs/src/api/server/index.rst b/src/docs/src/api/server/index.rst index f3098e88c11..ed64bed43f8 100644 --- a/src/docs/src/api/server/index.rst +++ b/src/docs/src/api/server/index.rst @@ -22,5 +22,6 @@ configuration information. .. toctree:: common + csrt authn configuration diff --git a/src/docs/src/config/csrt.rst b/src/docs/src/config/csrt.rst new file mode 100644 index 00000000000..896dc2b079c --- /dev/null +++ b/src/docs/src/config/csrt.rst @@ -0,0 +1,587 @@ +.. Licensed under the Apache License, Version 2.0 (the "License"); you may not +.. use this file except in compliance with the License. You may obtain a copy of +.. the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +.. WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +.. License for the specific language governing permissions and limitations under +.. the License. + +.. default-domain:: config +.. highlight:: ini + +.. _config-csrt: + +========================================== +Couch Stats Resource Tracker (CSRT) Config +========================================== + +CSRT configuration options and overview. + +.. seealso:: + + :doc:`/csrt/index` + +CSRT config +=========== + +This section contains the top level enablement and configuration options for CSRT. + +.. config:section:: csrt :: CSRT Primary Configuration + + .. config:option:: enable :: Enable CSRT data collection and RPC deltas + + Core enablement toggle for CSRT, defaults to false. Enabling this + setting initiates local CSRT stats collection as well as shipping deltas + in RPC responses to accumulate in the coordinator. + + This does *not* trigger the new RPC spawn metrics, and it does not + enable reporting for any of the rctx types. + + .. warning:: + + You *MUST* have all nodes in the cluster running a CSRT aware + CouchDB *before* you enable it on any node, otherwise the old + version nodes won't know how to handle the new RPC formats + including an embedded Delta payload. + + Top level CSRT enablement for local data collection and RPC deltas:: + + [csrt] + enable = false + + .. config:option:: enable_init_p :: Enable RPC spawn metric tracking + + Enablement of tracking new metric counters for different ``fabric_rpc`` operations + spawned by way of ``rexi_server:init_p/3``. This is the primary mechanism for + inducing database RPC operations within CouchDB, and these init_p metrics aim to + provide node level understandings of the workloads being induced by other + coordinator processes. This is especially relevant for databases on subsets of a + cluster resulting in non-uniform workloads, these metrics are tailored to + provide insight into what work is being spawned on each node in the cluster as a + function of time. + + Enablement for tracking counts of spawned RPC workers:: + + [csrt] + enable_init_p = false + + .. config:option:: enable_reporting :: Enable CSRT Process Lifecyle Reports + + This is the primary toggle for enabling CSRT process lifetime reports + containing detailed information about the quantity of work induced by + the given request/worker/etc. This is the top level toggle for enabling + _any_ reporting, and there also exists + :config:option:`csrt/enable_rpc_reporting` to disable the reporting of + any individual RPC workers, leaving the coordinator responsible of + generating a report with the accumulated deltas. + + .. note:: + + Note that this setting toggles whether or not to generate process + lifecycle reports, but no reports will be generated until logger + matchers have been enabled that trigger a match on CSRT contexts + that have surpassed the configured thresholds. + + Top level toggle for whether any process lifecycle reports are generated:: + + [csrt] + enable_reporting = false + + .. config:option:: enable_rpc_reporting :: Enable RPC process lifecyle reports + + This enables the possibility of RPC workers generating reports. They + still need to hit the configured thresholds to induce a report, but + this will generate CSRT process lifetime reports for individual RPC + workers that trigger the configured logger thresholds. This allows for + quantifying per node resource usage when desired, as otherwise the + reports are at the http request level and don't provide per node stats. + + The key idea here is that having RPC level CSRT process lifetime + reporting is incredibly useful, but can also generate large quantities + of data. For example, a view query on a Q=64 database will stream + results from 64 shard replicas, resulting in at least 64 RPC reports, + plus any that might have been generated from RPC workers that "lost" + the race for shard replica. This is very useful, but a lot of data + given the verbose nature of funneling it through the RSyslog reports, + however, the ability to write directly to something like ClickHouse or + another columnar store would be great. + + Until there's an efficient storage mechanism to stream the results to, + the rsyslog entries work great and are very practical, but care must be + taken to not generate too much data for aggregate queries as they + generate at least ``Qx`` more report than an individual report per http + request from the coordinator. This setting exists as a way to either + a) utilize the logger matcher configured thresholds to allow for _any_ + rctx's to be recorded when they induce heavy operations, either + Coordinator or RPC worker; or b) to _only_ log workloads at the + coordinator level. + + .. note:: + + This setting exists because we lack an expressive enough config + declaration to easily chain the matchspec constructions as + ``ets:fun2ms/1`` is a special compile time parse transform macro that + requires the full definition to be specified directly, it cannot + be interactively constructed. That said, you _can_ register matchers + through ``remsh`` with more specific and fine grained pattern matching, + and a more expressive system for defining matchers are being + explored. + + .. warning:: + + Enabling this setting *will* generate considerably more logs! Specifically, for aggregate queries and database operations, this will generate `Q` * `N` times more logs than a singular doc request taking only `N` inreacting with a singular shard range. See the note above about this being a temporary setting during the experimental stages of CSRT. + + Toggle to enable possibility of RPC process lifecycle reports:: + + [csrt] + enable_rpc_reporting = false + + .. config:option:: should_truncate_reports :: truncate zero values from lifecyle reports + + enables truncation of the csrt process lifetime reports to not include + any fields that are zero at the end of process lifetime, eg don't + include ``js_filter=0`` in the report if the request did not induce + javascript filtering. + + this can be disabled if you really care about consistent fields in the + report logs, but this is a log space saving mechanism, similar to + disabling rpc reporting by default, as its a simple way to reduce + overall volume + + Truncate zero values from process lifecycle reports, enabled by default:: + + [csrt] + should_truncate_reports = true + + .. config:option:: query_limit :: Maximum quantity of rows to return in CSRT query/http requests. + + Limit the quantity of rows that can be loaded in an http query.:: + + [csrt] + query_limit = 100 + + .. config:option:: query_cardinality_limit :: Maximum quantity of rows to allow in CSRT query/http requests. + + Limit the quantity of rows that can be loaded in an http query.:: + + [csrt] + query_cardinality_limit = 10000 + +.. _csrt-logger-matcher-configuration: + +CSRT Logger Matcher Configuration +================================= + +There are currently eight builtin default logger matchers designed to make it +easy to do filtering on heavy resource usage inducing and long running +requests. These are designed as a simple baseline of useful matchers, declared +in a manner amenable to ``default.ini`` based constructs. More expressive matcher +declarations are being explored, and matchers of arbitrary complexity can be +registered directly through ``remsh``. The default matchers are all designed around +an integer config threshold that triggers on a specific field, eg docs read, or +on a delta of fields for long requests and changes requests that process many +rows but return few. + +The current default matchers are: + + * `all_coordinators`: match all Coordinators handling HTTP requests + + * :config:option:`Enable ` | none + + * `all_rpc_workers`: match all RPC Worker handling internal requests + + * :config:option:`Enable ` | none + + * `docs_read`: match all requests reading more than N docs + + * :config:option:`Enable ` | :config:option:`Threshold ` + + * `rows_read`: match all requests reading more than N rows + + * :config:option:`Enable ` | :config:option:`Threshold ` + + * `docs_written`: match all requests writing more than N docs + + * :config:option:`Enable ` | :config:option:`Threshold ` + + * `ioq_calls`: match all requests inducing more than N ioq_calls + + * :config:option:`Enable ` | :config:option:`Threshold ` + + * `long_reqs`: match all requests lasting more than N milliseconds + + * :config:option:`Enable ` | :config:option:`Threshold ` + + * `changes_processed`: match all changes requests that returned at least N rows + less than was necessarily loaded to complete the request (eg find heavy + filtered changes requests reading many rows but returning few). + + * :config:option:`Enable ` | :config:option:`Threshold ` + +Each of the default matchers has an enablement setting in +:ref:`csrt-logger-matcher-configuration-enablement` for toggling enablement of +it, and all but the ``all_coordinators`` and ``all_rpc_workers`` matchers have a +corresponding threshold value setting in +:ref:`csrt-logger-matcher-configuration-threshold` that is an integer value +corresponding to the specific nature of that matcher. + +.. seealso:: + + :ref:`csrt-logger-matcher-configuration-enablement` + + :ref:`csrt-logger-matcher-configuration-threshold` + +.. _csrt-logger-matcher-configuration-enablement: + +CSRT Logger Matcher Enablement Configuration +-------------------------------------------- + + These settings enable the default logger matchers, any can be enabled + independently of each other, but none will generate reports unless the + :config:option:`csrt/enable` and :config:option:`csrt/enable_reporting` + settings are both true. + +.. seealso:: + + :ref:`csrt-logger-matcher-configuration` + + :ref:`csrt-logger-matcher-configuration-threshold` + +.. config:section:: csrt_logger.matchers_enabled :: CSRT Logger Matcher Enablement + + .. config:option:: all_coordinators :: Enable all_coordinators CSRT Logger Matcher + + Enable the ``all_coordinators`` default matcher to match against all + coordinators handling HTTP requests. + + Enable the matcher:: + + [csrt_logger.matchers_enabled] + all_coordinators = false + + .. config:option:: all_rpc_workers :: Enable all_rpc_workers default CSRT Logger Matcher + + Enable the ``all_rpc_workers`` default matcher to match against all + RPC Workers handling internal CouchDB requests. This is predominantly + induced by HTTP requests, but any internal systems flowing through + ``fabric_rpc`` will be picked up as well, such as internal/external + replication and anything that needs to load a document through the + quorum system. + + Enable the matcher:: + + [csrt_logger.matchers_enabled] + all_rpc_workers = false + + .. config:option:: docs_read :: Enable docs_read default CSRT Logger Matcher + + Enable the ``docs_read`` builtin matcher, with a default + ``Threshold=1000``, such that any request that reads more than + ``Threshold`` docs will generate a CSRT process lifetime report with a + summary of its resource consumption. + + This is different from the ``rows_read`` filter in that a view with + ``?limit=1000`` will read 1000 rows, but the same request with + ``?include_docs=true`` will also induce an additional 1000 docs read. + + Enable the matcher:: + + [csrt_logger.matchers_enabled] + docs_read = false + + .. seealso:: + :config:option:`Set docs_read matcher Threshold ` + + .. config:option:: rows_read :: Enable rows_read default CSRT Logger Matcher + + Enable the ``rows_read`` builtin matcher, with a default + ``Threshold=1000``, such that any request that reads more than + ``Threshold`` rows will generate a CSRT process lifetime report with a + summary of its resource consumption. + + This is different from the ``docs_read`` filter so that we can + distinguish between heavy view requests with lots of rows or heavy + requests with lots of docs. + + Enable the matcher:: + + [csrt_logger.matchers_enabled] + rows_read = false + + .. seealso:: + :config:option:`Set rows_read matcher Threshold ` + + .. config:option:: docs_written :: Enable docs_written default CSRT Logger Matcher + + Enable the ``docs_written`` builtin matcher, with a default + ``Threshold=500``, such that any request that written more than + ``Threshold`` docs will generate a CSRT process lifetime report with a + summary of its resource consumption. + + Enable the matcher:: + + [csrt_logger.matchers_enabled] + docs_written = false + + .. seealso:: + :config:option:`Set docs_written matcher Threshold ` + + .. config:option:: ioq_calls :: Enable ioq_calls default CSRT Logger Matcher + + Enable the ``ioq_calls`` builtin matcher, with a default + ``Threshold=10000``, such that any request that induces more than + ``Threshold`` IOQ calls will generate a CSRT process lifetime report with + a summary of its resource consumption. + + Enable the matcher:: + + [csrt_logger.matchers_enabled] + ioq_calls = false + + .. seealso:: + :config:option:`Set ioq_calls matcher Threshold ` + + .. config:option:: long_reqs :: Enable long_reqs default CSRT Logger Matcher + + Enable the ``long_reqs`` builtin matcher, with a default + ``Threshold=60000``, such that any request where the the last CSRT rctx + ``updated_at`` timestamp is at least ``Threshold`` milliseconds greater + than the ``started_at timestamp`` will generate a CSRT process lifetime + report with a summary of its resource consumption. + + Enable the matcher:: + + [csrt_logger.matchers_enabled] + long_reqs = false + + .. seealso:: + :config:option:`Set long_reqs matcher Threshold ` + + .. config:option:: changes_processed :: Enable changes_processed default CSRT Logger Matcher + + Enable the ``changes_processed`` builtin matcher, with a default + ``Threshold=1000``, such that any request where the CSRT rctx ``rows_read`` + field as at least ``Threshold`` greater than the rctx ``changes_returned`` + field will generate a CSRT process lifetime report with a summary of + its resource consumption. + + Enable the matcher:: + + [csrt_logger.matchers_enabled] + changes_processed = false + + .. seealso:: + :config:option:`Set changes_processed matcher Threshold ` + +.. _csrt-logger-matcher-configuration-threshold: + +CSRT Logger Matcher Threshold Configuration +------------------------------------------- + + These settings control the Threshold configurations for the default + matchers. These are scalar integer values that are used by all default + matchers aside from ``all_coordinators`` and ``all_rpc_workers``. See the top + level config for more information and the enablement config for how to + enable these matchers. + +.. seealso:: + + :ref:`csrt-logger-matcher-configuration` + + :ref:`csrt-logger-matcher-configuration-enablement` + +.. config:section:: csrt_logger.matchers_threshold :: CSRT Logger Matcher Threshold + + .. config:option:: docs_read :: Set Threshold for docs_read CSRT Logger Matcher + + Threshold for ``docs_read`` logger matcher, defaults to ``1000`` docs read. + + Set the Threshold:: + + [csrt_logger.matchers_threshold] + docs_read = 1000 + + .. seealso:: + :config:option:`Enable docs_read matcher ` + + .. config:option:: rows_read :: Set Threshold for rows_read default CSRT Logger Matcher + + Threshold for ``rows_read`` logger matcher, defaults to ``1000`` rows read. + + Set the Threshold:: + + [csrt_logger.matchers_threshold] + rows_read = 1000 + + .. seealso:: + :config:option:`Enable rows_read matcher ` + + .. config:option:: docs_written :: Set Threshold for docs_written default CSRT Logger Matcher + + Threshold for ``docs_written`` logger matcher, defaults to ``500`` docs written. + + Set the Threshold:: + + [csrt_logger.matchers_threshold] + docs_written = 500 + + .. seealso:: + :config:option:`Enable docs_written matcher ` + + .. config:option:: ioq_calls :: Set Threshold for ioq_calls default CSRT Logger Matcher + + Threshold for ``ioq_calls`` logger matcher, defaults to ``10000`` IOQ calls. + + Set the Threshold:: + + [csrt_logger.matchers_threshold] + ioq_calls = 10000 + + .. seealso:: + :config:option:`Enable ioq_calls matcher ` + + .. config:option:: long_reqs :: Set Threshold for long_reqs default CSRT Logger Matcher + + Threshold for ``long_reqs`` logger matcher, defaults to ``60000`` milliseconds (1 minute). + + Set the Threshold:: + + [csrt_logger.matchers_threshold] + long_reqs = 60000 + + .. seealso:: + :config:option:`Enable long_reqs matcher ` + + .. config:option:: changes_processed :: Set Threshold for changes_processed default CSRT Logger Matcher + + Threshold for ``changes_processed`` logger matcher, defaults to ``1000`` changes processed. + + Set the Threshold:: + + [csrt_logger.matchers_threshold] + changes_processed = 1000 + + .. seealso:: + :config:option:`Enable changes_processed matcher ` + +Recommendations +=============== + +CSRT is still experimental, and the Matcher Logger declaration syntax limits +the types of filtering and queries available, so caution should be exercised +when enabling any RPC reporting, and unless you have a specific need, you can +leave RPC reporting disabled. + +Simple Approach: Log a CSRT report for all coordinators +------------------------------------------------------- + +To enable process life cycle reporting for all HTTP requests, enable the `all_coordinators` matcher by way of :config:option:`csrt_logger.matchers_enabled/all_coordinators`, in addition to the normal CSRT enablements for tracking and reporting. This will result in a 1:1 mapping of HTTP requests to CSRT report logs, connected by the request `nonce`. For example:: + + [csrt] + enable = true + enable_init_p = true + enable_reporting = true + + [csrt_logger.matchers_enabled] + all_coordinators = true + +Custom Logger Matcher filtering +------------------------------- + +The default loggers and logger thresholds have been configured to make it easy +to find significant requests that induce heavy resource usage, without +drastically increasing the data log volume. For example, logging only when +requests take more than a minute or induce more than 10000 IOQ calls are +examples of scenarios where you most likely want to be informed about those +significant requests as they're well outside of normal efficient database +queries, while constraining total log volume for non-significant requests. The +default logger matcher Thresholds provide a simple way to set high level +watermarks to automatically generate logged reports for further analysis. + +Here's a recommended Sample configuration to enable the threshold based default +matchers, and utilize their default config values:: + + [csrt] + enable = true + enable_init_p = true + enable_reporting = true + + [csrt_logger.matchers_enabled] + docs_read = true + rows_read = true + docs_written = true + long_reqs = true + changes_processed = true + ioq_calls = true + +That will enable CSRT stats collection and the new RPC stats, CSRT reporting, and the default Logger Matchers, although these can be individually enabled as well. Those Logger Matchers Threshold values can be configured, as follows, with the commented out defaults shown:: + + [csrt_logger.matchers_threshold] + ;docs_read = 1000 + ;rows_read = 1000 + ;docs_written = 500 + ;long_reqs = 60000 + ;changes_processed = 1000 + ;ioq_calls = 10000 + +Change these values and enablements at your discretion. And if you really want +to enable RPC reporting, you can do so by way of +:config:option:`csrt/enable_rpc_reporting`, which will then use the same +configured Thresholds to match against those RPC workers, which, using the +default ``ioq_calls`` Threshold of 10000 would result in generating an +``rpc_worker`` rctx report for any workers that generated more than 10000 +``ioq_calls``, and similarly for the coordinator, which is a little awkward but it +provides a way to at least get node level reports generating when you really +need to see RPC worker resource usage at the node level. This is certainly +useful, but with Threshold configured low enough this will generate large +volumes of RPC worker reports, as described above, so cautious is warranted in +enabling RPC report logging with these filters:: + + [csrt] + enable = true + enable_init_p = true + enable_reporting = true + enable_rpc_reporting = true + + [csrt_logger.matchers_enabled] + docs_read = true + rows_read = true + docs_written = true + long_reqs = true + changes_processed = true + ioq_calls = true + +That said, if you really want to enable RPC reporting, you can do so by way of +:config:option:`csrt_logger.matchers_enabled/all_rpc_workers`, which combined +with :config:option:`csrt_logger.matchers_enabled/all_coordinators` will enable +logging for *ALL* coordinators and RPC workers, resulting in a report generated +for every CSRT tracked process lifecycle. This would be much better suited for +writing directly to a Vector store for post processing without any of the +verbose string labels. This also has the advantage of exposing both +``all_coordinators`` and ``all_rpc_workers`` through the ``/_active_resources`` +interface, allowing for efficient querying and aggregating on either all +coordinators or RPC workers:: + + [csrt] + enable = true + enable_init_p = true + enable_reporting = true + enable_rpc_reporting = true + + [csrt_logger.matchers_enabled] + all_coordinators = true + all_rpc_workers = true + +.. note:: + + Enabling :config:option:`csrt_logger.matchers_enabled/all_rpc_workers` while + leaving :config:option:`csrt/enable_rpc_reporting` disabled will result in a + pragmatic middle ground with no RPC reports being generated, yet the + ``all_rpc_workers`` logger matcher being enabled for querying. We should + probably extend the Logger Matchers logic to allow for specific Logger + Matchers to only be utilized for the querying APIs, and allow for more + stringent filters when decided to generate a lifecycle report. diff --git a/src/docs/src/config/index.rst b/src/docs/src/config/index.rst index 347785c43d2..dfac1e877df 100644 --- a/src/docs/src/config/index.rst +++ b/src/docs/src/config/index.rst @@ -36,3 +36,4 @@ Configuration query-servers misc resharding + csrt diff --git a/src/docs/src/csrt/index.rst b/src/docs/src/csrt/index.rst new file mode 100644 index 00000000000..83c1b471974 --- /dev/null +++ b/src/docs/src/csrt/index.rst @@ -0,0 +1,541 @@ +.. Licensed under the Apache License, Version 2.0 (the "License"); you may not +.. use this file except in compliance with the License. You may obtain a copy of +.. the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +.. WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +.. License for the specific language governing permissions and limitations under +.. the License. + +.. _csrt: + +=================================== +Couch Stats Resource Tracker (CSRT) +=================================== + +The ``couch_srt`` app introduces the Couch Stats Resource Tracker, aka CSRT for +short. CSRT is a real time stats tracking system that tracks the quantity of +resources induced at the process level in a live queryable manner, while also +generating process lifetime reports containing statistics on the total resource +load of a request, as a function of CouchDB operations like dbs/docs opened, +view and changes rows read, changes returned vs processed, Javascript filter +usage, request duration, and more. This system is a paradigm shift in CouchDB +visibility and introspection, allowing for expressive real time querying +capabilities to introspect, understand, and aggregate CouchDB internal resource +usage, as well as powerful filtering facilities for conditionally generating +reports on "heavy usage" requests or "long/slow" requests. CSRT also extends +``recon:proc_window/3`` with ``couch_srt:proc_window/3`` allowing for the same style of +battle hardened introspection with Recon's excellent ``proc_window``, but with +the sample window over any of the CSRT tracked CouchDB stats! + +CSRT does this by piggy-backing off of the existing metrics tracked by way of +``couch_stats:increment_counter/{1,2}`` at the time when the local process induces +those metrics inc calls, and then CSRT updates an ets entry containing the +context information for the local process, such that global aggregate queries +can be performed against the ets table as well as the generation of the process +resource usage reports at the conclusions of the process's lifecycle.The ability +to do aggregate querying in realtime in addition to the process lifecycle +reports for post facto analysis over time, is a cornerstone of CSRT that is the +result of a series of iterations until a robust and scalable approach was built. + +The real time querying is achieved by way of a global ets table with +``read_concurrency``, ``write_concurrency``, and ``decentralized_counters`` enabled. +Great care was taken to ensure that _zero_ concurrent writes to the same key +occur in this model, and this entire system is predicated on the fact that +incremental updates to ``ets:update_counter/{3,4}`` provides *really* fast and +efficient updates in an atomic and isolated fashion when coupled with +decentralized counters and write concurrency. Each process that calls +``couch_stats:increment_counter/{1,2}`` tracks their local context in CSRT as well, +with zero concurrent writes from any other processes. Outside of the context +setup and teardown logic, _only_ operations to ``ets:update_counter/{3,4}`` are +performed, one per process invocation of ``couch_stats:increment_counter/{1,2}``, and +one for coordinators to update worker deltas in a single batch, resulting in a +1:1 ratio of ets calls to real time stats updates for the primary workloads. + +The primary achievement of CSRT is the core framework itself for concurrent +process local stats tracking and real time RPC delta accumulation in a scalable +manner that allows for real time aggregate querying and process lifecycle +reports. This took several versions to find a scalable and robust approach that +induced minimal impact on maximum system throughput. Now that the framework is +in place, it can be extended to track any further desired process local uses of +``couch_stats:increment_counter/{1,2}``. That said, the currently selected set of stats +to track was heavily influenced by the challenges in retroactively +understanding the quantity of resources induced by a query like +``/db/_changes?since=$SEQ``, or similarly, ``/db/_find``. + +CSRT started as an extension of the Mango execution stats logic to ``_changes`` +feeds to get proper visibility into quantity of docs read and filtered per +changes request, but then the focus inverted with the realization that we +should instead use the existing stats tracking mechanisms that have already +been deemed critical information to track, which then also allows for the real +time tracking and aggregate query capabilities. The Mango execution stats can +be ported into CSRT itself and just become one subset of the stats tracked as a +whole, and similarly, any additional desired stats tracking can be easily added +and will be picked up in the RPC deltas and process lifetime reports. + +.. seealso:: + + :doc:`/config/csrt` + +CSRT Overview +------------- + + When an incoming HTTP request is handled by the CouchDB ``chttpd`` worker + pool, that worker process instantiates a CSRT ``coordinator`` context to + track both the resources induced by local coordinator process, as well as + to aggregate the resources induced by the remote RPC worker processes + needed to fulfill the given request; each of those workers instantiates its + own ``rpc_worker`` context to do local tracking and forward the deltas back + to the coordinator. + + These contexts are represented internally by the ``#rctx{}`` record and are + often referred to as "rctx"'s for short. + + For a singular doc open request, eg ``GET /foo/bar``, the coordinator process + will query from the normal N=3 shard replicas containing that doc. The + coordinator process spawns 3 RPC workers on the relevant nodes in the + cluster containing those shard replicas, each of which creates a local + rctx. This is a total of four processes spawned to fulfill the get doc HTTP + request, each of which has a valid local rctx that can generate a process + lifecycle report to provide detailed node level information of RPC workers. + + We must decide what to log. A simple approach is by way of enabling the + ``all_coordinators`` matcher, this will create a process lifecycle report for + every HTTP request made against the CouchDB cluster, providing detailed + statistics about the total quantities of CouchDB resources induced to + fulfill the request. This creates a 1:1 mapping of additional CouchDB CSRT + reports generated with HTTP requests, as only the coordinators are logged + and no RPC workers. + + Logging a CSRT report for each HTTP request provides a chronological view + of work induced, at the cluster level, that can then be analyzed with + various tools to visualize and understand what the cluster was doing and + why. At a simpler level, these reports can be summed together over a time + period on the relevant fields, and then we can utilize calculus to take + the the derivative and understand the largest spikes in workload over time, + or the integral to understand the total quantities of resources induced + over time, eg docs read per second or how many total docs were read in a + given time period, respectively. + + However, that's exactly what a tool like Graphite or Prometheus connected + to CouchDB's existing stats facilities would provide, so what does CSRT + accomplish? Specifically, the current CouchDB stats are at the node level, + meaning we can see how many documents a particular node loaded as a + function of time, or we can be aggregate the sum over all the nodes at the + cluster level, giving us the total docs read as we did above, but CSRT goes + beyond the node level and provides: + + * Real time tracking of those same existing stats, but at the HTTP + coordinator and RPC worker process level + + * Performant HTTP and internal query APIs for advanced aggregations and + sorting on top of efficient ETS match specs, done in a constrained + manner so that queries don't overload the system yet still provide + meaningful results + + * Dynamic "Logger Matcher" system for on the fly enabling powerful + filtering of CouchDB internal activity for real time querying or + generating logs for HTTP requests with detailed statistics about + their CouchDB resource usage + + The RPC worker stats are tracked in real time to have their workloads + queryable in real time, but also because they need to funnel the statistics + back to the coordinator request so that we can generate a report for the + HTTP request as a whole. This brings us back to the question of what to + log, for our ``GET /foo/bar`` example above, that spawned four processes to + fulfill the request, each of which instantiates a local CSRT context to + track its usage. We could log reports for all three workers, and the + coordinator, which would give us four total reports generated, which is + maybe alright, but it's 4x more log lines compared to the singular HTTP + entry. + + Taking that a step further, if we perform a view query against a ``Q=64`` + database, that will create a ``coordinator`` rctx, as well as ``Q * N = 64 * 3 + = 196`` total ``rpc_worker`` rctxs, although 2/3rds of those workers will die + out after losing the race for the shard range, but if we logged all rctx + reports for all processes tracked, the singular HTTP view query against a + Q=64 database would generate 196 RPC worker reports and 1 coordinator + report! + + To generate 197 rsyslog report log entries for a singular HTTP request is a + significant increase over the singular HTTP log entry normally generated, + *however*, a Javascript filtered changes request from ``since=0`` on a ``Q=64`` + billion+ doc database will takes many billions of rows read, docs read, and + IOQ calls to fulfill, at which point, the 197 induced reports, even with + 2/3rds as noops, are suddenly inconsequential compared to the raw CPU and + network/disk IO induced to fulfill that request. + + CSRT itself creates the real time tracking system that allows us to track + process level CouchDB metrics in HTTP coordinators and RPC workers in a + real time queryable manner while also shipping the RPC deltas back to the + coordinators for aggregating. The data itself is able to be collected with + minimal impact and at high throughput, but the act of logging and querying + in real time is what becomes expensive. + + The balance in CSRT is how do we query and save the usage data efficiently + with minimal impact on throughput and performance of CouchDB while still + allowing for meaningful insights. + + .. note:: + + The core stats collection of CSRT is highly performant after having + gone through a number of iterations and performance testing to find a + viable approach. One of the key *experimental* aspects of CSRT is our + ability to map Logger Matcher configurations from ``default.ini`` into + CSRT itself and generate a corresponding ``ets:match_spec()``. If we had + a way of declaring a Logger Matcher in the ini file by way of Mango + specs that is then able to be translated into a compiled match_spec, + then we eliminate the need for the default matchers and toggles for RPC + reporting. + + The CSRT Logger Matchers are a first pass pragmatic approach for being able + to easily map useful filtering declarations into logs and HTTP query API + for constraining the volume of data returned. For example, enabling the + default ``ioq_calls`` matcher with a threshold of 10000 IOQ calls with non + RPC reporting enabled will result in generating a CSRT lifecycle report for + that HTTP request, but all of the enabled matchers are are exposed in the + HTTP API as well, so you can query against the ``ioq_calls`` matcher and + perform aggregations on top of those results, grouping by database name for + instance, but the aggregations happen *on top* of the ``ets:match_spec()`` + filtered rows, so the initial query filtering is performed as close to the + internal ETS data storage as possible, and we can focus on workloads that + are already established as significant, and *only* aggregate on top of + those significant workloads. This is absolutely essential for being able to + perform these types of queries and aggregations at scale with minimal + impact on CouchDB. + + Furthermore, the heavy RPC workers that would normally be skipped in the + logs, are still queryable through the HTTP API and query API, so they can + still be introspected to diagnose ongoing cluster issues, without having to + modify logging settings. + +An example of the hidden data CSRT exposes +------------------------------------------ + + Let's take a look at a more concrete example demonstrating where CSRT + fundamentally changes our visibility into the internal activities of + CouchDB. The following screenshot is from a benchmark against a real world + cluster, but let's say this view is roughly keyed into a real workload's + daily cyclic run, and we're trying to understand what CouchDB is doing and + why, for the given workload. So here we have some time series graphs built + from querying ``GET /_node/_local/_system`` and aggregated across the + cluster, showing HTTP throughput, CouchDB operations, IOQ throughput, + Erlang memory, and Erlang process count: + + .. figure:: ../../images/csrt-sample-workload.png + :align: center + :alt: Sample high throughput workload + + We can see a steady increase in processes over time, and a similar but + logarithmic-ish increase in the CouchDB operations as well as in the IOQ + calls, however, the HTTP throughput actually goes *down*, and we'd like to + understand why that's happening. The benchmark above was gradually + saturating CPU as a function of time, with near full CPU saturation at the + end of the run, and the Erlang process count we know is at least largely + relative to the normal cyclic workload of increase concurrency coming in, + so as we run out of available CPU while also increasing the number of + concurrent requests, those requests naturally get slower. + + That said, we can see this cluster tops out at a sustained one million IOQ + operations per second, to fulfill maybe 1000 requests per second, which is + a 1000x increase in IOQ calls relative to HTTP requests, a pretty + substantial difference. The next question is obviously, well, what's + actually inducing all of those operations, what HTTP requests are actually + causing 1 million IOQ ops per second and 300k doc reads per second? Are + these slow or problematic queries? If not, are we just hitting cluster + capacity and need to expand? These types of questions are very difficult to + make concrete conclusions upon with the existing data, and often isolated + experiments and profiling are required to even begin to track down the + heavy usage requests, especially when those heavy requests are a needle in + a haystack. The above metrics visualization does a great job of informing + us the system is *very* busy, but it's difficult to understand why. Even + the CouchDB operations doc reads stats are difficult to correlate, as doc + reads could be happening from views or all docs or changes or background + operations, further clouding the connection of these metrics to the HTTP + requests that induced them. + + This is where you would enable CSRT with the ``ioq_calls`` matcher, and maybe + the ``docs_read`` matcher, allowing you to query live and track down what + requests are generating all of that load. Or better yet, if you'd already + enabled CSRT and reporting on the default Logger Matchers, there'd be a + logged report for each of the heavy requests using more than 10000 IOQ + calls or 1000 docs read. Perhaps only a few requests are hitting the 10000 + IOQ calls metric, so you'd like to lower the Threshold to 5000 IOQ calls, or + maybe that wasn't sufficient so you dropped it down further to 1000. These + Logger Matchers can be enabled dynamically as well as their Thresholds + configured dynamically, and the CSRT logger will pickup on those changes + and reload the matchers immediately, live, such that those new Thresholds + apply to the process lifecycle logging reports as well as querying the + ``_active_resources`` against the ``ioq_calls`` matcher. + + .. seealso:: + + :config:option:`Enable CSRT here ` + + :config:option:`Enable CSRT reporting here ` + + :config:option:`Enable ioq_calls matcher here ` + + :config:option:`Set ioq_calls matcher Threshold here ` + + Once the Logger Matchers are enabled, reports can be generated automatically, + and advanced query aggregations become available. In our heavy example cyclic + workload above, the benchmark was a rampup view query run on a Q=64 database + with ``?group=false&include_docs=true`` and no limit, specifically to do a full + index database scan loading every single doc in the process, and then we spawn + another HTTP worker performing those view requests every second, progressively + overloading the system as you can see in the Erlang process count metrics. In + the above workload case, it was fairly uniform in that the workload wasn't + skewed by outliers, rather by a large parallel quantities of full database + scans by way of view indexes. + +Another example with csrt proc window +------------------------------------- + + Now let us continue with another example, this time demonstrating the use of + the ``csrt:proc_window/3`` in a ``remsh``, as one would do with + ``recon:proc_window/3`` to get an idea of heavy active processes in the system. + Normally one would run something like ``recon:proc_window(reductions, 5, 5000).`` + to list the top 5 most active processes over the next five seconds, sorted by delta + on the reductions count of that process. Essentially ``recon:proc_window/3`` takes + a snapshot of the system at ``T0`` for the data you requested, waits 5000 + milliseconds, fetches a snapshot of the system at ``T1``, then it performs a + delta on ``T1`` and ``T0``, sorting and returning the top 5 results. Recon does + this by way of a heavily optimized data structure allowing for minimal + memory consumption of high Erlang process systems and efficient deltas. + + The ``csrt:proc_window/3`` functionality piggy backs off of + ``recon:proc_window/3``, and utilizes the same core data structures and delta + sorting logic, but instead exposing sampling on ``erlang:process_info/2`` + statistics, ``csrt:proc_window/3`` exposes the same logic on the CouchDB + internal CSRT metrics, like ``docs_read``, ``ioq_calls``, ``js_filter``, etc. + + .. note:: + + The ``csrt:proc_window/3`` functionality is demonstrated in a ``remsh`` as it's + not currently exposed by way of the HTTP API, but can now easily be built on + the field extraction logic in ``couch_srt_query`` powering the HTTP API. This + can be added readily, as it should map over well enough to the HTTP API. + + Now, given a database ``foo`` with 11k documents containing a ``doc.value`` field + that is an integer value which can be filtered in a design doc by way of + even and odd. If we instantiate a series of while loops in parallel making + requests of the form:: + + GET /foo/_changes?filter=bar/even&include_docs=true + + We can generate a good chunk of load on a local laptop dev setup, resulting + in requests that take a few seconds to load through the changes feed, fetch + all 11k docs, and then funnel them through the Javascript engine to filter + for even valued docs; this allows us time to query these heavier requests + live and see them in progress with the real time stats tracking and + querying capabilities of CSRT. + + For example, let's use ``couch_srt:proc_window/3`` as one would do with + ``recon:proc_window/3`` to get an idea of the heavy active processes on the + system:: + + (node1@127.0.0.1)2> rp([{PR, couch_srt:to_json(couch_srt:get_resource(PR))} || {PR, _, _} <- couch_srt:proc_window(ioq_calls, 3, 1000)]). + [{{<0.5090.0>,#Ref<0.2277656623.605290499.37969>}, + #{changes_returned => 3962,db_open => 10,dbname => <<"foo">>, + docs_read => 7917,docs_written => 0,get_kp_node => 54, + get_kv_node => 1241,ioq_calls => 15834,js_filter => 7917, + js_filtered_docs => 7917,nonce => <<"cc5a814ceb">>, + pid_ref => + <<"<0.5090.0>:#Ref<0.2277656623.605290499.37969>">>, + rows_read => 7917, + started_at => <<"2025-07-21T17:25:08.784z">>, + type => + <<"coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes">>, + updated_at => <<"2025-07-21T17:25:13.051z">>, + username => <<"adm">>}}, + {{<0.5087.0>,#Ref<0.2277656623.606601217.92191>}, + #{changes_returned => 4310,db_open => 10,dbname => <<"foo">>, + docs_read => 8624,docs_written => 0,get_kp_node => 58, + get_kv_node => 1358,ioq_calls => 17248,js_filter => 8624, + js_filtered_docs => 8624,nonce => <<"0e625c723a">>, + pid_ref => + <<"<0.5087.0>:#Ref<0.2277656623.606601217.92191>">>, + rows_read => 8624, + started_at => <<"2025-07-21T17:25:08.424z">>, + type => + <<"coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes">>, + updated_at => <<"2025-07-21T17:25:13.051z">>, + username => <<"adm">>}}, + {{<0.5086.0>,#Ref<0.2277656623.605290499.27728>}, + #{changes_returned => 4285,db_open => 10,dbname => <<"foo">>, + docs_read => 8569,docs_written => 0,get_kp_node => 57, + get_kv_node => 1349,ioq_calls => 17138,js_filter => 8569, + js_filtered_docs => 8569,nonce => <<"962cda1645">>, + pid_ref => + <<"<0.5086.0>:#Ref<0.2277656623.605290499.27728>">>, + rows_read => 8569, + started_at => <<"2025-07-21T17:25:08.406z">>, + type => + <<"coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes">>, + updated_at => <<"2025-07-21T17:25:13.051z">>, + username => <<"adm">>}}] + ok + + This shows us the top 3 most active processes (being tracked in CSRT) over + the next 1000 milliseconds, sorted by number of ``ioq_calls`` induced! All of + three of these processes are incurring heavy usage, reading many thousands + of docs with 15k+ IOQ calls and heavy JS filter usage, exactly the types of + requests you want to be alerted to. CSRT's proc window logic is built on + top of Recon's, which doesn't return the process info itself, so you'll + need to fetch the process status with ``couch_srt:get_resource/1`` and then + pretty print it with ``couch_srt:to_json/1``. + + The output above is a real time snapshot of the live running system and + shows processes actively inducing additional resource usage, so these CSRT + context values are just a time snapshot of where that process was at, as of + the ``updated_at`` timestamp. We can reference the nonce value to search + through the report logs for a final report, assuming the given context + ended up using sufficient resources to trigger a logger matcher lifetime + report. The above changes requests were induced specifically to induce + reports as well, so unsurprisingly we have reports for all three. + + However, I want to first show the existing visibility into these changes + requests exposed by the raw HTTP logs to highlight the impact of the CSRT + reports and new visibility into request workloads exposed. + + First, let's look at the existing HTTP logs for those 3 requests:: + + (chewbranca)-(jobs:1)-(~/src/couchdb_csrt_v3) + (! 9872)-> grep 'cc5a814ceb\|0e625c723a\|962cda1645' ./dev/logs/node1.log | grep -v '^\[report]' + [notice] 2025-07-21T17:25:14.520641Z node1@127.0.0.1 <0.5087.0> 0e625c723a localhost:15984 127.0.0.1 adm GET /foo/_changes?filter=bar/even&asdf=fdsa&include_docs=true 200 ok 6096 + [notice] 2025-07-21T17:25:14.521417Z node1@127.0.0.1 <0.5086.0> 962cda1645 localhost:15984 127.0.0.1 adm GET /foo/_changes?filter=bar/even&asdf=fdsa&include_docs=true 200 ok 6115 + [notice] 2025-07-21T17:25:14.844317Z node1@127.0.0.1 <0.5090.0> cc5a814ceb localhost:15984 127.0.0.1 adm GET /foo/_changes?filter=bar/even&asdf=fdsa&include_docs=true 200 ok 6059 + + So we see the requests were made, and we can see it's doing + ``include_docs=true`` as well as using a customer filter, both obvious + indications that this is a potentially heavier request, however, we don't + know if database ``foo`` had a thousand docs or a billion docs, whether those + docs were small or large, nor any indication of the computational + complexity of the reference filter function. This makes it challenging to + retroactively correlate heavy resource usage at a hardware level with the + underlying requests that induced those workloads, especially if the heavy + requests are an inconspicuous subset of the full database workload. + + CSRT resolves this by providing a real time querying system to find the + active heavy processes, live, as well as a process lifecycle reporting engine + providing detailed analysis of the workloads induced by the request. + + Let's assume we had the default IOQ logger matcher enabled, with the + default configuration of logging any requests inducing more than 10k IOQ + calls, which would catch all three of our requests above, even though + they're all still going. As a result, we generate process lifecycle reports + for all three of those requests, as we can see:: + + (chewbranca)-(jobs:1)-(~/src/couchdb_csrt_v3) + (! 9873)-> grep 'cc5a814ceb\|0e625c723a\|962cda1645' ./dev/logs/node1.log | grep '^\[report]' + [report] 2025-07-21T17:25:14.520787Z node1@127.0.0.1 <0.5174.0> -------- [csrt-pid-usage-lifetime changes_returned=5500 db_open=10 dbname="foo" docs_read=11001 get_kp_node=72 get_kv_node=1754 ioq_calls=22002 js_filter=11001 js_filtered_docs=11001 nonce="0e625c723a" pid_ref="<0.5087.0>:#Ref<0.2277656623.606601217.92191>" rows_read=11001 started_at="2025-07-21T17:25:08.424z" type="coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes" updated_at="2025-07-21T17:25:14.520z" username="adm"] + [report] 2025-07-21T17:25:14.521578Z node1@127.0.0.1 <0.5155.0> -------- [csrt-pid-usage-lifetime changes_returned=5500 db_open=10 dbname="foo" docs_read=11001 get_kp_node=72 get_kv_node=1754 ioq_calls=22002 js_filter=11001 js_filtered_docs=11001 nonce="962cda1645" pid_ref="<0.5086.0>:#Ref<0.2277656623.605290499.27728>" rows_read=11001 started_at="2025-07-21T17:25:08.406z" type="coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes" updated_at="2025-07-21T17:25:14.521z" username="adm"] + [report] 2025-07-21T17:25:14.844436Z node1@127.0.0.1 <0.5213.0> -------- [csrt-pid-usage-lifetime changes_returned=5500 db_open=10 dbname="foo" docs_read=11001 get_kp_node=72 get_kv_node=1754 ioq_calls=22002 js_filter=11001 js_filtered_docs=11001 nonce="cc5a814ceb" pid_ref="<0.5090.0>:#Ref<0.2277656623.605290499.37969>" rows_read=11001 started_at="2025-07-21T17:25:08.784z" type="coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes" updated_at="2025-07-21T17:25:14.844z" username="adm"] + + We find the process lifecycle reports for the requests with the three + grep'ed on nonces, and we can see they all read the 11k core documents, + plus the one design document, JS filtered all 11,001 docs, and then only + returned the 5500 doc's containing an even ``doc.value`` field. + + This also shows the discrepancy between the quantity of induced resource + usage to actually generate a request, relative to the magnitude of the data + returned. All of our ``doc.value`` fields were positive integers, if we had + a filter function searching for negative ``doc.value`` results, we would have + found none, resulting in ``changes_returned=0``, but we would have still + induced the 11,001 doc loads and Javascript filter calls. + + CSRT is specifically built to automatically find and report these types of + workload discrepancies and in general to help highlight where individual + HTTP requests use drastically more resources than the median workloads. + +Demonstration of expressiveness constraints in Logger Matchers and ini settings +------------------------------------------------------------------------------- + + Again, we run into expressiveness issues with default.ini mapping to + `ets:match_spec()`, ideally we could just directly declare something like:: + + [csrt_loggers] + heavy_rpc_workers = ets:fun2ms(#rctx{type=#rpc_worker{}, ioq_calls=IC}=R) when IC > 1234 -> R end). + heavy_coordinators = ets:fun2ms(#rctx{type=#coordinator{}, ioq_calls=IC}=R) when IC > 1234 -> R end). + heavy_changes = ets:fun2ms(#rctx{type=#coordinator{mod='chttp_db', func='handle_changes_req'}, docs_read=DR}=R) when DR > 123456 -> R end). + debug_foo_db = ets:fun2ms(fun(#rctx{type=#coordinator{}, dbname=(<<"foo">>)}) -> R end). + debug_foo_db = ets:fun2ms(fun(#rctx{type=#coordinator{}, dbname=(<<"foo">>)}=R) -> R end)). + debug_foo_db_shard_changes = ets:fun2ms(fun(#rctx{type=#rpc_worker{func=changes}, dbname=(<<"shards/00000000-7fffffff/foo.1753691445">>)}=R) -> R end)). + + Once we can express and persist Logger Matchers directly like that in the + ``ini`` files, we'll no longer need the default matchers, as we'll be able to + express any filter functions directly, on coordinators or RPC workers or a + combination of both. Furthermore, once we can transform the static + default.ini definitions of that form, we'll be able to do the same with the + query interface, and we can ``POST`` those complex queries in and have a + match spec dynamically generated and run directly against the ETS table. + + Now to highlight that this is _specifically_ an expressiveness problem, + let's demonstrate how to actually register those matchers above dynamically + by way of a ``remsh``, and then see the report generation changes directly:: + + (node1@127.0.0.1)35> rr(couch_srt_logger). + [coordinator,rctx,rpc_worker,st] + (node1@127.0.0.1)36> couch_srt_logger:register_matcher("debug_foo_db", ets:fun2ms(fun(#rctx{type=#coordinator{}, dbname=(<<"foo">>)}=R) -> R end)). + ok + + In the logs we see the automatic Logger Matcher reload notice:: + + [notice] 2025-07-28T08:35:41.576259Z node1@127.0.0.1 <0.251.0> -------- Initialized 3 CSRT Logger matchers + + And now, if we make an HTTP request to database ``foo``, we'll automatically + generate a CSRT process lifecycle report log for that request, without + inducing additional report logging for requests to databases other than + ``foo``, for example, given ``GET /foo``, we now get the following HTTP related + logs:: + + [notice] 2025-07-28T08:38:15.638529Z node1@127.0.0.1 <0.2114371.0> 203629c3b4 localhost:15984 127.0.0.1 adm GET /foo 200 ok 3 + [report] 2025-07-28T08:38:15.638659Z node1@127.0.0.1 <0.2114423.0> -------- [csrt-pid-usage-lifetime db_open=6 dbname="foo" nonce="203629c3b4" pid_ref="<0.2114371.0>:#Ref<0.3800414810.3105882114.258360>" started_at="2025-07-28T08:38:15.636z" type="coordinator-{chttpd_db:handle_request}:GET:/foo" updated_at="2025-07-28T08:38:15.638z" username="adm"] + + We can also create our ``debug_foo_db_shard_changes`` Logger Matcher declared + above, but note that the RPC workers operate on local database shard names, + not the higher level clustered database names from the coordinator's + perspective. To match against specific database names in RPC workers, we'll + need to match against the full shard name (eg + ``<<"shards/00000000-7fffffff/foo.1753691445">>`` instead of ``<<"foo">>``), as + in our example above, like so:: + + (node1@127.0.0.1)44> rr(couch_srt_logger). + [coordinator,rctx,rpc_worker,st] + (node1@127.0.0.1)45> couch_srt_logger:register_matcher("debug_foo_db_shard_changes", ets:fun2ms(fun(#rctx{type=#rpc_worker{func=changes}, dbname=(<<"shards/00000000-7fffffff/foo.1753691445">>)}=R) -> R end)). + ok + + As before we get the loggers re-initialize message, but we don't see the + RPC worker, only the top level coordinator report from our "debug_foo_db" + Logger Matcher, what happened?:: + + [notice] 2025-07-28T08:45:06.305788Z node1@127.0.0.1 <0.251.0> -------- Initialized 4 CSRT Logger matchers + [notice] 2025-07-28T08:45:08.106879Z node1@127.0.0.1 <0.2124751.0> eff915deb7 localhost:15984 127.0.0.1 adm GET /foo/_changes 200 ok 110 + [report] 2025-07-28T08:45:08.106957Z node1@127.0.0.1 <0.2124806.0> -------- [csrt-pid-usage-lifetime changes_returned=6228 db_open=8 dbname="foo" get_kp_node=42 get_kv_node=1003 nonce="eff915deb7" pid_ref="<0.2124751.0>:#Ref<0.3800414810.3105882116.229072>" rows_read=6228 started_at="2025-07-28T08:45:07.997z" type="coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes" updated_at="2025-07-28T08:45:08.106z" username="adm"] + + What happened is that Logger Matchers on ``rpc_worker`` rctxs are queryable + but will not generate a report log unless the specific + :config:option:`csrt/enable_rpc_reporting` setting is enabled! After doing + so, we see the config set notice, followed by a report for the rpc worker + on shard range OO-7F for db ``foo``, as expected. Note that we get the RPC + report before the HTTP log, as the worker completed before the coordinator + that needed that worker to complete, completed, and similarly, the + coordinator worker process logs the HTTP line prior to the CSRT coordinator + context being cleaned up:: + + [notice] 2025-07-28T08:45:44.584938Z node1@127.0.0.1 <0.146.0> -------- config: [csrt] enable_rpc_reporting set to true for reason nil + [report] 2025-07-28T08:45:47.852469Z node1@127.0.0.1 <0.2125885.0> -------- [csrt-pid-usage-lifetime changes_returned=3122 db_open=1 dbname="shards/00000000-7fffffff/foo.1753691445" get_kp_node=21 get_kv_nodat="2025-07-28T08:45:47.731z" type="rpc_worker-{<0.2125822.0>:#Ref<0.3800414810.3105882116.240929>}:fabric_rpc:changes" updated_at="2025-07-28T08:45:47.852z"] + [notice] 2025-07-28T08:45:47.852524Z node1@127.0.0.1 <0.2125822.0> 7ea9ca7743 localhost:15984 127.0.0.1 adm GET /foo/_changes 200 ok 122 + [report] 2025-07-28T08:45:47.852602Z node1@127.0.0.1 <0.2125871.0> -------- [csrt-pid-usage-lifetime changes_returned=6228 db_open=11 dbname="foo" get_kp_node=42 get_kv_node=1003 nonce="7ea9ca7743" pid_ref="<0.2125822.0>:#Ref<0.3800414810.3105882116.240904>" rows_read=6228 started_at="2025-07-28T08:45:47.730z" type="coordinator-{chttpd_db:handle_changes_req}:GET:/foo/_changes" updated_at="2025-07-28T08:45:47.852z" username="adm"] + + .. note:: + + It seems like that some creative pattern matches nestable within match + specs are possible, perhaps something like + ``ets:fun2ms(fun(#rctx{dbname=<<"shards/", Range/17, "/foo", Timestamp/binary">>}) -> {Range, R} end).`` + allowing for matching on all ``foo`` db workers, and demoing extracting + out the Range for run. diff --git a/src/docs/src/experimental.rst b/src/docs/src/experimental.rst index fecfff4e27d..fb386d664e5 100644 --- a/src/docs/src/experimental.rst +++ b/src/docs/src/experimental.rst @@ -50,3 +50,21 @@ Enable nouveau in config and run the Java service. enable = true Have fun! + +Couch Stats Resource Tracker (CSRT) +=================================== + +Couch Stats Resource Tracker, aka CSRT, is an experimental real time stats +tracking system designed to augment the existing the existing +``couch_stats:increment_counter/{1,2}`` invocations with real time process local stats +collection, querying, RPC deltas, and powerful filtered logging of resources +induced by HTTP and RPC worker requests. + +.. toctree:: + :caption: Couch Stats Resource Tracker Links + + csrt/index + +.. seealso:: + + :doc:`/config/csrt` diff --git a/src/fabric/priv/stats_descriptions.cfg b/src/fabric/priv/stats_descriptions.cfg index d12aa0c8480..2374907a997 100644 --- a/src/fabric/priv/stats_descriptions.cfg +++ b/src/fabric/priv/stats_descriptions.cfg @@ -26,3 +26,50 @@ {type, counter}, {desc, <<"number of write quorum errors">>} ]}. + + +%% fabric_rpc worker stats +{[fabric_rpc, get_all_security, spawned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker get_all_security spawns">>} +]}. +{[fabric_rpc, open_doc, spawned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker open_doc spawns">>} +]}. +{[fabric_rpc, all_docs, spawned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker all_docs spawns">>} +]}. +{[fabric_rpc, update_docs, spawned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker update_docs spawns">>} +]}. +{[fabric_rpc, map_view, spawned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker map_view spawns">>} +]}. +{[fabric_rpc, reduce_view, spawned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker reduce_view spawns">>} +]}. +{[fabric_rpc, open_shard, spawned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker open_shard spawns">>} +]}. +{[fabric_rpc, changes, spawned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker changes spawns">>} +]}. +{[fabric_rpc, changes, processed], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker changes row invocations">>} +]}. +{[fabric_rpc, changes, returned], [ + {type, counter}, + {desc, <<"number of fabric_rpc worker changes rows returned">>} +]}. +{[fabric_rpc, view, rows_read], [ + {type, counter}, + {desc, <<"number of fabric_rpc view_cb row invocations">>} +]}. diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index 67f529e0935..69df03ade81 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -284,6 +284,7 @@ get_missing_revs(DbName, IdRevsList, Options) -> with_db(DbName, Options, {couch_db, get_missing_revs, [IdRevsList]}). update_docs(DbName, Docs0, Options) -> + couch_srt:docs_written(length(Docs0)), {Docs1, Type} = case couch_util:get_value(read_repair, Options) of NodeRevs when is_list(NodeRevs) -> @@ -493,6 +494,8 @@ view_cb({meta, Meta}, Acc) -> ok = rexi:stream2({meta, Meta}), {ok, Acc}; view_cb({row, Props}, #mrargs{extra = Options} = Acc) -> + %% TODO: distinguish between all_docs vs view call + couch_stats:increment_counter([fabric_rpc, view, rows_read]), % Adding another row ViewRow = fabric_view_row:from_props(Props, Options), ok = rexi:stream2(ViewRow), @@ -512,6 +515,7 @@ reduce_cb({meta, Meta}, Acc, _Options) -> {ok, Acc}; reduce_cb({row, Props}, Acc, Options) -> % Adding another row + couch_stats:increment_counter([fabric_rpc, view, rows_read]), ViewRow = fabric_view_row:from_props(Props, Options), ok = rexi:stream2(ViewRow), {ok, Acc}; @@ -529,6 +533,7 @@ changes_enumerator(#full_doc_info{} = FDI, Acc) -> changes_enumerator(#doc_info{id = <<"_local/", _/binary>>, high_seq = Seq}, Acc) -> {ok, Acc#fabric_changes_acc{seq = Seq, pending = Acc#fabric_changes_acc.pending - 1}}; changes_enumerator(DocInfo, Acc) -> + couch_stats:increment_counter([fabric_rpc, changes, processed]), #fabric_changes_acc{ db = Db, args = #changes_args{ @@ -569,6 +574,7 @@ changes_enumerator(DocInfo, Acc) -> {ok, Acc#fabric_changes_acc{seq = Seq, pending = Pending - 1}}. changes_row(Changes, Docs, DocInfo, Acc) -> + couch_stats:increment_counter([fabric_rpc, changes, returned]), #fabric_changes_acc{db = Db, pending = Pending, epochs = Epochs} = Acc, #doc_info{id = Id, high_seq = Seq, revs = [#rev_info{deleted = Del} | _]} = DocInfo, {change, [ @@ -667,6 +673,14 @@ clean_stack(S) -> ). set_io_priority(DbName, Options) -> + couch_srt:set_context_dbname(DbName), + %% TODO: better approach here than using proplists? + case proplists:get_value(user_ctx, Options) of + undefined -> + ok; + #user_ctx{name = UserName} -> + couch_srt:set_context_username(UserName) + end, case lists:keyfind(io_priority, 1, Options) of {io_priority, Pri} -> erlang:put(io_priority, Pri); diff --git a/src/fabric/src/fabric_util.erl b/src/fabric/src/fabric_util.erl index d0961533f3c..3f688556b4b 100644 --- a/src/fabric/src/fabric_util.erl +++ b/src/fabric/src/fabric_util.erl @@ -137,15 +137,20 @@ get_shard([#shard{node = Node, name = Name} | Rest], Opts, Timeout, Factor) -> Ref = rexi:cast(Node, self(), MFA, [sync]), try receive - {Ref, {ok, Db}} -> - {ok, Db}; - {Ref, {'rexi_EXIT', {{unauthorized, _} = Error, _}}} -> - throw(Error); - {Ref, {'rexi_EXIT', {{forbidden, _} = Error, _}}} -> - throw(Error); - {Ref, Reason} -> - couch_log:debug("Failed to open shard ~p because: ~p", [Name, Reason]), - get_shard(Rest, Opts, Timeout, Factor) + {Ref, Msg0} -> + {Msg, Delta} = couch_srt:extract_delta(Msg0), + couch_srt:accumulate_delta(Delta), + case Msg of + {ok, Db} -> + {ok, Db}; + {'rexi_EXIT', {{unauthorized, _} = Error, _}} -> + throw(Error); + {'rexi_EXIT', {{forbidden, _} = Error, _}} -> + throw(Error); + Reason -> + couch_log:debug("Failed to open shard ~p because: ~p", [Name, Reason]), + get_shard(Rest, Opts, Timeout, Factor) + end after Timeout -> couch_log:debug("Failed to open shard ~p after: ~p", [Name, Timeout]), get_shard(Rest, Opts, Factor * Timeout, Factor) diff --git a/src/fabric/test/eunit/fabric_rpc_purge_tests.erl b/src/fabric/test/eunit/fabric_rpc_purge_tests.erl index 07e6b1d4220..57c533ccd6c 100644 --- a/src/fabric/test/eunit/fabric_rpc_purge_tests.erl +++ b/src/fabric/test/eunit/fabric_rpc_purge_tests.erl @@ -262,6 +262,8 @@ rpc_update_doc(DbName, Doc, Opts) -> fabric_rpc:update_docs(DbName, [Doc], Opts), Reply = test_util:wait(fun() -> receive + {Ref, {Reply, {delta, _}}} -> + Reply; {Ref, Reply} -> Reply after 0 -> diff --git a/src/fabric/test/eunit/fabric_rpc_tests.erl b/src/fabric/test/eunit/fabric_rpc_tests.erl index 16bb66badac..c2d525c5bc7 100644 --- a/src/fabric/test/eunit/fabric_rpc_tests.erl +++ b/src/fabric/test/eunit/fabric_rpc_tests.erl @@ -101,7 +101,19 @@ t_no_config_db_create_fails_for_shard_rpc(DbName) -> receive Resp0 -> Resp0 end, - ?assertMatch({Ref, {'rexi_EXIT', {{error, missing_target}, _}}}, Resp). + case couch_srt:is_enabled() of + true -> + %% allow for {Ref, {rexi_EXIT, error}, {delta, D}} + ?assertMatch( + {Ref, {{'rexi_EXIT', {{error, missing_target}, _}}, _}}, + Resp + ); + false -> + ?assertMatch( + {Ref, {'rexi_EXIT', {{error, missing_target}, _}}}, + Resp + ) + end. t_db_create_with_config(DbName) -> MDbName = mem3:dbname(DbName), diff --git a/src/ioq/src/ioq.erl b/src/ioq/src/ioq.erl index 8e38c2a0015..edb2a59ee55 100644 --- a/src/ioq/src/ioq.erl +++ b/src/ioq/src/ioq.erl @@ -60,6 +60,7 @@ call_search(Fd, Msg, Metadata) -> call(Fd, Msg, Metadata). call(Fd, Msg, Metadata) -> + couch_srt:ioq_called(), case bypass(Msg, Metadata) of true -> gen_server:call(Fd, Msg, infinity); diff --git a/src/mem3/src/mem3_rpc.erl b/src/mem3/src/mem3_rpc.erl index 70fc797dad6..8e2f392ff0e 100644 --- a/src/mem3/src/mem3_rpc.erl +++ b/src/mem3/src/mem3_rpc.erl @@ -379,11 +379,18 @@ rexi_call(Node, MFA, Timeout) -> Ref = rexi:cast(Node, self(), MFA, [sync]), try receive - {Ref, {ok, Reply}} -> - Reply; - {Ref, Error} -> - erlang:error(Error); - {rexi_DOWN, Mon, _, Reason} -> + {Ref, Msg0} -> + {Msg, Delta} = couch_srt:extract_delta(Msg0), + couch_srt:accumulate_delta(Delta), + case Msg of + {ok, Reply} -> + Reply; + Error -> + erlang:error(Error) + end; + {rexi_DOWN, Mon, _, Reason0} -> + {Reason, Delta} = couch_srt:extract_delta(Reason0), + couch_srt:accumulate_delta(Delta), erlang:error({rexi_DOWN, {Node, Reason}}) after Timeout -> erlang:error(timeout) diff --git a/src/rexi/include/rexi.hrl b/src/rexi/include/rexi.hrl index a2d86b2ab54..a962f306917 100644 --- a/src/rexi/include/rexi.hrl +++ b/src/rexi/include/rexi.hrl @@ -11,6 +11,7 @@ % the License. -record(error, { + delta, timestamp, reason, mfa, diff --git a/src/rexi/src/rexi.erl b/src/rexi/src/rexi.erl index 02d3a9e5559..21acf3b69d3 100644 --- a/src/rexi/src/rexi.erl +++ b/src/rexi/src/rexi.erl @@ -104,7 +104,8 @@ kill_all(NodeRefs) when is_list(NodeRefs) -> -spec reply(any()) -> any(). reply(Reply) -> {Caller, Ref} = get(rexi_from), - erlang:send(Caller, {Ref, Reply}). + Payload = couch_srt:maybe_add_delta(Reply), + erlang:send(Caller, {Ref, Payload}). %% Private function used by stream2 to initialize the stream. Message is of the %% form {OriginalRef, {self(),reference()}, Reply}, which enables the @@ -188,7 +189,8 @@ stream2(Msg, Limit, Timeout) -> {ok, Count} -> put(rexi_unacked, Count + 1), {Caller, Ref} = get(rexi_from), - erlang:send(Caller, {Ref, self(), Msg}), + Payload = couch_srt:maybe_add_delta(Msg), + erlang:send(Caller, {Ref, self(), Payload}), ok catch throw:timeout -> @@ -222,7 +224,11 @@ stream_ack(Client) -> %% ping() -> {Caller, _} = get(rexi_from), - erlang:send(Caller, {rexi, '$rexi_ping'}). + %% It is essential ping/0 includes deltas as otherwise long running + %% filtered queries will be silent on usage until they finally return + %% a row or no results. This delay is proportional to the database size, + %% so instead we make sure ping/0 keeps live stats flowing. + erlang:send(Caller, couch_srt:maybe_add_delta({rexi, '$rexi_ping'})). aggregate_server_queue_len() -> rexi_server_mon:aggregate_queue_len(rexi_server). diff --git a/src/rexi/src/rexi_server.erl b/src/rexi/src/rexi_server.erl index b2df65c7193..efdbb18e51f 100644 --- a/src/rexi/src/rexi_server.erl +++ b/src/rexi/src/rexi_server.erl @@ -102,12 +102,12 @@ handle_info({'DOWN', Ref, process, Pid, Error}, #st{workers = Workers} = St) -> case find_worker(Ref, Workers) of #job{worker_pid = Pid, worker = Ref, client_pid = CPid, client = CRef} = Job -> case Error of - #error{reason = {_Class, Reason}, stack = Stack} -> - notify_caller({CPid, CRef}, {Reason, Stack}), + #error{reason = {_Class, Reason}, stack = Stack, delta = Delta} -> + notify_caller({CPid, CRef}, {Reason, Stack}, Delta), St1 = save_error(Error, St), {noreply, remove_job(Job, St1)}; _ -> - notify_caller({CPid, CRef}, Error), + notify_caller({CPid, CRef}, Error, undefined), {noreply, remove_job(Job, St)} end; false -> @@ -134,15 +134,22 @@ init_p(From, MFA) -> string() | undefined ) -> any(). init_p(From, {M, F, A}, Nonce) -> + MFA = {M, F, length(A)}, put(rexi_from, From), - put('$initial_call', {M, F, length(A)}), + put('$initial_call', MFA), put(nonce, Nonce), try + couch_srt:create_worker_context(From, MFA, Nonce), + couch_srt:maybe_track_rexi_init_p(MFA), apply(M, F, A) catch exit:normal -> + couch_srt:destroy_context(), ok; Class:Reason:Stack0 -> + %% Make a CSRT delta manually to funnel back to the caller + Delta = couch_srt:make_delta(), + couch_srt:destroy_context(), Stack = clean_stack(Stack0), {ClientPid, _ClientRef} = From, couch_log:error( @@ -158,6 +165,7 @@ init_p(From, {M, F, A}, Nonce) -> ] ), exit(#error{ + delta = Delta, timestamp = os:timestamp(), reason = {Class, Reason}, mfa = {M, F, A}, @@ -200,8 +208,10 @@ find_worker(Ref, Tab) -> [Worker] -> Worker end. -notify_caller({Caller, Ref}, Reason) -> - rexi_utils:send(Caller, {Ref, {rexi_EXIT, Reason}}). +notify_caller({Caller, Ref}, Reason, Delta) -> + Payload = couch_srt:maybe_add_delta({rexi_EXIT, Reason}, Delta), + Msg = {Ref, Payload}, + rexi_utils:send(Caller, Msg). kill_worker(FromRef, #st{clients = Clients} = St) -> case find_worker(FromRef, Clients) of diff --git a/src/rexi/src/rexi_utils.erl b/src/rexi/src/rexi_utils.erl index 146d0238ac1..226dfa542f2 100644 --- a/src/rexi/src/rexi_utils.erl +++ b/src/rexi/src/rexi_utils.erl @@ -62,23 +62,37 @@ process_message(RefList, Keypos, Fun, Acc0, TimeoutRef, PerMsgTO) -> receive {timeout, TimeoutRef} -> {timeout, Acc0}; - {rexi, Ref, Msg} -> + {rexi, Ref, Msg0} -> + {Msg, Delta} = couch_srt:extract_delta(Msg0), + couch_srt:accumulate_delta(Delta), case lists:keyfind(Ref, Keypos, RefList) of false -> {ok, Acc0}; Worker -> Fun(Msg, Worker, Acc0) end; - {rexi, Ref, From, Msg} -> + {rexi, Ref, From, Msg0} -> + {Msg, Delta} = couch_srt:extract_delta(Msg0), + couch_srt:accumulate_delta(Delta), case lists:keyfind(Ref, Keypos, RefList) of false -> {ok, Acc0}; Worker -> Fun(Msg, {Worker, From}, Acc0) end; + %% Special case for csrt of `{rexi, '$rexi_ping'}` with Delta. + %% Including delta in rexi_ping is essential for getting live info + %% about long running filtered queries that aren't returning rows, as + %% otherwise we won't get the delta until the exhaustion of the find + %% query. + {{rexi, '$rexi_ping'}, {delta, Delta}} -> + couch_srt:accumulate_delta(Delta), + {ok, Acc0}; {rexi, '$rexi_ping'} -> {ok, Acc0}; - {Ref, Msg} -> + {Ref, Msg0} -> + {Msg, Delta} = couch_srt:extract_delta(Msg0), + couch_srt:accumulate_delta(Delta), case lists:keyfind(Ref, Keypos, RefList) of false -> % this was some non-matching message which we will ignore @@ -86,7 +100,9 @@ process_message(RefList, Keypos, Fun, Acc0, TimeoutRef, PerMsgTO) -> Worker -> Fun(Msg, Worker, Acc0) end; - {Ref, From, Msg} -> + {Ref, From, Msg0} -> + {Msg, Delta} = couch_srt:extract_delta(Msg0), + couch_srt:accumulate_delta(Delta), case lists:keyfind(Ref, Keypos, RefList) of false -> {ok, Acc0}; diff --git a/src/rexi/test/rexi_tests.erl b/src/rexi/test/rexi_tests.erl index 18b05b545ca..ee624f4b68c 100644 --- a/src/rexi/test/rexi_tests.erl +++ b/src/rexi/test/rexi_tests.erl @@ -75,6 +75,7 @@ t_cast(_) -> Ref = rexi:cast(node(), {?MODULE, rpc_test_fun, [potato]}), {Res, Dict} = receive + {Ref, {{R, D}, {delta, _}}} -> {R, maps:from_list(D)}; {Ref, {R, D}} -> {R, maps:from_list(D)} end, ?assertEqual(potato, Res), @@ -99,7 +100,12 @@ t_cast_explicit_caller(_) -> receive {'DOWN', CallerRef, _, _, Exit} -> Exit end, - ?assertMatch({Ref, {potato, [_ | _]}}, Result). + case couch_srt:is_enabled() of + true -> + ?assertMatch({Ref, {{potato, [_ | _]}, {delta, _}}}, Result); + false -> + ?assertMatch({Ref, {potato, [_ | _]}}, Result) + end. t_cast_ref(_) -> put(nonce, yesh), @@ -180,6 +186,7 @@ t_cast_error(_) -> Ref = rexi:cast(node(), self(), {?MODULE, rpc_test_fun, [{error, tomato}]}, []), Res = receive + {Ref, {RexiExit, {delta, _}}} -> RexiExit; {Ref, RexiExit} -> RexiExit end, ?assertMatch({rexi_EXIT, {tomato, [{?MODULE, rpc_test_fun, 1, _} | _]}}, Res). @@ -188,6 +195,7 @@ t_kill(_) -> Ref = rexi:cast(node(), {?MODULE, rpc_test_fun, [{sleep, 10000}]}), WorkerPid = receive + {Ref, {{sleeping, Pid}, {delta, _}}} -> Pid; {Ref, {sleeping, Pid}} -> Pid end, ?assert(is_process_alive(WorkerPid)), @@ -207,18 +215,24 @@ t_ping(_) -> rexi:cast(node(), {?MODULE, rpc_test_fun, [ping]}), Res = receive - {rexi, Ping} -> Ping + {{rexi, Ping}, {delta, _}} -> Ping; + {rexi, Ping} -> Ping; + Other -> Other end, ?assertEqual('$rexi_ping', Res). stream_init(Ref) -> receive + {Ref, From, {rexi_STREAM_INIT, {delta, _}}} -> + From; {Ref, From, rexi_STREAM_INIT} -> From end. recv(Ref) when is_reference(Ref) -> receive + {Ref, _, {Msg, {delta, _}}} -> Msg; + {Ref, {Msg, {delta, _}}} -> Msg; {Ref, _, Msg} -> Msg; {Ref, Msg} -> Msg after 500 -> timeout