1313#include < folly/portability/Unistd.h>
1414#include < nlohmann/json.hpp>
1515#include < platform/dirutils.h>
16+ #include < platform/process_monitor.h>
1617#include < platform/strerror.h>
1718#include < protocol/connection/client_connection_map.h>
18- #ifndef WIN32
19- #include < sys/wait.h>
20- #include < csignal>
21- #endif
19+ #include < atomic>
2220#include < chrono>
2321#include < fstream>
2422#include < iostream>
@@ -49,6 +47,8 @@ class NodeImpl : public Node {
4947 nlohmann::json config;
5048 ConnectionMap connectionMap;
5149 const std::string id;
50+ std::unique_ptr<ProcessMonitor> child;
51+ std::atomic_bool allow_child_death{false };
5252};
5353
5454NodeImpl::NodeImpl (boost::filesystem::path directory, std::string id)
@@ -59,10 +59,11 @@ NodeImpl::NodeImpl(boost::filesystem::path directory, std::string id)
5959 const auto errmaps =
6060 source_root / " etc" / " couchbase" / " kv" / " error_maps" ;
6161 const auto rbac = source_root / " tests" / " testapp_cluster" / " rbac.json" ;
62- const auto log_filename = NodeImpl::directory / " memcached_log" ;
62+ const auto log_filename = NodeImpl::directory / " log " / " memcached_log" ;
6363 const auto portnumber_file = NodeImpl::directory / " memcached.ports.json" ;
6464 const auto minidump_dir = NodeImpl::directory / " crash" ;
6565 create_directories (minidump_dir);
66+ create_directories (log_filename.parent_path ());
6667
6768 config = {{" max_connections" , 1000 },
6869 {" system_connections" , 250 },
@@ -104,108 +105,71 @@ NodeImpl::NodeImpl(boost::filesystem::path directory, std::string id)
104105}
105106
106107void NodeImpl::startMemcachedServer () {
107- #ifdef WIN32
108- STARTUPINFO sinfo{};
109- PROCESS_INFORMATION pinfo{};
110- sinfo.cb = sizeof (sinfo);
111-
112- char commandline[1024 ];
113- sprintf (commandline,
114- " memcached.exe -C %s" ,
115- configfile.generic_string ().c_str ());
116- if (!CreateProcess (" memcached.exe" , // lpApplicationName
117- commandline, // lpCommandLine
118- nullptr , // lpProcessAttributes
119- nullptr , // lpThreadAttributes
120- false , // bInheritHandles
121- 0 , // dwCreationFlags
122- nullptr , // lpEnvironment
123- nullptr , // lpCurrentDirectory
124- &sinfo, // lpStartupInfo
125- &pinfo)) { // lpProcessInfoqrmation
126- throw std::system_error (GetLastError (),
127- std::system_category (),
128- " Failed to execute memcached" );
129- }
130-
131- child = pinfo.hProcess ;
132- #else
133- child = fork ();
134- if (child == -1 ) {
135- throw std::system_error (
136- errno, std::system_category (), " Failed to start client" );
108+ boost::filesystem::path exe{boost::filesystem::current_path () /
109+ " memcached" };
110+ exe = exe.generic_path ();
111+ if (!boost::filesystem::exists (exe)) {
112+ exe = exe.generic_string () + " .exe" ;
113+ if (!boost::filesystem::exists (exe)) {
114+ throw std::runtime_error (
115+ " NodeImpl::startMemcachedServer(): Failed to locate "
116+ " memcached" );
117+ }
137118 }
138-
139- if (child == 0 ) {
140- std::string binary (OBJECT_ROOT);
141- const auto memcached_json = configfile.generic_string ();
142- binary.append (" /memcached" );
143-
144- std::vector<const char *> argv;
145- argv.emplace_back (binary.c_str ());
146- argv.emplace_back (" -C" );
147- argv.emplace_back (memcached_json.c_str ());
148- argv.emplace_back (nullptr );
149- execvp (argv[0 ], const_cast <char **>(argv.data ()));
150- throw std::system_error (
151- errno, std::system_category (), " Failed to execute memcached" );
119+ std::vector<std::string> argv = {
120+ {exe.generic_string (), " -C" , configfile.generic_string ()}};
121+ child = ProcessMonitor::create (argv, [this ](const auto & ec) {
122+ if (!allow_child_death) {
123+ std::cerr << " memcached process on " << directory.generic_string ()
124+ << " terminated: " << ec.to_string () << std::endl;
125+ std::cerr << " Terminating process" << std::endl;
126+ std::_Exit (EXIT_FAILURE);
127+ }
128+ });
129+
130+ if (getenv (" MEMCACHED_UNIT_TESTS" )) {
131+ // The test _SHOULD_ complete under 60 seconds..
132+ child->setTimeoutHander (std::chrono::seconds{60 }, [this ]() {
133+ static std::mutex mutex;
134+ std::lock_guard<std::mutex> guard (mutex);
135+ std::cerr << " memcached process on " << directory.generic_string ()
136+ << " might be stuck." << std::endl;
137+
138+ // We've set the cycle size to be 200M so we should expect
139+ // only a single log file (but for simplicity just iterate
140+ // over them all and print the last 8k of each file
141+ std::cerr << " Last 8k of the log files" << std::endl
142+ << " ========================" << std::endl;
143+ for (const auto & p :
144+ boost::filesystem::directory_iterator (directory / " log" )) {
145+ if (is_regular_file (p)) {
146+ auto content = cb::io::loadFile (p.path ().generic_string ());
147+ if (content.size () > 8192 ) {
148+ content = content.substr (
149+ content.find (' \n ' , content.size () - 8192 ));
150+ }
151+ std::cerr << p.path ().generic_string () << std::endl
152+ << content << std::endl
153+ << " -----------------------------" << std::endl;
154+ }
155+ }
156+ // Wait 2 secs before terminating so that the other nodes also
157+ // may have their timouts fire and dump the logs..
158+ std::this_thread::sleep_for (std::chrono::seconds{2 });
159+ std::exit (EXIT_FAILURE);
160+ });
152161 }
153- #endif
154-
155162 // wait and read the portnumber file
156163 parsePortnumberFile ();
157164}
158165
159166NodeImpl::~NodeImpl () {
160- if (isRunning ()) {
161- #ifdef WIN32
162- // @todo This should be made a bit more robust
163- if (!TerminateProcess (child, 0 )) {
164- std::cerr << " TerminateProcess failed!" << std::endl;
165- std::cerr.flush ();
166- _exit (EXIT_FAILURE);
167- }
168- DWORD status;
169- if ((status = WaitForSingleObject (child, 60000 )) != WAIT_OBJECT_0) {
170- std::cerr << " Unexpected return value from WaitForSingleObject: "
171- << status << std::endl;
172- std::cerr.flush ();
173- _exit (EXIT_FAILURE);
174- }
175- if (!GetExitCodeProcess (child, &status)) {
176- std::cerr << " GetExitCodeProcess failed: " << GetLastError ()
177- << std::endl;
178- std::cerr.flush ();
179- _exit (EXIT_FAILURE);
180- }
181- #else
182- // Start by giving it a slow and easy start...
183- const auto timeout =
184- std::chrono::steady_clock::now () + std::chrono::seconds (15 );
185- kill (child, SIGTERM);
186-
187- do {
188- std::this_thread::sleep_for (std::chrono::milliseconds (10 ));
189- } while (isRunning () && std::chrono::steady_clock::now () < timeout);
190-
191- if (isRunning ()) {
192- // no mercy!
193- kill (child, SIGKILL);
194-
195- int status;
196- pid_t ret;
197- while (true ) {
198- ret = waitpid (child, &status, 0 );
199- if (ret == reinterpret_cast <pid_t >(-1 ) && errno == EINTR) {
200- // Just loop again
201- continue ;
202- }
203- break ;
204- }
205- }
206- #endif
167+ if (child) {
168+ allow_child_death = true ;
169+ child->terminate ();
207170 }
208-
171+ // make sure we reap the thread
172+ child.reset ();
209173 if (!configfile.empty ()) {
210174 try {
211175 remove (configfile);
@@ -222,54 +186,6 @@ void NodeImpl::parsePortnumberFile() {
222186 cb::io::rmrf (config[" portnumber_file" ]);
223187}
224188
225- #ifdef WIN32
226- bool Node::isRunning () const {
227- if (child != INVALID_HANDLE_VALUE) {
228- DWORD status;
229-
230- if (!GetExitCodeProcess (child, &status)) {
231- throw std::system_error (
232- GetLastError (),
233- std::system_category (),
234- " NodeImpl::isRunning: GetExitCodeProcess failed" );
235-
236- std::cerr << " GetExitCodeProcess: failed: " << cb_strerror ()
237- << std::endl;
238- exit (EXIT_FAILURE);
239- }
240-
241- if (status == STILL_ACTIVE) {
242- return true ;
243- }
244-
245- CloseHandle (child);
246- child = INVALID_HANDLE_VALUE;
247- }
248- return false ;
249- }
250- #else
251- bool Node::isRunning () const {
252- if (child != 0 ) {
253- int status;
254- auto next = waitpid (child, &status, WNOHANG);
255- if (next == static_cast <pid_t >(-1 )) {
256- throw std::system_error (errno,
257- std::system_category (),
258- " NodeImpl::isRunning: waitpid failed" );
259- }
260-
261- if (next == child) {
262- child = 0 ;
263- return false ;
264- }
265-
266- return true ;
267- }
268-
269- return false ;
270- }
271- #endif
272-
273189std::unique_ptr<MemcachedConnection> NodeImpl::getConnection () const {
274190 auto ret = connectionMap.getConnection ().clone ();
275191 ret->setAutoRetryTmpfail (true );
0 commit comments