From 94d337eb0fbf78bda2af6938b4a65d3c94808caf Mon Sep 17 00:00:00 2001 From: Andreas Madsen Date: Sun, 11 Mar 2012 14:19:02 +0100 Subject: [PATCH] cluster: kill workers when master dies This patch will kill the worker once it has lost its connection with the parent. However if the worker are doing a suicide, other measures will be used. --- lib/cluster.js | 50 ++++--------- test/simple/test-cluster-master-kill.js | 98 +++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 37 deletions(-) create mode 100644 test/simple/test-cluster-master-kill.js diff --git a/lib/cluster.js b/lib/cluster.js index 977f1dd3be5..686d05d3bdc 100644 --- a/lib/cluster.js +++ b/lib/cluster.js @@ -109,23 +109,6 @@ cluster.setupMaster = function(options) { silent: options.silent || false }; - // Kill workers when a uncaught exception is received - process.on('uncaughtException', function(err) { - // Did the user install a listener? If so, it overrides this one. - if (process.listeners('uncaughtException').length > 1) return; - - // Output the error stack, and create on if non exist - if (!(err instanceof Error)) { - err = new Error(err); - } - console.error(err.stack); - - // quick destroy cluster - quickDestroyCluster(); - // when done exit process with error code: 1 - process.exit(1); - }); - // emit setup event cluster.emit('setup'); }; @@ -401,17 +384,10 @@ Worker.prototype.destroy = function() { process.exit(0); }); - // When master do a quickDestroy the channel is not necesarily closed - // at the point this function runs. For that reason we need to keep - // checking that the channel is still open, until a actually callback - // from the master is resicved. Also we can't do a timeout and then - // just kill, since we don't know if the quickDestroy function was called. - setInterval(function() { - if (!self.process.connected) { - process.exit(0); - } - }, 200); - + // When channel is closed, terminate the process + this.process.once('disconnect', function() { + process.exit(0); + }); } else { process.exit(0); } @@ -509,20 +485,20 @@ cluster.disconnect = function(callback) { progress.check(); }; -// Sync way to quickly kill all cluster workers -// However the workers may not die instantly -function quickDestroyCluster() { - eachWorker(function(worker) { - worker.process.disconnect(); - worker.process.kill(); - }); -} - // Internal function. Called from src/node.js when worker process starts. cluster._setupWorker = function() { + // Get worker class var worker = cluster.worker = new Worker(); + // when the worker is disconnected from parent accidently + // we will terminate the worker + process.once('disconnect', function() { + if (worker.suicide !== true) { + process.exit(0); + } + }); + // Tell master that the worker is online worker.state = 'online'; sendInternalMessage(worker, { cmd: 'online' }); diff --git a/test/simple/test-cluster-master-kill.js b/test/simple/test-cluster-master-kill.js new file mode 100644 index 00000000000..36254039039 --- /dev/null +++ b/test/simple/test-cluster-master-kill.js @@ -0,0 +1,98 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + + +var common = require('../common'); +var assert = require('assert'); +var cluster = require('cluster'); + +if (cluster.isWorker) { + + // keep the worker alive + var http = require('http'); + http.Server().listen(common.PORT, '127.0.0.1'); + +} else if (process.argv[2] === 'cluster') { + + var worker = cluster.fork(); + + // send PID info to testcase process + process.send({ + pid: worker.process.pid + }); + + // terminate the cluster process + worker.once('listening', function() { + setTimeout(function() { + process.exit(0); + }, 1000); + }); + +} else { + + // This is the testcase + var fork = require('child_process').fork; + + // is process alive helper + var isAlive = function(pid) { + try { + //this will throw an error if the process is dead + process.kill(pid, 0); + + return true; + } catch (e) { + return false; + } + }; + + // Spawn a cluster process + var master = fork(process.argv[1], ['cluster']); + + // get pid info + var pid = null; + master.once('message', function(data) { + pid = data.pid; + }); + + // When master is dead + var alive = true; + master.on('exit', function(code) { + + // make sure that the master died by purpose + assert.equal(code, 0); + + // check worker process status + setTimeout(function() { + alive = isAlive(pid); + }, 200); + }); + + process.once('exit', function() { + // cleanup: kill the worker if alive + if (alive) { + process.kill(pid); + } + + assert.equal(typeof pid, 'number', 'did not get worker pid info'); + assert.equal(alive, false, 'worker was alive after master died'); + }); + +}