[PATCH 1 of 2] server: add an error feedback mechanism for when the daemon fails to launch
Matt Harbison
mharbison72 at gmail.com
Thu Mar 29 02:45:30 UTC 2018
# HG changeset patch
# User Matt Harbison <matt_harbison at yahoo.com>
# Date 1522210269 14400
# Wed Mar 28 00:11:09 2018 -0400
# Node ID 7a6eccc3b43dd8f6090b23aa38b2f71d33f5bbdc
# Parent 8bac14ce57781d2a45b6d036b3614b1a21917f0d
server: add an error feedback mechanism for when the daemon fails to launch
There's a recurring problem on Windows where `hg serve -d` will randomly fail to
spawn a detached process. The reason for the failure is completely hidden, and
it takes hours to get a single failure on my laptop. All this does is redirect
stdout/stderr of the child to a file until the lock file is freed, and then the
parent dumps it out if it fails to spawn.
I chose to put the output into the lock file because that is always cleaned up.
There's no way to report errors after that anyway. On Windows, killdaemons.py
is roughly `kill -9`, so this ensures that junk won't pile up.
This may end up being a case of EADDRINUSE. At least that's what I saw spit out
a few times (among other odd errors and missing output on Windows). But I also
managed to get the same thing on Fedora 26 by running test-hgwebdir.t with
--loop -j10 for several hours. Running `netstat` immediately after killing that
run printed a wall of sockets in the TIME_WAIT state, which were gone a couple
seconds later. I couldn't match up ports that failed, because --loop doesn't
print out the message about the port that was used. So maybe the fix is to
rotate the use of HGPORT[12] in the tests. But, let's collect some more data
first.
diff --git a/mercurial/server.py b/mercurial/server.py
--- a/mercurial/server.py
+++ b/mercurial/server.py
@@ -30,6 +30,20 @@ def runservice(opts, parentfn=None, init
runargs=None, appendpid=False):
'''Run a command as a service.'''
+ # When daemonized, redirect stdout/stderr to the lockfile (which gets
+ # cleaned up after the child is up and running), so that the parent can read
+ # and print the error if this child dies early.
+ if opts['daemon_postexec']:
+ for inst in opts['daemon_postexec']:
+ if inst.startswith('unlink:'):
+ lockpath = inst[7:]
+ procutil.stdout.flush()
+ procutil.stderr.flush()
+ logfilefp = open(lockpath, 'w+')
+ os.dup2(logfilefp.fileno(), 1)
+ os.dup2(logfilefp.fileno(), 2)
+ logfilefp.close()
+
def writepid(pid):
if opts['pid_file']:
if appendpid:
@@ -47,6 +61,8 @@ def runservice(opts, parentfn=None, init
try:
if not runargs:
runargs = procutil.hgcmd() + pycompat.sysargv[1:]
+ # Only hg processes accept --traceback (e.g., not dumbhttp.py)
+ runargs.append('--traceback')
runargs.append('--daemon-postexec=unlink:%s' % lockpath)
# Don't pass --cwd to the child process, because we've already
# changed directory.
@@ -61,6 +77,12 @@ def runservice(opts, parentfn=None, init
return not os.path.exists(lockpath)
pid = procutil.rundetached(runargs, condfn)
if pid < 0:
+ # If the daemonized process managed to write out an error msg,
+ # report it.
+ if os.path.exists(lockpath):
+ with open(lockpath) as log:
+ for line in log:
+ procutil.stderr.write('server: %s' % line)
raise error.Abort(_('child process failed to start'))
writepid(pid)
finally:
@@ -81,31 +103,39 @@ def runservice(opts, parentfn=None, init
os.setsid()
except AttributeError:
pass
+
+ lockpath = None
for inst in opts['daemon_postexec']:
if inst.startswith('unlink:'):
lockpath = inst[7:]
- os.unlink(lockpath)
elif inst.startswith('chdir:'):
os.chdir(inst[6:])
elif inst != 'none':
raise error.Abort(_('invalid value for --daemon-postexec: %s')
% inst)
- procutil.hidewindow()
- procutil.stdout.flush()
- procutil.stderr.flush()
+
+ if lockpath:
+ procutil.hidewindow()
+ procutil.stdout.flush()
+ procutil.stderr.flush()
- nullfd = os.open(os.devnull, os.O_RDWR)
- logfilefd = nullfd
- if logfile:
- logfilefd = os.open(logfile, os.O_RDWR | os.O_CREAT | os.O_APPEND,
- 0o666)
- os.dup2(nullfd, 0)
- os.dup2(logfilefd, 1)
- os.dup2(logfilefd, 2)
- if nullfd not in (0, 1, 2):
- os.close(nullfd)
- if logfile and logfilefd not in (0, 1, 2):
- os.close(logfilefd)
+ nullfd = os.open(os.devnull, os.O_RDWR)
+ logfilefd = nullfd
+ if logfile:
+ logfilefd = os.open(logfile,
+ os.O_RDWR | os.O_CREAT | os.O_APPEND,
+ 0o666)
+ os.dup2(nullfd, 0)
+ os.dup2(logfilefd, 1)
+ os.dup2(logfilefd, 2)
+ if nullfd not in (0, 1, 2):
+ os.close(nullfd)
+ if logfile and logfilefd not in (0, 1, 2):
+ os.close(logfilefd)
+
+ # Only unlink after redirecting stdout/stderr, so Windows doesn't
+ # complain about a sharing violation.
+ os.unlink(lockpath)
if runfn:
return runfn()
diff --git a/tests/test-serve.t b/tests/test-serve.t
--- a/tests/test-serve.t
+++ b/tests/test-serve.t
@@ -47,8 +47,8 @@ With -v and -p daytime (should fail beca
#if no-root no-windows
$ KILLQUIETLY=Y
- $ hgserve -p daytime
- abort: cannot start server at 'localhost:13': Permission denied
+ $ hgserve -p daytime 2>&1 | egrep -v '^server: (Traceback|Abort| )'
+ server: abort: cannot start server at 'localhost:13': Permission denied
abort: child process failed to start
% errors
$ KILLQUIETLY=N
More information about the Mercurial-devel
mailing list