[PATCH 1 of 2] server: add an error feedback mechanism for when the daemon fails to launch
Matt Harbison
mharbison72 at gmail.com
Thu Mar 29 23:04:39 EDT 2018
On Thu, 29 Mar 2018 07:26:04 -0400, Yuya Nishihara <yuya at tcha.org> wrote:
> On Wed, 28 Mar 2018 22:45:30 -0400, Matt Harbison wrote:
>> # HG changeset patch
>> # User Matt Harbison <matt_harbison at yahoo.com>
>> # Date 1522210269 14400
>> # Wed Mar 28 00:11:09 2018 -0400
>> # Node ID 7a6eccc3b43dd8f6090b23aa38b2f71d33f5bbdc
>> # Parent 8bac14ce57781d2a45b6d036b3614b1a21917f0d
>> server: add an error feedback mechanism for when the daemon fails to
>> launch
>>
>> There's a recurring problem on Windows where `hg serve -d` will
>> randomly fail to
>> spawn a detached process. The reason for the failure is completely
>> hidden, and
>> it takes hours to get a single failure on my laptop. All this does is
>> redirect
>> stdout/stderr of the child to a file until the lock file is freed, and
>> then the
>> parent dumps it out if it fails to spawn.
>
> Abusing the lockfile is fine, but ...
>
>> diff --git a/mercurial/server.py b/mercurial/server.py
>> --- a/mercurial/server.py
>> +++ b/mercurial/server.py
>> @@ -30,6 +30,20 @@ def runservice(opts, parentfn=None, init
>> runargs=None, appendpid=False):
>> '''Run a command as a service.'''
>>
>> + # When daemonized, redirect stdout/stderr to the lockfile (which
>> gets
>> + # cleaned up after the child is up and running), so that the
>> parent can read
>> + # and print the error if this child dies early.
>> + if opts['daemon_postexec']:
>> + for inst in opts['daemon_postexec']:
>> + if inst.startswith('unlink:'):
>> + lockpath = inst[7:]
>> + procutil.stdout.flush()
>> + procutil.stderr.flush()
>> + logfilefp = open(lockpath, 'w+')
>
> Nit: os.open(lockpath, os.O_WRONLY | os.O_APPEND | os.O_BINARY) is
> probably
> better as it enforces that the lock was created by the parent process.
>
>> + os.dup2(logfilefp.fileno(), 1)
>> + os.dup2(logfilefp.fileno(), 2)
>> + logfilefp.close()
>
>> @@ -47,6 +61,8 @@ def runservice(opts, parentfn=None, init
>> try:
>> if not runargs:
>> runargs = procutil.hgcmd() + pycompat.sysargv[1:]
>> + # Only hg processes accept --traceback (e.g., not
>> dumbhttp.py)
>> + runargs.append('--traceback')
>
> Doesn't it enable the traceback permanently? The --traceback option
> should be
> specified explicitly only when needed.
Yes. The errors happen in random locations, so I didn't want to sprinkle
this on command lines all over. I didn't see a way to detect if it was
running under the test harness, but maybe it's OK to conditionalize this
based on some environment variable that it sets?
>> runargs.append('--daemon-postexec=unlink:%s' % lockpath)
>> # Don't pass --cwd to the child process, because we've
>> already
>> # changed directory.
>> @@ -61,6 +77,12 @@ def runservice(opts, parentfn=None, init
>> return not os.path.exists(lockpath)
>> pid = procutil.rundetached(runargs, condfn)
>> if pid < 0:
>> + # If the daemonized process managed to write out an
>> error msg,
>> + # report it.
>> + if os.path.exists(lockpath):
>> + with open(lockpath) as log:
>> + for line in log:
>> + procutil.stderr.write('server: %s' % line)
>
> Event without this hack, stdout/stderr of the child should be visible (at
> least on Unix.) They aren't redirected until the server starts
> successfully.
Yeah, it looks like it's a Windows problem. See 594dd384803c. I wasn't
sure if it's better to special case Windows, but I don't have a problem
doing that.
>> - procutil.hidewindow()
>> - procutil.stdout.flush()
>> - procutil.stderr.flush()
>> +
>> + if lockpath:
>> + procutil.hidewindow()
>> + procutil.stdout.flush()
>> + procutil.stderr.flush()
>>
>> - nullfd = os.open(os.devnull, os.O_RDWR)
>> - logfilefd = nullfd
>> - if logfile:
>> - logfilefd = os.open(logfile, os.O_RDWR | os.O_CREAT |
>> os.O_APPEND,
>> - 0o666)
>> - os.dup2(nullfd, 0)
>> - os.dup2(logfilefd, 1)
>> - os.dup2(logfilefd, 2)
>> - if nullfd not in (0, 1, 2):
>> - os.close(nullfd)
>> - if logfile and logfilefd not in (0, 1, 2):
>> - os.close(logfilefd)
>> + nullfd = os.open(os.devnull, os.O_RDWR)
>> + logfilefd = nullfd
>> + if logfile:
>> + logfilefd = os.open(logfile,
>> + os.O_RDWR | os.O_CREAT |
>> os.O_APPEND,
>> + 0o666)
>> + os.dup2(nullfd, 0)
>> + os.dup2(logfilefd, 1)
>> + os.dup2(logfilefd, 2)
>> + if nullfd not in (0, 1, 2):
>> + os.close(nullfd)
>> + if logfile and logfilefd not in (0, 1, 2):
>> + os.close(logfilefd)
>> +
>> + # Only unlink after redirecting stdout/stderr, so Windows
>> doesn't
>> + # complain about a sharing violation.
>> + os.unlink(lockpath)
>
> Wheter lockpath is specified or not, stdout/stderr of the child must be
> redirected to /dev/null.
That should be easy enough to fix. I was under the impression that the
lock file is mandatory.
More information about the Mercurial-devel
mailing list