[PATCH] hgwatchman: new experimental extension

Martijn Pieters mj at zopatista.com
Tue Feb 16 13:23:26 UTC 2016


# HG changeset patch
# User Martijn Pieters <mjpieters at fb.com>
# Date 1455628807 0
#      Tue Feb 16 13:20:07 2016 +0000
# Node ID 2e3731a83e78075bbd751a8fe686ab2e3cb2ebc7
# Parent  9e0e8c6973e61ec7691adecfc8e857f42693e720
hgwatchman: new experimental extension

Extension to plug into a watchman daemon, speeding up hg status calls.

Originally developed at https://bitbucket.org/facebook/hgwatchman

The extension includes a copy of pywatchman, taken from
https://github.com/facebook/watchman/tree/master/python. This package has not
been updated to mercurial code standards.

diff --git a/hgext/hgwatchman/README.md b/hgext/hgwatchman/README.md
new file mode 100644
--- /dev/null
+++ b/hgext/hgwatchman/README.md
@@ -0,0 +1,69 @@
+hgwatchman
+==========
+
+Integrates the file-watching program watchman with Mercurial to produce faster
+status results.
+
+On a particular Linux system, for a real-world repository with over 200,000
+files hosted on ext4, vanilla `hg status` takes over 3 seconds. On the
+same system, with hgwatchman it takes under 0.6 seconds.
+
+Platforms Supported
+===================
+
+**Linux:** *Stable*. Watchman and hgwatchman are both known to work reliably,
+  even under severe loads.
+
+**Mac OS X:** *Stable*. The Mercurial test suite passes with hgwatchman turned
+  on, on case-insensitive HFS+. There has been a reasonable amount of user
+  testing under normal loads.
+
+**Solaris, BSD:** *Alpha*. watchman and hgwatchman are believed to work, but
+  very little testing has been done.
+
+**Windows:** *Alpha*. Not in a release version of watchman or hgwatchman yet.
+
+
+Installing
+==========
+
+Install [watchman](https://github.com/facebook/watchman) and make sure it is
+in your PATH.
+
+In your `hgrc`, add the following lines:
+
+    :::ini
+    [extensions]
+    hgwatchman =
+
+Configuring
+===========
+
+hgwatchman requires no configuration -- it will tell watchman about your
+repository as necessary. The following configuration options exist:
+
+    :::ini
+    [watchman]
+    mode = {off, on, paranoid}
+
+When `mode = off`, hgwatchman will disable itself. When `mode = on`, hgwatchman
+will be enabled as usual. When `mode = paranoid`, hgwatchman will query both
+watchman and the filesystem, and ensure that the results are consistent.
+
+    :::ini
+    [watchman]
+    timeout = (float)
+
+A value, in seconds, that determines how long hgwatchman will wait for watchman
+to return results. Defaults to `2.0`.
+
+Known Issues
+============
+
+* hgwatchman will disable itself if any of the following extensions are enabled:
+  largefiles, inotify, eol; or if the repository has subrepos.
+* hgwatchman will produce incorrect results if nested repos that are not
+  subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
+
+The issues related to nested repos and subrepos are probably not fundamental
+ones. Patches to fix them are welcome.
diff --git a/hgext/hgwatchman/__init__.py b/hgext/hgwatchman/__init__.py
new file mode 100644
--- /dev/null
+++ b/hgext/hgwatchman/__init__.py
@@ -0,0 +1,581 @@
+# __init__.py - hgwatchman initialization and overrides
+#
+# Copyright 2013-2016 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
+
+Integrates the file-watching program watchman with Mercurial to produce faster
+status results.
+
+On a particular Linux system, for a real-world repository with over 200,000
+files hosted on ext4, vanilla `hg status` takes over 3 seconds. On the
+same system, with hgwatchman it takes under 0.6 seconds.
+
+hgwatchman requires no configuration -- it will tell watchman about your
+repository as necessary. The following configuration options exist:
+
+::
+
+    [watchman]
+    mode = {off, on, paranoid}
+
+When `mode = off`, hgwatchman will disable itself. When `mode = on`, hgwatchman
+will be enabled as usual. When `mode = paranoid`, hgwatchman will query both
+watchman and the filesystem, and ensure that the results are consistent.
+
+::
+
+    [watchman]
+    timeout = (float)
+
+A value, in seconds, that determines how long hgwatchman will wait for watchman
+to return results. Defaults to `2.0`.
+
+::
+
+    [watchman]
+    blacklistusers = (list of userids)
+
+A list of usernames for which hgwatchman will disable itself altogether.
+
+'''
+from __future__ import absolute_import
+
+import os
+import stat
+import sys
+
+from mercurial import (
+    context,
+    extensions,
+    localrepo,
+    scmutil,
+    util,
+)
+from mercurial import match as matchmod
+from mercurial.i18n import _
+
+from . import (
+    client,
+    state
+)
+
+# Note for extension authors: ONLY specify testedwith = 'internal' for
+# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
+# be specifying the version(s) of Mercurial they are tested with, or
+# leave the attribute unspecified.
+testedwith = 'internal'
+
+# pathauditor has been moved in 2.9
+try:
+    from mercurial import pathutil
+    pathutil.pathauditor
+except ImportError:
+    pathutil = scmutil
+
+# status is a class after 3.2
+try:
+    namestatus = scmutil.status
+except AttributeError:
+    def namestatus(*x):
+        return x
+
+_blacklist = ['largefiles', 'inotify', 'eol']
+
+def _handleunavailable(ui, state, ex):
+    if isinstance(ex, client.Unavailable):
+        if ex.warn:
+            ui.warn(str(ex) + '\n')
+        if ex.invalidate:
+            state.invalidate()
+        ui.log('watchman', 'watchman unavailable: %s\n', ex.msg)
+    else:
+        ui.log('watchman', 'watchman exception: %s\n', ex)
+
+def _hashignore(ignore):
+    sha1 = util.sha1()
+    if util.safehasattr(ignore, 'includepat'):
+        sha1.update(ignore.includepat)
+    sha1.update('\0\0')
+    if util.safehasattr(ignore, 'excludepat'):
+        sha1.update(ignore.excludepat)
+    sha1.update('\0\0')
+    if util.safehasattr(ignore, 'patternspat'):
+        sha1.update(ignore.patternspat)
+    sha1.update('\0\0')
+    if util.safehasattr(ignore, '_files'):
+        for f in ignore._files:
+            sha1.update(f)
+    sha1.update('\0')
+    return sha1.hexdigest()
+
+def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
+    '''Whenever full is False, ignored is False, and the watchman client is
+    available, use watchman combined with saved state to possibly return only a
+    subset of files.'''
+    def bail():
+        return orig(match, subrepos, unknown, ignored, full=True)
+
+    if full or ignored or not self._watchmanclient.available():
+        return bail()
+    clock, ignorehash, notefiles = self._watchmanstate.get()
+    if not clock:
+        if self._watchmanstate.crawl_on_invalidate:
+            return bail()
+        clock = 'c:0:0'
+        notefiles = []
+
+    def fwarn(f, msg):
+        self._ui.warn('%s: %s\n' % (self.pathto(f), msg))
+        return False
+
+    def badtype(mode):
+        kind = _('unknown')
+        if stat.S_ISCHR(mode):
+            kind = _('character device')
+        elif stat.S_ISBLK(mode):
+            kind = _('block device')
+        elif stat.S_ISFIFO(mode):
+            kind = _('fifo')
+        elif stat.S_ISSOCK(mode):
+            kind = _('socket')
+        elif stat.S_ISDIR(mode):
+            kind = _('directory')
+        return _('unsupported file type (type is %s)') % kind
+
+    ignore = self._ignore
+    dirignore = self._dirignore
+    if unknown:
+        if _hashignore(ignore) != ignorehash and clock != 'c:0:0':
+            # ignore list changed -- can't rely on watchman state any more
+            if self._watchmanstate.crawl_on_invalidate:
+                return bail()
+            notefiles = []
+            clock = 'c:0:0'
+    else:
+        # always ignore
+        ignore = util.always
+        dirignore = util.always
+
+    matchfn = match.matchfn
+    matchalways = match.always()
+    dmap = self._map
+    nonnormalset = None
+    if util.safehasattr(self, "_nonnormalset"):
+        nonnormalset = self._nonnormalset
+
+    copymap = self._copymap
+    getkind = stat.S_IFMT
+    dirkind = stat.S_IFDIR
+    regkind = stat.S_IFREG
+    lnkkind = stat.S_IFLNK
+    join = self._join
+    normcase = util.normcase
+    fresh_instance = False
+
+    exact = skipstep3 = False
+    if matchfn == match.exact:  # match.exact
+        exact = True
+        dirignore = util.always  # skip step 2
+    elif match.files() and not match.anypats():  # match.match, no patterns
+        skipstep3 = True
+
+    if not exact and self._checkcase:
+        # note that even though we could receive directory entries, we're only
+        # interested in checking if a file with the same name exists. So only
+        # normalize files if possible (Mercurial >= 3.4), not directories.
+        normalize = getattr(self, '_normalizefile', self._normalize)
+        skipstep3 = False
+    else:
+        normalize = None
+
+    # step 1: find all explicit files
+    results, work, dirsnotfound = self._walkexplicit(match, subrepos)
+
+    skipstep3 = skipstep3 and not (work or dirsnotfound)
+    if work and isinstance(work[0], tuple):
+        # Mercurial >= 3.3.3
+        work = [d for d in work if not dirignore(d[0])]
+    else:
+        work = [d for d in work if not dirignore(d)]
+
+    if not work and (exact or skipstep3):
+        for s in subrepos:
+            del results[s]
+        del results['.hg']
+        return results
+
+    # step 2: query watchman
+    try:
+        # Use the user-configured timeout for the query.
+        # Add a little slack over the top of the user query to allow for
+        # overheads while transferring the data
+        self._watchmanclient.settimeout(self._watchmanstate.timeout + 0.1)
+        result = self._watchmanclient.command('query', {
+            'fields': ['mode', 'mtime', 'size', 'exists', 'name'],
+            'since': clock,
+            'expression': [
+                'not', [
+                    'anyof', ['dirname', '.hg'],
+                    ['name', '.hg', 'wholename']
+                ]
+            ],
+            'sync_timeout': int(self._watchmanstate.timeout * 1000),
+            'empty_on_fresh_instance': self._watchmanstate.crawl_on_invalidate,
+        })
+    except Exception as ex:
+        _handleunavailable(self._ui, self._watchmanstate, ex)
+        self._watchmanclient.clearconnection()
+        return bail()
+    else:
+        # We need to propagate the last observed clock up so that we
+        # can use it for our next query
+        self._watchmanstate.setlastclock(result['clock'])
+        if result['is_fresh_instance']:
+            if self._watchmanstate.crawl_on_invalidate:
+                self._watchmanstate.invalidate()
+                return bail()
+            fresh_instance = True
+            # Ignore any prior noteable files from the state info
+            notefiles = []
+
+    # for file paths which require normalization and we encounter a case
+    # collision, we store our own foldmap
+    if normalize:
+        foldmap = dict((normcase(k), k) for k in results)
+
+    switch_slashes = os.sep == '\\'
+    # The order of the results is, strictly speaking, undefined.
+    # For case changes on a case insensitive filesystem we may receive
+    # two entries, one with exists=True and another with exists=False.
+    # The exists=True entries in the same response should be interpreted
+    # as being happens-after the exists=False entries due to the way that
+    # watchman tracks files.  We use this property to reconcile deletes
+    # for name case changes.
+    for entry in result['files']:
+        fname = entry['name']
+        if switch_slashes:
+            fname = fname.replace('\\', '/')
+        if normalize:
+            normed = normcase(fname)
+            fname = normalize(fname, True, True)
+            foldmap[normed] = fname
+        fmode = entry['mode']
+        fexists = entry['exists']
+        kind = getkind(fmode)
+
+        if not fexists:
+            # if marked as deleted and we don't already have a change
+            # record, mark it as deleted.  If we already have an entry
+            # for fname then it was either part of walkexplicit or was
+            # an earlier result that was a case change
+            if fname not in results and fname in dmap and (
+                    matchalways or matchfn(fname)):
+                results[fname] = None
+        elif kind == dirkind:
+            if fname in dmap and (matchalways or matchfn(fname)):
+                results[fname] = None
+        elif kind == regkind or kind == lnkkind:
+            if fname in dmap:
+                if matchalways or matchfn(fname):
+                    results[fname] = entry
+            elif (matchalways or matchfn(fname)) and not ignore(fname):
+                results[fname] = entry
+        elif fname in dmap and (matchalways or matchfn(fname)):
+            results[fname] = None
+
+    # step 3: query notable files we don't already know about
+    # XXX try not to iterate over the entire dmap
+    if normalize:
+        # any notable files that have changed case will already be handled
+        # above, so just check membership in the foldmap
+        notefiles = set((normalize(f, True, True) for f in notefiles
+                         if normcase(f) not in foldmap))
+    visit = set((f for f in notefiles if (f not in results and matchfn(f)
+                                          and (f in dmap or not ignore(f)))))
+
+    if nonnormalset is not None and not fresh_instance:
+        if matchalways:
+            visit.update((f for f in nonnormalset if f not in results))
+            visit.update((f for f in copymap if f not in results))
+        else:
+            visit.update((f for f in nonnormalset if f not in results
+                          and matchfn(f)))
+            visit.update((f for f in copymap
+                          if f not in results and matchfn(f)))
+    else:
+        if matchalways:
+            visit.update((f for f, st in dmap.iteritems()
+                          if (f not in results and
+                              (st[2] < 0 or st[0] != 'n' or fresh_instance))))
+            visit.update((f for f in copymap if f not in results))
+        else:
+            visit.update((f for f, st in dmap.iteritems()
+                          if (f not in results and
+                              (st[2] < 0 or st[0] != 'n' or fresh_instance)
+                              and matchfn(f))))
+            visit.update((f for f in copymap
+                          if f not in results and matchfn(f)))
+
+    audit = pathutil.pathauditor(self._root).check
+    auditpass = [f for f in visit if audit(f)]
+    auditpass.sort()
+    auditfail = visit.difference(auditpass)
+    for f in auditfail:
+        results[f] = None
+
+    nf = iter(auditpass).next
+    for st in util.statfiles([join(f) for f in auditpass]):
+        f = nf()
+        if st or f in dmap:
+            results[f] = st
+
+    for s in subrepos:
+        del results[s]
+    del results['.hg']
+    return results
+
+def overridestatus(
+        orig, self, node1='.', node2=None, match=None, ignored=False,
+        clean=False, unknown=False, listsubrepos=False):
+    listignored = ignored
+    listclean = clean
+    listunknown = unknown
+
+    def _cmpsets(l1, l2):
+        try:
+            if 'HGWATCHMAN_LOG_FILE' in os.environ:
+                fn = os.environ['HGWATCHMAN_LOG_FILE']
+                f = open(fn, 'wb')
+            else:
+                fn = 'watchmanfail.log'
+                f = self.opener(fn, 'wb')
+        except (IOError, OSError):
+            self.ui.warn(_('warning: unable to write to %s\n') % fn)
+            return
+
+        try:
+            for i, (s1, s2) in enumerate(zip(l1, l2)):
+                if set(s1) != set(s2):
+                    f.write('sets at position %d are unequal\n' % i)
+                    f.write('watchman returned: %s\n' % s1)
+                    f.write('stat returned: %s\n' % s2)
+        finally:
+            f.close()
+
+    if isinstance(node1, context.changectx):
+        ctx1 = node1
+    else:
+        ctx1 = self[node1]
+    if isinstance(node2, context.changectx):
+        ctx2 = node2
+    else:
+        ctx2 = self[node2]
+
+    working = ctx2.rev() is None
+    parentworking = working and ctx1 == self['.']
+    match = match or matchmod.always(self.root, self.getcwd())
+
+    # Maybe we can use this opportunity to update watchman's state.
+    # Mercurial uses workingcommitctx and/or memctx to represent the part of
+    # the workingctx that is to be committed. So don't updated the state in
+    # that case.
+    # HG_PENDING is set in the environment when the dirstate is being updated
+    # in the middle of a transaction; we must not update our state in that
+    # case, or we risk forgetting about changes in the working copy.
+    updatestate = (parentworking and match.always() and
+                   not isinstance(ctx2, (context.workingcommitctx,
+                                         context.memctx)) and
+                   'HG_PENDING' not in os.environ)
+
+    try:
+        if self._watchmanstate.crawl_on_invalidate:
+            # Use a short timeout to query the current clock.  If that
+            # takes too long then we assume that the service will be slow
+            # to answer our query.
+            # crawl_on_invalidate indicates that we prefer to crawl the
+            # tree ourselves because we can ignore portions that watchman
+            # cannot and we tend to be faster in the warmer buffer cache
+            # cases.
+            self._watchmanclient.settimeout(0.1)
+        else:
+            # Give watchman more time to potentially complete its crawl
+            # and return the initial clock.  In this mode we assume that
+            # the filesystem will be slower than parsing a potentially
+            # very large watchman result set.
+            self._watchmanclient.settimeout(
+                self._watchmanstate.timeout + 0.1)
+        startclock = self._watchmanclient.getcurrentclock()
+    except Exception as ex:
+        self._watchmanclient.clearconnection()
+        _handleunavailable(self.ui, self._watchmanstate, ex)
+        # boo, watchman failed. bail
+        return orig(node1, node2, match, listignored, listclean,
+                    listunknown, listsubrepos)
+
+    if updatestate:
+        # We need info about unknown files. This may make things slower the
+        # first time, but whatever.
+        stateunknown = True
+    else:
+        stateunknown = listunknown
+
+    r = orig(node1, node2, match, listignored, listclean, stateunknown,
+             listsubrepos)
+    modified, added, removed, deleted, unknown, ignored, clean = r
+
+    if updatestate:
+        notefiles = modified + added + removed + deleted + unknown
+        self._watchmanstate.set(
+            self._watchmanstate.getlastclock() or startclock,
+            _hashignore(self.dirstate._ignore),
+            notefiles)
+
+    if not listunknown:
+        unknown = []
+
+    # don't do paranoid checks if we're not going to query watchman anyway
+    full = listclean or match.traversedir is not None
+    if self._watchmanstate.mode == 'paranoid' and not full:
+        # run status again and fall back to the old walk this time
+        self.dirstate._watchmandisable = True
+
+        # shut the UI up
+        quiet = self.ui.quiet
+        self.ui.quiet = True
+        fout, ferr = self.ui.fout, self.ui.ferr
+        self.ui.fout = self.ui.ferr = open(os.devnull, 'wb')
+
+        try:
+            rv2 = orig(
+                node1, node2, match, listignored, listclean, listunknown,
+                listsubrepos)
+        finally:
+            self.dirstate._watchmandisable = False
+            self.ui.quiet = quiet
+            self.ui.fout, self.ui.ferr = fout, ferr
+
+        # clean isn't tested since it's set to True above
+        _cmpsets([modified, added, removed, deleted, unknown, ignored, clean],
+                 rv2)
+        modified, added, removed, deleted, unknown, ignored, clean = rv2
+
+    return namestatus(modified, added, removed, deleted, unknown,
+                      ignored, clean)
+
+def makedirstate(cls):
+    class watchmandirstate(cls):
+        def _watchmaninit(self, watchmanstate, watchmanclient):
+            # _watchmandisable is used in paranoid mode
+            self._watchmandisable = False
+            self._watchmanstate = watchmanstate
+            self._watchmanclient = watchmanclient
+
+        def walk(self, *args, **kwargs):
+            orig = super(watchmandirstate, self).walk
+            if self._watchmandisable:
+                return orig(*args, **kwargs)
+            return overridewalk(orig, self, *args, **kwargs)
+
+        def rebuild(self, *args, **kwargs):
+            self._watchmanstate.invalidate()
+            return super(watchmandirstate, self).rebuild(*args, **kwargs)
+
+        def invalidate(self, *args, **kwargs):
+            self._watchmanstate.invalidate()
+            return super(watchmandirstate, self).invalidate(*args, **kwargs)
+
+    return watchmandirstate
+
+def wrapdirstate(orig, self):
+    ds = orig(self)
+    # only override the dirstate when watchman is available for the repo
+    if util.safehasattr(self, '_watchmanstate'):
+        ds.__class__ = makedirstate(ds.__class__)
+        ds._watchmaninit(self._watchmanstate, self._watchmanclient)
+    return ds
+
+def extsetup(ui):
+    wrapfilecache(localrepo.localrepository, 'dirstate', wrapdirstate)
+    if sys.platform == 'darwin':
+        # An assist for avoiding the dangling-symlink fsevents bug
+        extensions.wrapfunction(os, 'symlink', wrapsymlink)
+
+def wrapsymlink(orig, source, link_name):
+    ''' if we create a dangling symlink, also touch the parent dir
+    to encourage fsevents notifications to work more correctly '''
+    try:
+        return orig(source, link_name)
+    finally:
+        try:
+            os.utime(os.path.dirname(link_name), None)
+        except OSError:
+            pass
+
+def reposetup(ui, repo):
+    # We don't work with largefiles or inotify
+    exts = extensions.enabled()
+    for ext in _blacklist:
+        if ext in exts:
+            return
+
+    if util.safehasattr(repo, 'dirstate'):
+        # We don't work with subrepos either. Note that we can get passed in
+        # e.g. a statichttprepo, which throws on trying to access the substate.
+        # XXX This sucks.
+        try:
+            # if repo[None].substate can cause a dirstate parse, which is too
+            # slow. Instead, look for a file called hgsubstate,
+            if repo.wvfs.exists('.hgsubstate') or repo.wvfs.exists('.hgsub'):
+                return
+        except AttributeError:
+            return
+
+        watchmanstate = state.state(repo)
+        if watchmanstate.mode == 'off':
+            return
+
+        try:
+            watchmanclient = client.client(repo)
+        except Exception as ex:
+            _handleunavailable(ui, watchmanstate, ex)
+            return
+
+        repo._watchmanstate = watchmanstate
+        repo._watchmanclient = watchmanclient
+
+        # at this point since watchmanstate wasn't present, repo.dirstate is
+        # not a watchmandirstate
+        repo.dirstate.__class__ = makedirstate(repo.dirstate.__class__)
+        # nuke the dirstate so that _watchmaninit and subsequent configuration
+        # changes take effect on it
+        del repo._filecache['dirstate']
+        delattr(repo.unfiltered(), 'dirstate')
+
+        class watchmanrepo(repo.__class__):
+            def status(self, *args, **kwargs):
+                orig = super(watchmanrepo, self).status
+                return overridestatus(orig, self, *args, **kwargs)
+
+        repo.__class__ = watchmanrepo
+
+def wrapfilecache(cls, propname, wrapper):
+    """Wraps a filecache property. These can't be wrapped using the normal
+    wrapfunction. This should eventually go into upstream Mercurial.
+    """
+    assert callable(wrapper)
+    for currcls in cls.__mro__:
+        if propname in currcls.__dict__:
+            origfn = currcls.__dict__[propname].func
+            assert callable(origfn)
+            def wrap(*args, **kwargs):
+                return wrapper(origfn, *args, **kwargs)
+            currcls.__dict__[propname].func = wrap
+            break
+
+    if currcls is object:
+        raise AttributeError(
+            _("type '%s' has no property '%s'") % (cls, propname))
diff --git a/hgext/hgwatchman/client.py b/hgext/hgwatchman/client.py
new file mode 100644
--- /dev/null
+++ b/hgext/hgwatchman/client.py
@@ -0,0 +1,109 @@
+# client.py - watchman client
+#
+# Copyright 2013-2016 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import getpass
+
+from mercurial import util
+
+from . import pywatchman
+
+class Unavailable(Exception):
+    def __init__(self, msg, warn=True, invalidate=False):
+        self.msg = msg
+        self.warn = warn
+        if self.msg == 'timed out waiting for response':
+            self.warn = False
+        self.invalidate = invalidate
+
+    def __str__(self):
+        if self.warn:
+            return 'warning: watchman unavailable: %s' % self.msg
+        else:
+            return 'watchman unavailable: %s' % self.msg
+
+class WatchmanNoRoot(Unavailable):
+    def __init__(self, root, msg):
+        self.root = root
+        super(WatchmanNoRoot, self).__init__(msg)
+
+class client(object):
+    def __init__(self, repo, timeout=1.0):
+        err = None
+        if not self._user:
+            err = "couldn't get user"
+            warn = True
+        if self._user in repo.ui.configlist('watchman', 'blacklistusers'):
+            err = 'user %s in blacklist' % self._user
+            warn = False
+
+        if err:
+            raise Unavailable(err, warn)
+
+        self._timeout = timeout
+        self._watchmanclient = None
+        self._root = repo.root
+        self._ui = repo.ui
+        self._firsttime = True
+
+    def settimeout(self, timeout):
+        self._timeout = timeout
+        if self._watchmanclient is not None:
+            self._watchmanclient.setTimeout(timeout)
+
+    def getcurrentclock(self):
+        result = self.command('clock')
+        if not util.safehasattr(result, 'clock'):
+            raise Unavailable('clock result is missing clock value',
+                              invalidate=True)
+        return result.clock
+
+    def clearconnection(self):
+        self._watchmanclient = None
+
+    def available(self):
+        return self._watchmanclient is not None or self._firsttime
+
+    @util.propertycache
+    def _user(self):
+        try:
+            return getpass.getuser()
+        except KeyError:
+            # couldn't figure out our user
+            return None
+
+    def _command(self, *args):
+        watchmanargs = (args[0], self._root) + args[1:]
+        try:
+            if self._watchmanclient is None:
+                self._firsttime = False
+                self._watchmanclient = pywatchman.client(
+                    timeout=self._timeout,
+                    useImmutableBser=True)
+            return self._watchmanclient.query(*watchmanargs)
+        except pywatchman.CommandError as ex:
+            if ex.msg.startswith('unable to resolve root'):
+                raise WatchmanNoRoot(self._root, ex.msg)
+            raise Unavailable(ex.msg)
+        except pywatchman.WatchmanError as ex:
+            raise Unavailable(str(ex))
+
+    def command(self, *args):
+        try:
+            try:
+                return self._command(*args)
+            except WatchmanNoRoot:
+                # this 'watch' command can also raise a WatchmanNoRoot if
+                # watchman refuses to accept this root
+                self._command('watch')
+                return self._command(*args)
+        except Unavailable:
+            # this is in an outer scope to catch Unavailable form any of the
+            # above _command calls
+            self._watchmanclient = None
+            raise
diff --git a/hgext/hgwatchman/pywatchman/__init__.py b/hgext/hgwatchman/pywatchman/__init__.py
new file mode 100644
--- /dev/null
+++ b/hgext/hgwatchman/pywatchman/__init__.py
@@ -0,0 +1,538 @@
+# Copyright 2014 Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * Neither the name Facebook nor the names of its contributors may be used to
+#    endorse or promote products derived from this software without specific
+#    prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import errno
+import socket
+import subprocess
+
+# Sometimes it's really hard to get Python extensions to compile,
+# so fall back to a pure Python implementation.
+try:
+    import bser
+except ImportError, e:
+    import pybser as bser
+
+import capabilities
+
+# 2 bytes marker, 1 byte int size, 8 bytes int64 value
+sniff_len = 13
+
+
+class WatchmanError(Exception):
+    pass
+
+
+class SocketTimeout(WatchmanError):
+    """A specialized exception raised for socket timeouts during communication to/from watchman.
+       This makes it easier to implement non-blocking loops as callers can easily distinguish
+       between a routine timeout and an actual error condition.
+
+       Note that catching WatchmanError will also catch this as it is a super-class, so backwards
+       compatibility in exception handling is preserved.
+    """
+
+
+class CommandError(WatchmanError):
+    """error returned by watchman
+
+    self.msg is the message returned by watchman.
+    """
+
+    def __init__(self, msg, cmd=None):
+        self.msg = msg
+        self.cmd = cmd
+        super(CommandError, self).__init__('watchman command error: %s' % msg)
+
+    def setCommand(self, cmd):
+        self.cmd = cmd
+
+    def __str__(self):
+        if self.cmd:
+            return '%s, while executing %s' % (self.msg, self.cmd)
+        return self.msg
+
+
+class Transport(object):
+    """ communication transport to the watchman server """
+    buf = None
+
+    def close(self):
+        """ tear it down """
+        raise NotImplementedError()
+
+    def readBytes(self, size):
+        """ read size bytes """
+        raise NotImplementedError()
+
+    def write(self, buf):
+        """ write some data """
+        raise NotImplementedError()
+
+    def setTimeout(self, value):
+        pass
+
+    def readLine(self):
+        """ read a line
+        Maintains its own buffer, callers of the transport should not mix
+        calls to readBytes and readLine.
+        """
+        if self.buf is None:
+            self.buf = []
+
+        # Buffer may already have a line if we've received unilateral
+        # response(s) from the server
+        if len(self.buf) == 1 and "\n" in self.buf[0]:
+            (line, b) = self.buf[0].split("\n", 1)
+            self.buf = [b]
+            return line
+
+        while True:
+            b = self.readBytes(4096)
+            if "\n" in b:
+                result = ''.join(self.buf)
+                (line, b) = b.split("\n", 1)
+                self.buf = [b]
+                return result + line
+            self.buf.append(b)
+
+
+class Codec(object):
+    """ communication encoding for the watchman server """
+    transport = None
+
+    def __init__(self, transport):
+        self.transport = transport
+
+    def receive(self):
+        raise NotImplementedError()
+
+    def send(self, *args):
+        raise NotImplementedError()
+
+    def setTimeout(self, value):
+        self.transport.setTimeout(value)
+
+class UnixSocketTransport(Transport):
+    """ local unix domain socket transport """
+    sock = None
+
+    def __init__(self, sockpath, timeout):
+        self.sockpath = sockpath
+        self.timeout = timeout
+
+        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        try:
+            sock.settimeout(self.timeout)
+            sock.connect(self.sockpath)
+            self.sock = sock
+        except socket.error as e:
+            raise WatchmanError('unable to connect to %s: %s' %
+                                (self.sockpath, e))
+
+    def close(self):
+        self.sock.close()
+        self.sock = None
+
+    def setTimeout(self, value):
+        self.timeout = value
+        self.sock.settimeout(self.timeout)
+
+    def readBytes(self, size):
+        try:
+            buf = [self.sock.recv(size)]
+            if not buf[0]:
+                raise WatchmanError('empty watchman response')
+            return buf[0]
+        except socket.timeout:
+            raise SocketTimeout('timed out waiting for response')
+
+    def write(self, data):
+        try:
+            self.sock.sendall(data)
+        except socket.timeout:
+            raise SocketTimeout('timed out sending query command')
+
+
+class WindowsNamedPipeTransport(Transport):
+    """ connect to a named pipe """
+
+    def __init__(self, sockpath, timeout):
+        self.sockpath = sockpath
+        self.timeout = timeout
+        self.pipe = os.open(sockpath, os.O_RDWR | os.O_BINARY)
+
+    def close(self):
+        os.close(self.pipe)
+        self.pipe = None
+
+    def readBytes(self, size):
+        return os.read(self.pipe, size)
+
+    def write(self, data):
+        return os.write(self.pipe, data)
+
+
+class CLIProcessTransport(Transport):
+    """ open a pipe to the cli to talk to the service
+    This intended to be used only in the test harness!
+
+    The CLI is an oddball because we only support JSON input
+    and cannot send multiple commands through the same instance,
+    so we spawn a new process for each command.
+
+    We disable server spawning for this implementation, again, because
+    it is intended to be used only in our test harness.  You really
+    should not need to use the CLI transport for anything real.
+
+    While the CLI can output in BSER, our Transport interface doesn't
+    support telling this instance that it should do so.  That effectively
+    limits this implementation to JSON input and output only at this time.
+
+    It is the responsibility of the caller to set the send and
+    receive codecs appropriately.
+    """
+    proc = None
+    closed = True
+
+    def __init__(self, sockpath, timeout):
+        self.sockpath = sockpath
+        self.timeout = timeout
+
+    def close(self):
+        if self.proc:
+            self.proc.kill()
+            self.proc = None
+
+    def _connect(self):
+        if self.proc:
+            return self.proc
+        args = [
+            'watchman',
+            '--sockname={}'.format(self.sockpath),
+            '--logfile=/BOGUS',
+            '--statefile=/BOGUS',
+            '--no-spawn',
+            '--no-local',
+            '--no-pretty',
+            '-j',
+        ]
+        self.proc = subprocess.Popen(args,
+                                     stdin=subprocess.PIPE,
+                                     stdout=subprocess.PIPE)
+        return self.proc
+
+    def readBytes(self, size):
+        self._connect()
+        res = self.proc.stdout.read(size)
+        if res == '':
+            raise WatchmanError('EOF on CLI process transport')
+        return res
+
+    def write(self, data):
+        if self.closed:
+            self.closed = False
+            self.proc = None
+        self._connect()
+        res = self.proc.stdin.write(data)
+        self.proc.stdin.close()
+        self.closed = True
+        return res
+
+
+class BserCodec(Codec):
+    """ use the BSER encoding.  This is the default, preferred codec """
+
+    def _loads(self, response):
+        return bser.loads(response)
+
+    def receive(self):
+        buf = [self.transport.readBytes(sniff_len)]
+        if not buf[0]:
+            raise WatchmanError('empty watchman response')
+
+        elen = bser.pdu_len(buf[0])
+
+        rlen = len(buf[0])
+        while elen > rlen:
+            buf.append(self.transport.readBytes(elen - rlen))
+            rlen += len(buf[-1])
+
+        response = ''.join(buf)
+        try:
+            res = self._loads(response)
+            return res
+        except ValueError as e:
+            raise WatchmanError('watchman response decode error: %s' % e)
+
+    def send(self, *args):
+        cmd = bser.dumps(*args)
+        self.transport.write(cmd)
+
+class ImmutableBserCodec(BserCodec):
+    """ use the BSER encoding, decoding values using the newer
+        immutable object support """
+
+    def _loads(self, response):
+        return bser.loads(response, False)
+
+
+class JsonCodec(Codec):
+    """ Use json codec.  This is here primarily for testing purposes """
+    json = None
+
+    def __init__(self, transport):
+        super(JsonCodec, self).__init__(transport)
+        # optional dep on json, only if JsonCodec is used
+        import json
+        self.json = json
+
+    def receive(self):
+        line = self.transport.readLine()
+        try:
+            return self.json.loads(line)
+        except Exception as e:
+            print(e, line)
+            raise
+
+    def send(self, *args):
+        cmd = self.json.dumps(*args)
+        self.transport.write(cmd + "\n")
+
+
+class client(object):
+    """ Handles the communication with the watchman service """
+    sockpath = None
+    transport = None
+    sendCodec = None
+    recvCodec = None
+    sendConn = None
+    recvConn = None
+    subs = {}  # Keyed by subscription name
+    logs = []  # When log level is raised
+    unilateral = ['log', 'subscription']
+    tport = None
+    useImmutableBser = None
+
+    def __init__(self, sockpath=None, timeout=1.0, transport=None,
+                 sendEncoding=None, recvEncoding=None, useImmutableBser=False):
+        self.sockpath = sockpath
+        self.timeout = timeout
+        self.useImmutableBser = useImmutableBser
+
+        transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local'
+        if transport == 'local' and os.name == 'nt':
+            self.transport = WindowsNamedPipeTransport
+        elif transport == 'local':
+            self.transport = UnixSocketTransport
+        elif transport == 'cli':
+            self.transport = CLIProcessTransport
+            if sendEncoding is None:
+                sendEncoding = 'json'
+            if recvEncoding is None:
+                recvEncoding = sendEncoding
+        else:
+            raise WatchmanError('invalid transport %s' % transport)
+
+        sendEncoding = sendEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
+        recvEncoding = recvEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
+
+        self.recvCodec = self._parseEncoding(recvEncoding)
+        self.sendCodec = self._parseEncoding(sendEncoding)
+
+    def _parseEncoding(self, enc):
+        if enc == 'bser':
+            if self.useImmutableBser:
+                return ImmutableBserCodec
+            return BserCodec
+        elif enc == 'json':
+            return JsonCodec
+        else:
+            raise WatchmanError('invalid encoding %s' % enc)
+
+    def _hasprop(self, result, name):
+        if self.useImmutableBser:
+            return hasattr(result, name)
+        return name in result
+
+    def _resolvesockname(self):
+        # if invoked via a trigger, watchman will set this env var; we
+        # should use it unless explicitly set otherwise
+        path = os.getenv('WATCHMAN_SOCK')
+        if path:
+            return path
+
+        cmd = ['watchman', '--output-encoding=bser', 'get-sockname']
+        try:
+            p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                                 stderr=subprocess.PIPE,
+                                 close_fds=os.name != 'nt')
+        except OSError as e:
+            raise WatchmanError('"watchman" executable not in PATH (%s)', e)
+
+        stdout, stderr = p.communicate()
+        exitcode = p.poll()
+
+        if exitcode:
+            raise WatchmanError("watchman exited with code %d" % exitcode)
+
+        result = bser.loads(stdout)
+        if 'error' in result:
+            raise WatchmanError('get-sockname error: %s' % result['error'])
+
+        return result['sockname']
+
+    def _connect(self):
+        """ establish transport connection """
+
+        if self.recvConn:
+            return
+
+        if self.sockpath is None:
+            self.sockpath = self._resolvesockname()
+
+        self.tport = self.transport(self.sockpath, self.timeout)
+        self.sendConn = self.sendCodec(self.tport)
+        self.recvConn = self.recvCodec(self.tport)
+
+    def __del__(self):
+        self.close()
+
+    def close(self):
+        if self.tport:
+            self.tport.close()
+            self.tport = None
+            self.recvConn = None
+            self.sendConn = None
+
+    def receive(self):
+        """ receive the next PDU from the watchman service
+
+        If the client has activated subscriptions or logs then
+        this PDU may be a unilateral PDU sent by the service to
+        inform the client of a log event or subscription change.
+
+        It may also simply be the response portion of a request
+        initiated by query.
+
+        There are clients in production that subscribe and call
+        this in a loop to retrieve all subscription responses,
+        so care should be taken when making changes here.
+        """
+
+        self._connect()
+        result = self.recvConn.receive()
+        if self._hasprop(result, 'error'):
+            raise CommandError(result['error'])
+
+        if self._hasprop(result, 'log'):
+            self.logs.append(result['log'])
+
+        if self._hasprop(result, 'subscription'):
+            sub = result['subscription']
+            if not (sub in self.subs):
+                self.subs[sub] = []
+            self.subs[sub].append(result)
+
+        return result
+
+    def isUnilateralResponse(self, res):
+        for k in self.unilateral:
+            if k in res:
+                return True
+        return False
+
+    def getLog(self, remove=True):
+        """ Retrieve buffered log data
+
+        If remove is true the data will be removed from the buffer.
+        Otherwise it will be left in the buffer
+        """
+        res = self.logs
+        if remove:
+            self.logs = []
+        return res
+
+    def getSubscription(self, name, remove=True):
+        """ Retrieve the data associated with a named subscription
+
+        If remove is True (the default), the subscription data is removed
+        from the buffer.  Otherwise the data is returned but left in
+        the buffer.
+
+        Returns None if there is no data associated with `name`
+        """
+
+        if not (name in self.subs):
+            return None
+        sub = self.subs[name]
+        if remove:
+            del self.subs[name]
+        return sub
+
+    def query(self, *args):
+        """ Send a query to the watchman service and return the response
+
+        This call will block until the response is returned.
+        If any unilateral responses are sent by the service in between
+        the request-response they will be buffered up in the client object
+        and NOT returned via this method.
+        """
+
+        self._connect()
+        try:
+            self.sendConn.send(args)
+
+            res = self.receive()
+            while self.isUnilateralResponse(res):
+                res = self.receive()
+            return res
+        except CommandError as ex:
+            ex.setCommand(args)
+            raise ex
+
+    def capabilityCheck(self, optional=None, required=None):
+        """ Perform a server capability check """
+        res = self.query('version', {
+            'optional': optional or [],
+            'required': required or []})
+
+        if not self._hasprop(res, 'capabilities'):
+            # Server doesn't support capabilities, so we need to
+            # synthesize the results based on the version
+            capabilities.synthesize(res, opts)
+            if 'error' in res:
+                raise CommandError(res['error'])
+
+        return res
+
+    def setTimeout(self, value):
+        self.recvConn.setTimeout(value)
+        self.sendConn.setTimeout(value)
+
+# no-check-code -- this is a 3rd party library
diff --git a/hgext/hgwatchman/pywatchman/bser.c b/hgext/hgwatchman/pywatchman/bser.c
new file mode 100644
--- /dev/null
+++ b/hgext/hgwatchman/pywatchman/bser.c
@@ -0,0 +1,950 @@
+/*
+Copyright (c) 2013-2015, Facebook, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name Facebook nor the names of its contributors may be used to
+   endorse or promote products derived from this software without specific
+   prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <Python.h>
+#ifdef _MSC_VER
+#define inline __inline
+#include <stdint.h>
+#endif
+
+/* Return the smallest size int that can store the value */
+#define INT_SIZE(x) (((x) == ((int8_t)x))  ? 1 :    \
+                     ((x) == ((int16_t)x)) ? 2 :    \
+                     ((x) == ((int32_t)x)) ? 4 : 8)
+
+#define BSER_ARRAY     0x00
+#define BSER_OBJECT    0x01
+#define BSER_STRING    0x02
+#define BSER_INT8      0x03
+#define BSER_INT16     0x04
+#define BSER_INT32     0x05
+#define BSER_INT64     0x06
+#define BSER_REAL      0x07
+#define BSER_TRUE      0x08
+#define BSER_FALSE     0x09
+#define BSER_NULL      0x0a
+#define BSER_TEMPLATE  0x0b
+#define BSER_SKIP      0x0c
+
+// An immutable object representation of BSER_OBJECT.
+// Rather than build a hash table, key -> value are obtained
+// by walking the list of keys to determine the offset into
+// the values array.  The assumption is that the number of
+// array elements will be typically small (~6 for the top
+// level query result and typically 3-5 for the file entries)
+// so that the time overhead for this is small compared to
+// using a proper hash table.  Even with this simplistic
+// approach, this is still faster for the mercurial use case
+// as it helps to eliminate creating N other objects to
+// represent the stat information in the hgwatchman extension
+typedef struct {
+  PyObject_HEAD
+  PyObject *keys;   // tuple of field names
+  PyObject *values; // tuple of values
+} bserObject;
+
+static Py_ssize_t bserobj_tuple_length(PyObject *o) {
+  bserObject *obj = (bserObject*)o;
+
+  return PySequence_Length(obj->keys);
+}
+
+static PyObject *bserobj_tuple_item(PyObject *o, Py_ssize_t i) {
+  bserObject *obj = (bserObject*)o;
+
+  return PySequence_GetItem(obj->values, i);
+}
+
+static PySequenceMethods bserobj_sq = {
+  bserobj_tuple_length,      /* sq_length */
+  0,                         /* sq_concat */
+  0,                         /* sq_repeat */
+  bserobj_tuple_item,        /* sq_item */
+  0,                         /* sq_ass_item */
+  0,                         /* sq_contains */
+  0,                         /* sq_inplace_concat */
+  0                          /* sq_inplace_repeat */
+};
+
+static void bserobj_dealloc(PyObject *o) {
+  bserObject *obj = (bserObject*)o;
+
+  Py_CLEAR(obj->keys);
+  Py_CLEAR(obj->values);
+  PyObject_Del(o);
+}
+
+static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) {
+  bserObject *obj = (bserObject*)o;
+  Py_ssize_t i, n;
+  const char *namestr;
+
+  if (PyIndex_Check(name)) {
+    i = PyNumber_AsSsize_t(name, PyExc_IndexError);
+    if (i == -1 && PyErr_Occurred()) {
+      return NULL;
+    }
+    return PySequence_GetItem(obj->values, i);
+  }
+
+  // hack^Wfeature to allow mercurial to use "st_size" to reference "size"
+  namestr = PyString_AsString(name);
+  if (!strncmp(namestr, "st_", 3)) {
+    namestr += 3;
+  }
+
+  n = PyTuple_GET_SIZE(obj->keys);
+  for (i = 0; i < n; i++) {
+    const char *item_name = NULL;
+    PyObject *key = PyTuple_GET_ITEM(obj->keys, i);
+
+    item_name = PyString_AsString(key);
+    if (!strcmp(item_name, namestr)) {
+      return PySequence_GetItem(obj->values, i);
+    }
+  }
+  PyErr_Format(PyExc_AttributeError,
+              "bserobject has no attribute '%.400s'", namestr);
+  return NULL;
+}
+
+static PyMappingMethods bserobj_map = {
+  bserobj_tuple_length,     /* mp_length */
+  bserobj_getattrro,        /* mp_subscript */
+  0                         /* mp_ass_subscript */
+};
+
+PyTypeObject bserObjectType = {
+  PyVarObject_HEAD_INIT(NULL, 0)
+  "bserobj_tuple",           /* tp_name */
+  sizeof(bserObject),        /* tp_basicsize */
+  0,                         /* tp_itemsize */
+  bserobj_dealloc,           /* tp_dealloc */
+  0,                         /* tp_print */
+  0,                         /* tp_getattr */
+  0,                         /* tp_setattr */
+  0,                         /* tp_compare */
+  0,                         /* tp_repr */
+  0,                         /* tp_as_number */
+  &bserobj_sq,               /* tp_as_sequence */
+  &bserobj_map,              /* tp_as_mapping */
+  0,                         /* tp_hash  */
+  0,                         /* tp_call */
+  0,                         /* tp_str */
+  bserobj_getattrro,         /* tp_getattro */
+  0,                         /* tp_setattro */
+  0,                         /* tp_as_buffer */
+  Py_TPFLAGS_DEFAULT,        /* tp_flags */
+  "bserobj tuple",           /* tp_doc */
+  0,                         /* tp_traverse */
+  0,                         /* tp_clear */
+  0,                         /* tp_richcompare */
+  0,                         /* tp_weaklistoffset */
+  0,                         /* tp_iter */
+  0,                         /* tp_iternext */
+  0,                         /* tp_methods */
+  0,                         /* tp_members */
+  0,                         /* tp_getset */
+  0,                         /* tp_base */
+  0,                         /* tp_dict */
+  0,                         /* tp_descr_get */
+  0,                         /* tp_descr_set */
+  0,                         /* tp_dictoffset */
+  0,                         /* tp_init */
+  0,                         /* tp_alloc */
+  0,                         /* tp_new */
+};
+
+
+static PyObject *bser_loads_recursive(const char **ptr, const char *end,
+    int mutable);
+
+static const char bser_true = BSER_TRUE;
+static const char bser_false = BSER_FALSE;
+static const char bser_null = BSER_NULL;
+static const char bser_string_hdr = BSER_STRING;
+static const char bser_array_hdr = BSER_ARRAY;
+static const char bser_object_hdr = BSER_OBJECT;
+
+static inline uint32_t next_power_2(uint32_t n)
+{
+  n |= (n >> 16);
+  n |= (n >> 8);
+  n |= (n >> 4);
+  n |= (n >> 2);
+  n |= (n >> 1);
+  return n + 1;
+}
+
+// A buffer we use for building up the serialized result
+struct bser_buffer {
+  char *buf;
+  int wpos, allocd;
+};
+typedef struct bser_buffer bser_t;
+
+static int bser_append(bser_t *bser, const char *data, uint32_t len)
+{
+  int newlen = next_power_2(bser->wpos + len);
+  if (newlen > bser->allocd) {
+    char *nbuf = realloc(bser->buf, newlen);
+    if (!nbuf) {
+      return 0;
+    }
+
+    bser->buf = nbuf;
+    bser->allocd = newlen;
+  }
+
+  memcpy(bser->buf + bser->wpos, data, len);
+  bser->wpos += len;
+  return 1;
+}
+
+static int bser_init(bser_t *bser)
+{
+  bser->allocd = 8192;
+  bser->wpos = 0;
+  bser->buf = malloc(bser->allocd);
+
+  if (!bser->buf) {
+    return 0;
+  }
+
+  // Leave room for the serialization header, which includes
+  // our overall length.  To make things simpler, we'll use an
+  // int32 for the header
+#define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
+  bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1);
+
+  return 1;
+}
+
+static void bser_dtor(bser_t *bser)
+{
+  free(bser->buf);
+  bser->buf = NULL;
+}
+
+static int bser_long(bser_t *bser, int64_t val)
+{
+  int8_t i8;
+  int16_t i16;
+  int32_t i32;
+  int64_t i64;
+  char sz;
+  int size = INT_SIZE(val);
+  char *iptr;
+
+  switch (size) {
+    case 1:
+      sz = BSER_INT8;
+      i8 = (int8_t)val;
+      iptr = (char*)&i8;
+      break;
+    case 2:
+      sz = BSER_INT16;
+      i16 = (int16_t)val;
+      iptr = (char*)&i16;
+      break;
+    case 4:
+      sz = BSER_INT32;
+      i32 = (int32_t)val;
+      iptr = (char*)&i32;
+      break;
+    case 8:
+      sz = BSER_INT64;
+      i64 = (int64_t)val;
+      iptr = (char*)&i64;
+      break;
+    default:
+      PyErr_SetString(PyExc_RuntimeError,
+          "Cannot represent this long value!?");
+      return 0;
+  }
+
+  if (!bser_append(bser, &sz, sizeof(sz))) {
+    return 0;
+  }
+
+  return bser_append(bser, iptr, size);
+}
+
+static int bser_string(bser_t *bser, PyObject *sval)
+{
+  char *buf = NULL;
+  Py_ssize_t len;
+  int res;
+  PyObject *utf = NULL;
+
+  if (PyUnicode_Check(sval)) {
+    utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore");
+    sval = utf;
+  }
+
+  res = PyString_AsStringAndSize(sval, &buf, &len);
+  if (res == -1) {
+    res = 0;
+    goto out;
+  }
+
+  if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) {
+    res = 0;
+    goto out;
+  }
+
+  if (!bser_long(bser, len)) {
+    res = 0;
+    goto out;
+  }
+
+  if (len > UINT32_MAX) {
+    PyErr_Format(PyExc_ValueError, "string too big");
+    res = 0;
+    goto out;
+  }
+
+  res = bser_append(bser, buf, (uint32_t)len);
+
+out:
+  if (utf) {
+    Py_DECREF(utf);
+  }
+
+  return res;
+}
+
+static int bser_recursive(bser_t *bser, PyObject *val)
+{
+  if (PyBool_Check(val)) {
+    if (val == Py_True) {
+      return bser_append(bser, &bser_true, sizeof(bser_true));
+    }
+    return bser_append(bser, &bser_false, sizeof(bser_false));
+  }
+
+  if (val == Py_None) {
+    return bser_append(bser, &bser_null, sizeof(bser_null));
+  }
+
+  if (PyInt_Check(val)) {
+    return bser_long(bser, PyInt_AS_LONG(val));
+  }
+
+  if (PyLong_Check(val)) {
+    return bser_long(bser, PyLong_AsLongLong(val));
+  }
+
+  if (PyString_Check(val) || PyUnicode_Check(val)) {
+    return bser_string(bser, val);
+  }
+
+
+  if (PyFloat_Check(val)) {
+    double dval = PyFloat_AS_DOUBLE(val);
+    char sz = BSER_REAL;
+
+    if (!bser_append(bser, &sz, sizeof(sz))) {
+      return 0;
+    }
+
+    return bser_append(bser, (char*)&dval, sizeof(dval));
+  }
+
+  if (PyList_Check(val)) {
+    Py_ssize_t i, len = PyList_GET_SIZE(val);
+
+    if (!bser_append(bser, &bser_array_hdr, sizeof(bser_array_hdr))) {
+      return 0;
+    }
+
+    if (!bser_long(bser, len)) {
+      return 0;
+    }
+
+    for (i = 0; i < len; i++) {
+      PyObject *ele = PyList_GET_ITEM(val, i);
+
+      if (!bser_recursive(bser, ele)) {
+        return 0;
+      }
+    }
+
+    return 1;
+  }
+
+  if (PyTuple_Check(val)) {
+    Py_ssize_t i, len = PyTuple_GET_SIZE(val);
+
+    if (!bser_append(bser, &bser_array_hdr, sizeof(bser_array_hdr))) {
+      return 0;
+    }
+
+    if (!bser_long(bser, len)) {
+      return 0;
+    }
+
+    for (i = 0; i < len; i++) {
+      PyObject *ele = PyTuple_GET_ITEM(val, i);
+
+      if (!bser_recursive(bser, ele)) {
+        return 0;
+      }
+    }
+
+    return 1;
+  }
+
+  if (PyMapping_Check(val)) {
+    Py_ssize_t len = PyMapping_Length(val);
+    Py_ssize_t pos = 0;
+    PyObject *key, *ele;
+
+    if (!bser_append(bser, &bser_object_hdr, sizeof(bser_object_hdr))) {
+      return 0;
+    }
+
+    if (!bser_long(bser, len)) {
+      return 0;
+    }
+
+    while (PyDict_Next(val, &pos, &key, &ele)) {
+      if (!bser_string(bser, key)) {
+        return 0;
+      }
+      if (!bser_recursive(bser, ele)) {
+        return 0;
+      }
+    }
+
+    return 1;
+  }
+
+  PyErr_SetString(PyExc_ValueError, "Unsupported value type");
+  return 0;
+}
+
+static PyObject *bser_dumps(PyObject *self, PyObject *args)
+{
+  PyObject *val = NULL, *res;
+  bser_t bser;
+  uint32_t len;
+
+  if (!PyArg_ParseTuple(args, "O", &val)) {
+    return NULL;
+  }
+
+  if (!bser_init(&bser)) {
+    return PyErr_NoMemory();
+  }
+
+  if (!bser_recursive(&bser, val)) {
+    bser_dtor(&bser);
+    if (errno == ENOMEM) {
+      return PyErr_NoMemory();
+    }
+    // otherwise, we've already set the error to something reasonable
+    return NULL;
+  }
+
+  // Now fill in the overall length
+  len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
+  memcpy(bser.buf + 3, &len, sizeof(len));
+
+  res = PyString_FromStringAndSize(bser.buf, bser.wpos);
+  bser_dtor(&bser);
+
+  return res;
+}
+
+int bunser_int(const char **ptr, const char *end, int64_t *val)
+{
+  int needed;
+  const char *buf = *ptr;
+  int8_t i8;
+  int16_t i16;
+  int32_t i32;
+  int64_t i64;
+
+  switch (buf[0]) {
+    case BSER_INT8:
+      needed = 2;
+      break;
+    case BSER_INT16:
+      needed = 3;
+      break;
+    case BSER_INT32:
+      needed = 5;
+      break;
+    case BSER_INT64:
+      needed = 9;
+      break;
+    default:
+      PyErr_Format(PyExc_ValueError,
+          "invalid bser int encoding 0x%02x", buf[0]);
+      return 0;
+  }
+  if (end - buf < needed) {
+    PyErr_SetString(PyExc_ValueError, "input buffer to small for int encoding");
+    return 0;
+  }
+  *ptr = buf + needed;
+  switch (buf[0]) {
+    case BSER_INT8:
+      memcpy(&i8, buf + 1, sizeof(i8));
+      *val = i8;
+      return 1;
+    case BSER_INT16:
+      memcpy(&i16, buf + 1, sizeof(i16));
+      *val = i16;
+      return 1;
+    case BSER_INT32:
+      memcpy(&i32, buf + 1, sizeof(i32));
+      *val = i32;
+      return 1;
+    case BSER_INT64:
+      memcpy(&i64, buf + 1, sizeof(i64));
+      *val = i64;
+      return 1;
+    default:
+      return 0;
+  }
+}
+
+static int bunser_string(const char **ptr, const char *end,
+    const char **start, int64_t *len)
+{
+  const char *buf = *ptr;
+
+  // skip string marker
+  buf++;
+  if (!bunser_int(&buf, end, len)) {
+    return 0;
+  }
+
+  if (buf + *len > end) {
+    PyErr_Format(PyExc_ValueError, "invalid string length in bser data");
+    return 0;
+  }
+
+  *ptr = buf + *len;
+  *start = buf;
+  return 1;
+}
+
+static PyObject *bunser_array(const char **ptr, const char *end, int mutable)
+{
+  const char *buf = *ptr;
+  int64_t nitems, i;
+  PyObject *res;
+
+  // skip array header
+  buf++;
+  if (!bunser_int(&buf, end, &nitems)) {
+    return 0;
+  }
+  *ptr = buf;
+
+  if (nitems > LONG_MAX) {
+    PyErr_Format(PyExc_ValueError, "too many items for python array");
+    return NULL;
+  }
+
+  if (mutable) {
+    res = PyList_New((Py_ssize_t)nitems);
+  } else {
+    res = PyTuple_New((Py_ssize_t)nitems);
+  }
+
+  for (i = 0; i < nitems; i++) {
+    PyObject *ele = bser_loads_recursive(ptr, end, mutable);
+
+    if (!ele) {
+      Py_DECREF(res);
+      return NULL;
+    }
+
+    if (mutable) {
+      PyList_SET_ITEM(res, i, ele);
+    } else {
+      PyTuple_SET_ITEM(res, i, ele);
+    }
+    // DECREF(ele) not required as SET_ITEM steals the ref
+  }
+
+  return res;
+}
+
+static PyObject *bunser_object(const char **ptr, const char *end,
+    int mutable)
+{
+  const char *buf = *ptr;
+  int64_t nitems, i;
+  PyObject *res;
+  bserObject *obj;
+
+  // skip array header
+  buf++;
+  if (!bunser_int(&buf, end, &nitems)) {
+    return 0;
+  }
+  *ptr = buf;
+
+  if (mutable) {
+    res = PyDict_New();
+  } else {
+    obj = PyObject_New(bserObject, &bserObjectType);
+    obj->keys = PyTuple_New((Py_ssize_t)nitems);
+    obj->values = PyTuple_New((Py_ssize_t)nitems);
+    res = (PyObject*)obj;
+  }
+
+  for (i = 0; i < nitems; i++) {
+    const char *keystr;
+    int64_t keylen;
+    PyObject *key;
+    PyObject *ele;
+
+    if (!bunser_string(ptr, end, &keystr, &keylen)) {
+      Py_DECREF(res);
+      return NULL;
+    }
+
+    if (keylen > LONG_MAX) {
+      PyErr_Format(PyExc_ValueError, "string too big for python");
+      Py_DECREF(res);
+      return NULL;
+    }
+
+    key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen);
+    if (!key) {
+      Py_DECREF(res);
+      return NULL;
+    }
+
+    ele = bser_loads_recursive(ptr, end, mutable);
+
+    if (!ele) {
+      Py_DECREF(key);
+      Py_DECREF(res);
+      return NULL;
+    }
+
+    if (mutable) {
+      PyDict_SetItem(res, key, ele);
+      Py_DECREF(key);
+      Py_DECREF(ele);
+    } else {
+      /* PyTuple_SET_ITEM steals ele, key */
+      PyTuple_SET_ITEM(obj->values, i, ele);
+      PyTuple_SET_ITEM(obj->keys, i, key);
+    }
+  }
+
+  return res;
+}
+
+static PyObject *bunser_template(const char **ptr, const char *end,
+    int mutable)
+{
+  const char *buf = *ptr;
+  int64_t nitems, i;
+  PyObject *arrval;
+  PyObject *keys;
+  Py_ssize_t numkeys, keyidx;
+
+  if (buf[1] != BSER_ARRAY) {
+    PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
+    return NULL;
+  }
+
+  // skip header
+  buf++;
+  *ptr = buf;
+
+  // Load template keys
+  keys = bunser_array(ptr, end, mutable);
+  if (!keys) {
+    return NULL;
+  }
+
+  numkeys = PySequence_Length(keys);
+
+  // Load number of array elements
+  if (!bunser_int(ptr, end, &nitems)) {
+    Py_DECREF(keys);
+    return 0;
+  }
+
+  if (nitems > LONG_MAX) {
+    PyErr_Format(PyExc_ValueError, "Too many items for python");
+    Py_DECREF(keys);
+    return NULL;
+  }
+
+  arrval = PyList_New((Py_ssize_t)nitems);
+  if (!arrval) {
+    Py_DECREF(keys);
+    return NULL;
+  }
+
+  for (i = 0; i < nitems; i++) {
+    PyObject *dict = NULL;
+    bserObject *obj = NULL;
+
+    if (mutable) {
+      dict = PyDict_New();
+    } else {
+      obj = PyObject_New(bserObject, &bserObjectType);
+      if (obj) {
+        obj->keys = keys;
+        Py_INCREF(obj->keys);
+        obj->values = PyTuple_New(numkeys);
+      }
+      dict = (PyObject*)obj;
+    }
+    if (!dict) {
+fail:
+      Py_DECREF(keys);
+      Py_DECREF(arrval);
+      return NULL;
+    }
+
+    for (keyidx = 0; keyidx < numkeys; keyidx++) {
+      PyObject *key;
+      PyObject *ele;
+
+      if (**ptr == BSER_SKIP) {
+        *ptr = *ptr + 1;
+        ele = Py_None;
+        Py_INCREF(ele);
+      } else {
+        ele = bser_loads_recursive(ptr, end, mutable);
+      }
+
+      if (!ele) {
+        goto fail;
+      }
+
+      if (mutable) {
+        key = PyList_GET_ITEM(keys, keyidx);
+        PyDict_SetItem(dict, key, ele);
+        Py_DECREF(ele);
+      } else {
+        PyTuple_SET_ITEM(obj->values, keyidx, ele);
+        // DECREF(ele) not required as SET_ITEM steals the ref
+      }
+    }
+
+    PyList_SET_ITEM(arrval, i, dict);
+    // DECREF(obj) not required as SET_ITEM steals the ref
+  }
+
+  Py_DECREF(keys);
+
+  return arrval;
+}
+
+static PyObject *bser_loads_recursive(const char **ptr, const char *end,
+    int mutable)
+{
+  const char *buf = *ptr;
+
+  switch (buf[0]) {
+    case BSER_INT8:
+    case BSER_INT16:
+    case BSER_INT32:
+    case BSER_INT64:
+      {
+        int64_t ival;
+        if (!bunser_int(ptr, end, &ival)) {
+          return NULL;
+        }
+        if (ival < LONG_MIN || ival > LONG_MAX) {
+          return PyLong_FromLongLong(ival);
+        }
+        return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
+      }
+
+    case BSER_REAL:
+      {
+        double dval;
+        memcpy(&dval, buf + 1, sizeof(dval));
+        *ptr = buf + 1 + sizeof(double);
+        return PyFloat_FromDouble(dval);
+      }
+
+    case BSER_TRUE:
+      *ptr = buf + 1;
+      Py_INCREF(Py_True);
+      return Py_True;
+
+    case BSER_FALSE:
+      *ptr = buf + 1;
+      Py_INCREF(Py_False);
+      return Py_False;
+
+    case BSER_NULL:
+      *ptr = buf + 1;
+      Py_INCREF(Py_None);
+      return Py_None;
+
+    case BSER_STRING:
+      {
+        const char *start;
+        int64_t len;
+
+        if (!bunser_string(ptr, end, &start, &len)) {
+          return NULL;
+        }
+
+        if (len > LONG_MAX) {
+          PyErr_Format(PyExc_ValueError, "string too long for python");
+          return NULL;
+        }
+
+        return PyString_FromStringAndSize(start, (long)len);
+      }
+
+    case BSER_ARRAY:
+      return bunser_array(ptr, end, mutable);
+
+    case BSER_OBJECT:
+      return bunser_object(ptr, end, mutable);
+
+    case BSER_TEMPLATE:
+      return bunser_template(ptr, end, mutable);
+
+    default:
+      PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
+  }
+
+  return NULL;
+}
+
+// Expected use case is to read a packet from the socket and
+// then call bser.pdu_len on the packet.  It returns the total
+// length of the entire response that the peer is sending,
+// including the bytes already received.  This allows the client
+// to compute the data size it needs to read before it can
+// decode the data
+static PyObject *bser_pdu_len(PyObject *self, PyObject *args)
+{
+  const char *start = NULL;
+  const char *data = NULL;
+  int datalen = 0;
+  const char *end;
+  int64_t expected_len, total_len;
+
+  if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
+    return NULL;
+  }
+  data = start;
+  end = data + datalen;
+
+  // Validate the header and length
+  if (memcmp(data, EMPTY_HEADER, 2) != 0) {
+    PyErr_SetString(PyExc_ValueError, "invalid bser header");
+    return NULL;
+  }
+
+  data += 2;
+
+  // Expect an integer telling us how big the rest of the data
+  // should be
+  if (!bunser_int(&data, end, &expected_len)) {
+    return NULL;
+  }
+
+  total_len = expected_len + (data - start);
+  if (total_len > LONG_MAX) {
+    return PyLong_FromLongLong(total_len);
+  }
+  return PyInt_FromLong((long)total_len);
+}
+
+static PyObject *bser_loads(PyObject *self, PyObject *args)
+{
+  const char *data = NULL;
+  int datalen = 0;
+  const char *end;
+  int64_t expected_len;
+  int mutable = 1;
+  PyObject *mutable_obj = NULL;
+
+  if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) {
+    return NULL;
+  }
+  if (mutable_obj) {
+    mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
+  }
+
+  end = data + datalen;
+
+  // Validate the header and length
+  if (memcmp(data, EMPTY_HEADER, 2) != 0) {
+    PyErr_SetString(PyExc_ValueError, "invalid bser header");
+    return NULL;
+  }
+
+  data += 2;
+
+  // Expect an integer telling us how big the rest of the data
+  // should be
+  if (!bunser_int(&data, end, &expected_len)) {
+    return NULL;
+  }
+
+  // Verify
+  if (expected_len + data != end) {
+    PyErr_SetString(PyExc_ValueError, "bser data len != header len");
+    return NULL;
+  }
+
+  return bser_loads_recursive(&data, end, mutable);
+}
+
+static PyMethodDef bser_methods[] = {
+  {"loads",  bser_loads, METH_VARARGS, "Deserialize string."},
+  {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."},
+  {"dumps",  bser_dumps, METH_VARARGS, "Serialize string."},
+  {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC initbser(void)
+{
+  (void)Py_InitModule("bser", bser_methods);
+  PyType_Ready(&bserObjectType);
+}
+
+/* vim:ts=2:sw=2:et:
+ */
+
+// no-check-code
diff --git a/hgext/hgwatchman/pywatchman/capabilities.py b/hgext/hgwatchman/pywatchman/capabilities.py
new file mode 100644
--- /dev/null
+++ b/hgext/hgwatchman/pywatchman/capabilities.py
@@ -0,0 +1,69 @@
+# Copyright 2015 Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * Neither the name Facebook nor the names of its contributors may be used to
+#    endorse or promote products derived from this software without specific
+#    prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import re
+
+def parse_version(vstr):
+    res = 0
+    for n in vstr.split('.'):
+        res = res * 1000
+        res = res + int(n)
+    return res
+
+cap_versions = {
+    "cmd-watch-del-all": "3.1.1",
+    "cmd-watch-project": "3.1",
+    "relative_root": "3.3",
+    "term-dirname": "3.1",
+    "term-idirname": "3.1",
+    "wildmatch": "3.7",
+}
+
+def check(version, name):
+    if name in cap_versions:
+        return version >= parse_version(cap_versions[name])
+    return False
+
+def synthesize(vers, opts):
+    """ Synthesize a capability enabled version response
+        This is a very limited emulation for relatively recent feature sets
+    """
+    parsed_version = parse_version(vers['version'])
+    vers['capabilities'] = {}
+    for name in opts['optional']:
+        vers['capabilities'][name] = check(parsed_version, name)
+    failed = False
+    for name in opts['required']:
+        have = check(parsed_version, name)
+        vers['capabilities'][name] = have
+        if not have:
+            vers['error'] = 'client required capability `' + name + \
+                            '` is not supported by this server'
+    return vers
+
+# no-check-code
diff --git a/hgext/hgwatchman/pywatchman/pybser.py b/hgext/hgwatchman/pywatchman/pybser.py
new file mode 100644
--- /dev/null
+++ b/hgext/hgwatchman/pywatchman/pybser.py
@@ -0,0 +1,355 @@
+# Copyright 2015 Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * Neither the name Facebook nor the names of its contributors may be used to
+#    endorse or promote products derived from this software without specific
+#    prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import collections
+import ctypes
+import struct
+
+BSER_ARRAY = '\x00'
+BSER_OBJECT = '\x01'
+BSER_STRING = '\x02'
+BSER_INT8 = '\x03'
+BSER_INT16 = '\x04'
+BSER_INT32 = '\x05'
+BSER_INT64 = '\x06'
+BSER_REAL = '\x07'
+BSER_TRUE = '\x08'
+BSER_FALSE = '\x09'
+BSER_NULL = '\x0a'
+BSER_TEMPLATE = '\x0b'
+BSER_SKIP = '\x0c'
+
+# Leave room for the serialization header, which includes
+# our overall length.  To make things simpler, we'll use an
+# int32 for the header
+EMPTY_HEADER = "\x00\x01\x05\x00\x00\x00\x00"
+
+
+def _int_size(x):
+    """Return the smallest size int that can store the value"""
+    if -0x80 <= x <= 0x7F:
+        return 1
+    elif -0x8000 <= x <= 0x7FFF:
+        return 2
+    elif -0x80000000 <= x <= 0x7FFFFFFF:
+        return 4
+    elif -0x8000000000000000L <= x <= 0x7FFFFFFFFFFFFFFFL:
+        return 8
+    else:
+        raise RuntimeError('Cannot represent value: ' + str(x))
+
+
+class _bser_buffer(object):
+
+    def __init__(self):
+        self.buf = ctypes.create_string_buffer(8192)
+        struct.pack_into(str(len(EMPTY_HEADER)) + 's', self.buf, 0, EMPTY_HEADER)
+        self.wpos = len(EMPTY_HEADER)
+
+    def ensure_size(self, size):
+        while ctypes.sizeof(self.buf) - self.wpos < size:
+            ctypes.resize(self.buf, ctypes.sizeof(self.buf) * 2)
+
+    def append_long(self, val):
+        size = _int_size(val)
+        to_write = size + 1
+        self.ensure_size(to_write)
+        if size == 1:
+            struct.pack_into('=cb', self.buf, self.wpos, BSER_INT8, val)
+        elif size == 2:
+            struct.pack_into('=ch', self.buf, self.wpos, BSER_INT16, val)
+        elif size == 4:
+            struct.pack_into('=ci', self.buf, self.wpos, BSER_INT32, val)
+        elif size == 8:
+            struct.pack_into('=cq', self.buf, self.wpos, BSER_INT64, val)
+        else:
+            raise RuntimeError('Cannot represent this long value')
+        self.wpos += to_write
+
+
+    def append_string(self, s):
+        if isinstance(s, unicode):
+            s = s.encode('utf-8')
+        s_len = len(s)
+        size = _int_size(s_len)
+        to_write = 2 + size + s_len
+        self.ensure_size(to_write)
+        if size == 1:
+            struct.pack_into('=ccb' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT8, s_len, s)
+        elif size == 2:
+            struct.pack_into('=cch' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT16, s_len, s)
+        elif size == 4:
+            struct.pack_into('=cci' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT32, s_len, s)
+        elif size == 8:
+            struct.pack_into('=ccq' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT64, s_len, s)
+        else:
+            raise RuntimeError('Cannot represent this string value')
+        self.wpos += to_write
+
+
+    def append_recursive(self, val):
+        if isinstance(val, bool):
+            needed = 1
+            self.ensure_size(needed)
+            if val:
+                to_encode = BSER_TRUE
+            else:
+                to_encode = BSER_FALSE
+            struct.pack_into('=c', self.buf, self.wpos, to_encode)
+            self.wpos += needed
+        elif val is None:
+            needed = 1
+            self.ensure_size(needed)
+            struct.pack_into('=c', self.buf, self.wpos, BSER_NULL)
+            self.wpos += needed
+        elif isinstance(val, (int, long)):
+            self.append_long(val)
+        elif isinstance(val, (str, unicode)):
+            self.append_string(val)
+        elif isinstance(val, float):
+            needed = 9
+            self.ensure_size(needed)
+            struct.pack_into('=cd', self.buf, self.wpos, BSER_REAL, val)
+            self.wpos += needed
+        elif isinstance(val, collections.Mapping) and isinstance(val, collections.Sized):
+            val_len = len(val)
+            size = _int_size(val_len)
+            needed = 2 + size
+            self.ensure_size(needed)
+            if size == 1:
+                struct.pack_into('=ccb', self.buf, self.wpos, BSER_OBJECT, BSER_INT8, val_len)
+            elif size == 2:
+                struct.pack_into('=cch', self.buf, self.wpos, BSER_OBJECT, BSER_INT16, val_len)
+            elif size == 4:
+                struct.pack_into('=cci', self.buf, self.wpos, BSER_OBJECT, BSER_INT32, val_len)
+            elif size == 8:
+                struct.pack_into('=ccq', self.buf, self.wpos, BSER_OBJECT, BSER_INT64, val_len)
+            else:
+                raise RuntimeError('Cannot represent this mapping value')
+            self.wpos += needed
+            for k, v in val.iteritems():
+                self.append_string(k)
+                self.append_recursive(v)
+        elif isinstance(val, collections.Iterable) and isinstance(val, collections.Sized):
+            val_len = len(val)
+            size = _int_size(val_len)
+            needed = 2 + size
+            self.ensure_size(needed)
+            if size == 1:
+                struct.pack_into('=ccb', self.buf, self.wpos, BSER_ARRAY, BSER_INT8, val_len)
+            elif size == 2:
+                struct.pack_into('=cch', self.buf, self.wpos, BSER_ARRAY, BSER_INT16, val_len)
+            elif size == 4:
+                struct.pack_into('=cci', self.buf, self.wpos, BSER_ARRAY, BSER_INT32, val_len)
+            elif size == 8:
+                struct.pack_into('=ccq', self.buf, self.wpos, BSER_ARRAY, BSER_INT64, val_len)
+            else:
+                raise RuntimeError('Cannot represent this sequence value')
+            self.wpos += needed
+            for v in val:
+                self.append_recursive(v)
+        else:
+            raise RuntimeError('Cannot represent unknown value type')
+
+
+def dumps(obj):
+    bser_buf = _bser_buffer()
+    bser_buf.append_recursive(obj)
+    # Now fill in the overall length
+    obj_len = bser_buf.wpos - len(EMPTY_HEADER)
+    struct.pack_into('=i', bser_buf.buf, 3, obj_len)
+    return bser_buf.buf.raw[:bser_buf.wpos]
+
+
+def _bunser_int(buf, pos):
+    try:
+        int_type = buf[pos]
+    except IndexError:
+        raise ValueError('Invalid bser int encoding, pos out of range')
+    if int_type == BSER_INT8:
+        needed = 2
+        fmt = '=b'
+    elif int_type == BSER_INT16:
+        needed = 3
+        fmt = '=h'
+    elif int_type == BSER_INT32:
+        needed = 5
+        fmt = '=i'
+    elif int_type == BSER_INT64:
+        needed = 9
+        fmt = '=q'
+    else:
+        raise ValueError('Invalid bser int encoding 0x%02x' % int(int_type))
+    int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
+    return (int_val, pos + needed)
+
+
+def _bunser_string(buf, pos):
+    str_len, pos = _bunser_int(buf, pos + 1)
+    str_val = struct.unpack_from(str(str_len) + 's', buf, pos)[0]
+    return (str_val, pos + str_len)
+
+
+def _bunser_array(buf, pos, mutable=True):
+    arr_len, pos = _bunser_int(buf, pos + 1)
+    arr = []
+    for i in range(arr_len):
+        arr_item, pos = _bser_loads_recursive(buf, pos, mutable)
+        arr.append(arr_item)
+
+    if not mutable:
+      arr = tuple(arr)
+
+    return arr, pos
+
+
+# This is a quack-alike with the bserObjectType in bser.c
+# It provides by getattr accessors and getitem for both index
+# and name.
+class _BunserDict(object):
+    __slots__ = ('_keys', '_values')
+
+    def __init__(self, keys, values):
+        self._keys = keys
+        self._values = values
+
+    def __getattr__(self, name):
+        return self.__getitem__(name)
+
+    def __getitem__(self, key):
+        if isinstance(key, (int, long)):
+            return self._values[key]
+        elif key.startswith('st_'):
+            # hack^Wfeature to allow mercurial to use "st_size" to
+            # reference "size"
+            key = key[3:]
+        try:
+            return self._values[self._keys.index(key)]
+        except ValueError as ex:
+            raise KeyError('_BunserDict has no key %s' % key)
+
+    def __len__(self):
+        return len(self._keys)
+
+def _bunser_object(buf, pos, mutable=True):
+    obj_len, pos = _bunser_int(buf, pos + 1)
+    if mutable:
+        obj = {}
+    else:
+        keys = []
+        vals = []
+
+    for i in range(obj_len):
+        key, pos = _bunser_string(buf, pos)
+        val, pos = _bser_loads_recursive(buf, pos, mutable)
+        if mutable:
+            obj[key] = val
+        else:
+            keys.append(key)
+            vals.append(val)
+
+    if not mutable:
+        obj = _BunserDict(keys, vals)
+
+    return obj, pos
+
+
+def _bunser_template(buf, pos, mutable=True):
+    if buf[pos + 1] != BSER_ARRAY:
+        raise RuntimeError('Expect ARRAY to follow TEMPLATE')
+    keys, pos = _bunser_array(buf, pos + 1)
+    nitems, pos = _bunser_int(buf, pos)
+    arr = []
+    for i in range(nitems):
+        if mutable:
+            obj = {}
+        else:
+            vals = []
+
+        for keyidx in range(len(keys)):
+            if buf[pos] == BSER_SKIP:
+                pos += 1
+                ele = None
+            else:
+                ele, pos = _bser_loads_recursive(buf, pos, mutable)
+
+            if mutable:
+                key = keys[keyidx]
+                obj[key] = ele
+            else:
+                vals.append(ele)
+
+        if not mutable:
+            obj = _BunserDict(keys, vals)
+
+        arr.append(obj)
+    return arr, pos
+
+
+def _bser_loads_recursive(buf, pos, mutable=True):
+    val_type = buf[pos]
+    if (val_type == BSER_INT8 or val_type == BSER_INT16 or
+        val_type == BSER_INT32 or val_type == BSER_INT64):
+        return _bunser_int(buf, pos)
+    elif val_type == BSER_REAL:
+        val = struct.unpack_from('=d', buf, pos + 1)[0]
+        return (val, pos + 9)
+    elif val_type == BSER_TRUE:
+        return (True, pos + 1)
+    elif val_type == BSER_FALSE:
+        return (False, pos + 1)
+    elif val_type == BSER_NULL:
+        return (None, pos + 1)
+    elif val_type == BSER_STRING:
+        return _bunser_string(buf, pos)
+    elif val_type == BSER_ARRAY:
+        return _bunser_array(buf, pos, mutable)
+    elif val_type == BSER_OBJECT:
+        return _bunser_object(buf, pos, mutable)
+    elif val_type == BSER_TEMPLATE:
+        return _bunser_template(buf, pos, mutable)
+    else:
+        raise RuntimeError('unhandled bser opcode 0x%02x' % (val_type,))
+
+
+def pdu_len(buf):
+    if buf[0:2] != EMPTY_HEADER[0:2]:
+        raise RuntimeError('Invalid BSER header')
+    expected_len, pos = _bunser_int(buf, 2)
+    return expected_len + pos
+
+
+def loads(buf, mutable=True):
+    if buf[0:2] != EMPTY_HEADER[0:2]:
+        raise RuntimeError('Invalid BSER header')
+    expected_len, pos = _bunser_int(buf, 2)
+    if len(buf) != expected_len + pos:
+        raise RuntimeError('bser data len != header len')
+    return _bser_loads_recursive(buf, pos, mutable)[0]
+
+# no-check-code
diff --git a/hgext/hgwatchman/state.py b/hgext/hgwatchman/state.py
new file mode 100644
--- /dev/null
+++ b/hgext/hgwatchman/state.py
@@ -0,0 +1,114 @@
+# state.py - hgwatchman persistent state
+#
+# Copyright 2013-2016 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import errno
+import os
+import socket
+import struct
+
+from mercurial import pathutil
+from mercurial.i18n import _
+
+_version = 4
+_versionformat = ">I"
+
+class state(object):
+    def __init__(self, repo):
+        self._opener = repo.opener
+        self._ui = repo.ui
+        self._rootdir = pathutil.normasprefix(repo.root)
+        self._lastclock = None
+
+        self.mode = self._ui.config('watchman', 'mode', default='on')
+        self.crawl_on_invalidate = self._ui.configbool(
+            'watchman', 'crawl_on_invalidate', True)
+        self.timeout = float(self._ui.config(
+            'watchman', 'timeout', default='2'))
+
+    def get(self):
+        try:
+            file = self._opener('watchman.state', 'rb')
+        except IOError as inst:
+            if inst.errno != errno.ENOENT:
+                raise
+            return None, None, None
+
+        versionbytes = file.read(4)
+        if len(versionbytes) < 4:
+            self._ui.log(
+                'watchman', 'watchman: state file only has %d bytes, '
+                'nuking state\n' % len(versionbytes))
+            self.invalidate()
+            return None, None, None
+        try:
+            diskversion = struct.unpack(_versionformat, versionbytes)[0]
+            if diskversion != _version:
+                # different version, nuke state and start over
+                self._ui.log('watchman', 'watchman: version switch from %d to '
+                             '%d, nuking state\n' % (diskversion, _version))
+                self.invalidate()
+                return None, None, None
+
+            state = file.read().split('\0')
+            # state = hostname\0clock\0ignorehash\0 + list of files, each
+            # followed by a \0
+            diskhostname = state[0]
+            hostname = socket.gethostname()
+            if diskhostname != hostname:
+                # file got moved to a different host
+                self._ui.log('watchman', 'watchman: stored hostname "%s" '
+                             'different from current "%s", nuking state\n' %
+                             (diskhostname, hostname))
+                self.invalidate()
+                return None, None, None
+
+            clock = state[1]
+            ignorehash = state[2]
+            # discard the value after the last \0
+            notefiles = state[3:-1]
+
+        finally:
+            file.close()
+
+        return clock, ignorehash, notefiles
+
+    def set(self, clock, ignorehash, notefiles):
+        if clock is None:
+            self.invalidate()
+            return
+
+        try:
+            file = self._opener('watchman.state', 'wb')
+        except (IOError, OSError):
+            self._ui.warn(_("warning: unable to write out watchman state\n"))
+            return
+
+        try:
+            file.write(struct.pack(_versionformat, _version))
+            file.write(socket.gethostname() + '\0')
+            file.write(clock + '\0')
+            file.write(ignorehash + '\0')
+            if notefiles:
+                file.write('\0'.join(notefiles))
+                file.write('\0')
+        finally:
+            file.close()
+
+    def invalidate(self):
+        try:
+            os.unlink(os.path.join(self._rootdir, '.hg', 'watchman.state'))
+        except OSError as inst:
+            if inst.errno != errno.ENOENT:
+                raise
+
+    def setlastclock(self, clock):
+        self._lastclock = clock
+
+    def getlastclock(self):
+        return self._lastclock
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -564,6 +564,8 @@
     Extension('mercurial.osutil', ['mercurial/osutil.c'],
               extra_link_args=osutil_ldflags,
               depends=common_depends),
+    Extension('hgext.hgwatchman.pywatchman.bser',
+              ['hgext/hgwatchman/pywatchman/bser.c']),
     ]
 
 try:
diff --git a/tests/test-check-code.t b/tests/test-check-code.t
--- a/tests/test-check-code.t
+++ b/tests/test-check-code.t
@@ -8,6 +8,10 @@
 
   $ hg locate | sed 's-\\-/-g' |
   >   xargs "$check_code" --warnings --per-file=0 || false
+  Skipping hgext/hgwatchman/pywatchman/__init__.py it has no-che?k-code (glob)
+  Skipping hgext/hgwatchman/pywatchman/bser.c it has no-che?k-code (glob)
+  Skipping hgext/hgwatchman/pywatchman/capabilities.py it has no-che?k-code (glob)
+  Skipping hgext/hgwatchman/pywatchman/pybser.py it has no-che?k-code (glob)
   Skipping hgext/zeroconf/Zeroconf.py it has no-che?k-code (glob)
   Skipping i18n/polib.py it has no-che?k-code (glob)
   Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob)
diff --git a/tests/test-check-py3-compat.t b/tests/test-check-py3-compat.t
--- a/tests/test-check-py3-compat.t
+++ b/tests/test-check-py3-compat.t
@@ -56,6 +56,10 @@
   hgext/graphlog.py not using absolute_import
   hgext/hgcia.py not using absolute_import
   hgext/hgk.py not using absolute_import
+  hgext/hgwatchman/pywatchman/__init__.py not using absolute_import
+  hgext/hgwatchman/pywatchman/__init__.py requires print_function
+  hgext/hgwatchman/pywatchman/capabilities.py not using absolute_import
+  hgext/hgwatchman/pywatchman/pybser.py not using absolute_import
   hgext/highlight/__init__.py not using absolute_import
   hgext/highlight/highlight.py not using absolute_import
   hgext/histedit.py not using absolute_import



More information about the Mercurial-devel mailing list