[PATCH audit RFC] audit: add core audit module, extension using it, and a minimal test

Mike Edgar adgar at google.com
Wed Dec 2 17:38:42 UTC 2015


# HG changeset patch
# User Mike Edgar <adgar at google.com>
# Date 1448701927 18000
#      Sat Nov 28 04:12:07 2015 -0500
# Node ID 2a3144c7158e2f80a9ea7ce7da026c1492ffa9e6
# Parent  389b9907470c61bc502a4e78724aa9b336d81cf6
audit: add core audit module, extension using it, and a minimal test

This is the first piece of implementing audit trails/chain-of-custody tracking
in core Mercurial, providing minimal core functionality and and an extension
for accessing that core functionality.

For more details on the audit trail design, see:

https://www.mercurial-scm.org/wiki/AuditTrailPlan

diff -r 389b9907470c -r 2a3144c7158e hgext/audit.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/audit.py	Sat Nov 28 04:12:07 2015 -0500
@@ -0,0 +1,138 @@
+# Copyright (C) 2015 - Mike Edgar <adgar at google.com>
+#
+# This extension enables direct manipulation of mercurial's built-in audit trail
+# metadata.
+
+"""view and edit changeset audit trail"""
+
+from mercurial import audit as auditmod
+from mercurial import lock as lockmod
+from mercurial import cmdutil, context, error, filelog, obsolete, revlog
+from mercurial import scmutil, util
+from mercurial.i18n import _
+
+cmdtable = {}
+command = cmdutil.command(cmdtable)
+# Note for extension authors: ONLY specify testedwith = 'internal' for
+# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
+# be specifying the version(s) of Mercurial they are tested with, or
+# leave the attribute unspecified.
+testedwith = 'internal'
+
+ at command('audit',
+    [('r', 'rev', '', _('revision to audit'), _('REV'))],
+    _('-r REV'))
+def audit(ui, repo, rev='', **opts):
+    """directly read a revision's audit trail"""
+    rev, ctx = _getrev(repo, rev)
+    sigs = auditmod.extractsigs(ctx.extra())
+    for (num, sigkey, sigvalue) in sigs:
+        atts = auditmod.decodesig(sigvalue)
+        ui.status(_('signature %s\n%s\n%s\n') % (
+            num,
+            '-' * len('signature %s' % num),
+            ''.join('%s=%s\n' % (k, atts[k]) for k in sorted(atts))))
+
+ at command('debugaudit',
+    [('r', 'rev', '', _('revision to audit'), _('REV')),
+     ('u', 'user', '', _('recorded user in audit trail'), _('USER')),
+     ('d', 'date', '', _('recorded date in audit trail'), _('DATE')),
+     ('a', 'attest', [], _('attestation(s) to add (use \'key=value\')'),
+      _('ATTESTATION')),
+     ('', 'delete', [], _('attestation key(s) to delete'), _('KEY'))],
+    _('-r REV [-u USER] [-d DATE] [-a ATTESTATION]... [--delete KEY]...'))
+def debugaudit(ui, repo, rev='', user='', date='', attest=None, delete=None,
+               **opts):
+    """directly add or delete attestations in a changeset's audit trail"""
+    rev, ctx = _getrev(repo, rev)
+    attest = _checkattest(attest or [])
+    delete = _checkdelete(delete or [])
+    user = user or ui.username()
+    date = util.parsedate(date) if date else util.makedate()
+
+    # Compute the new extra dict for the changeset.
+    extra = ctx.extra().copy()
+    sigs = auditmod.extractsigs(extra)
+    atts = {}
+    if sigs:
+        atts = auditmod.decodesig(sigs[-1][2])
+
+    # Now, apply input options to atts
+    for a in attest:
+        k, eq, v = a.partition('=')
+        atts[k] = v
+    for d in delete:
+        del atts[d]
+    extra.update(auditmod.audit('debugaudit', ctx, user, date, atts))
+    if extra == ctx.extra():
+        ui.status(_('no changes\n'))
+        return 1
+
+    # Having computed the new extra dict, do an amend with the new extra.
+    def filectxfn(repo, memctx, path):
+        fctx = ctx[path]
+        copied = fctx.renamed()
+        if copied:
+            copied = copied[0]
+        return context.memfilectx(repo, path, fctx.data(),
+                                  islink=fctx.islink(), isexec=fctx.isexec(),
+                                  copied=copied, memctx=memctx)
+    new = context.memctx(repo,
+                         parents=[ctx.p1().node(), ctx.p2().node()],
+                         text=ctx.description(),
+                         files=ctx.files(),
+                         filectxfn=filectxfn,
+                         user=user,
+                         date=date,
+                         extra=extra)
+
+    wlock = lock = newid = None
+    try:
+        wlock = repo.wlock()
+        lock = repo.lock()
+        tr = repo.transaction('debugaudit')
+        try:
+            oldid = ctx.node()
+            newid = repo.commitctx(new)
+            if newid != oldid:
+                ps = [c.node() for c in repo.parents()]
+                if oldid in ps:
+                    repo.setparents(*[newid if p == oldid else p for p in ps])
+                if obsolete.isenabled(repo, obsolete.createmarkersopt):
+                    # mark the new changeset as successor of the rewritten one
+                    new = repo[newid]
+                    obs = [(ctx, (new,))]
+                    obsolete.createmarkers(repo, obs)
+            tr.close()
+        finally:
+            tr.release()
+    finally:
+        lockmod.release(lock, wlock)
+
+def _getrev(repo, rev):
+    if not rev:
+        raise error.Abort(_('must specify revision to censor'))
+
+    rev = scmutil.revsingle(repo, rev, rev).rev()
+    try:
+        return rev, repo[rev]
+    except KeyError:
+        raise error.Abort(_('invalid revision identifier %s') % rev)
+
+def _checkattest(attest):
+    for a in attest:
+        if a.find('=') < 0:
+            raise error.Abort(_('attestation missing equals sign: %s') % a)
+        elif a.find('=') == 0:
+            raise error.Abort(_('attestation missing key: %s') % a)
+        elif a[:a.find('=')] in auditmod.required_atts:
+            raise error.Abort(_('cannot set builtin attestation: %s') % a)
+    return attest
+
+def _checkdelete(delete):
+    for d in delete:
+        if '=' in d:
+            raise error.Abort(_('invalid key contains equals sign: %s') % d)
+        elif d in auditmod.required_atts:
+            raise error.Abort(_('cannot delete builtin attestation: %s') % d)
+    return delete
diff -r 389b9907470c -r 2a3144c7158e mercurial/audit.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/audit.py	Sat Nov 28 04:12:07 2015 -0500
@@ -0,0 +1,98 @@
+# audit.py - encoding changeset chain of custody in extra
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+"""
+This module serves to implement the data formatting machinery employed by other
+modules to produce and consume audit log data.
+"""
+
+import re
+
+from mercurial.node import hex
+from mercurial import error, ui, util
+from mercurial.i18n import _
+
+# hg.custodian: required attestation. The creator of an audit entry. RFC822.
+att_custodian = 'hg.custodian'
+# hg.date: required attestation. The date of the last change to the audit entry.
+# Same format as in changelogs (see changelog.py).
+att_date = 'hg.date'
+# hg.link: required attestation. For the Nth signature, link is the SHA of a
+# changeset with N-1 matching signatures. For sig0, hg.link is always nullid.
+# The linked changeset is only needed for full audit verification.
+att_source = 'hg.link'
+# Required builtin attestations, used for validation. Extensions might augment
+# this list to enforce local policy.
+required_atts = [att_custodian, att_date, att_source]
+
+# Valid characters in an attestation name.
+att_chars = set(
+    'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-')
+
+sigkey_pat = re.compile('^sig\d+$')
+
+def enabled(ui, source):
+    """Check if a given audit log source has auditing enabled in config."""
+    return (ui.configbool('experimental', 'audit-all', False) or
+            ui.configbool('experimental', 'audit-' + source, False))
+
+def audit(source, ctx, user, date, atts):
+    if not enabled(ctx.repo().ui, source):
+        return {} # not enabled
+    if not atts:
+        atts = {}
+    atts = atts.copy()  # copy because we will add our own attestations later
+
+    # Signatures are linearly ordered from 1 (maybe 0)
+    sigs = extractsigs(ctx.extra())
+    if sigs:
+        oldsig = sigs[-1][1]
+        newsig = 'sig%d' % (sigs[-1][0]+1,)
+        lastuser = decodesig(sigs[-1][2])[att_custodian]
+    else:
+        oldsig = 'sig0'
+        newsig = 'sig1'
+        lastuser = ctx.user()
+
+    if user == lastuser and not atts:
+        return {} # custodian didn't change, no attestations. don't update.
+
+    if user == lastuser:
+        sig = oldsig
+    else:
+        sig = newsig
+    if user != lastuser or not sigs:
+        atts[att_source] = hex(ctx.node())
+
+    # if custodian user changed add new signature, else update old signature.
+    atts[att_custodian] = user
+    # unconditionally add hg.date attestation to timestamp the audit entry.
+    atts[att_date] = '%d %d' % date
+
+    verifyatts(atts)
+    return {sig: encodesig(atts)}
+
+def extractsigs(extra):
+    return sorted((int(k[3:]), k, v)
+                   for (k, v) in extra.iteritems() if sigkey_pat.match(k))
+
+def decodesig(b):
+    atts = {}
+    for attline in b.splitlines():
+        k, colon, v = attline.partition(':')
+        atts[k] = v
+    return atts
+
+def encodesig(atts):
+    return ''.join('%s:%s\n' % (k, v) for (k, v) in sorted(atts.iteritems()))
+
+def verifyatts(atts):
+    for att in required_atts:
+        if att not in atts:
+            raise error.Abort(_('audit attestations with no %s key') % att)
+    for att in atts.iterkeys():
+        if any(c not in att_chars for c in att):
+            badchars = ''.join(sorted(c for c in att if c not in att_chars))
+            raise error.Abort(_('attestation %s has invalid characters: %s') % (
+                att, '[%s]' % badchars))
diff -r 389b9907470c -r 2a3144c7158e tests/test-audit.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-audit.t	Sat Nov 28 04:12:07 2015 -0500
@@ -0,0 +1,244 @@
+  $ cat >> $HGRCPATH <<EOF
+  > [extensions]
+  > audit=
+  > rebase=
+  > [alias]
+  > debugaudit=debugaudit -d '0 0'
+  > [experimental]
+  > evolution=createmarkers
+  > audit-debugaudit=True
+  > EOF
+  $ USER1='Test User <test at example.net>'
+  $ USER2='Foo User <foo at example.org>'
+  $ HGUSER="$USER1"
+  $ export HGUSER
+
+Create repo and perform a rebase with a different user
+
+  $ hg init r
+  $ cd r
+  $ echo init > f
+  $ hg add f
+  $ hg ci -m init
+
+  $ echo v2 > f2
+  $ hg add f2
+  $ hg ci -m two
+  $ V2NODE=`hg id -q`
+  $ hg update .~1
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  $ echo v2b > f2b
+  $ hg add f2b
+  $ hg ci -m tobe
+  created new head
+  $ hg rebase -s $V2NODE -d .
+  rebasing 1:6e7218dcac9d "two"
+  $ hg id -r tip
+  8940c25c8bf0 tip
+
+Use debugaudit to add arbitrary attestations for testing
+
+  $ hg debugaudit -a tested=yes -u "$USER2" -r tip
+
+Look at new audit log in modified changeset
+
+  $ hg id -r tip
+  b56c8275b93c tip
+  $ hg audit -r tip
+  signature 1
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=0 0
+  hg.link=8940c25c8bf083973a903d690b2d4a27495c2c65
+  tested=yes
+  
+
+Adding more attestations works with -a flag
+
+  $ hg debugaudit -a linted=yes -a tested.manual=yes -u "$USER2" -r tip
+  $ hg audit -r tip
+  signature 1
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=0 0
+  hg.link=8940c25c8bf083973a903d690b2d4a27495c2c65
+  linted=yes
+  tested=yes
+  tested.manual=yes
+  
+
+Adding more attestations, updating old attestations works with -a flag
+
+  $ hg debugaudit -a linted=twice -u "$USER2" -r tip
+  $ hg audit -r tip
+  signature 1
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=0 0
+  hg.link=8940c25c8bf083973a903d690b2d4a27495c2c65
+  linted=twice
+  tested=yes
+  tested.manual=yes
+  
+
+Removing attestation works with --delete flag
+
+  $ hg debugaudit --delete linted --delete tested -u "$USER2" -r tip
+  $ hg audit -r tip
+  signature 1
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=0 0
+  hg.link=8940c25c8bf083973a903d690b2d4a27495c2c65
+  tested.manual=yes
+  
+
+Test changing attestations for the working copy parent
+
+  $ hg update -q tip
+  $ hg debugaudit -a tested=2 -u "$USER2" -r tip
+  $ hg audit -r tip
+  signature 1
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=0 0
+  hg.link=8940c25c8bf083973a903d690b2d4a27495c2c65
+  tested=2
+  tested.manual=yes
+  
+
+Changing attestation with a different user starts a new audit entry
+
+  $ hg debugaudit -a linted=thrice -u "$USER1" -r tip
+  $ hg audit -r tip
+  signature 1
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=0 0
+  hg.link=8940c25c8bf083973a903d690b2d4a27495c2c65
+  tested=2
+  tested.manual=yes
+  
+  signature 2
+  -----------
+  hg.custodian=Test User <test at example.net>
+  hg.date=0 0
+  hg.link=6a32f1897b5a9727722b7b7140b9912f29780fac
+  linted=thrice
+  tested=2
+  tested.manual=yes
+  
+
+Date flag works when updating an audit entry or creating a new one
+
+  $ hg debugaudit -u "$USER1" -d '1 0' -r tip
+  $ hg audit -r tip
+  signature 1
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=0 0
+  hg.link=8940c25c8bf083973a903d690b2d4a27495c2c65
+  tested=2
+  tested.manual=yes
+  
+  signature 2
+  -----------
+  hg.custodian=Test User <test at example.net>
+  hg.date=1 0
+  hg.link=6a32f1897b5a9727722b7b7140b9912f29780fac
+  linted=thrice
+  tested=2
+  tested.manual=yes
+  
+  $ hg debugaudit -u "$USER2" -d '2 0' -r tip
+  $ hg audit -r tip
+  signature 1
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=0 0
+  hg.link=8940c25c8bf083973a903d690b2d4a27495c2c65
+  tested=2
+  tested.manual=yes
+  
+  signature 2
+  -----------
+  hg.custodian=Test User <test at example.net>
+  hg.date=1 0
+  hg.link=6a32f1897b5a9727722b7b7140b9912f29780fac
+  linted=thrice
+  tested=2
+  tested.manual=yes
+  
+  signature 3
+  -----------
+  hg.custodian=Foo User <foo at example.org>
+  hg.date=2 0
+  hg.link=1eb1212029d6f2644deebc363b794402b3ea8570
+  linted=thrice
+  tested=2
+  tested.manual=yes
+  
+
+Test creating custom attestations by original changeset author, in audit entry 0
+
+  $ cd ..
+  $ hg init r2
+  $ cd r2
+  $ echo f > f
+  $ hg add f
+  $ hg ci -m 'init'
+  $ hg audit -r tip
+  $ hg debugaudit -a 'tested=yes' -r tip
+  $ hg audit -r tip
+  signature 0
+  -----------
+  hg.custodian=Test User <test at example.net>
+  hg.date=0 0
+  hg.link=8f4d710dd79cd833f8b9d3d4d4509135737abdea
+  tested=yes
+  
+
+Test disabled by config
+
+  $ cat >> $HGRCPATH <<EOF
+  > [experimental]
+  > audit-debugaudit=False
+  > EOF
+  $ hg debugaudit -a 'tested=repeatedly' -r tip
+  no changes
+  [1]
+
+  $ cat >> $HGRCPATH <<EOF
+  > [experimental]
+  > audit-all=True
+  > EOF
+  $ hg debugaudit -a 'tested=repeatedly' -r tip
+  $ hg audit -r tip
+  signature 0
+  -----------
+  hg.custodian=Test User <test at example.net>
+  hg.date=0 0
+  hg.link=8f4d710dd79cd833f8b9d3d4d4509135737abdea
+  tested=repeatedly
+  
+
+Test invalid attestation format, no changes scenario, etc.
+
+  $ hg debugaudit -a '=yes' -u "$USER2" -r tip
+  abort: attestation missing key: =yes
+  [255]
+  $ hg debugaudit -a 'testedisyes' -u "$USER2" -r tip
+  abort: attestation missing equals sign: testedisyes
+  [255]
+  $ hg debugaudit -a 'hg.custodian=lol' -u "$USER2" -r tip
+  abort: cannot set builtin attestation: hg.custodian=lol
+  [255]
+  $ hg debugaudit --delete 'tested=' -u "$USER2" -r tip
+  abort: invalid key contains equals sign: tested=
+  [255]
+  $ hg debugaudit --delete 'hg.date' -u "$USER2" -r tip
+  abort: cannot delete builtin attestation: hg.date
+  [255]
+  $ hg debugaudit -r tip -u "$USER1"
+  no changes
+  [1]
diff -r 389b9907470c -r 2a3144c7158e tests/test-help.t
--- a/tests/test-help.t	Sat Nov 28 04:11:57 2015 -0500
+++ b/tests/test-help.t	Sat Nov 28 04:12:07 2015 -0500
@@ -245,6 +245,7 @@
       disabled extensions:
   
        acl           hooks for controlling repository access
+       audit         view and edit changeset audit trail
        blackbox      log repository events to a blackbox for debugging
        bugzilla      hooks for integrating with the Bugzilla bug tracker
        censor        erase file content at a given revision


More information about the Mercurial-devel mailing list