# -*- coding: iso-8859-1 -*- # # Copyright (C) 2005 Edgewall Software # Copyright (C) 2005-2006 Christian Boos # All rights reserved. # # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. # # Author: Christian Boos from __future__ import generators import os import time import posixpath import re from trac.core import * from trac.config import _TRUE_VALUES as TRUE from trac.util.text import shorten_line, to_unicode from trac.util.html import escape, html from trac.versioncontrol import Changeset, Node, Repository, \ IRepositoryConnector, \ NoSuchChangeset, NoSuchNode from trac.versioncontrol.web_ui import ChangesetModule, BrowserModule from trac.wiki import IWikiSyntaxProvider try: # The new `demandimport` mechanism doesn't play well with code relying # on the `ImportError` exception being caught. # OTOH, we can't disable `demandimport` because mercurial relies on it # (circular reference issue). So for now, we activate `demandimport` # before loading mercurial modules, and desactivate it afterwards. # # See http://www.selenic.com/mercurial/bts/issue605 try: from mercurial import demandimport demandimport.enable(); except ImportError: demandimport = None from mercurial import hg from mercurial.ui import ui from mercurial.repo import RepoError from mercurial.revlog import LookupError, revlog from mercurial.node import hex, short, nullid from mercurial.util import pathto try: # for most recent version (hg:chgset:731e739b8659 at 2006-11-15) from mercurial.cmdutil import walkchangerevs except ImportError: # for older version from mercurial.commands import walkchangerevs if demandimport: demandimport.disable(); has_mercurial = True try: # Detect and define alternative of revlog.linkrev() # whose 1st arg is changed at 2008/11/12. # The next line cause "LookupError: nul@: no node" on old # revlog.linkrev() which requires node string as 1st arg. # Otherwise, 1st arg is rev (integer). revlog(file, os.path.devnull).linkrev('') except LookupError: old_linkrev = True # 1st arg is node def linkrev(obj, node): return obj.linkrev(node) except TypeError, exc: old_linkrev = False # 1st arg is rev (int) def linkrev(obj, node): return obj.linkrev(obj.rev(node)) except ImportError: has_mercurial = False ui = object ### Components class MercurialConnector(Component): implements(IRepositoryConnector, IWikiSyntaxProvider) def get_supported_types(self): """Support for `repository_type = hg`""" global has_mercurial if has_mercurial: yield ("hg", 8) def get_repository(self, type, dir, authname): """Return a `MercurialRepository`""" options = {} for key, val in self.config.options(type): options[key] = val return MercurialRepository(dir, self.log, options) # IWikiSyntaxProvider methods def get_wiki_syntax(self): yield (r'[0-9a-f]{12,40}', lambda formatter, label, match: self._format_link(formatter, 'cset', label, label)) def get_link_resolvers(self): yield ('cset', self._format_link) yield ('chgset', self._format_link) yield ('branch', self._format_link) # go to the corresponding head yield ('tag', self._format_link) def _format_link(self, formatter, ns, rev, label): repos = self.env.get_repository() if ns == 'branch': for b, head in repos.get_branches(): if b == rev: rev = head break try: chgset = repos.get_changeset(rev) return html.a(label, class_="changeset", title=shorten_line(chgset.message), href=formatter.href.changeset(rev)) except NoSuchChangeset, e: return html.a(label, class_="missing changeset", title=to_unicode(e), rel="nofollow", href=formatter.href.changeset(rev)) ### Helpers class trac_ui(ui): def __init__(self, *args): ui.__init__(self, *args) self.setconfig('ui', 'interactive', 'off') def write(self, *args): pass def write_err(self, str): pass def readline(self): raise TracError('*** Mercurial ui.readline called ***') ### Version Control API class MercurialRepository(Repository): """Repository implementation based on the mercurial API. This wraps a hg.repository object. The revision navigation follows the branches, and defaults to the first parent/child in case there are many. The eventual other parents/children are listed as additional changeset properties. """ def __init__(self, path, log, options): self.ui = trac_ui() if isinstance(path, unicode): str_path = path.encode('utf-8') if not os.path.exists(str_path): str_path = path.encode('latin-1') path = str_path self.repo = hg.repository(ui=self.ui, path=path) self.path = self.repo.root self._show_rev = True if 'show_rev' in options and not options['show_rev'] in TRUE: self._show_rev = False self._node_fmt = 'node_format' in options \ and options['node_format'] # will default to 'short' if self.path is None: raise TracError(path + ' does not appear to ' \ 'contain a Mercurial repository.') Repository.__init__(self, 'hg:%s' % path, None, log) def hg_time(self, timeinfo): # [hg b47f96a178a3] introduced an API change: if isinstance(timeinfo, tuple): # Mercurial 0.8 time = timeinfo[0] else: # Mercurial 0.7 time = timeinfo.split()[0] return int(time) def hg_node(self, rev): try: if rev: m = re.match(r"(\d+):", rev) if m: rev = m.group(1) return self.repo.lookup(rev) return self.repo.changelog.tip() except (LookupError, RepoError): raise NoSuchChangeset(rev) def hg_display(self, n): nodestr = self._node_fmt == "hex" and hex(n) or short(n) if self._show_rev: return '%s:%s' % (self.repo.changelog.rev(n), nodestr) else: return nodestr def close(self): self.repo = None def normalize_path(self, path): """Remove leading "/", except for the root""" return path and path.strip('/') or '' def normalize_rev(self, rev): """Return the changelog node for the specified rev""" return self.hg_display(self.hg_node(rev)) def short_rev(self, rev): """Return the revision number for the specified rev""" return self.repo.changelog.rev(self.hg_node(rev)) def get_changeset(self, rev): return MercurialChangeset(self, self.hg_node(rev)) def get_changesets(self, start, stop): """Follow each head and parents in order to get all changesets FIXME: this should be handled by the repository cache as well. The code below is only an heuristic, and doesn't work in the general case. E.g. look at the mercurial repository timeline for 2006-10-18, you need to give ''38'' daysback in order to see the changesets from 2006-10-17... This is because of the following '''linear''' sequence of csets: - 3445:233c733e4af5 10/18/2006 9:08:36 AM mpm - 3446:0b450267cf47 9/10/2006 3:25:06 AM hopper - 3447:ef1032c223e7 9/10/2006 3:25:06 AM hopper - 3448:6ca49c5fe268 9/10/2006 3:25:07 AM hopper - 3449:c8686e3f0291 10/18/2006 9:14:26 AM hopper This is most probably because [3446:3448] correspond to old changesets that have been ''hg import''ed, with their original dates. """ log = self.repo.changelog seen = {nullid: 1} seeds = log.heads() while seeds: cn = seeds[0] del seeds[0] time = self.hg_time(log.read(cn)[2]) rev = log.rev(cn) if time < start: continue # assume no ancestor is younger and use next seed # (and that assumption is wrong for 3448 in the example above) elif time < stop: yield MercurialChangeset(self, cn) for p in log.parents(cn): if p not in seen: seen[p] = 1 seeds.append(p) def get_node(self, path, rev=None): return MercurialNode(self, self.normalize_path(path), self.hg_node(rev)) def get_tags(self, rev): for tag, n in self.repo.tagslist(): yield (tag, self.hg_display(n)) def get_branches(self, rev): heads = self.repo.changelog.heads() brinfo = self.repo.branchlookup(heads) for head in heads: rev = self.hg_display(head) if head in brinfo: branch = ' '.join(brinfo[head]) else: branch = rev yield (branch, rev) def get_oldest_rev(self): return self.hg_display(nullid) def get_youngest_rev(self): return self.hg_display(self.repo.changelog.tip()) def previous_rev(self, rev): n = self.hg_node(rev) log = self.repo.changelog parents = [self.hg_display(p) for p in log.parents(n) if p != nullid] parents.sort() return parents and parents[0] or None def next_rev(self, rev, path=''): # NOTE: path ignored for now n = self.hg_node(rev) log = self.repo.changelog children = [self.hg_display(c) for c in log.children(n)] children.sort() return children and children[0] or None def rev_older_than(self, rev1, rev2): log = self.repo.changelog return log.rev(self.hg_node(rev1)) < log.rev(self.hg_node(rev2)) # def get_path_history(self, path, rev=None, limit=None): # path = self.normalize_path(path) # rev = self.normalize_rev(rev) # expect_deletion = False # while rev: # if self.has_node(path, rev): # if expect_deletion: # # it was missing, now it's there again: # # rev+1 must be a delete # yield path, rev+1, Changeset.DELETE # newer = None # 'newer' is the previously seen history tuple # older = None # 'older' is the currently examined history tuple # for p, r in _get_history(path, 0, rev, limit): # older = (p, r, Changeset.ADD) # rev = self.previous_rev(r) # if newer: # if older[0] == path: # # still on the path: 'newer' was an edit # yield newer[0], newer[1], Changeset.EDIT # else: # # the path changed: 'newer' was a copy # rev = self.previous_rev(newer[1]) # # restart before the copy op # yield newer[0], newer[1], Changeset.COPY # older = (older[0], older[1], 'unknown') # break # newer = older # if older: # # either a real ADD or the source of a COPY # yield older # else: # expect_deletion = True # rev = self.previous_rev(rev) def sync(self): pass def get_changes(self, old_path, old_rev, new_path, new_rev, ignore_ancestry=1): """Generates changes corresponding to generalized diffs. Generator that yields change tuples (old_node, new_node, kind, change) for each node change between the two arbitrary (path,rev) pairs. The old_node is assumed to be None when the change is an ADD, the new_node is assumed to be None when the change is a DELETE. """ old_node = new_node = None if self.has_node(old_path, old_rev): old_node = self.get_node(old_path, old_rev) else: raise NoSuchNode(old_path, old_rev, 'The Base for Diff is invalid') if self.has_node(new_path, new_rev): new_node = self.get_node(new_path, new_rev) else: raise NoSuchNode(new_path, new_rev, 'The Target for Diff is invalid') # check kind, both should be same. if new_node.kind != old_node.kind: raise TracError('Diff mismatch: Base is a %s (%s in revision %s) ' 'and Target is a %s (%s in revision %s).' \ % (old_node.kind, old_path, old_rev, new_node.kind, new_path, new_rev)) # Correct change info from changelog(revlog) # Finding changes between two revs requires tracking back # several routes. if new_node.isdir: # TODO: Should we follow rename and copy? # As temporary workaround, simply compare entry names. changes = [] for path in new_node.manifest: if new_path and not path.startswith(new_path+'/'): continue op = old_path + path[len(new_path):] if op not in old_node.manifest: changes.append((path, None, new_node.subnode(path), Node.FILE, Changeset.ADD)) elif old_node.manifest[op] != new_node.manifest[path]: changes.append((path, old_node.subnode(op), new_node.subnode(path), Node.FILE, Changeset.EDIT)) # otherwise, not changed for path in old_node.manifest: if old_path and not path.startswith(old_path+'/'): continue np = new_path + path[len(old_path):] if np not in new_node.manifest: changes.append((path, old_node.subnode(np), None, Node.FILE, Changeset.DELETE)) for change in sorted(changes, key=lambda c: c[0]): yield(change[1], change[2], change[3], change[4]) else: if old_node.manifest[old_node.path] != \ new_node.manifest[new_node.path]: yield(old_node, new_node, Node.FILE, Changeset.EDIT) class MercurialNode(Node): """A path in the repository, at a given revision. It encapsulates the repository manifest for the given revision. As directories are not first-class citizens in Mercurial, retrieving revision information for directory is much slower than for files. """ def __init__(self, repos, path, n, manifest=None, mflags=None): self.repos = repos self.n = n log = repos.repo.changelog if not manifest: manifest_n = log.read(n)[0] # 0: manifest node manifest = repos.repo.manifest.read(manifest_n) if hasattr(repos.repo.manifest, 'readflags'): mflags = repos.repo.manifest.readflags(manifest_n) self.manifest = manifest self.mflags = mflags if isinstance(path, unicode): try: self._init_path(log, path.encode('utf-8')) except NoSuchNode: self._init_path(log, path.encode('latin-1')) # TODO: configurable charset for the repository, i.e. #3809 else: self._init_path(log, path) def _init_path(self, log, path): kind = None if path in self.manifest: # then it's a file kind = Node.FILE file_n = self.manifest[path] log_rev = linkrev(self.repos.repo.file(path), file_n) node = log.node(log_rev) else: # it will be a directory if there are matching entries dir = path and path+'/' or '' entries = {} newest = -1 for file in self.manifest.keys(): if file.startswith(dir): entry = file[len(dir):].split('/', 1)[0] entries[entry] = 1 if path: # small optimization: we skip this for root node file_n = self.manifest[file] log_rev = linkrev(self.repos.repo.file(file), file_n) newest = max(log_rev, newest) if entries: kind = Node.DIRECTORY self.entries = entries.keys() if newest >= 0: node = log.node(newest) else: # ... as it's the youngest anyway node = log.tip() if not kind: if log.tip() == nullid: # empty repository kind = Node.DIRECTORY self.entries = [] node = nullid else: raise NoSuchNode(path, self.repos.hg_display(self.n)) self.time = self.repos.hg_time(log.read(node)[2]) rev = self.repos.hg_display(node) Node.__init__(self, path, rev, kind) self.created_path = path self.created_rev = rev self.data = None def subnode(self, p, dirnode=None): """Return a node with the same revision information but another path""" return MercurialNode(self.repos, p, self.n, self.manifest, dirnode) def get_content(self): if self.isdir: return None self.pos = 0 # reset the read() return self # something that can be `read()` ... def read(self, size=None): if self.isdir: return TracError("Can't read from directory %s" % self.path) file_n = self.manifest[self.path] file = self.repos.repo.file(self.path) if self.data is None: self.data = file.read(file_n) self.pos = 0 if size: prev_pos = self.pos self.pos += size return self.data[prev_pos:self.pos] return self.data def get_entries(self): if self.isfile: return for entry in self.entries: if self.path: entry = posixpath.join(self.path, entry) yield MercurialNode(self.repos, entry, self.n, self.manifest, self.mflags) def get_history(self, limit=None): newer = None # 'newer' is the previously seen history tuple older = None # 'older' is the currently examined history tuple log = self.repos.repo.changelog # directory history if self.isdir: if not self.path: # special case for the root for r in xrange(log.rev(self.n), -1, -1): yield ('', self.repos.hg_display(log.node(r)), r and Changeset.EDIT or Changeset.ADD) return # Code compatibility for ''walkchangerevs'': # In Mercurial 0.7, it had 5 arguments, but # [hg 1d7d0c07e8f3] removed the 3rd argument ('cwd'). args = (self.repos.ui, self.repos.repo) if walkchangerevs.func_code.co_argcount == 5: args = args + (None,) args = args + (['path:%s' % self.path], {'rev': ['%s:0' % hex(self.n)]}) wcr = walkchangerevs(*args) matches = {} for st, rev, fns in wcr[0]: if st == 'window': matches.clear() elif st == 'add': matches[rev] = 1 elif st == 'iter': if matches[rev]: yield (self.path, self.repos.hg_display(log.node(rev)), Changeset.EDIT) return # file history file_n = self.manifest[self.path] file = self.repos.repo.file(self.path) # FIXME: COPY currently unsupported for file_rev in xrange(file.rev(file_n), -1, -1): rev = log.node(linkrev(file, file.node(file_rev))) older = (self.path, self.repos.hg_display(rev), Changeset.ADD) if newer: change = newer[0] == older[0] and Changeset.EDIT or \ Changeset.COPY newer = (newer[0], newer[1], change) yield newer newer = older if newer: yield newer def get_properties(self): if self.isfile: if self.mflags: # Mercurial upto 9.1 exe = self.mflags[self.path] elif hasattr(self.manifest, 'execf'): # 1.0.1 or before exe = self.manifest.execf(self.path) else: # after 1.0.1 exe = 'x' in self.manifest.flags(self.path) return exe and {'exe': '*'} or {} return {} # FIXME++: implement pset/pget/plist etc. in hg # (hm, extended changelog is about the *changelog*, not the manifest...) def get_content_length(self): if self.isdir: return None return len(self.read()) def get_content_type(self): if self.isdir: return None return '' def get_last_modified(self): return self.time class MercurialChangeset(Changeset): """A changeset in the repository. This wraps the corresponding information from the changelog. The files changes are obtained by comparing the current manifest to the parent manifest(s). """ def __init__(self, repos, n): log = repos.repo.changelog log_data = log.read(n) manifest, user, timeinfo, files, desc = log_data[:5] extra = {} if len(log_data) > 5: # extended changelog, since [hg 2f35961854fb] extra = log_data[5] time = repos.hg_time(timeinfo) Changeset.__init__(self, repos.hg_display(n), to_unicode(desc), user, time) self.repos = repos self.n = n self.manifest_n = manifest self.files = files self.parents = [repos.hg_display(p) for p in log.parents(n) \ if p != nullid] self.children = [repos.hg_display(c) for c in log.children(n)] self.tags = [t for t in repos.repo.nodetags(n)] self.extra = extra def get_properties(self): def changeset_links(csets): return ' '.join(['[cset:%s]' % cset for cset in csets]) if len(self.parents) > 1: yield ('Parents', changeset_links(self.parents), True, 'changeset') if len(self.children) > 1: yield ('Children', changeset_links(self.children), True, 'changeset') if len(self.tags): yield ('Tags', changeset_links(self.tags), True, 'changeset') for k, v in self.extra.iteritems(): yield (k, v, False, 'message') # TODO: Improve this API... def get_changes(self): repo = self.repos.repo log = repo.changelog parents = log.parents(self.n) manifest = repo.manifest.read(self.manifest_n) manifest1 = manifest2 = None if parents: man_node1 = log.read(parents[0])[0] manifest1 = repo.manifest.read(man_node1) if len(parents) > 1: man_node2 = log.read(parents[1])[0] manifest2 = repo.manifest.read(man_node2) deletions = {} def detect_delete(pmanifest, p): for f in pmanifest.keys(): if f not in manifest: deletions[f] = p if manifest1: detect_delete(manifest1, self.parents[0]) if manifest2: detect_delete(manifest2, self.parents[1]) renames = {} changes = [] for f in self.files: # 'added' and 'edited' files if f in deletions: # and since Mercurial > 0.7 [hg c6ffedc4f11b] continue # also 'deleted' files action = None # TODO: find a way to detect conflicts and show how they were solved if manifest1 and f in manifest1: action = Changeset.EDIT changes.append((f, Node.FILE, action, f, self.parents[0])) if manifest2 and f in manifest2: action = Changeset.EDIT changes.append((f, Node.FILE, action, f, self.parents[1])) if not action: rename_info = repo.file(f).renamed(manifest[f]) if rename_info: base_path = rename_info[0] linkedrev = linkrev(repo.file(base_path), rename_info[1]) base_rev = self.repos.hg_display(log.node(linkedrev)) if base_path in deletions: action = Changeset.MOVE renames[base_path] = f else: action = Changeset.COPY else: action = Changeset.ADD base_path = '' base_rev = None changes.append((f, Node.FILE, action, base_path, base_rev)) for f, p in deletions.items(): if f not in renames: changes.append((f, Node.FILE, Changeset.DELETE, f, p)) changes.sort() for change in changes: yield change