#!/usr/bin/env python # # rdiff-backup -- Mirror files while keeping incremental changes # Version 0.2 released August 3, 2001 # Copyright (C) 2001 Ben Escoto # # This program is licensed under the GNU General Public License (GPL). # See http://www.gnu.org/copyleft/gpl.html for details. # # Please send me mail if/when you find bugs or have any suggestions. """ rdiff-backup -- Mirror files while keeping incremental changes Table of Contents Classes: RBException Logger BasicFileOperations Path RemotePath PathTriple TimeRelated BackupPrepare Copier Incrementer RestorePrepare RestoreSequence Restorer Main PipeFile Connection Misc Functions Summary of execution: Main.Main() runs, parses the commandline stuff, etc., and then runs a copy of either BackupPrepare or RestorePrepare depending on the action taken. Incrementer/Copier or Restorer do most of the work. """ from __future__ import nested_scopes import os, stat, time, sys, shutil, tempfile, getopt, re, UserString import cPickle, types, popen2 blocksize = 16 * 1024 connection = None class RBException(Exception): pass class Logger: """Manage output and status messages In general, a message will not be transmitted unless it's verbosity is less than or equal to the set verbosity. Each message also has an error level, which is 0 for normal messages, 1 for warnings, and 2 for errors. """ def __init__(self): """Logger initializer self.verbosity determines how much output should be written self.logfile is the filename of the log file to write to self.logfp is the python file object form of logfile """ self.verbosity = None self.logfile = None self.logfp = None def __del__(self): """Close logfile if still open""" if self.logfp: self.logfp.close() def Log(self, errstring, verbosity, errlevel = 0): """Main logging function - record errstring""" if verbosity <= self.verbosity: logstring = str(self.getprefix(errlevel)) + errstring if connection and connection.server: sys.stderr.write(logstring + "\n") else: self.write_to_terminal(logstring, errlevel) if self.logfile: self.logfp.write(logstring + "\n") if connection and not connection.server: connection.rexec(write_to_logfile, logstring + "\n") def FatalError(self, errstring, exitlevel): """Exit program with exitlevel because of fatal error""" self.Log(errstring, 1, 2) sys.exit(exitlevel) def open_logfile(self, logfile): """Initialize logfile locally""" self.logfp = open(str(logfile), "a") self.logfile = logfile def getprefix(self, errlevel): """Return string which precedes log entries""" if self.verbosity < 9: prefix = "" else: prefix = time.asctime(time.localtime(time.time())) + " " if errlevel == 1: prefix = prefix + "Warning: " elif errlevel == 2: prefix = prefix + "ERROR: " return prefix def write_to_terminal(self, string, errlevel): """Display message either on stdout or stderr""" if errlevel > 0: sys.stderr.write(string+"\n") elif not connection or not connection.server: sys.stdout.write(string+"\n") L = Logger() class BasicFileOperations: """Provides functions to copy, make a diff, etc. All of these should work with a Path object instead of a string when referring to filenames. """ def rdiff(self, source, target, diff): """Produce a diff from source to target filenames called diff This is somewhat complicated because it tries to avoid moving files across the pipe as much as possible if there is a remote connection, and there are theoretically 8 cases to consider. In no case should either the source or target file have to be moved over. """ try: if isremote(source): sigfile = RemotePath(connection.reval(tempfile.mktemp)) connection.rexec(run_external, "rdiff signature %s %s" % (source.quote(), sigfile)) else: sigfile = Path(tempfile.mktemp()) self.run_external("rdiff signature %s %s" % (source.quote(), sigfile)) if isremote(sigfile) and not isremote(target): sigfile2 = Path(tempfile.mktemp()) sigfile.shallowcopy(sigfile2) elif not isremote(sigfile) and isremote(target): sigfile2 = RemotePath(connection.reval(tempfile.mktemp)) sigfile.shallowcopy(sigfile2) else: sigfile2 = sigfile if isremote(target) and isremote(diff): connection.rexec(run_external, "rdiff delta %s %s %s" % (sigfile2, target.quote(), diff.quote())) elif not isremote(target) and not isremote(diff): self.run_external("rdiff delta %s %s %s" % (sigfile2, target.quote(), diff.quote())) else: # diff file and target are on different ends if isremote(target): diff2 = RemotePath(connection.reval(tempfile.mktemp)) connection.rexec(run_external, "rdiff delta %s %s %s" % (sigfile2, target.quote(), diff2.quote())) diff2.shallowcopy(diff) diff2.delete() else: diff2 = Path(tempfile.mktemp()) self.run_external("rdiff delta %s %s %s" % (sigfile2, target.quote(), diff2.quote())) diff2.shallowcopy(diff) diff2.delete() except RBException: try: sigfile.delete() sigfile2.delete() except: pass L.FatalError(sys.exc_info()[1], 8) try: sigfile.delete() if sigfile != sigfile2: sigfile2.delete() except os.error: L.Log("Unable to remove " + sigfile, 3, 1) def patch(self, patchee, diff): """Use diff to modify patchee""" L.Log("Patching file %s with %s" % (patchee, diff), 7) def patchremote(patchee = patchee, diff = diff): outfile = RemotePath(connection.reval(tempfile.mktemp)) connection.rexec(run_external, "rdiff patch %s %s %s" % (patchee.quote(), diff.quote(), outfile)) patchee.delete() connection.rexec(shutil.copy, outfile, str(patchee)) outfile.delete() def patchlocal(patchee = patchee, diff = diff): outfile = Path(tempfile.mktemp()) self.run_external("rdiff patch %s %s %s" % (patchee.quote(), diff.quote(), outfile)) patchee.delete() shutil.copy(str(outfile), str(patchee)) outfile.delete() if isremote(diff) and isremote(patchee): patchremote() elif not isremote(diff) and not isremote(patchee): patchlocal() elif isremote(diff) and not isremote(patchee): diff2 = Path(tempfile.mktemp()) diff.shallowcopy(diff2) patchlocal(patchee, diff2) diff2.delete() else: assert not isremote(diff) and isremote(patchee) diff2 = RemotePath(connection.reval(tempfile.mktemp)) diff.shallowcopy(diff2) patchremote(patchee, diff2) diff2.delete() def rdiff_copy(self, source, target): """Like shallowcopy, but maybe use rdiff Just send over delta if both files are regular and on opposite sides. Otherwise use shallowcopy. """ if (source.isreg() and target.isreg() and ((isremote(source) and not isremote(target)) or (not isremote(source) and isremote(target)))): diff = Path(tempfile.mktemp()) self.rdiff(source, target, diff) self.patch(target, diff) diff.delete() target.updateattribs(source) else: source.shallowcopy(target) def mknodlike(self, source, target, type): """Use unix prog mknod and stat to make special file This depends on the commandline usage of mknod, and the format of the data returned by "stat -t". This at least works under linux, and hopefully other platforms too. """ L.Log("Using stat and mknod to make file %s. This may not work " "on all unix platforms." % target, 3, 1) statpipe = os.popen("stat -t %s" % source.quote()) statoutputlist = statpipe.read().split() statpipe.close() major, minor = map(lambda x: int(x, 16), (statoutputlist[9], statoutputlist[10])) self.run_external("mknod %s %s %d %d" % (target.quote(), type, major, minor)) def run_external(self, path): """Execute path as shell command""" L.Log("Running: " + path, 8) if os.system(str(path)): raise RBException("Problem running " + path) def copyfileobj(self, source, target): """Copy file object source to target file obj, then close both""" bufsize = blocksize while 1: inbuf = source.read(bufsize) if not inbuf: break target.write(inbuf) source.close() target.close() BFO = BasicFileOperations() class Path(UserString.UserString): """Wrapper around the name of a file and its stats This class caches lstat calls (in self.statblock), the time in seconds the filename indicates (self.filetime), and directory listings (self.listdir). """ def __init__(self, path): """Path initializer - call base class, set statted to None""" UserString.UserString.__init__(self, path) self.statted = None self.filetime = None self.dirlisting = None def setpath(self, path): """Update self.data with path, clear statblock""" self.data = path self.clearstats() def clearstats(self): """Clear any cached stats. Use when file is changed""" if self.statted: self.statted = None self.statblock = None def normalize(self): """Set self.data to canonical version This just means that redundant /'s will be removed, including the trailing one, even for directories. ".." components will be retained. """ newpath = "/".join(filter(lambda x: x and x != ".", self.data.split("/"))) if self.startswith("/"): self.setpath("/" + newpath) else: self.setpath(newpath) def quote(self): """Return quoted form of self for use with os.system()""" return '"%s"' % self.data.replace("\\", "\\\\").replace('"', '\\"') def lstat(self, override = None): """Return output of os.lstat and save results""" if not self.statted or override: try: self.statblock = os.lstat(self.data) except os.error: self.statblock = None self.statted = 1 return self.statblock def isdir(self): """True if path is of a directory""" return self.lstat() and stat.S_ISDIR(self.lstat()[stat.ST_MODE]) def isreg(self): """True if path is of a regular file""" return self.lstat() and stat.S_ISREG(self.lstat()[stat.ST_MODE]) def issym(self): """True if path is of a symlink""" return self.lstat() and stat.S_ISLNK(self.lstat()[stat.ST_MODE]) def getsize(self): """Return length of file in bytes, or None if file doesn't exist""" if self.lstat(): return self.lstat()[stat.ST_SIZE] else: return None def type(self): """Return string used to indicate type of file Currently we are using None if the file doesn't exist, "dir" for a directory, "sym" for a symlink, "reg" for a regular file, and "other" for everything else. """ if not self.lstat(): return None elif self.isreg(): return "reg" elif self.isdir(): return "dir" elif self.issym(): return "sym" else: return "other" def ctime(self): """Return last time (in seconds) of file change""" return max(self.lstat()[stat.ST_CTIME], self.lstat()[stat.ST_MTIME]) def listdir(self): """Cached version of os.listdir(self). Use only on directories""" if self.dirlisting is None: self.dirlisting = map(Path, os.listdir(self.data)) return self.dirlisting def open_for_reading(self): """Return file object opened for reading""" return open(self.data, "r") def open_for_writing(self, placeholder = None): """Return file object opened for writing""" return open(self.data, "w") def dirsplit(self): """Returns a tuple (dirname, basename) Basename is never '' unless self is root, so it is unlike os.path.basename. If path is just above root (so dirname is root), then dirname is ''. In all other cases dirname is not the empty string. Also, dirsplit depends on the format of self, so basename could be ".." and dirname could be a subdirectory. For an atomic relative path, dirname will be '.'. """ self.normalize() if self.data.find("/") == -1: return (Path("."), self) comps = self.data.split("/") return (Path("/".join(comps[:-1])), Path(comps[-1])) def append(self, newpath): """Create new Path by adding on new path""" result = self + "/" + newpath result.normalize() return result def delete(self): """Remove file (and any subdirectories if dir)""" if self.isdir(): shutil.rmtree(self.data) else: os.unlink(self.data) self.clearstats() def chmod(self, permissions): """Wrapper around os.chmod, to be overridden in RemoteFile""" os.chmod(self.data, permissions) def chown(self, uid, gid): os.chown(self.data, uid, gid) def utime(self, timepair): os.utime(self.data, timepair) def mkdir(self): """Like os.mkdir(self) but does string conversion""" os.mkdir(self.data) self.clearstats() def mkfifo(self): os.mkfifo(self.data) def readlink(self): return os.readlink(self.data) def symlink(self, destination): os.symlink(destination, self.data) def touch(self): touch(self.data) def shallowcopy(self, target): """Turn target path into a copy of self, including attributes Does not recurse down directories. So if target and self are both directories, the only effect will be the target's attributes being changed. """ if target.lstat() and not (target.isdir() and self.isdir()): target.delete() if not target.lstat(): if self.isreg(): BFO.copyfileobj(self.open_for_reading(), target.open_for_writing(self.getsize())) else: self.createfilelike(target) if target.lstat(1): # some files like sockets won't be created target.updateattribs(self) def deepcopy(self, target): """Like shallowcopy, but recur if directory""" L.Log("Deep copying %s to %s" % (self, target), 8) self.shallowcopy(target) if self.isdir(): for entry in self.listdir(): (self + "/" + entry).deepcopy(target + "/" + entry) def updateattribs(self, sourcepath): """Change file attributes of self to match source Only changes the chmoddable bits, uid/gid ownership, and timestamps, so self must already exist and be of right type. """ s = sourcepath.lstat() if not s: raise RBException("Trying to copy non-existent file %s" % sourcepath) elif sourcepath.issym(): return # symlinks have no valid attributes self.chmod(stat.S_IMODE(s[stat.ST_MODE])) self.chown(s[stat.ST_UID], s[stat.ST_GID]) self.utime((s[stat.ST_ATIME], s[stat.ST_MTIME])) self.clearstats() def createfilelike(self, targetpath): """Create new file targetpath of same type as self Returns true if some change is made (always except for sockets, which are ignored). """ if targetpath.lstat(): raise RBException("File %s already exists" % targetpath) mode = self.lstat()[stat.ST_MODE] if self.isdir(): targetpath.mkdir() elif self.isreg(): targetpath.touch() elif self.issym(): os.symlink(os.readlink(self.data), targetpath.data) elif stat.S_ISCHR(mode): BFO.mknodlike(self, targetpath, "c") elif stat.S_ISBLK(mode): BFO.mknodlike(self, targetpath, "b") elif stat.S_ISFIFO(mode): targetpath.mkfifo() elif stat.S_ISSOCK(mode): L.Log("Ignoring socket %s" % self, 3, 1) return None else: raise RBException("Unknown file type for %s" % self) targetpath.clearstats() return 1 def isincfile(self): """Return true if extension makes self look like incfile""" basecomps = self.dirsplit()[1].split(".") if len(basecomps) < 3: return None type = basecomps[-1] if (type != "snapshot" and type != "diff" and type != "missing" and type != "dir"): return None try: self.inc_time() except ValueError: return None return 1 def inc_time(self): """Return time in seconds indicated by inc file's filename This, like the other inc_ methods, should only be used if a filename passes self.isincfile(). """ if not self.filetime: timestr = self.split(".")[-2] date, timeofday = timestr.split("_") year, month, day = date.split("-") hour, minute, second = timeofday.split(":") self.filetime = time.mktime(map(int, (year, month, day, hour, minute, second, -1, -1, -1))) return self.filetime def inc_type(self): """Return the inctype of an incfile Currently this is just the final extension of the file, so "dir", "missing", "snapshot", or "diff". """ return self.split(".")[-1] def inc_basename(self): """Return name of incfile minus extension""" return Path(".".join(self.split(".")[:-2])) def inc_addextension(self, timestring, type): """Return new path of incfile with appropriate timestring and type""" return Path(".".join([self.data, timestring, type])) class RemotePath(Path): """Path on the other side of the connection The various methods that operate directly on the file are overridden here, to instead use the global "connection" to make the appropriate calls. """ def lstat(self, override = None): """Stat path on the other side of connection""" if not self.statted or override: try: self.statblock = connection.reval(os.lstat, self.data) except os.error: self.statblock = None self.statted = 1 return self.statblock def listdir(self): """Return list of RemotePaths representing paths in remote dir""" if self.dirlisting is None: self.dirlisting = map(RemotePath, connection.reval(os.listdir, self.data)) return self.dirlisting def open_for_reading(self): """Return PipeFile object opened for reading""" return connection.reval(open, "r", self.data) def open_for_writing(self, num_bytes_to_be_written): """Return PipeFile object opened for writing""" outfile = PipeFile(connection.outpipe, "w", num_bytes_to_be_written) connection.rexec(write_filename_from_fileobj, self.data, outfile) return outfile def delete(self): if self.isdir(): connection.rexec(shutil.rmtree, self.data) else: connection.rexec(os.unlink, self.data) self.clearstats() def chmod(self, permissions): connection.rexec(os.chmod, self.data, permissions) def chown(self, uid, gid): connection.rexec(os.chown, self.data, uid, gid) def utime(self, timepair): connection.rexec(os.utime, self.data, timepair) def mkdir(self): connection.rexec(os.mkdir, self.data) def mkfifo(self): connection.rexec(os.mkfifo, self.data) def readlink(self): return connection.reval(os.readlink, self.data) def symlink(self, destination): connection.rexec(os.symlink, destination, self.data) def touch(self): connection.rexec(touch, self.data) def shallowcopy(self, target): """Like path.shallowcopy, but optimize if both are remote""" if self.isreg() and target.isreg() and isremote(target): target.delete() connection.rexec(shutil.copy, self.data, target.data) target.updateattribs(self) else: __Path__.shallowcopy(self, target) def inc_addextension(self, timestring, type): """Return new path of incfile with appropriate timestring and type""" return RemotePath(".".join([self.data, timestring, type])) class PathTriple: """Hold six paths. They are newdir, newbase, olddir, oldbase, incdir, and incbase, where for each dir is like "/a/b" and base is like "c" so when combined as in newfull, oldfull, and incfull, they make "/a/b/c". new, old, and inc are used for the source dir, the dir that becomes a mirror, and the dir that holds the increments. For old and new base is the same of the actual files expected, while for inc, most of the files dealt with will have an extension tacked on to base first. """ def __init__(self, newdir, newbase, olddir, oldbase, incdir, incbase): self.newdir, self.newbase = newdir, newbase self.newfull = newdir.append(newbase) self.olddir, self.oldbase = olddir, oldbase self.oldfull = olddir.append(oldbase) self.incdir, self.incbase = incdir, incbase self.incfull = incdir.append(incbase) def append_base(self, basename): """Return new PathTriple moving fulls to dirs and add new base""" return PathTriple(self.newfull, basename, self.oldfull, basename, self.incfull, basename) def __str__(self): """used for logging""" return ("newfull = %s\noldfull = %s\nincfull = %s" % (self.newfull, self.oldfull, self.incfull)) class TimeRelated: """Functions and data which depend on the time""" def __init__(self, timeinseconds): """Initializer - set time self.previnctime is the time in seconds of the previous backup self.timestr is a user readable and sortable version of time """ self.time = timeinseconds self.previnctime = None self.timestr = self.timetostring(self.time) def setprevinctime(self, previnctime): """Set previnctime and prevtimestr from time in seconds""" self.previnctime = previnctime self.prevtimestr = self.timetostring(previnctime) def getfullname(self, prefix, type): """Return the full modified name of an update file""" return "%s.%s.%s" % (prefix, self.prevtimestr, type) def timetostring(self, timeinseconds): """Makes a user readable version of time in seconds""" return time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(timeinseconds)) def recentlychanged(self, statblock): """Return true if file has changed since previous backup""" return (statblock[stat.ST_CTIME] > self.previnctime or statblock[stat.ST_MTIME] > self.previnctime) def recent_extension(self, filename): """Return true for files with recent extension True if time extension of filename indicates is newer than or equally as old as self.time. """ return stringtotime(string.split(filename, ".")[-2]) >= self.time class BackupPrepare: """Prepare for a Backup operation Most of the class variables are taken from the Main class, or the Incrementer and Restorer classes and mean the same thing. self.marker_time is used by self.get_marker_time and caches the time of last backup, and self.marker is the filename of the marker. """ def __init__(self, args, exclude, exclude_mirror): self.source = Path(args[0]) if connection: # if true, then we are operating remotely self.target = RemotePath(args[1]) else: self.target = Path(args[1]) self.exclude = exclude self.exclude_mirror = exclude_mirror self.rbdir = None self.TR = None self.marker_time = None self.marker = None def Start(self): """Start backing up""" L.Log("Starting backup", 6) self.check_arguments() self.prepare_rbdir() self.prepare_logfile() self.TR = TimeRelated(time.time()) self.add_misc_excludes() if self.get_marker_time(): self.incremental_backup() else: self.mirror_only() self.reset_time_marker() def check_arguments(self): """Make sure initial arguments are good Both source and target must be names of directories. Abort if target already exists but isn't a directory. """ self.source.normalize() self.target.normalize() if not self.source.isdir(): L.FatalError("Currently the head of the backup tree " "must be a directory. Sorry.", 4) if not self.target.lstat(): L.Log("Creating directory %s" % self.target, 4) self.target.mkdir() elif not self.target.isdir(): L.FatalError("Destination %s exists but is not a directory" % self.target, 5) def prepare_rbdir(self): """Create rdiff-backup-data dir if does not exist""" def rbdirerror(): L.FatalError("rdiff backup directory %s exists and is not a " "directory, or could not be created", 6) self.rbdir = self.target + "/rdiff-backup-data" if not self.rbdir.lstat(): try: self.rbdir.mkdir() except os.error: rbdirerror() elif not self.rbdir.isdir(): rbdirerror() def prepare_logfile(self): """Initialize logfile""" logfilename = self.rbdir + "/backup.log" if connection: connection.rexec(open_logfile, logfilename) else: open_logfile(logfilename) def add_misc_excludes(self): """Add commonsense excludes""" self.exclude_mirror.append(str(self.rbdir)) self.exclude.append("/proc") self.exclude.append(str(self.target)) def reset_time_marker(self): """Create file marking time of last backup, delete old one""" basename = self.rbdir + "/current_mirror" basename.inc_addextension(self.TR.timestr, "snapshot").touch() if self.marker: self.marker.delete() def get_marker_time(self): """Return time in seconds of marker, or 0 if none exists""" if self.marker_time is None: l = filter(lambda x: x.startswith("current_mirror"), self.rbdir.listdir()) if not l: self.marker_time = 0 elif len(l) > 1: L.FatalError("Two time marker (current_mirror.---) files " "found. Please delete all but one.", 8) else: self.marker = self.rbdir + "/" + l[0] self.marker_time = self.marker.inc_time() return self.marker_time def incremental_backup(self): """Do incremental backup from self.source to self.target""" L.Log("Starting incremental backup " + self.TR.timestr, 3) self.TR.setprevinctime(self.get_marker_time()) newcomps, oldcomps = self.source.dirsplit(), self.target.dirsplit() pt = PathTriple(newcomps[0], newcomps[1], oldcomps[0], oldcomps[1], self.rbdir, "increments") Incrementer(self.TR, self.exclude, self.exclude_mirror).Increment(pt) L.Log("Finished incremental backup %s at %s" % (self.TR.timestr, time.asctime()), 3) def mirror_only(self): """Mirror source to target, no incrementing""" L.Log("Copying %s to %s for initial mirror" % (self.source, self.target), 3) Copier(self.exclude, self.exclude_mirror).Copy(self.source, self.target) class Copier: """Copy files around This is a class so we can use excludes and other class variables to direct and accumulate data about file operations. self.exclude and self.exclude_mirror are lists of regular expressions indicating which files from the new and mirror directory to exclude. exclude_regexps and exclude_mirror_regexps are compiled versions of these. """ def __init__(self, exclude, exclude_mirror): self.exclude = exclude self.exclude_regexps = map(re.compile, exclude) self.exclude_mirror = exclude_mirror self.exclude_mirror_regexps = map(re.compile, exclude_mirror) L.Log("Excludes: " + str(self.exclude), 7) L.Log("Mirror excludes: " + str(self.exclude_mirror), 7) def Copy(self, sourcepath, targetpath): """Recursively copy sourcepath to targetpath""" L.Log("Deep copying %s to %s" % (sourcepath, targetpath), 8) if (self.isexcluded(sourcepath, self.exclude_regexps) or self.isexcluded(targetpath, self.exclude_mirror_regexps)): L.Log("Excluding file %s" % sourcepath, 5) return sourcepath.shallowcopy(targetpath) if sourcepath.isdir(): for entry in sourcepath.listdir(): self.Copy(sourcepath.append(entry), targetpath.append(entry)) def isexcluded(self, filename, exclude_regexps): """Return true if filename matches exclude list""" for regexp in exclude_regexps: m = regexp.match(str(filename)) if m and m.group() == filename: return 1 return None class Incrementer(Copier): """Namespace and data used by Increment and related functions This class uses the following variables: self.TR, a TimeRelated, holding the time of this backup and the the previous one. self.instructions, which tells how each file is to be backed up. The function indicated by the dictionary is run with a PathTriple as its argument. If the type isn't in the dictionary, use "other" as the default. """ def __init__(self, TR, exclude, exclude_mirror): Copier.__init__(self, exclude, exclude_mirror) self.TR = TR self.instructions = { "reg": { "reg": self.diff, None: self.missing, "other": self.snapshot }, "dir": { "dir": self.dir_increment, None: self.missing, "other": self.snapshot }, None: { "dir": self.dir_increment, "other": self.snapshot }, "other": { None: self.missing, "other": self.snapshot }} def instructions_get(self, newfiletype, oldfiletype): """Lookup up pair in instructions""" if self.instructions.has_key(newfiletype): d = self.instructions[newfiletype] else: d = self.instructions["other"] if d.has_key(oldfiletype): return d[oldfiletype] else: return d["other"] def Increment(self, pt): """Update new to old and old to inc in pathtriple pt Increment returns true if action was taken or false if nothing was to be done for the files. This is so directories can be flagged if their subdirectories change. """ L.Log("Increment on %s" % pt.newfull, 7) L.Log("pathtriple:\n%s" % pt, 8) if (self.isexcluded(pt.newfull, self.exclude_regexps) or self.isexcluded(pt.oldfull, self.exclude_mirror_regexps)): L.Log("Excluding file %s" % pt.newfull, 5) return None if self.shortcut_by_time(pt): return None changed = self.instructions_get(pt.newfull.type(), pt.oldfull.type())(pt) if changed: L.Log("Updating file %s" % pt.oldfull, 6) if pt.newfull.lstat(): pt.newfull.shallowcopy(pt.oldfull) else: pt.oldfull.delete() return changed def shortcut_by_time(self, pt): """Return true if possible to skip over file by looking at time""" return (pt.newfull.lstat() and not pt.newfull.isdir() and pt.newfull.ctime() < self.TR.previnctime) def diff(self, pt): """Create increment diff, update old""" diffname = self.getincname(pt.incfull, "diff") BFO.rdiff(pt.newfull, pt.oldfull, diffname) diffname.updateattribs(pt.oldfull) return 1 def missing(self, pt): """Create increment to note that pt.old was missing""" self.getincname(pt.incfull, "missing").touch() return 1 def snapshot(self, pt): """Copy old to inc This is used when the file changes types, the file is no longer in the old directory, or for some other reason a diff cannot be made. """ pt.oldfull.shallowcopy(self.getincname(pt.incfull, "snapshot")) return 1 def dir_increment(self, pt): """Increment directories - called when pt.old or pt.new is a dir""" if pt.newfull.isdir(): new_filelist = pt.newfull.listdir() else: new_filelist = [] if pt.oldfull.isdir(): old_filelist = pt.oldfull.listdir() else: old_filelist = [] if not pt.incfull.lstat(): pt.incfull.mkdir() changed = mapor(lambda fn: self.Increment(pt.append_base(fn)), listunion(new_filelist, old_filelist)) if changed: incpath = self.getincname(pt.incfull, "dir") incpath.touch() incpath.updateattribs(pt.oldfull) return changed def getincname(self, basepath, type): """Return name of new incfile based basepath with type type""" return basepath.inc_addextension(self.TR.prevtimestr, type) class RestorePrepare: """Prepare for Restore operation""" def __init__(self, args, exclude): self.args = args self.exclude = exclude self.oldfile = None def Start(self): """Start restoring""" L.Log("Starting restore", 6) self.set_incfile_and_target() TR = TimeRelated(self.incfile.inc_time()) self.exclude.append(".*/rdiff-backup-data") self.set_rbdir_and_oldfile() L.open_logfile(self.rbdir + "/restore.log") L.Log("Restoring %s to %s, going back to dataset %s." % (self.incfile, self.target, TR.timestr), 4) L.Log("Current time: %s" % time.asctime(), 4) newcomps = self.target.dirsplit() oldcomps = self.oldfile.dirsplit() inccomps = self.incfile.inc_basename().dirsplit() pt = PathTriple(newcomps[0], newcomps[1], oldcomps[0], oldcomps[1], inccomps[0], inccomps[1]) Restorer(TR, self.exclude).Restore(pt) L.Log("Finished Restore", 6) def set_incfile_and_target(self): """Make sure args are good and init incfile and target""" self.incfile = Path(self.args[0]) if len(self.args) == 2: self.target = Path(self.args[1]) else: self.target = self.incfile.inc_basename() self.incfile.normalize() self.target.normalize() if not self.incfile.isincfile(): L.FatalError("%s does not look like an increment file" % self.incfile, 3) if self.target.lstat(): L.FatalError("File %s seems to exist already. Termining " "restore. Use rdiff-backup\nwith two arguments " "to specify the restore target, as in:\n" "'rdiff-backup backupfile target'" % self.target, 7) def set_rbdir_and_oldfile(self): """Set self.rbdir, the backup data dir, and oldfile The idea here is to keep backing up on the path until we find something named "rdiff-backup-data". Then use that as a reference to calculate the oldfile. This could fail if the increment file is pointed to in a funny way, using symlinks or somesuch. """ fullincpath = Path(os.path.join(os.getcwd(), self.incfile)) dir = fullincpath while 1: dir, base = dir.dirsplit() if base == "rdiff-backup-data": break elif not dir or dir== ".": L.FatalError("Unable to find rdiff-backup-data directory", 9) self.rbdir = dir + "/" + base L.Log("Set rbdir to %s" % self.rbdir, 8) extension = fullincpath[len(self.rbdir + "/increments"):] self.oldfile = (dir + extension).inc_basename() L.Log("Set oldfile to %s" % self.oldfile, 8) class RestoreSequence: """Hold instructions on how to restore a file The main data structure here is a sequence of triples stored in self.seq. Each triple is (time in seconds, inctype, path) of an increment file. To use, set the mirror file name and target name with __init__, then call self.add for each relevant increment file. Then call restore and poof the target file will be made. """ def __init__(self, mirror_file, target): """RestoreSequence initializer""" self.mirror_file = mirror_file self.target = target self.seq = [] def path_to_tuple(self, path): """Given path return matching sequence element""" return (path.inc_time(), path.inc_type(), path) def append(self, path): """Add path (filename of increment file) to the sequence""" self.seq.append(self.path_to_tuple(path)) def set_seq_from_filelist(self, filelist): """Replace sequence using new list of filenames""" self.seq = map(self.path_to_tuple, filelist) def normalize(self): """Remove unnecessary elements in the sequence It turns out that, except in the case of diff increments, only the last increment will be necessary. The method here uses the fact that sorting self.seq will produce the increments in chronological order. Sequence ends up in reverse chronological order. """ self.seq.sort() for index in range(len(self.seq)): if self.seq[index][1] != "diff": self.seq = self.seq[:index + 1] break self.seq.reverse() def restore(self): """Create the file indicated by the sequence. Do not recur.""" self.normalize() L.Log("Restore Sequence = %s" % self.seq, 8) if not self.seq: self.copy_from_mirror() for entry in self.seq: if entry[1] == "missing": L.Log("%s means that file did not exist then." % entry[2], 7) elif entry[1] == "dir": self.dir(entry[2]) elif entry[1] == "snapshot": self.snapshot(entry[2]) elif entry[1] == "diff": self.diff(entry[2]) else: raise RBException("Unknown increment type " + entry[2]) def copy_from_mirror(self): """Copy over current version from mirror directory""" self.mirror_file.shallowcopy(self.target) self.target.clearstats() def snapshot(self, incfile): """Process snapshot increment""" L.Log("Restoring by copying %s to %s" % (incfile, self.target), 7) incfile.shallowcopy(self.target) self.target.clearstats() def dir(self, incfile): """Process dir increment""" L.Log("Creating directory %s" % self.target, 7) self.target.mkdir() self.target.updateattribs(incfile) def diff(self, incfile): """Process diff increment""" if not self.target.lstat(): self.copy_from_mirror() BFO.patch(self.target, incfile) self.target.updateattribs(incfile) class Restorer: """Wrapper around Restore and related functions self.TR.time should be the target time files are restored to. self.exclude is a list of directories that will be excluded from the restore, and exclude_regexps is a compiled version. """ def __init__(self, TR, exclude): """Restorer initializer""" self.TR = TR self.exclude = exclude self.exclude_regexps = map(re.compile, exclude) def Restore(self, pt): """Restore files indicated in pathtriple pt.incbase should not include the extension of the increment file. """ L.Log("Restoring %s to %s" % (pt.oldfull, pt.newfull), 7) L.Log("pathtriple = %s" % pt, 8) if self.isexcluded(pt.oldfull): L.Log("Not restoring file %s due to exclude." % pt.newfull, 5) return RS = RestoreSequence(pt.oldfull, pt.newfull) for entry in pt.incdir.listdir(): if self.is_incfile_relevant(pt.incbase, entry): RS.append(pt.incdir.append(entry)) RS.restore() if pt.newfull.isdir(): self.restore_dir_contents(pt) def restore_dir_contents(self, pt): """Recursively restore contents of directory pt""" if pt.incfull.isdir(): inc_dirlisting = map(lambda p: p.inc_basename(), filter(lambda p: p.isincfile(), pt.incfull.listdir())) else: inc_dirlisting = [] if pt.oldfull.isdir(): old_dirlisting = pt.oldfull.listdir() else: old_dirlisting = [] for entry in listunion(inc_dirlisting, old_dirlisting): self.Restore(pt.append_base(entry)) def is_incfile_relevant(self, incbase, path): """Return true if path is relevant incfile to restore to incbase Only checks the base and time as indicated in path. Path does not have to be an incfile; if not return false. """ return (path.isincfile() and path.inc_basename() == incbase and path.inc_time() >= self.TR.time) def isexcluded(self, filename): """Return true if filename matches exclude list""" for regexp in self.exclude_regexps: m = regexp.match(str(filename)) if m and m.group() == filename: return 1 return None class Main: """Used to hold variables for the initialization functions self.exclude and self.exclude_mirror are passed to BackupPrepare or RestorePrepare eventually. self.rbdir is the normalized path of the rdiff-backup-data directory (used for writing log files and for finding mirror files when restoring). self.args are the arguments (not options) passed on the commandline. self.action will eventually be "backup", "restore", or "server" self.remote_cmd is the command to be run locally that will open up a pipe to a remote copy of rdiff-backup in server mode. """ def __init__(self): self.exclude = [] self.exclude_mirror = [] self.rbdir = None self.args = None self.action = None self.remote_cmd = None def Main(self): self.parse_commandline(sys.argv[1:]) self.check_for_rdiff() self.set_action_from_args() if self.remote_cmd: ConnectionPrepare().StartClient(self.remote_cmd) os.umask(077) if self.action == "backup": if len(self.args) != 2: self.commandline_error("Backups require " "two command line arguments") BackupPrepare(self.args, self.exclude, self.exclude_mirror).Start() elif self.action == "server": ConnectionPrepare().StartServer(sys.stdin, sys.stdout) elif self.action == "testserver": pass else: assert self.action == "restore" RestorePrepare(self.args, self.exclude).Start() self.cleanup() def parse_commandline(self, commandline): """Use commandline to set variables or signal error""" verbosity = 4 try: optlist, self.args = getopt.getopt(sys.argv[1:], "bv:", ["exclude=", "exclude-mirror=", "server", "testserver", "remote-cmd="]) except getopt.error: self.commandline_error("Invalid commandline options") for opt, arg in optlist: if opt == "-b": self.action = "backup" elif opt == "--server": self.action = "server" elif opt == "--testserver": self.action = "testserver" elif opt == "--remote-cmd": self.remote_cmd = arg elif opt == "-v": verbosity = int(arg) elif opt == "--exclude": self.exclude.append(arg) elif opt == "--exclude-mirror": self.exclude_mirror.append(arg) L.verbosity = verbosity if len(self.args) == 1: self.action = "restore" elif len(self.args) == 0 and (self.action == "server" or self.action == "testserver"): pass elif len(self.args) > 2 or len(self.args) < 1: self.commandline_error("Wrong number of commandline arguments") def commandline_error(self, errstring): """Signal error with commandline and exit""" sys.stderr.write("ERROR: %s\n" % errstring) print self.usage() sys.exit(2) def usage(self): """Return string summarizing usage""" return """ This script can be used to backup files incrementally, storing them as complete copies of the original plus reverse diffs describing how to recover older versions of the files. Usage: rdiff-backup [options] source backupdir rdiff-backup [options] file-to-restore [destination] Options: -b Force backup mode even if first argument appears to be a backup file. -h Display this message and exit. -v[0-9] Specify verbosity level (0 is totally silent, 9 is noisiest). This affects logging as well as output to the terminal. --exclude regexp This option can be used multiple times to exclude files from being backed up. Each argument should be a regular expression. --server Enter server mode (not to be invoked directly) --remote-cmd command Use this option to backup files to a remote location. command should be a string that, when executed by the shell, sets up a pipe to rdiff-backup --server in the remote location. Examples: rdiff-backup foo bar Simplest case: back up directory foo to directory bar. rdiff-backup --exclude /home/bob/tmp /home/bob /mnt/backup Back files up from /home/bob to /mnt/backup, leaving increments in /mnt/backup/rdiff-backup-data. Do not back up directory /home/bob/tmp. rdiff-backup -v9 important-data.dir-3423.2001-02-14_23:23:23 temp Restore directory important-data as it was on Februrary 14, 2001, calling the new directory "temp". Keep lots of logging information. rdiff-backup --remote-cmd 'ssh bob@hostname.net /path/to/rdiff-backup --server' /some/local-dir /whatever/remote-dir Backup /some/local-dir on the local machine to /whatever/remote-dir on machine hostname.net. Use ssh and remote account 'bob' to open the connection. The remote copy of rdiff-backup is at /path/to/rdiff-backup. """ def check_for_rdiff(self): """Exit with error if problem with rdiff""" if os.system("rdiff --help >/dev/null"): L.FatalError(""" There was some problem running rdiff, a program this script requires. Make sure rdiff is executable and in the PATH. rdiff is currently part of the librsync package and can be downloaded from http://sourceforge.net/projects/rproxy/""") def set_action_from_args(self): """Try to determine action from form of arguments""" if not self.action: if Path(self.args[0]).isincfile(): self.action = "restore" else: self.action = "backup" if self.remote_cmd and self.action == "restore": L.FatalError("Sorry, remote restores not currently supported", 12) L.Log("Action = " + self.action, 8) def cleanup(self): """rdiff-backup's last function to run""" if connection: connection.close() class PipeFile: """Object that simulates some file methods and reads from a pipe Only read is supported right now. self.connection is the parent Connection object, passed so that close() can clear the busy bit. self.mode is None if Pipe is for reading, 1 if for writing. """ def __init__(self, pipe, mode, length): self.pipe = pipe if mode == "r": self.mode = None elif mode == "w": self.mode = 1 else: raise RBException("PipeFile: Unknown file opening type") self.length = length def read(self, bufsize = -1): """Read bufsize characters from file, or until end if bufsize = -1""" assert not self.mode if bufsize < 0: actually_read = self.length else: actually_read = min(bufsize, self.length) self.setlength(self.length - actually_read) return self.pipe.read(actually_read) def write(self, s): """Write string s into file""" assert self.mode and len(s) <= self.length self.pipe.write(s) self.setlength(self.length - len(s)) def close(self): """Read/write pipe enough to match length, and then flush""" if self.mode: self.write('!'*self.length) else: self.read(self.length) self.pipe.flush() def setlength(self, length): """Use to set self.length so self.connection.busy handled""" if not length: connection.busy = None self.length = length class ConnectionPrepare: """Set up the appropriate connection StartServer starts the server on the specified pipes, while StartClient starts the client by calling the specified command to open up a connection to the remote copy of rdiff-backup. """ def StartServer(self, inpipe, outpipe): """Start server operations""" global connection connection = Connection(inpipe, outpipe, 1) self.command_loop() def command_loop(self): """Keep reading and executing from the pipe""" while 1: connection.run_command() def StartClient(self, remote_cmd): """Start client operations by running remote_cmd""" global connection inpipe, outpipe = popen2.popen2(remote_cmd) connection = Connection(inpipe, outpipe, 0) class Connection: """Manage a pipe in each direction self.inpipe is a pipe suitable for reading, and self.output for writing. self.busy is set if pipe is currently occupied so that get/put methods won't work. self.server is true iff the current process is a server. """ def __init__(self, inpipe, outpipe, server): self.inpipe = inpipe self.outpipe = outpipe self.server = server self.busy = None def get(self): """Read an object from the pipe and return value""" assert not self.busy header_string = self.inpipe.read(8) format_string, length = header_string[0], self.s2l(header_string[1:]) if format_string == "p": result = cPickle.loads(self.inpipe.read(length)) if (type(result) == types.InstanceType and issubclass(result.__class__, Exception)): raise result else: return result elif format_string == "f": self.busy = 1 return PipeFile(self.inpipe, "r", length) else: assert format_string == "q" L.Log("Received quit command from remote end; quitting", 6) sys.exit(0) def put(self, obj, filelength = None): """Put as a file or object depending on obj type""" if self.isfile(obj): self.putfile(obj, filelength) else: self.putobj(obj) def putobj(self, obj): """Put an object into the pipe""" pickle = cPickle.dumps(obj, 1) header_string = "p" + self.l2s(len(pickle)) self.outpipe.write(header_string) self.outpipe.write(pickle) self.outpipe.flush() def put_file_prefix(self, filelength): """Just write the header string for a file to outpipe""" self.outpipe.write("f" + self.l2s(filelength)) def putfile(self, file, filelength = None): """Put a file into the pipe""" if not filelength: # must be dealing with pipefile filelength = file.length self.put_file_prefix(filelength) bytes_transferred = 0L while 1: inbuf = file.read(blocksize) self.outpipe.write(inbuf) len_inbuf = len(inbuf) bytes_transferred = bytes_transferred + len_inbuf if len_inbuf < blocksize: break self.outpipe.flush() assert bytes_transferred == filelength def get_pipefile(self, filelength): """Return a PipeFile object that is writable like a normal file""" self.put_file_prefix(filelength) return PipeFile(self.outpipe, "w", filelength) def close(self): """Tell the remote end to quit; close the pipes""" self.outpipe.write("q1234567") self.outpipe.close() self.inpipe.close() def s2l(self, s): """Convert string to long int""" assert len(s) == 7 l = 0L for i in range(7): l = l*256 + ord(s[i]) return l def l2s(self, l): """Convert long int to string""" s = "" for i in range(7): l, remainder = divmod(l, 256) s = chr(remainder) + s assert remainder == 0 return s def put_command(self, return_val, f, arglist): """Tell the remove side to execute f on arguments arglist If a pipefile is one of the arguments, just write the header and then exit (it must be the last argument). Then subsequent writes to the pipefile will send the data down the link and thus fill in the last argument. """ self.put((return_val, len(arglist), f)) for arg in arglist: if self.ispipefile(arg): self.put_file_prefix(arg.length) break else: self.put(arg) def reval(self, f, *args): """Wrapper around put_command - expect return val""" # L.Log("Evaluating function %s on arguments %s" % # (f, args), 8) self.put_command(1, f, args) return self.get() def rexec(self, f, *args): """Wrapper around put_command - don't read return val""" # L.Log("Executing command %s on arguments %s" % # (f, args), 8) self.put_command(0, f, args) def run_command(self): """Read a command and then run it on given arguments""" return_val, arity, func = self.get() args = [] for i in range(arity): args.append(self.get()) L.Log("Running %s on %s" % (func, args), 8) try: result = apply(func, args) except: result = sys.exc_info()[1] if return_val: self.put(result) return result def isfile(self, obj): """Return true if obj is like a file""" return type(obj) == types.FileType or self.ispipefile(obj) def ispipefile(self, obj): """Return true if obj is a pipefile""" return (type(obj) == types.InstanceType and issubclass(obj.__class__, PipeFile)) def listunion(a, b): """Make a new list composed of any element in either a or b""" d = {} for element in a + b: d[element]=1 return d.keys() def mapor(f, l): """Return true if f(e) true for e in l. Not short-circuiting.""" result = None for i in l: result = f(i) or result return result def isremote(path): """True if path is a RemotePath""" return issubclass(path.__class__, RemotePath) def write_filename_from_fileobj(filename, fileobj): """Like copy fileobj but target will be opened first This, like some of the commands below, has to be of global scope in order to be pickled correctly. Also, the fileobj is required by connection.put_command to be the last argument. """ BFO.copyfileobj(fileobj, open(filename, "w")) def touch(target): """Make a file with no length""" open(str(target), "w").close() def open_logfile(logfile): """Same as Log.open_logfile but for remote use - global for pickling""" L.open_logfile(logfile) def write_to_logfile(logstring): """Write errstring to logfile if it exists""" if L.logfile: L.logfp.write(logstring) def run_external(path): BFO.run_external(path) if globals().has_key("startclient"): ConnectionPrepare().StartClient("./rdiff-backup --server") elif __name__ == "__main__": Main().Main()