#!/usr/bin/env python '''A script to help upgrade a FreeBSD ports installation, using, as far as possible precompiled packages. In particular it computes the INDEX for installed ports and dependencies corresponding to the present state of ports, sorted in dependency order.''' # Author: Michel Talon # Version 1.2 # Licence: BSD (revised) # Date: January, February 2007 # Testing by Mike Clarke, Cyrille Szymanski, Jean-Jacques Dhenin, Thierry # Thomas import os, os.path, sys, re from time import sleep, time, strftime, localtime import thread, ftplib from ftplib import FTP ###### Variables to be user customized ############## # The next lines can be edited or copied to /usr/local/etc/pkgupgrade.conf # where they will be sourced. portdir = "/usr/ports/" # can be replaced for non standard installs pkgdir = "/var/db/pkg/" # idem # Choose here a good mirror instead freebsd_server = "ftp.freebsd.org" # The following two are performance knobs for Index generation. As an example, # on my machine, a true biproc, Index generation takes 35s for nwork=1, 21s4 # for nwork=2, 20s8 for nwork=3 and still 20s6 for nwork=10, for around 360 # ports. This shows that concurrency is very effective, the problem is more # compute bound than IO bound. Each port takes around 0.06s and the delay # helps worker threads to not be fired simultaneously so as to spread the IO # work around the threads. Note that FreeBSD uses HZ=1000, so delay is still # ten times bigger than scheduler delays, which are the smallest time scale # accessible to thread activity. nwork = 3 # Number of worker threads delay = 0.01 # Small delay before firing a new job. # The following is useful if we run the program several times, since INDEX.ftp # is a very big download download_index = True # Do we download INDEX.ftp ? keep_index = True # Do we keep it after download ? # Do we want complete logging of addition of dependencies - useful for # debugging? add_deps_log = False # Using RELEASE packages is certainly more coherent, but some people want to # have most recent packages which may perhaps have coherence problems. pkg_release = True # Do we use RELEASE or Latest packages? # Some package versions are baroque, and the simplistic algorithm in the # program gives wrong answer. Forking pkg_version -t is guaranteed to give # correct results, at the expense of a heavy performance hit. pkg_cmp_version = False # Do we fork pkg_version -t to compare versions ? # index_pristine helps cure the problem that some ports get package names # depending on local settings, such as gimp-gnome-2.2.10_1,1 while # official packages and INDEX get gimp-2.2.10_1,1 This hack comes # from /usr/ports/Makefile. index_pristine = True # You can put precompiled packages here, they will be checked. Here depot # has relative path, the others have absolute path. pkg_repos = '/cdrom/packages/All:depot:/usr/ports/packages/All' # Put here ports on hold. Preferably very big ports which are not supposed to # break in the upgrade, nor to cause breakage to other programs. HOLD = ['print/teTeX-texmf', 'java/diablo-jdk15', 'print/teTeX-base', 'news/inn', 'java/java-tutorial', 'java/jdk14', 'math/scilab', 'print/cm-super', 'print/acroread7', 'math/atlas', 'x11-fonts/webfonts', 'lang/icc', 'multimedia/win32-codecs', 'print/tex-texmflocal', 'editors/openoffice.org-2'] # There are cases, such as 6.2 -> 7.0 where it is better to remove # all old packages without trying to be smart remove_everything = True # Finally a list of origins for ports we want to compile, and not to install # as binary packages. In particular they will not be downloaded. For this to # be useful, appropriate options have to be set in /etc/make.conf COMPILE=[] ##################################################### # Source the config file: cfgfile = '/usr/local/etc/pkgupgrade.conf' try : if os.access(cfgfile, os.F_OK) : execfile(cfgfile, globals()) except Exception : # Any exception print "Config file is not in python syntax." # A sanity check of the config (perhaps just sourced). if portdir.find('/', -1) == -1 : print "portdir needs a trailing / " sys.exit() if pkgdir.find('/', -1) == -1 : print "pkgdir needs a trailing / " sys.exit() ###### Some preparation ############################ index = open('INDEX.ports','w') # Computed index of installed ports upgradelog = open('UpgradeLog','w') # Log of the various events # We store relevant information about a port into an associative array, # indexed by the "origin" of the port. The corresponding value is an array # containing the various dependencies. So each key represents a node in the # DAG of dependency tree. The global top_rank is used to order topologically # nodes in the DAG, starts at 2 and increases. port2pkg = {} top_rank = 2 # Some other relevant global objects collection = [] # Collection of new origins inst_pkgs = {} # Old and new packages and their origins. managed_ports = [] # Ordered subset which will be managed by the upgrade procedure moved = {} # Dictionary: old_orig -> new_orig. last_valid_mov = {} # Special case of removed ports, last valid origin matchnumer = re.compile('(\d+)') # picks pure numbers matchalpha = re.compile('([a-zA-Z]+)') # picks pure alphas matchversion = re.compile('-([a-zA-Z0-9\.]+)(_\d+)?(,\d+)?$') # This picks the components of package versioning. The regular expression # selects the last'-' in the package name, followed by a number or a letter, # typically rc for 'release candidate', pl for patchlevel, etc. and perhaps # dot separated components which may have letters and numbers, such as in # jpeg-6b, forming the package version. These are followed optionally by # portrevision separated by '_', and portepoch, separated by ',' which are # numbers. Note we don't tolerate special characters such as ':' in the # version number. We hope such aberrations have disappeared. def find_pkg_dir(pkg_release) : "Computes the directory of packages on remote server." if pkg_release : pipe = os.popen("uname -m -r") uname = pipe.read() pipe.close() [rev_level, arch] = uname.split() rev_level = rev_level[0:3] pkg_dir = "/pub/FreeBSD/ports/" + arch + "/packages-" + rev_level + \ "-release" return pkg_dir else : # In this case there is apparently no arch indication, so these packages # are only compiled for the i386 architecture. pkg_dir = "/pub/FreeBSD/ports/packages" # Normally there is the latest INDEX here, and the latest ports under All. return pkg_dir def retreive_index() : "Retreives INDEX from the freebsd server." # This index corresponds to binary packages which can be downloaded from # the freebsd ftp server, and is in general different to the one we # compute below, which corresponds to packages we can compile from the # locally installed ports system. We need the first one to know what we # will install through pkg_add, and the second for ports we will # compile. In particular it is beneficial to download binary packages for # the build_deps of the ports we compile. There is some probability that # the ftp commands fail, hence some error checking is in order. err_message = "The ftp server " + freebsd_server + ''' cannot be contacted or does not mirror the FreeBSD distribution which you need. Please check it carefully, and eventually edit freebsd_server string.''' print "Downloading INDEX from ftp server " + freebsd_server + " in \ directory \n" + find_pkg_dir(pkg_release) index_ftp = open('INDEX.ftp','w') status_ftp = open('status.ftp','w') print >> status_ftp, "Preparing" status_ftp.close() ftp_handle = FTP(freebsd_server) ans = ftp_handle.login() if not ans[0:3] == '230' : print err_message sys.exit() # Passive mode is on by default, perhaps needs some tuning. The command to # do that is "ftp_handle.set_pasv(False)" try : ans = ftp_handle.cwd(find_pkg_dir(pkg_release)) except ftplib.error_perm, resp : print resp print err_message ftp_handle.quit() sys.exit() # Finally retreive INDEX status_ftp = open('status.ftp','w') # To reset the content print >> status_ftp, "Downloading" status_ftp.close() ans = ftp_handle.retrbinary('RETR INDEX', index_ftp.write) if not ans[0:3] == '226' : print err_message ftp_handle.quit() sys.exit() ftp_handle.quit() # Signals the download is finished status_ftp = open('status.ftp','w') # To reset the content print >> status_ftp, "Finished" print "INDEX downloading finished." status_ftp.close() index_ftp.close() # We prepare a shell script to run make -V in each port. # This is to achieve maximum parallelism in the context of python # threads. # Motivation: see "describe" target in bsd.port.mk. # www-site is obtained by grepping the description file. PORTSDIR = portdir.rstrip('/') # portdir independence if index_pristine : # the "pristine" hack LOCALBASE = "/nonexistentlocal" X11BASE = "/nonexistentx" else : LOCALBASE = "/usr/local" X11BASE = "/usr/X11R6" makestring = '''/usr/bin/make \ PORTSDIR=''' + PORTSDIR + ''' \ LOCALBASE=''' + LOCALBASE + ''' \ X11BASE=''' + X11BASE + ''' \ -V PKGNAME \ -V PREFIX \ -V COMMENT \ -V DESCR \ -V MAINTAINER \ -V CATEGORIES \ -V EXTRACT_DEPENDS \ -V PATCH_DEPENDS \ -V FETCH_DEPENDS \ -V BUILD_DEPENDS \ -V RUN_DEPENDS \ -V LIB_DEPENDS ''' ###### Now we are setup ################## ###### First some helper functions ####### def get_valid_categories() : "Hack to get a list of valid categories." # This works only in a port directory, so choose one. pipe = os.popen("cd " + portdir + "/ftp/wget && make \ -V VALID_CATEGORIES") valid = pipe.readline().split() pipe.close() return valid valid_categories = get_valid_categories() # Compute this only once def check_cat(mylist, orig) : "Check that a category is valid." for entry in mylist: if entry not in valid_categories : mylist.remove(entry) print >> upgradelog, "Bad category " + entry + " in " + orig return mylist def clean_path(path) : '''We strip paths of dependencies of portdir which needlessly uses space and introduces explicit dependency on portdir. We also normalize paths which use .. to move in the ports tree. The script make_index in Tools does the same.''' if path.find("..") >= 0 : # Perhaps useful to keep track of them pipe = os.popen("realpath " + path) path = pipe.readline() pipe.close() # Experience shows some dependencies have trailing / or \n return path.replace(portdir, '', 1).strip().rstrip('/') def union(list1, list2) : "Set theoretic union of two lists." # Using sets would be faster but this is insignificant. for elem in list2 : if elem not in list1 : list1.append(elem) def get_website(description) : "Parse description file to get website." website = "" try: for line in file(portdir + description) : if not line.find('WWW:', 0, 5) : website = line.replace('WWW:', '').strip() return website except IOError : return "" # returns the last occurence of a line WWW: or "" # the first occurence has proven problematic ##### Get the list of installed packages ## # This is very easy since each package corresponds to a directory under # pkgdir. However if using portupgrade, an extraneous pkgd.db appears. The # origin of packages is then obtained parsing +CONTENTS. We thus obtain a # collection of origins for which we build an INDEX. In the course of doing # that we will discover dependencies which are not mentioned and ports who # have changed names, so we must alter the list afterwards. # Fill the array collection and the dictionary inst_pkgs from pkg database. ori_string = '@comment ORIGIN:' inst_pkgs_list = os.listdir(pkgdir) try : inst_pkgs_list.remove('pkgdb.db') except ValueError : pass print "There are now %d packages installed." % len(inst_pkgs_list) for pkg in inst_pkgs_list : for line in file(pkgdir + pkg + '/+CONTENTS') : if not line.find(ori_string, 0) : pkg_orig = line.replace(ori_string, '').strip() break collection.append(pkg_orig) inst_pkgs[pkg_orig] = [pkg, '','Build'] # First field is old pkg, second new computed, third, package downloaded. def fill_moved() : "Fills the 'moved' dictionary from entries in 'MOVED' " mov_f = file(portdir + 'MOVED','r') lineno = 0 mov1 = {} for line in mov_f : if line.find('#', 0, 1) == -1 : # Discard comments at beginning fields = line.split('|') try: mov1[fields[0]] = [fields[1], lineno] lineno += 1 except IndexError: pass # Empty lines # Recursively extend this to a new dictionary. We follow MOVED recursively, # but only with increasing lineno. In particular this breaks loops, except # when a name becomes itself. We want to transform a name systematically in # its most recent version, so if a -> b -> a -> c we want a -> c, b-> c. # Hence there is no need to remember the a -> b transformation, and we can use # a dictionary storing m[a] = b, m[b] = a, m[a] = c without worrying that the # second assignement to m[a] overwrites the first. def follow_moved(orig) : "Follows an origin through MOVED." # The result is the most recent origin for a software, if the software # still exists, or '' if the port was marked 'Removed'. n_orig = orig o_lineno = - 1 while True : try : [m_orig, lineno] = mov1[n_orig] # Here lineno is the line where n_orig appears if m_orig == '' : # removed port # Fill last_valid_mov last_valid_mov[orig] = n_orig return '' elif m_orig == n_orig : # protects against a -> a lines in MOVED return n_orig else: # This is the recursive step if lineno > o_lineno : # Only move forward n_orig = m_orig o_lineno = lineno else : # Break out of recursion, normal exit. return n_orig except KeyError : # Either orig does not appear in MOVED or n_origin has no further # move, normal exit here! return n_orig # Compute once and save in the "moved" dictionary. Note that moved.keys() # has all the names which appeared at least once in MOVED as origins of a # move. So it is in principle complete. Moreover ports which have been # removed are catched in follow_moved() and added to last_valid_mov. for orig in mov1.keys() : moved[orig] = follow_moved(orig) fill_moved() # Check and update the origins of installed packages. There is a hairy point # here, we chose to specify a given software by the most recent origin we can # find, that is the last one appearing in MOVED, and normally corresponding to # the origin deduced from the ports tree. def check_col(collection) : "Follows ports mentioned in MOVED." # No need to recurse here since "moved" has been recursively closed. removed = [] for orig in collection : # Each origin must be tested for possible move or deletion if orig in moved.keys() : n_orig = moved[orig] if n_orig == '' : # Removed port removed.append(orig) else : # Moved port if n_orig <> orig : # Protects against null moves. print >> upgradelog, "Port", orig, "has been moved to", n_orig inst_pkgs[n_orig] = inst_pkgs.pop(orig) # Replace key orig collection[collection.index(orig)] = n_orig else : n_orig = orig # This is because next test is uniformly on n_orig # MOVED collection may be incomplete, add a second check. In fact if # an obsolete port remains, it will crash our index generation. path = portdir + n_orig + '/Makefile' if not os.access(path, os.F_OK) and not n_orig in removed : removed.append(n_orig) print >> upgradelog, "Port", n_orig, "doesn't really exist." print >> upgradelog, "Following ports have been removed in check_col." for orig in removed : # Such ports may have been resurrected in different form, such as # linux-flashplugin6 -> linux-flashplugin7 or gnu-libtool -> # libtool15. In principle these name changes should be mentioned in # MOVED, but in case they are not, manual intervention is necessary, # and we content ourselves with straight removal. # Later one may add a "guessing" procedure as in portupgrade, but # fixing collection here is futile since necessary dependencies will # be brought later on, and most presumably such obsoleted ports have # been brought as dependencies of a more important one. # Note that, up to now, these ports have kept their old origin, so we # update the origin using last_valid_mov. print >> upgradelog, orig try : n_orig = last_valid_mov[orig] inst_pkgs[n_orig] = inst_pkgs.pop(orig) # Replace key orig except KeyError : # orig is in removed but not in last_valid_mov. Should not happen, # in this case don't change origin n_orig = orig inst_pkgs[n_orig][1] = 'Removed' # Mark removal # It is essential to remove orig from collection or the Index # generation would crash. However this software may still exist at # RELEASE time, perhaps under a different name. collection.remove(orig) upgradelog.flush() # MOVED contains merged ports, which may now lead to duplicates in the # collection. We fix that by converting it to a set and back to a list. return list(set(collection)) ##### Obtain the port information ######## def get_port_info(orig, collection) : "Obtains one port information running make -V." # The main aim of this function is to decipher the output of "make -V" for # a given port. But we also collect unsatisfied run_deps and all_deps in # the following arrays. missing = [] miss_run = [] # We intercept the calls to clean_path() for dependencies in order to # catch unsatisfied dependencies in the collection, and so we get a closed # set of ports under dependency relation. Doing this now ensures that the # construction of the DAG tree afterwards will be complete. We profit of # the occasion to satisfy run_deps of upgraded ports. def clean_path_close(path, if_run) : "Cleans path and collects unsatisfied dependencies." origin = clean_path(path) if origin not in collection : missing.append(origin) if add_deps_log : print >> upgradelog, "Adding dependency", origin, "for", orig if if_run : # Only add run_deps and lib_deps here miss_run.append(origin) return origin pipe = os.popen("cd " + portdir + orig + " && " + makestring) sleep(delay) # Yields to the other threads info = pipe.read() pipe.close() buff = info.split('\n') # Parse the contents of the buffer description = clean_path(buff[3]) prefix = buff[1] # We must restore the fake prefix introduced by index_pristine = True if index_pristine : if prefix == '/nonexistentx' : prefix = '/usr/X11R6' if prefix == '/nonexistentlocal' : prefix = '/usr/local' # The disposition of the fields is as in INDEX file. pkg_info = [buff[0], # 0 pkgname False, # 1 expanded? prefix, # 2 prefix buff[2], # 3 comment description, # 4 description buff[4], # 5 maintainer check_cat(buff[5].split(), orig), # 6 categories [clean_path_close(deps.split(':')[1], False) for deps in buff[9].split()], # 7 build_deps [clean_path_close(deps.split(':')[1], True) for deps in buff[10].split()], # 8 run_deps get_website(description), # 9 website [clean_path_close(deps.split(':')[1], False) for deps in buff[6].split()], # 10 extract_deps [clean_path_close(deps.split(':')[1], False) for deps in buff[7].split()], # 11 patch_deps [clean_path_close(deps.split(':')[1], False) for deps in buff[8].split()], # 12 fetch_deps 0 ] # 13 color # color is 0, 1, 2 or white, grey, black for DFS algorithm. # Finally library dependencies lib_deps = [clean_path_close(deps.split(':')[1], True) for deps in buff[11].split()] union(pkg_info[7], lib_deps) # build union lib union(pkg_info[8], lib_deps) # run union lib if len(pkg_info[8]) == 0 : pkg_info[1] = True # For recursive extension return pkg_info, missing, miss_run def run_col(collection) : '''Main loop for treating a collection of ports. Work is done by nwork worker threads. Synchronization is provided by an instance of Wsync class.''' # Creates a Wsync instance, with nwork threads, dowork = True worker = Wsync(nwork, collection) # The engine, launches tokens, and starts worker threads. It runs in the # context of the main thread. delay_s = (delay - 0.003)/nwork sleep_table = [0.5, 0.2, 0.1, 0.1, 0.05] # We need to care about adding dependencies to the collection while the # scan is running, particularly when we are near finished! for orig in collection : worker.engine(orig) sleep(delay_s) # Improves performance, spreads the load remaining = len(collection) - collection.index(orig) - 1 if remaining < 5 or remaining < nwork : if remaining > 4 : remaining = 4 # Sleeps longer and longer at the end # This should allow updating collection completely sleep(sleep_table[remaining]) # Signals end of work to threads. No need to protect, only main thread # writes to it. If these last threads produce dependencies, we will not # treat them. Hope there are few. worker.dowork = False # Blocks until last thread dies worker.finish() ################################################# # # # Recursion and sorting in the DAG # # # ################################################# # KeyError occurs when the needed dependency doesn't exist in the port # tree. For example a dependency to swig11 is recorded, but the port tree # has swig13. It may also occur if we don't have a "complete" set of ports, # that is closed under dependency. Then these KeyError should trigger # extending the set of ports. def recurse_exp(p2p) : '''Recursive expansion of run-deps. At the end we have the complete DAG of run time dependencies, we can topologically sort it in next step.''' def recurse(p2p, orig) : "The recursive step." for p_run in p2p[orig][8] : try: if not p2p[p_run][1] : # expand = False recurse(p2p, p_run) # Recurse on non expanded subdeps. union(p2p[orig][8], p2p[p_run][8]) except KeyError : # Some p_run doesn't exist. print >> upgradelog, "KeyError in recurse", p_run p2p[orig][1] = True # Now do it for orig in p2p.keys() : # recursive expansion of p2p if not p2p[orig][1] : # orig unexpanded recurse(p2p, orig) def emit_ordered(p2p, orig) : # Helper function "Fixes expansions and writes index line." def convert(list0) : # Helper function "Convert a list of dependencies to a string of sorted package names." nlist = [] for elem in list0 : try: nlist.append(p2p[elem][0]) except KeyError : # no elem in port tree print >> upgradelog, "KeyError in convert", elem nlist.sort() return " ".join(nlist) # returns a string from list. # We profit of the visit to fix the other dependencies. try: # Missing dependencies? Rather safe than sorry. for p_build in p2p[orig][7] : union(p2p[orig][7], p2p[p_build][8]) for p_ext in p2p[orig][10] : union(p2p[orig][10], p2p[p_ext][8]) for p_pat in p2p[orig][11] : union(p2p[orig][11], p2p[p_pat][8]) for p_fet in p2p[orig][12] : union(p2p[orig][12], p2p[p_fet][8]) except KeyError, reason : # Some p_build, etc. doesn't exist. print >> upgradelog, "KeyError in expand", reason line = "|".join([p2p[orig][0], portdir+orig, p2p[orig][2].rstrip('/'), p2p[orig][3], portdir+p2p[orig][4], p2p[orig][5], " ".join(p2p[orig][6]), convert(p2p[orig][7]), convert(p2p[orig][8]), p2p[orig][9], convert(p2p[orig][10]), convert(p2p[orig][11]), convert(p2p[orig][12])]) print >> index, line # Fills the INDEX file, in sorted order. # We visit each node of the DAG in depth first order, algorithm DFS in # Cormen, Leiserson, Rivest and Stein "Introduction to algorithms." # Each node visited has its other fields expanded. # Algorithm is: from a node visit another one if it is white. Immediately # paint it grey and recurse in depth. When returning from recursion, paint it # black and emit it. In fact the nodes so emitted are topologically sorted. # The theory also shows that loops in the dag are trivially detected. def sort_dag(p2p) : "Expand run-deps in other fields, and topological sort of DAG." for orig in p2p.keys() : # Find a new node to visit if p2p[orig][13] == 0 : # White ? dfs_visit(p2p, orig) def dfs_visit(p2p, orig) : "The DFS recursive step. " # Visit in depth first order. The algorithm emits ordered nodes and # detects eventual cycles. global top_rank p2p[orig][13] = 1 # Paint grey for end in p2p[orig][8] : # End of arrow, run_dependency try: if p2p[end][13] == 0 : # White ? dfs_visit(p2p, end) # Iterate in depth else : if p2p[end][13] == 1 : # Loop print p2p[end][0], p2p[orig][0], "are looping." except KeyError : # Key doesn't exist print >> upgradelog, "KeyError in dfs_visit", end # On return paint black and emit index line p2p[orig][13] = top_rank # will be >= 2, orders nodes. top_rank += 1 # Emit the black node. The DFS theory shows they will be sorted emit_ordered(p2p, orig) ############################################################################### # # # Next is an implementation of threaded execution of "make -V" since python # # code is not able to run concurrently on several processors, but forked # # external programs can. To avoid the large overhead of the python # # "threading" library, we use only the low level "thread" library, and # # construct our own synchronization from that. # # # ############################################################################### class Wsync : '''Workers synchronize. Collects threads stuff in a clean place. The engine works in conjunction with run_col().''' def __init__(self, nwork, collection) : "Variables needed by our synchronization procedure." self.njobs = 0 # Plays the role of a condition variable self.nthr = nwork self.token = 0 self.dowork = True self.nwork = nwork # Number of worker threads self.count = 0 self.collection = collection self.eng_l = thread.allocate_lock() # To control token engine loop self.tok_l = thread.allocate_lock() # To protect njobs and token self.exit_l = thread.allocate_lock() # To protect nthr at the end self.dag_l = thread.allocate_lock() # To protect writing to DAG self.eng_l.acquire() # Must be initialized to locked def update_col(self, orig, missing, miss_run) : "Adds the missing entries at the end of collection." # It is bad to do that while engine is running! So try locking it. # Wait until the following succeeds, this will block calls to # get_job() and engine() self.tok_l.acquire() for origin in missing : if origin not in self.collection : self.collection.append(origin) for origin in miss_run : # Add only miss_run origins to inst_pkgs, and only when they are # rundeps of a port in inst_pkgs, not a rundep of a buildep of # such a port. if inst_pkgs.has_key(orig) and not inst_pkgs.has_key(origin) : inst_pkgs[origin] = ['New', '','Build'] # pkg_names will be filled at the end. self.tok_l.release() def get_job(self) : "Gets one token and make use of it." self.tok_l.acquire() # Blocks waiting token if self.njobs > 0 : # General case orig = self.token # Gets token self.njobs -= 1 # Has picken a token if self.eng_l.locked() : self.eng_l.release() # Restart engine self.tok_l.release() # Allows token production # Now do real work pkg_info, missing, miss_run = get_port_info(orig, self.collection) self.dag_l.acquire() port2pkg[orig] = pkg_info # Put in global storage self.dag_l.release() if len(missing) > 0 : # Adds missing to coll. self.update_col(orig, missing, miss_run) else : # njobs = 0 strange! self.tok_l.release() # Nothing to do sleep(0.001) # Yields to another thread def work(self) : "Worker function, launching the make -V command." while self.dowork : # Collection not empty # One should have njobs > 0 here, since threads are # launched after tokens self.get_job() # Cleanup at the end self.get_job() # Eventually picks last job self.exit_l.acquire() # Signals end of thread self.nthr -= 1 self.exit_l.release() # And thread dies. def finish(self) : "Main thread waits for end of all worker threads." # This is a simplistic barrier implementation. get_out = False while not get_out : self.exit_l.acquire() if self.nthr > 0 : self.exit_l.release() else : get_out = True # Exits self.exit_l.release() def engine(self, orig) : '''Distributes tokens, here parameter orig. When count is less than nwork, it further launches the worker threads.''' self.tok_l.acquire() self.token = orig self.njobs += 1 self.tok_l.release() if self.count < self.nwork : # Launch nwork threads thread.start_new_thread(self.work, ()) self.count += 1 self.eng_l.acquire() # Blocks engine until token acquired ########### End threading engine ############################################# # Helper functions def append_all_deps(orig) : "Appends all dependencies of a target before construction." bdeps = port2pkg[orig][7] # Build dependencies union(bdeps, port2pkg[orig][10]) # Add extract dependencies union(bdeps, port2pkg[orig][11]) # Add patch dependencies union(bdeps, port2pkg[orig][12]) # Add fetch dependencies return bdeps def pkg_compare(pkg1, pkg2) : '''Compares version numbers of packages corresponding to a port. Returns False if pkg1 is older or equal to pkg2, and True if pkg1 is newer.''' # See the porters-handbook for the naming scheme of packages: # prefix portname suffix-portversion_portrevision,portepoch # We don't check that the package names of pkg1 and pkg2 are the same, # since we are here only concerned with versioning information. # As long as the comparison function is not sufficiently robust, one # shortcut, although very time consuming (1000 forks use around 6s on a # powerful machine) is to fork an instance of pkg_version -t to provide a # definitive comparison. But our own procedure is 200 times faster. if pkg_cmp_version : pipe = os.popen('/usr/sbin/pkg_version -t ' + pkg1 + ' ' + pkg2) res = pipe.read().strip() pipe.close() # result is either < or > or = # directly returns bypassing everything else if res == '>' : return True else : return False def extract_version(pkg) : "Extracts the versioning components in a package name." match = matchversion.search(pkg) try : # Extract all raw versioning components in comps comps = match.groups() except AttributeError : # There was no match in fact print >> upgradelog, "Package", pkg, "has no version information." return 'None', 0, 0 portversion = comps[0] if comps[1] : portrevision = comps[1].lstrip('_') else : portrevision = 0 if comps[2] : portepoch = comps[2].lstrip(',') else : portepoch = 0 return portversion, portrevision, portepoch def vers_compnt_cmp(ver1, ver2) : '''Compares two version components, returns -1 if ver1 < ver2, 0 if ver1 = ver2 and +1 if ver1 > ver2.''' # This is extremely delicate. A first version exists in a perl script # by B. Mah, the actual FreeBSD version is in the file # src/usr.sbin/pkg_install/lib/version.c written by Oliver Eikemeier # and contains a considerable number of special cases, notably for # things like 'pl', 'alpha', 'beta', 'pre', 'rc', and so on. Even # this way it may produce extremely counter intuitive results as noted # in the comments of this file. The best solution by far would be to # normalize the naming of packages in a consistent way and enforce it. # We content ourselves to following the simple strategy of B. Mah, that # we summarize: # - if ver = '' it is replaced by '0'. # - if ver1 and ver2 begin by a letter do an alpha comparison # - if only one begins by a letter it loses # - if something numeric remains, do a numeric compare # - if everything remains equal and we have letters at the right, # strip all the left stuff and reinvoke vers_compnt_cmp on the # remainder, which allows comparing 1.1p1 and 1.1p2 # Avoid recursion loops, quick exit if equality if ver1 == ver2 : return 0 # Normalize empty versions to be numeric 0. In particular it wins # other alpha, which is fine, see example below. if ver1 == '' : ver1 = '0' if ver2 == '' : ver2 = '0' # The standard case is ver1 and ver2 numeric, where a numeric compare # is appropriate. We match numbers at beginning of strings. This # should catch almost all cases. head1 = matchnumer.match(ver1) head2 = matchnumer.match(ver2) if head1 <> None and head2 <> None : # Both have numbers # Beware num1 and num2 have to be converted to numeric or the # comparison is alpha and completely different! For example we # have '12' < '4' as alpha. num1 = int(head1.group(0)) num2 = int(head2.group(0)) if num1 < num2 : # Numeric compare return -1 elif num1 > num2 : return +1 else : # Numbers are equal, strip them and recurse. tail1 = matchnumer.sub('', ver1) tail2 = matchnumer.sub('', ver2) return vers_compnt_cmp(tail1, tail2) # Perhaps pl (patchlevel) should be special cased, because it should # increase rank. Probably 1.0pl1 > 1.0, while 1.0rc9 < 1.0 if head1 <> None and head2 == None : # ver2 begins by alpha return +1 # ver1 numeric wins if head1 == None and head2 <> None : # ver1 begins by alpha return -1 # ver2 numeric wins # If we come here both ver1 and ver2 begin by alpha, so we pick the # alpha heads and do alpha compare. head1 = matchalpha.match(ver1) head2 = matchalpha.match(ver2) if head1 <> None and head2 <> None : # Both have alpha alp1 = head1.group(0) alp2 = head2.group(0) # Pick these alphas if alp1 < alp2 : # Alpha compare return -1 elif alp1 > alp2 : return +1 else : # Alphas are equal, strip them and recurse. tail1 = matchalpha.sub('', ver1) tail2 = matchalpha.sub('', ver2) return vers_compnt_cmp(tail1, tail2) else : # Should not happen, emergency exit! if ver1 < ver2 : # Use builtin compare return -1 elif ver1 > ver2 : return +1 # Equality is ruled out at beginning. # In two or three iterations we should be able to analyze any # situation. Example, compare 1.0beta1 and 1.0. Breaking on # components yields comparison of 0beta1 and 0. After first recursion, # the 0 is stripped and we compare beta1 and '', which is promoted to # 0 so wins. Hence 1.0 > 1.0beta1. Similarly 1.0beta2 > 1.0beta1. This # case requires two iterations, the first one strips the 0, the # second one strips beta. def ptv_cmp(ptv1, ptv2) : '''Compares two portversions. Returns 0 if they are equal, or at least one cannot be read, -1 if ptv1 < ptv2, +1 if ptv1 > ptv2.''' # This comparison function is presently insufficiently sophisticated # to cope with portversions which have fancy forms, e.g. some contain # rc for "release candidate". if ptv1 == 'None' or ptv2 == 'None' : return 0 # Split the dot-separated components of portversion. vers_info1 = ptv1.split('.') vers_info2 = ptv2.split('.') shorter = len(vers_info1) if len(vers_info2) < shorter : shorter = len(vers_info2) for ind in range(shorter) : comparator = vers_compnt_cmp(vers_info1[ind], vers_info2[ind]) if comparator : # non vanishing so decisive return comparator # If we come here all successive infos are the same up to shorter # length. Otherwise we have returned at the first different # information, running from left to right. if len(vers_info2) > shorter : return -1 # after all ptv1 is smaller than ptv1 elif len(vers_info1) > shorter : return 1 else : # ptv1 and ptv2 are equal return 0 print m_orig ptv1, ptr1, pte1 = extract_version(pkg1) ptv2, ptr2, pte2 = extract_version(pkg2) # The comparison rules are as follows: if portepoch is present it # dominates other information, its absence means its value is 0. With same # portepoch other information allows to order packages. The next # subdominant info is portversion as provided by the upstream port # developer. Finally with all else equal, it is the portrevision, as given # by the port maintainer which gives order. Note that portversion is a # string with embedded dots, which has to be ordered first by the first # numerical factor, then the second, etc. if pte1 < pte2 : return False elif pte1 > pte2 : return True else : # Same portepoch vers_cmp = ptv_cmp(ptv1, ptv2) if vers_cmp < 0 : return False elif vers_cmp > 0 : return True else : # Same portversion if ptr1 <= ptr2 : return False else : return True def run_analysis() : '''This drives the index build for the installed packages, as well as analyzing the dependency structure of ports and prebuilt binary packages, so as to prepare a good binary upgrade plan.''' # In parallel we download the INDEX of binary packages. if download_index == True : thread.start_new_thread(retreive_index, ()) # and we compute the INDEX corresponding to our ports tree. Note that if # we are tracking the Latest packages, this is largely superfluous, except # for the ports which don't have corresponding binary packages. t_start = time() print "Collecting installed packages." upd_collect = check_col(collection) print "Building the updated index." run_col(upd_collect) # Run the dependency analysis print "Building and filling the dependency DAG." recurse_exp(port2pkg) # expand run_deps sort_dag(port2pkg) # Topological sorts the dag and print INDEX # Here we eventually wait for the end of the INDEX download. We do this by # examining the lock file 'status.ftp'. while download_index : status_ftp = open('status.ftp', 'r') ans = status_ftp.readline().strip() if ans == 'Finished' : status_ftp.close() os.unlink('status.ftp') break # out of while else : sleep(1) # Now INDEX.ftp has information to correctly fill inst_pkgs # It takes too much time to parse INDEX.ftp twice, so we store its info # in a throw array dictionary, that will be garbage collected at the end # of run_analysis(), but has a big memory cost. Even this # operation is costly, around 3 s. The dictionary is organized with keys # which are the most recent origins for a given software, or the last # valid origin when it was removed. bin_pkgs = {} f_handle = open('INDEX.ftp', 'r') for line in f_handle : fields = line.strip().split('|') orig = fields[1].replace(portdir, '', 1) # Origin must be brought to its most recent incarnation before being # able to compare to what is in inst_pkgs. Fortunately 'moved' has # keys for all the successive names of a software, so may be used in # particular for the names in INDEX.ftp. if orig in moved.keys() : n_orig = moved[orig] else : n_orig = orig # If n_orig = '', the port has been removed. if n_orig : # Normal case bin_pkgs[n_orig] = fields[0] else : # We must accomodate the case of ports which have been removed before # package generation or between package generation and the present # state of ports. In the first case orig will not appear at all here # but in the second, the port should be registered with the last valid # origin previous removal. try : n_orig = last_valid_mov[orig] except KeyError : # Should not happen n_orig = orig bin_pkgs[n_orig] = fields[0] f_handle.close() # No more need of INDEX.ftp if download_index == True and keep_index == False : os.unlink('INDEX.ftp') for orig in inst_pkgs.keys() : # Fill correct inst_pkgs fields. In particular it is here that the # third field, which was 'Build' is replaced by a precompiled package, # if it exists. try : inst_pkgs[orig][1] = port2pkg[orig][0] except KeyError : # orig has been removed from port2pkg inst_pkgs[orig][1] = 'Removed' try: inst_pkgs[orig][2] = bin_pkgs[orig] except KeyError : # Perhaps orig refers to a port which has been removed before # package generation or to a new port which did not exist at # package generation time, and has been brought as dependency. In # the first case one has inst_pkgs[orig][1] = 'Removed', while in # the second it should be marked 'Build'. Another possibility is # that package generation was either forbidden or failed, for this # port, so it does not appear in binary packages. if inst_pkgs[orig][1] == 'Removed' : inst_pkgs[orig][2] = 'Removed' # Hold packages management, very primitive, we only remove the ports which # are in the HOLD array, without any consideration for dependency # management. This is susceptible to cause missing dependencies if these # packages are really required by other ones, so has to be used # sparingly. One may also add glob management, but this is dangerous. managed_ports = inst_pkgs.keys() for orig in HOLD : if orig in managed_ports : managed_ports.remove(orig) # There is no point upgrading a package to itself or lower! Hence we # remove the records where the third field is inferior or equal to first # field, only when third field is not 'Build' or 'Removed'. This # constitutes the bulk of the set of ports we don't consider. # TODO: Removing these packages may be unwise. Indeed, suppose gettext has # been upgraded! All other ports depending on gettext have certainly to be # upgraded, even if their own version number has not been changed at all. # So there is a point to upgrade a package to itself if one of its # dependencies has been upgraded. bad = [] # List of ports to be removed from managed_ports for orig in managed_ports : if not inst_pkgs[orig][2] == 'Build' and \ not inst_pkgs[orig][2] == 'Removed' and \ not inst_pkgs[orig][0] == 'New' and \ not pkg_compare(inst_pkgs[orig][2], inst_pkgs[orig][0]) : bad.append(orig) # We also remove from managed_ports these ports which we have already # built with the ports system, so that the installed version is the same # as the most up to date version obtainable from ports, and which do not # exist in the ftp repository hence are marked 'Build'. In this case we # dont want to remove and rebuild them. This doesn't apply to ports whose # first field is 'New'. These pretty exceptional ports are listed to # UpgradeLog later on, so we keep them in BAD. BAD = [] for orig in managed_ports : if inst_pkgs[orig][2] == 'Build' and \ not inst_pkgs[orig][0] == 'New' and \ not pkg_compare(inst_pkgs[orig][1], inst_pkgs[orig][0]) : bad.append(orig) BAD.append(orig) # Remove ports flagged in the two steps above, which are mutually # exclusive thanks to the first clause in the respective if. However do it # only if we have not set remove_everything. if not remove_everything : for orig in bad : managed_ports.remove(orig) # One should add build dependencies for packages remaining to be built # here, and run dependencies of them. Of course ports on HOLD or ports # which will not finally be built because the installed version is # sufficient, don't need to acquire buildeps. Hence this has to be run # after previous removals. We collect ports whose third field is 'Build' # and do a second pass, discovering the corresponding third field. We stop # there, the rest has to be built. This also applies to ports in COMPILE. new_deps = [] for orig in managed_ports : if inst_pkgs[orig][2] == 'Build' or orig in COMPILE : try : # This calls port2pkg[orig] which fails when orig has been # removed. deps = append_all_deps(orig) except KeyError : # Should not happen print "Strange error when adding buildeps for", orig for origin in deps : # Don't add ports in HOLD, or bad here : if origin not in inst_pkgs.keys() \ and origin not in HOLD \ and origin not in bad \ and origin not in new_deps : new_deps.append(origin) # Finally add these build dependencies to managed_ports, when they have a # precompiled binary package. Note that orig is current, obtained by # port2pkg, but is registered with the same current name in bin_pkgs if it # exists here. for orig in new_deps : try: inst_pkgs[orig] = ['NewBuild', port2pkg[orig][0], bin_pkgs[orig]] managed_ports.append(orig) except : # orig is not in bin_pkgs, do nothing pass # We now know the final length of managed_ports, and we have kept in bad # the list of up to date ports, so let us log this information. print >> upgradelog, '''There are %d managed ports, after having removed from the list the following up to date ports: ''' % len(managed_ports) print >> upgradelog, print_by_3(BAD) # Finally one should sort inst_pkgs in the same order as in INDEX.ports, # that is topologically. This ensures that when one installs a package, # the dependencies are already here. This in principle should avoid using # the '-f' option of pkg_add, but there are possible pitholes such as # packages marked hold, or packages to be built later on. def level(orig) : "Gets the order as in the DFS visit of the dependency DAG." try: level = port2pkg[orig][13] except KeyError : level = 0 # Port is to be removed return level # Presenting it this way solves an ordering bug. Why ? managed_ports.sort(cmp = lambda orig1, orig2 : level(orig1) - level(orig2)) print "Printing the upgrade list in UpgradeLog" # We print only managed ports, and in sorted order. print >> upgradelog, "\n**************************\n" print >> upgradelog, "UPGRADE LIST." print >> upgradelog, \ '%-25s ==> %-25s ==> %-25s' % ("Old installed", "Ports", "Binary pkgs") print >> upgradelog, "\n" for orig in managed_ports : print >> upgradelog, \ '%-25s ==> %-25s ==> %-25s' % (inst_pkgs[orig][0], inst_pkgs[orig][1], inst_pkgs[orig][2]) time_string = strftime("%M minutes %S seconds.", localtime(time() - t_start)) print "Total time spent in analysis: ", time_string return managed_ports ################################################# # Second Part # ################################################# def run_prepare(managed_ports) : '''Prepares an upgrade script and copies precious stuff, while downloading all possible precompiled packages.''' print "Second phase, downloads and backups." # Prepare room for saving work. Old libs and config files are saved in # Backups, all packages are downloaded to Packages. if not os.path.isdir('Backups') : try : os.mkdir('Backups') except OSError : # Backups exists but is not a directory os.rename('Backups','Backups.BAK') os.mkdir('Backups') if not os.path.isdir('Packages') : try : os.mkdir('Packages') except OSError : # Packages exists but is not a directory os.rename('Packages','Packages.BAK') os.mkdir('Packages') # Prepares list of packages to backup and download. Performs a full backup # for ports that have been removed and so cannot be recovered. Such # backups are prefixed by REM- for easy spotting. back_pkgs = [] for orig in managed_ports : o_pkg = inst_pkgs[orig][0] if inst_pkgs[orig][2] == 'Removed' : os.system("/usr/sbin/pkg_create -b " + o_pkg + " Backups/REM-" + o_pkg + ".tbz > /dev/null 2>&1") elif not ( o_pkg == 'New' or o_pkg == 'NewBuild' ) : back_pkgs.append(o_pkg) down_pkgs = to_have(managed_ports) # Launch two threads, one backups libraries and config files, the other # downloads packages. The main thread waits for termination of the other # long running tasks, and then prepares a shell script UpgradeShell to # perform the upgrade. # So that the following reads don't crash at beginning, and the status # files are clean. handle = open('status.backups', 'w') handle.close() handle = open('status.downloads', 'w') handle.close() t_start = time() thread.start_new_thread(do_backups, (back_pkgs,)) thread.start_new_thread(do_downloads, (down_pkgs,)) # If the main thread terminates, it kills other threads, even if they are # not finished. So better ensure correct termination. wait_back_down(t_start) print "Writing upgrade shell script." # Now that downloads are finished we check the exact content of the # Packages directory, everything not found here needs to be built locally, # and is listed in to_build. prebuilt, to_build = do_check_packages(managed_ports) # Finally we write the upgrade shell script. do_write_shell(managed_ports, prebuilt, to_build) print "All tasks completed." print '******************************************************************' ######################################### # Helper functions # ######################################### def to_have(managed_ports) : "Locate necessary packages or download them." repositories = pkg_repos.split(':') # Sanitize, wants a trailing / and a good leading indication for ind in range(len(repositories)) : if repositories[ind].find('/', -1) == -1 : repositories[ind] += '/' # Append a / if repositories[ind].find('/', 0, 1) == -1 : # the path is relative to working directory, prepend it repositories[ind] = os.getcwd() + '/' + repositories[ind] all_pkgs = {} bad = [] # Run this only once! for repos in repositories : try : all_pkgs[repos] = os.listdir(repos) except OSError : # Repository doesn't exist bad.append(repos) for repos in bad : repositories.remove(repos) # The shorter the best. in_packages = os.listdir('Packages') down_pkgs = [] # Don't download packages which we want to compile. for orig in managed_ports : if orig not in COMPILE : pkg = inst_pkgs[orig][2] # Don't search Removed or to be Built packages if not ( pkg == 'Build' or pkg == 'Removed' ) : pkgtbz = pkg + '.tbz' found = False # Can we find pkg locally? # First in Packages from a previous run if pkgtbz in in_packages : found = True # Next search in repositories if not found : for repos in repositories : if pkgtbz in all_pkgs[repos] : found = True # Mark the package in Packages by a symlink os.symlink(repos + pkgtbz, 'Packages/' + pkgtbz) break # Stop at first good repository if not found : # Must be downloaded down_pkgs.append(pkg) return down_pkgs def do_backups(pkgs) : "Backups ports in managed_ports, shared libraries and configs." status_bak = open('status.backups', 'w') # Resets the status file print >> status_bak, "Beginning" status_bak.close() handle = open('BackupLog', 'w') # Clean BackupLog handle.close() # Don't archive already archived packages. This saves time in case one # needs to run the program more than once. present = [] # Unfortunate hack, more direct for loop does not work for pkg in pkgs : if os.access('Backups/' + pkg + '.saved.tar.gz', os.R_OK) : present.append(pkg) for pkg in present : pkgs.remove(pkg) # Archive stuff in the Backups directory. archiver = "/usr/local/sbin/pkg_save.py -o Backups " PKGS = " ".join(pkgs) # In case command line should be too long, xargs breaks the request in # manageable chuncks. shell_command = "PKGS='" + PKGS + "' ;" + " "\ "echo $PKGS | xargs " + archiver + " >> BackupLog" # pkg_save.py is verbose about what it archives. So it will be easy to # grep UpgradeLog for a missing shared library and discover in which # archive it belongs. os.system(shell_command) # This only returns when the shell_command terminates status_bak = open('status.backups', 'w') # Reset status print >> status_bak, "End" status_bak.close() def do_downloads(pkgs) : "Download packages known to exist on the ftp server." err_message = "The ftp server " + freebsd_server + ''' cannot be contacted or does not mirror the FreeBSD distribution which you need. Please check it carefully, and eventually edit freebsd_server string.''' status_dow = open('status.downloads', 'w') # Resets the status file print >> status_dow, "Beginning" status_dow.close() pkg_all = find_pkg_dir(pkg_release) + '/All' def do_connect() : "Initiates the ftp connection." ftp_handle = FTP(freebsd_server) ans = ftp_handle.login() if not ans[0:3] == '230' : print err_message sys.exit() # Passive mode is on by default, perhaps needs some tuning. The command to # do that is "ftp_handle.set_pasv(False)" try : ans = ftp_handle.cwd(pkg_all) except ftplib.error_perm, resp : print >> upgradelog, resp print >> upgradelog, err_message ftp_handle.quit() sys.exit() return ftp_handle def do_retreive(pkg) : "Retreives one package." pack_handle = open('Packages/' + pkg + '.tbz', 'w') try : ans = ftp_handle.retrbinary('RETR ' + pkg + '.tbz', pack_handle.write) if ans[0:3] == '226' : print >> upgradelog, "Retreived", pkg else: print >> upgradelog, "Cannot retreive ", pkg os.unlink('Packages/' + pkg + '.tbz') return True except ftplib.error_perm, resp : # One should explore the various possible errors here! print >> upgradelog, resp print >> upgradelog, "Cannot retreive ", pkg os.unlink('Packages/' + pkg + '.tbz') return True # Some ftp servers are badly behaved and break the connection which leads # to an exception and a hanging thread. except Exception : # I am not sure of the correct exception, should be checked with a # real broken connection, perhaps ftplib.error_reply. Anyways, # this triggers resetting the connection. os.unlink('Packages/' + pkg + '.tbz') return False if len(pkgs) > 0 : # No need to connect if nothing to do! ftp_handle = do_connect() # This will be in scope for do_retreive() print >> upgradelog, "Retreiving packages from ", pkg_all for pkg in pkgs : # Side effect of the following is normally to download pkg if not do_retreive(pkg) : # The connection broke, restart it try : ftp_handle.quit() except Exception : # Tolerates anything here pass # Next do_connect has its own error checking ftp_handle = do_connect() # A new one, supposedly better if not do_retreive(pkg) : # Retreives same pkg # Connection broke twice, abort print "This ftp server doesn't like repeated downloads. Aborting" print "Restart the script with another ftp server." ftp_handle.quit() sys.exit() # By default continue to next pkg # In principle all packages are now downloaded if len(pkgs) > 0 : ftp_handle.quit() status_dow = open('status.downloads', 'w') # Resets the status file print >> status_dow, "End" status_dow.close() def wait_back_down(t_start) : "Waits until backups and downloads finished." bool_b = True bool_d = True while bool_b or bool_d : sleep(1) status_bak = open('status.backups', 'r') ans_b = status_bak.readline().strip() status_bak.close() status_dow = open('status.downloads', 'r') ans_d = status_dow.readline().strip() status_dow.close() if bool_b == True and ans_b == 'End' : time_string = strftime("%M minutes %S seconds.", localtime(time() - t_start)) print "Total time spent in backups: ", time_string bool_b = False if bool_d == True and ans_d == 'End' : time_string = strftime("%M minutes %S seconds.", localtime(time() - t_start)) print "Total time spent in downloads: ", time_string bool_d = False if (not bool_b) and (not bool_d) : os.unlink('status.backups') os.unlink('status.downloads') def do_check_packages(managed_ports) : '''Checks the contents of Packages and returns list of prebuilt ports and ports to build.''' # This also allows to preload Packages with some packages which will be # used here. Hence Packages should always be searched in to_have. Both # prebuilt and to_build are in good order, but compiled packages will be # built after installation of precompiled ones, which somewhat destroys # order. This is a necessary evil if we don't want that failed compiles # ruin the upgrade process, and a very limited evil if few ports need # compilation. The only harm done is incorrect entries in +REQUIRED_BY, # which is very easy to cure afterwards. Ports in COMPILE are forcefully # added to the compile list. to_build = [] prebuilt = [] present = os.listdir('Packages') for orig in managed_ports : # This comes in topological sort order if orig in COMPILE : to_build.append(orig) else : # Don't reinstall Removed packages! pkg = inst_pkgs[orig][2] if not pkg == 'Removed' : if pkg == 'Build' : to_build.append(orig) else : pkgtbz = pkg + '.tbz' if pkgtbz in present : prebuilt.append(pkgtbz) else: to_build.append(orig) return prebuilt, to_build # Both are sorted def print_by_3(array, escapenl = False) : '''Prints 3 elements of array by line. The result is a string enclosed in quotes, with white space separated elements, suitable for a shell script.''' p_string = "" sh_array = [] # short array index = 0 len_array = len(array) def print_lte_3(sh_array, escapenl, end = False) : "Print a single line with less or equal 3 elements." if end : eol = "'\n" else : if not escapenl : eol = " \n" else: eol = " \\\n" return " ".join(sh_array) + eol p_string +="'" # beginning, print opening ' if len_array == 0 : p_string += "'\n" # empty array, return empty string return p_string # get out for elem in array : # array is not empty sh_array.append(elem) index +=1 if index%3 == 0 and index < len_array : # print by 3 p_string += print_lte_3(sh_array, escapenl, False) sh_array = [] elif index >= len_array : # last line complete or incomplete p_string += print_lte_3(sh_array, escapenl, True) return p_string def do_write_shell(managed_ports, prebuilt, to_build) : "Writes the final shell script." main_dir = os.getcwd() old_pkgs = [] for orig in managed_ports : old_p = inst_pkgs[orig][0] if not ( old_p == 'New' or old_p == 'NewBuild' ) : old_pkgs.append(old_p) old_pkgs.reverse() # Remove dependant before dependencies print "Will remove", len(old_pkgs), "old packages." print "Will install", len(prebuilt), "new binary packages." print "Will compile", len(to_build), "ports." shandle = open('UpgradeShell', 'w') print >> shandle, '''#! /bin/sh # To trace execution # set -x # Protects the shell script against unintended interruptions, which would be # very prejudicial. Use kill -9 to kill it deliberately. trap "" 1 2 3 5 10 13 15 MAINDIR=''' + main_dir + ''' PORTDIR=''' + portdir + ''' OLDPKGS=''' + print_by_3(old_pkgs) + ''' PREBUILT=''' + print_by_3(prebuilt) + ''' TOBUILD=''' + print_by_3(to_build) + ''' # We want to write to UpgradeLog, go here: cd $MAINDIR echo "Beginning upgrade." >> UpgradeLog /bin/date >> UpgradeLog # Remove old stuff echo " " echo "Removing old packages." echo " " for pkg in $OLDPKGS do echo $pkg /usr/sbin/pkg_delete -f $pkg > /dev/null 2>&1 done echo " " echo "Old packages removed." echo " " echo "Ports marked on hold are unmanaged!" >> UpgradeLog echo "All managed ports removed." >> UpgradeLog # Add all precompiled binaries. They are added in good dependency order, so # in principle the "-f" flag should not be necessary. echo " " echo "Installing binary packages." echo " " cd $MAINDIR/Packages for pkg in $PREBUILT do echo $pkg # Some packages answer questions, be cautious here /usr/sbin/pkg_add -f $pkg | /usr/bin/grep ''' + r"'[[(][yYnN][a-z|]*[])]'" + ''' 2>&1 done echo " " echo "Binary packages installed." echo " " cd $MAINDIR echo "All prebuilt packages installed." >> UpgradeLog echo "Now building ports, may require interaction" # Finally build the remaining ports. Here there is always a possibility of # failure. Surely need to refine the script to cope with that. for orig in $TOBUILD do cd $PORTDIR$orig PRSG="Port $orig built and installed." /usr/bin/make all install clean || PRSG="Build failed for $orig ." cd $MAINDIR echo $PRSG >> UpgradeLog done echo "Upgrade finished." >> UpgradeLog /bin/date >> UpgradeLog echo "System upgraded." ''' shandle.close() os.chmod("UpgradeShell", 0755) if __name__ == '__main__' : # First thing to do should be to change to a clean directory, but we don't # do it for fear of loosing infos of a previous run. # Check that pkg_save.py exists and is executable. if not os.access('/usr/local/sbin/pkg_save.py', os.R_OK|os.X_OK) : print '''Cannot find pkg_save.py. Please download it from: http://www.lpthe.jussieu.fr/~talon/pkg_save.py check that is is readable and executable, and put it in /usr/local/sbin.''' sys.exit() starttime = time() managed_ports = run_analysis() # We don't use anymore several arrays, so we can present them to the # garbage collector. The important info is kept in inst_pkgs and # managed_ports. Arrays and dictionaries local to run_analysis or # dependent functions are no more in scope so are garbage collected. port2pkg.clear() collection = [] moved.clear() last_valid_mov.clear() run_prepare(managed_ports) upgradelog.close() time_string = strftime("%M minutes %S seconds.", localtime(time() - starttime)) print "Total time: ", time_string print '''You will find the generated index in INDEX.ports and special events encountered in UpgradeLog. INDEX can be used to browse the packages contents with a tool such as "show_index.py". The directory Backups contains backups of your installed shared libs and config files, the directory Packages contains downloaded packages or symlinks to packages residing locally. If you run the present program again, files in these directories will not be retreived again, and will be directly used. To upgrade, please use a system console running a standard shell (sh or csh) and run the generated script UpgradeShell as root. It is recommended to first review UpgradeShell, and take a look at UpgradeLog. A complete log of backups is produced in BackupLog, which may help in recovering a missing file afterwards. Ports whose compilation didn't succeed are mentioned in UpgradeLog for further care.'''