| author | Urja (ARMLFS builder)
<urja+armlfs@urja.dev> 2025-11-09 15:25:30 UTC |
| committer | Urja (ARMLFS builder)
<urja+armlfs@urja.dev> 2025-11-09 15:25:30 UTC |
| parent | f4165e61d024c062f248cd83b2d3286fa057537c |
| cleanup-old-pkg.py | +319 | -0 |
| srclist.sh | +71 | -0 |
diff --git a/cleanup-old-pkg.py b/cleanup-old-pkg.py new file mode 100755 index 0000000..c65d468 --- /dev/null +++ b/cleanup-old-pkg.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python3 + +import os +import sys +import subprocess +from subprocess import DEVNULL, PIPE, STDOUT + +class Return: + def __init__(self, rv, out=None): + self.rv = rv + self.out = out + def __bool__(self): + return self.rv == 0 + def __int__(self): + return self.rv + def __str__(self): + if self.out: + return self.out + return str(self.rv) + +def sub(*args, **kwargs): + c = subprocess.run(*args, **kwargs) + return Return(c.returncode, c.stdout) + +def subc(*args, **kwargs): + c = subprocess.run(*args, **kwargs) + if c.returncode != 0: + print("subprocess failed: ", args) + print("code:", c.returncode) + sys.exit(1) + return Return(c.returncode, c.stdout) + +def builddir(pkg): + cmd = [ "tar", "-xO", "--force-local", "--occurrence=1", "-f", pkg, ".BUILDINFO" ] + info = subc(cmd, capture_output=True, text=True) + pfx = "builddir = " + for L in str(info).splitlines(): + if L.startswith(pfx): + return L[len(pfx):].strip() + return None + + +# this is os.walk, simplified, but yielding DirEntries, and only for non-dirs, +# one by one. +def filewalk(top): + stack = [top] + while stack: + top = stack.pop() + dirs = [] + with os.scandir(top) as entries: + for entry in entries: + if entry.is_dir(): + dirs.append(entry.name) + else: + yield top, entry + for dirname in reversed(dirs): + new_path = os.path.join(top, dirname) + stack.append(new_path) + +def srclist(bdir): + cmd = [ "./srclist.sh", bdir ] + info = subc(cmd, capture_output=True, text=True) + files = [] + for L in str(info).splitlines(): + L = L.strip() + if len(L) and " " in L: + _,file = L.strip().split(" ",maxsplit=1) + files.append(file) + return files + + +releasepkg="/mnt/nfs/pkg" + + +class Pkg: + def __init__(self, e, ver): + self.files = {} + self.files[ver] = [e] + self._builddir = None + self._builddirs = [] + self.analyzed = False + + def add(self, e, ver): + self.analyzed = False + if ver not in self.files.keys(): + self.files[ver] = [e]; + else: + self.files[ver].append(e) + + def __len__(self): # How many versions? + return len(self.files) + + def _analyze(self): + """Compute things that need stat() of the files: size and age.""" + self.sizes = {} + self.dates = {} + newest = 0 + oldest = None + total_size = 0 + for ver in self.files.keys(): + vsiz = 0 + vnew = 0 + for e in self.files[ver]: + vsiz += e.stat().st_size + mt = e.stat().st_mtime + if mt > vnew: + vnew = mt + self.sizes[ver] = vsiz + self.dates[ver] = vnew + total_size += vsiz + if not oldest or vnew < oldest: + oldest = vnew + if vnew > newest: + newest = vnew + self.total_size = total_size + self.newest = newest + self.oldest = oldest + self.analyzed = True + + def size(self): + if not self.analyzed: + self._analyze() + return self.total_size + + def age(self): + if not self.analyzed: + self._analyze() + return self.newest - self.oldest + + def newest_ver(self): + if not self.analyzed: + self._analyze() + newest_ver = None + for ver,date in self.dates.items(): + if date == self.newest: + newest_ver = ver + break + return newest_ver + + def builddir(self): + if self._builddir: + return self._builddir + self._builddir = builddir(self.files[self.newest_ver()][0].path) + self._builddirs = [self._builddir] + return self._builddir + + def builddirs(self): + if not self._builddir: + return [self.builddir()] + if len(self._builddirs) <= 1: + return [self._builddir] + return self._builddirs; + + def add_builddir(self, dir): + if len(self._builddirs) < 1: + self.builddir() + if dir not in self._builddirs: + self._builddirs.append(dir) + + def merge(self, other): + for ver in other.files.keys(): + for e in other.files[ver]: + self.add(e,ver) + + +def fuzzy_builddir_match(pkg1, pkg2): + dir2 = pkg2.builddir() + if dir2 in pkg1.builddirs(): + return True + dir1 = pkg1.builddir() + if dir1 == dir2: + return True + if "-" in dir2 and "linux" not in dir1: + base,suffix = dir2.rsplit("-",maxsplit=1) + if dir1 == base: + #print("fuzzy match", dir1, dir2) + return True + return False + +pkgs = {} + +for root, entry in filewalk(releasepkg): + suffix = ".pkg.tar.xz" + dbgsuf = "-dbginfo" + if entry.name.endswith(suffix): + if not entry.is_file(): + continue + _,arch,repo = root.rsplit("/",maxsplit=2) + pkg,ver,rel,pkgarch = entry.name[:-len(suffix)].rsplit("-",maxsplit=3) + pkgver = f"{ver}-{rel}" # Simpler to reassemble than skip splitting there + dbg = False + + if pkg.endswith(dbgsuf): + dbg = True + pkg = pkg[:-len(dbgsuf)] + + #print(arch, repo, pkg, pkgver, pkgarch, dbg) + + id = (repo, pkg) + if id not in pkgs.keys(): + pkgs[id] = Pkg(entry, pkgver) + else: + pkgs[id].add(entry, pkgver) + +# Rules: +keep_versions = 3 +keep_age = 6 * 30 * 24 * 60 * 60 +keep_size = 100 * 1024 * 1024 +# These mean: +# - we always keep atleast 3 versions. +# - we always keep packages for atleast 6 months +# - we always keep atleast 100 MB of versions for a package +# (NOTE: the size does count the dbginfo and all arches) + +candidates = {} +for id, pkg in pkgs.items(): + if len(pkg) <= keep_versions: + continue + candidates[id] = pkg + +# This process brings split packages back together +# and with fuzzy_builddir_match we also bring together version-synced packages +# like firefox and firefox-i18n +merged = True +while merged: + merged = False + for id, pkg in candidates.items(): + for id2, other in candidates.items(): + if id == id2: # do not merge myself into itself... + continue + if id[0] != id2[0]: # repo must match + continue + if pkg.newest_ver() == other.newest_ver() and fuzzy_builddir_match(pkg, other): + newid = tuple([id[0]] + list(id[1:]) + list(id2[1:])) + pkg.merge(other) + if other.builddir() not in pkg.builddirs(): + pkg.add_builddir(other.builddir()) + candidates[newid] = pkg + del candidates[id] + del candidates[id2] + merged = True + break + if merged: + break + +finalists = {} +for id, pkg in candidates.items(): + if pkg.age() <= keep_age: + continue + if pkg.size() <= keep_size: + continue + finalists[id] = pkg + #print(id, len(pkg), pkg.age() / 86400, pkg.size() / (1024*1024)) + + +removals = {} +for id, pkg in finalists.items(): + versions = sorted(pkg.dates.items(), key=lambda e: e[1], reverse=True) + compliant = False + rmlist = [] + while not compliant: + ver,date = versions.pop() + rmlist += pkg.files[ver] + del pkg.files[ver] + pkg._analyze() + if len(pkg) <= keep_versions: + compliant = True + if pkg.age() <= keep_age: + compliant = True + if pkg.size() <= keep_size: + compliant = True + rminfo = (pkg.builddirs(), ver, date, rmlist) + removals[id] = rminfo + #print(len(pkg), pkg.age() / 86400, pkg.size() / (1024*1024)) + #print(removals[id]) + + +for id, info in removals.items(): + # Do not cleanup core sources, they're arranged differently. + repo = id[0] + if repo == "core": + continue + rmsrc = [] + for dir in info[0]: + keepfiles = srclist(dir) + basedir = os.path.basename(dir) + srcarchives = f"/sources/archives/{repo}/{basedir}" + with os.scandir(srcarchives) as entries: + for entry in entries: + if entry.is_dir(): + continue + if entry.name in keepfiles: + continue + if entry.stat().st_mtime > info[2]: + continue + rmsrc.append(entry) + info[3].append(entry) + #print(id, rmsrc) + +if len(removals): + print(f"Removing old packages/sources for {len(removals)} packages:") + for id, info in removals.items(): + idstr = id[0] + ":" + ",".join(id[1:]) + print(f"{len(info[3])} files for {idstr} - version {info[1]} and older") + for e in info[3]: + print(f" {e.path}") + print("Press enter to continue, Ctrl-C to abort.") + _ = input() + + for id, info in removals.items(): + for e in info[3]: + if e.path.endswith(".pkg.tar.xz"): + sigfile = e.path + ".sig" + if os.path.exists(sigfile): + os.unlink(sigfile) + os.unlink(e.path) + print("Done.") +else: + print("Nothing to clean up.") diff --git a/srclist.sh b/srclist.sh new file mode 100755 index 0000000..532fa36 --- /dev/null +++ b/srclist.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# extract the protocol from a source entry - return "local" for local sources +get_protocol() { + if [[ $1 = *://* ]]; then + # strip leading filename + local proto="${1#*::}" + proto="${proto%%://*}" + # strip proto+uri:// + printf "%s\n" "${proto%%+*}" + elif [[ $1 = *lp:* ]]; then + local proto="${1#*::}" + printf "%s\n" "${proto%%+lp:*}" + else + printf "%s\n" local + fi +} + +# extract the filename from a source entry +get_filename() { + local netfile=$1 + + # if a filename is specified, use it + if [[ $netfile = *::* ]]; then + printf "%s\n" "${netfile%%::*}" + return + fi + + local proto=$(get_protocol "$netfile") + + case $proto in + bzr|fossil|git|hg|svn) + filename=${netfile%%#*} + filename=${filename%%\?*} + filename=${filename%/} + filename=${filename##*/} + if [[ $proto = bzr ]]; then + filename=${filename#*lp:} + fi + if [[ $proto = fossil ]]; then + filename=$filename.fossil + fi + if [[ $proto = git ]]; then + filename=${filename%%.git*} + fi + ;; + *) + # if it is just an URL, we only keep the last component + filename="${netfile##*/}" + ;; + esac + printf "%s\n" "${filename}" +} + + +for arg; do + if [ ! -e $arg/PKGBUILD ]; then + continue + fi + + (cd $arg; + for src in $(su builder -c "makepkg --printsrcinfo" | grep "^ source = " | cut -d ' ' -f 3-); do + p=$(get_protocol "$src") + if [ "$p" = "local" ]; then + continue + fi + name=$(get_filename "$src") + echo $(basename `pwd`) $name + done + ) +done