#!/usr/bin/env python3
import os
import sys
import subprocess
from subprocess import DEVNULL, PIPE, STDOUT
class Return:
def __init__(self, rv, out=None):
self.rv = rv
self.out = out
def __bool__(self):
return self.rv == 0
def __int__(self):
return self.rv
def __str__(self):
if self.out:
return self.out
return str(self.rv)
def sub(*args, **kwargs):
c = subprocess.run(*args, **kwargs)
return Return(c.returncode, c.stdout)
def subc(*args, **kwargs):
c = subprocess.run(*args, **kwargs)
if c.returncode != 0:
print("subprocess failed: ", args)
print("code:", c.returncode)
sys.exit(1)
return Return(c.returncode, c.stdout)
def builddir(pkg):
cmd = [ "tar", "-xO", "--force-local", "--occurrence=1", "-f", pkg, ".BUILDINFO" ]
info = subc(cmd, capture_output=True, text=True)
pfx = "builddir = "
for L in str(info).splitlines():
if L.startswith(pfx):
return L[len(pfx):].strip()
return None
# this is os.walk, simplified, but yielding DirEntries, and only for non-dirs,
# one by one.
def filewalk(top):
stack = [top]
while stack:
top = stack.pop()
dirs = []
with os.scandir(top) as entries:
for entry in entries:
if entry.is_dir():
dirs.append(entry.name)
else:
yield top, entry
for dirname in reversed(dirs):
new_path = os.path.join(top, dirname)
stack.append(new_path)
def srclist(bdir):
cmd = [ "./srclist.sh", bdir ]
info = subc(cmd, capture_output=True, text=True)
files = []
for L in str(info).splitlines():
L = L.strip()
if len(L) and " " in L:
_,file = L.strip().split(" ",maxsplit=1)
files.append(file)
return files
releasepkg="/mnt/nfs/pkg"
class Pkg:
def __init__(self, e, ver):
self.files = {}
self.files[ver] = [e]
self._builddir = None
self._builddirs = []
self.analyzed = False
def add(self, e, ver):
self.analyzed = False
if ver not in self.files.keys():
self.files[ver] = [e];
else:
self.files[ver].append(e)
def __len__(self): # How many versions?
return len(self.files)
def _analyze(self):
"""Compute things that need stat() of the files: size and age."""
self.sizes = {}
self.dates = {}
newest = 0
oldest = None
total_size = 0
for ver in self.files.keys():
vsiz = 0
vnew = 0
for e in self.files[ver]:
vsiz += e.stat().st_size
mt = e.stat().st_mtime
if mt > vnew:
vnew = mt
self.sizes[ver] = vsiz
self.dates[ver] = vnew
total_size += vsiz
if not oldest or vnew < oldest:
oldest = vnew
if vnew > newest:
newest = vnew
self.total_size = total_size
self.newest = newest
self.oldest = oldest
self.analyzed = True
def size(self):
if not self.analyzed:
self._analyze()
return self.total_size
def age(self):
if not self.analyzed:
self._analyze()
return self.newest - self.oldest
def newest_ver(self):
if not self.analyzed:
self._analyze()
newest_ver = None
for ver,date in self.dates.items():
if date == self.newest:
newest_ver = ver
break
return newest_ver
def builddir(self):
if self._builddir:
return self._builddir
self._builddir = builddir(self.files[self.newest_ver()][0].path)
self._builddirs = [self._builddir]
return self._builddir
def builddirs(self):
if not self._builddir:
return [self.builddir()]
if len(self._builddirs) <= 1:
return [self._builddir]
return self._builddirs;
def add_builddir(self, dir):
if len(self._builddirs) < 1:
self.builddir()
if dir not in self._builddirs:
self._builddirs.append(dir)
def merge(self, other):
for ver in other.files.keys():
for e in other.files[ver]:
self.add(e,ver)
def fuzzy_builddir_match(pkg1, pkg2):
dir2 = pkg2.builddir()
if dir2 in pkg1.builddirs():
return True
dir1 = pkg1.builddir()
if dir1 == dir2:
return True
if "-" in dir2 and "linux" not in dir1:
base,suffix = dir2.rsplit("-",maxsplit=1)
if dir1 == base:
#print("fuzzy match", dir1, dir2)
return True
return False
pkgs = {}
for root, entry in filewalk(releasepkg):
suffix = ".pkg.tar.xz"
dbgsuf = "-dbginfo"
if entry.name.endswith(suffix):
if not entry.is_file():
continue
_,arch,repo = root.rsplit("/",maxsplit=2)
pkg,ver,rel,pkgarch = entry.name[:-len(suffix)].rsplit("-",maxsplit=3)
pkgver = f"{ver}-{rel}" # Simpler to reassemble than skip splitting there
dbg = False
if pkg.endswith(dbgsuf):
dbg = True
pkg = pkg[:-len(dbgsuf)]
#print(arch, repo, pkg, pkgver, pkgarch, dbg)
id = (repo, pkg)
if id not in pkgs.keys():
pkgs[id] = Pkg(entry, pkgver)
else:
pkgs[id].add(entry, pkgver)
# Rules:
keep_versions = 3
keep_age = 6 * 30 * 24 * 60 * 60
keep_size = 100 * 1024 * 1024
# These mean:
# - we always keep atleast 3 versions.
# - we always keep packages for atleast 6 months
# - we always keep atleast 100 MB of versions for a package
# (NOTE: the size does count the dbginfo and all arches)
candidates = {}
for id, pkg in pkgs.items():
if len(pkg) <= keep_versions:
continue
candidates[id] = pkg
# This process brings split packages back together
# and with fuzzy_builddir_match we also bring together version-synced packages
# like firefox and firefox-i18n
merged = True
while merged:
merged = False
for id, pkg in candidates.items():
for id2, other in candidates.items():
if id == id2: # do not merge myself into itself...
continue
if id[0] != id2[0]: # repo must match
continue
if pkg.newest_ver() == other.newest_ver() and fuzzy_builddir_match(pkg, other):
newid = tuple([id[0]] + list(id[1:]) + list(id2[1:]))
pkg.merge(other)
if other.builddir() not in pkg.builddirs():
pkg.add_builddir(other.builddir())
candidates[newid] = pkg
del candidates[id]
del candidates[id2]
merged = True
break
if merged:
break
finalists = {}
for id, pkg in candidates.items():
if pkg.age() <= keep_age:
continue
if pkg.size() <= keep_size:
continue
finalists[id] = pkg
#print(id, len(pkg), pkg.age() / 86400, pkg.size() / (1024*1024))
removals = {}
for id, pkg in finalists.items():
versions = sorted(pkg.dates.items(), key=lambda e: e[1], reverse=True)
compliant = False
rmlist = []
while not compliant:
ver,date = versions.pop()
rmlist += pkg.files[ver]
del pkg.files[ver]
pkg._analyze()
if len(pkg) <= keep_versions:
compliant = True
if pkg.age() <= keep_age:
compliant = True
if pkg.size() <= keep_size:
compliant = True
rminfo = (pkg.builddirs(), ver, date, rmlist)
removals[id] = rminfo
#print(len(pkg), pkg.age() / 86400, pkg.size() / (1024*1024))
#print(removals[id])
for id, info in removals.items():
# Do not cleanup core sources, they're arranged differently.
repo = id[0]
if repo == "core":
continue
rmsrc = []
for dir in info[0]:
keepfiles = srclist(dir)
basedir = os.path.basename(dir)
srcarchives = f"/sources/archives/{repo}/{basedir}"
with os.scandir(srcarchives) as entries:
for entry in entries:
if entry.is_dir():
continue
if entry.name in keepfiles:
continue
if entry.stat().st_mtime > info[2]:
continue
rmsrc.append(entry)
info[3].append(entry)
#print(id, rmsrc)
if len(removals):
print(f"Removing old packages/sources for {len(removals)} packages:")
for id, info in removals.items():
idstr = id[0] + ":" + ",".join(id[1:])
print(f"{len(info[3])} files for {idstr} - version {info[1]} and older")
for e in info[3]:
print(f" {e.path}")
print("Press enter to continue, Ctrl-C to abort.")
_ = input()
for id, info in removals.items():
for e in info[3]:
if e.path.endswith(".pkg.tar.xz"):
sigfile = e.path + ".sig"
if os.path.exists(sigfile):
os.unlink(sigfile)
os.unlink(e.path)
print("Done.")
else:
print("Nothing to clean up.")