buildhistory: Add simplistic file move detection

We'd like to use buildhistory more during patch review however its
proving hard, particularly where whole subtrees of files move,
such as a kernel version upgrade, or where a software module moves
include directory.

This adds file rename matching which covers our common case of library
moves, kernel upgrades and more.

A new test case is also added so that someone in the future can change
the code and test the logic is still doing the expected things.

(From OE-Core rev: 791ce304f5e066759874beac0feef5ee62a1c255)

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Richard Purdie
2020-06-03 16:01:02 +01:00
parent 26ae42ded7
commit eb226b897f
4 changed files with 18539 additions and 3 deletions

View File

@@ -213,6 +213,7 @@ class FileChange:
changetype_perms = 'P'
changetype_ownergroup = 'O'
changetype_link = 'L'
changetype_move = 'M'
def __init__(self, path, changetype, oldvalue = None, newvalue = None):
self.path = path
@@ -251,10 +252,11 @@ class FileChange:
return '%s changed owner/group from %s to %s' % (self.path, self.oldvalue, self.newvalue)
elif self.changetype == self.changetype_link:
return '%s changed symlink target from %s to %s' % (self.path, self.oldvalue, self.newvalue)
elif self.changetype == self.changetype_move:
return '%s moved to %s' % (self.path, self.oldvalue)
else:
return '%s changed (unknown)' % self.path
def blob_to_dict(blob):
alines = [line for line in blob.data_stream.read().decode('utf-8').splitlines()]
adict = {}
@@ -281,11 +283,14 @@ def file_list_to_dict(lines):
adict[path] = splitv[0:3]
return adict
numeric_removal = str.maketrans('0123456789', 'XXXXXXXXXX')
def compare_file_lists(alines, blines, compare_ownership=True):
adict = file_list_to_dict(alines)
bdict = file_list_to_dict(blines)
filechanges = []
additions = []
removals = []
for path, splitv in adict.items():
newsplitv = bdict.pop(path, None)
if newsplitv:
@@ -318,11 +323,65 @@ def compare_file_lists(alines, blines, compare_ownership=True):
if oldvalue != newvalue:
filechanges.append(FileChange(path, FileChange.changetype_link, oldvalue, newvalue))
else:
filechanges.append(FileChange(path, FileChange.changetype_remove))
removals.append(path)
# Whatever is left over has been added
for path in bdict:
filechanges.append(FileChange(path, FileChange.changetype_add))
additions.append(path)
# Rather than print additions and removals, its nicer to print file 'moves'
# where names or paths are similar.
revmap_remove = {}
for removal in removals:
translated = removal.translate(numeric_removal)
if translated not in revmap_remove:
revmap_remove[translated] = []
revmap_remove[translated].append(removal)
#
# We want to detect renames of large trees of files like
# /lib/modules/5.4.40-yocto-standard to /lib/modules/5.4.43-yocto-standard
#
renames = {}
for addition in additions.copy():
if addition not in additions:
continue
translated = addition.translate(numeric_removal)
if translated in revmap_remove:
if len(revmap_remove[translated]) != 1:
continue
removal = revmap_remove[translated][0]
commondir = addition.split("/")
commondir2 = removal.split("/")
idx = None
for i in range(len(commondir)):
if commondir[i] != commondir2[i]:
idx = i
break
commondir = "/".join(commondir[:i+1])
commondir2 = "/".join(commondir2[:i+1])
# If the common parent is in one dict and not the other its likely a rename
# so iterate through those files and process as such
if commondir2 not in bdict and commondir not in adict:
if commondir not in renames:
renames[commondir] = commondir2
for addition2 in additions.copy():
if addition2.startswith(commondir):
removal2 = addition2.replace(commondir, commondir2)
if removal2 in removals:
additions.remove(addition2)
removals.remove(removal2)
continue
filechanges.append(FileChange(removal, FileChange.changetype_move, addition))
additions.remove(addition)
removals.remove(removal)
for rename in renames:
filechanges.append(FileChange(renames[rename], FileChange.changetype_move, rename))
for addition in additions:
filechanges.append(FileChange(addition, FileChange.changetype_add))
for removal in removals:
filechanges.append(FileChange(removal, FileChange.changetype_remove))
return filechanges

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,7 @@
import os
from oeqa.selftest.case import OESelftestTestCase
import tempfile
import operator
from oeqa.utils.commands import get_bb_var
class TestBlobParsing(OESelftestTestCase):
@@ -97,3 +98,48 @@ class TestBlobParsing(OESelftestTestCase):
var_changes[x.fieldname] = (oldvalue, x.newvalue)
self.assertEqual(defaultmap, var_changes, "Defaults not set properly")
class TestFileListCompare(OESelftestTestCase):
def test_compare_file_lists(self):
# Test that a directory tree that moves location such as /lib/modules/5.4.40-yocto-standard -> /lib/modules/5.4.43-yocto-standard
# is correctly identified as a move
from oe.buildhistory_analysis import compare_file_lists, FileChange
with open(self.tc.files_dir + "/buildhistory_filelist1.txt", "r") as f:
filelist1 = f.readlines()
with open(self.tc.files_dir + "/buildhistory_filelist2.txt", "r") as f:
filelist2 = f.readlines()
expectedResult = [
'/lib/libcap.so.2 changed symlink target from libcap.so.2.33 to libcap.so.2.34',
'/lib/libcap.so.2.33 moved to /lib/libcap.so.2.34',
'/lib/modules/5.4.40-yocto-standard moved to /lib/modules/5.4.43-yocto-standard',
'/lib/modules/5.4.43-yocto-standard/modules.builtin.alias.bin was added',
'/usr/bin/gawk-5.0.1 moved to /usr/bin/gawk-5.1.0',
'/usr/lib/libbtrfsutil.so changed symlink target from libbtrfsutil.so.1.1.1 to libbtrfsutil.so.1.2.0',
'/usr/lib/libbtrfsutil.so.1 changed symlink target from libbtrfsutil.so.1.1.1 to libbtrfsutil.so.1.2.0',
'/usr/lib/libbtrfsutil.so.1.1.1 moved to /usr/lib/libbtrfsutil.so.1.2.0',
'/usr/lib/libkmod.so changed symlink target from libkmod.so.2.3.4 to libkmod.so.2.3.5',
'/usr/lib/libkmod.so.2 changed symlink target from libkmod.so.2.3.4 to libkmod.so.2.3.5',
'/usr/lib/libkmod.so.2.3.4 moved to /usr/lib/libkmod.so.2.3.5',
'/usr/lib/libpixman-1.so.0 changed symlink target from libpixman-1.so.0.38.4 to libpixman-1.so.0.40.0',
'/usr/lib/libpixman-1.so.0.38.4 moved to /usr/lib/libpixman-1.so.0.40.0',
'/usr/lib/opkg/alternatives/rtcwake was added',
'/usr/lib/python3.8/site-packages/PyGObject-3.34.0.egg-info moved to /usr/lib/python3.8/site-packages/PyGObject-3.36.1.egg-info',
'/usr/lib/python3.8/site-packages/btrfsutil-1.1.1-py3.8.egg-info moved to /usr/lib/python3.8/site-packages/btrfsutil-1.2.0-py3.8.egg-info',
'/usr/lib/python3.8/site-packages/pycairo-1.19.0.egg-info moved to /usr/lib/python3.8/site-packages/pycairo-1.19.1.egg-info',
'/usr/sbin/rtcwake changed type from file to symlink',
'/usr/sbin/rtcwake changed permissions from rwxr-xr-x to rwxrwxrwx',
'/usr/sbin/rtcwake changed symlink target from None to /usr/sbin/rtcwake.util-linux',
'/usr/sbin/rtcwake.util-linux was added'
]
result = compare_file_lists(filelist1, filelist2)
rendered = []
for entry in sorted(result, key=operator.attrgetter("path")):
rendered.append(str(entry))
self.maxDiff = None
self.assertCountEqual(rendered, expectedResult)