insane: improve license checksumming logic

Instead of opening files as bytes and battling decoding to UTF-8 which can throw
exceptions, open directly as strings and replace invalid codepoints.  This
handles licenses in encodings which are not UTF-8 but are based on ASCII much
better.

Also instead of extracting the license lines, writing them to a file, and then
hashing the file, hash the lines directly.

(From OE-Core rev: 63ef9d342277c4ba541b78cbb45ef181f071f495)

Signed-off-by: Ross Burton <ross.burton@intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Ross Burton
2019-03-21 12:30:42 +00:00
committed by Richard Purdie
parent 2187d06166
commit 24b9bcf043

View File

@@ -458,7 +458,6 @@ python populate_lic_qa_checksum() {
"""
Check for changes in the license files.
"""
import tempfile
sane = True
lic_files = d.getVar('LIC_FILES_CHKSUM') or ''
@@ -496,61 +495,45 @@ python populate_lic_qa_checksum() {
if (not beginline) and (not endline):
md5chksum = bb.utils.md5_file(srclicfile)
with open(srclicfile, 'rb') as f:
license = f.read()
with open(srclicfile, 'r', errors='replace') as f:
license = f.read().splitlines()
else:
fi = open(srclicfile, 'rb')
fo = tempfile.NamedTemporaryFile(mode='wb', prefix='poky.', suffix='.tmp', delete=False)
tmplicfile = fo.name;
lineno = 0
linesout = 0
license = []
for line in fi:
lineno += 1
if (lineno >= beginline):
if ((lineno <= endline) or not endline):
fo.write(line)
license.append(line)
linesout += 1
else:
break
fo.flush()
fo.close()
fi.close()
md5chksum = bb.utils.md5_file(tmplicfile)
license = b''.join(license)
os.unlink(tmplicfile)
with open(srclicfile, 'rb') as f:
import hashlib
lineno = 0
license = []
m = hashlib.md5()
for line in f:
lineno += 1
if (lineno >= beginline):
if ((lineno <= endline) or not endline):
m.update(line)
license.append(line.decode('utf-8', errors='replace').rstrip())
else:
break
md5chksum = m.hexdigest()
if recipemd5 == md5chksum:
bb.note (pn + ": md5 checksum matched for ", url)
else:
if recipemd5:
msg = pn + ": The LIC_FILES_CHKSUM does not match for " + url
msg = msg + "\n" + pn + ": The new md5 checksum is " + md5chksum
try:
license_lines = license.decode('utf-8').split('\n')
except:
# License text might not be valid UTF-8, in which
# case we don't know how to include it in our output
# and have to skip it.
pass
else:
max_lines = int(d.getVar('QA_MAX_LICENSE_LINES') or 20)
if not license_lines or license_lines[-1] != '':
# Ensure that our license text ends with a line break
# (will be added with join() below).
license_lines.append('')
remove = len(license_lines) - max_lines
if remove > 0:
start = max_lines // 2
end = start + remove - 1
del license_lines[start:end]
license_lines.insert(start, '...')
msg = msg + "\n" + pn + ": Here is the selected license text:" + \
"\n" + \
"{:v^70}".format(" beginline=%d " % beginline if beginline else "") + \
"\n" + "\n".join(license_lines) + \
"{:^^70}".format(" endline=%d " % endline if endline else "")
max_lines = int(d.getVar('QA_MAX_LICENSE_LINES') or 20)
if not license or license[-1] != '':
# Ensure that our license text ends with a line break
# (will be added with join() below).
license.append('')
remove = len(license) - max_lines
if remove > 0:
start = max_lines // 2
end = start + remove - 1
del license[start:end]
license.insert(start, '...')
msg = msg + "\n" + pn + ": Here is the selected license text:" + \
"\n" + \
"{:v^70}".format(" beginline=%d " % beginline if beginline else "") + \
"\n" + "\n".join(license) + \
"{:^^70}".format(" endline=%d " % endline if endline else "")
if beginline:
if endline:
srcfiledesc = "%s (lines %d through to %d)" % (srclicfile, beginline, endline)