bitbake: siggen: Clean up task reference formats

Currently siggen uses the format "<filename>.<taskname>" for referencing tasks
whilst runqueue uses "<filename>:<taskname>". This converts to use ":" as the
separator everywhere.

This is an API breaking change since the cache is affected, as are siginfo files
and any custom signature handlers such as those in OE-Core.

Ultimately this will let us clean up and the accessor functions from runqueue,
removing all the ".rsplit(".", 1)[0]" type code currently all over the place.
Once a standard is used everwhere we can update the code over time to be more
optimal.

(Bitbake rev: 07e539e1c566ca3434901e1a00335cb76c69d496)

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Richard Purdie
2019-07-29 17:28:20 +01:00
parent 40a5e193c4
commit 4fbb862cdc
5 changed files with 82 additions and 95 deletions

View File

@@ -220,7 +220,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
cachedata.hashfn[fn] = self.hashfilename
for task, taskhash in self.basetaskhashes.items():
identifier = '%s.%s' % (fn, task)
identifier = '%s:%s' % (fn, task)
cachedata.basetaskhash[identifier] = taskhash
cachedata.inherits[fn] = self.inherits

View File

@@ -422,7 +422,7 @@ def generate_dependency_hash(tasklist, gendeps, lookupcache, whitelist, fn):
var = lookupcache[dep]
if var is not None:
data = data + str(var)
k = fn + "." + task
k = fn + ":" + task
basehash[k] = hashlib.sha256(data.encode("utf-8")).hexdigest()
taskdeps[task] = alldeps

View File

@@ -1174,10 +1174,9 @@ class RunQueueData:
def prepare_task_hash(self, tid):
procdep = []
for dep in self.runtaskentries[tid].depends:
procdep.append(fn_from_tid(dep) + "." + taskname_from_tid(dep))
(mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
self.runtaskentries[tid].hash = bb.parse.siggen.get_taskhash(taskfn, taskname, procdep, self.dataCaches[mc])
self.runtaskentries[tid].unihash = bb.parse.siggen.get_unihash(taskfn + "." + taskname)
procdep.append(dep)
self.runtaskentries[tid].hash = bb.parse.siggen.get_taskhash(tid, procdep, self.dataCaches[mc_from_tid(tid)])
self.runtaskentries[tid].unihash = bb.parse.siggen.get_unihash(tid)
def dump_data(self):
"""
@@ -1401,7 +1400,7 @@ class RunQueue:
sq_data['hashfn'][tid] = self.rqdata.dataCaches[mc].hashfn[taskfn]
sq_data['unihash'][tid] = self.rqdata.runtaskentries[tid].unihash
valid_ids = self.validate_hash(sq_data, data, siginfo, currentcount)
valid = self.validate_hash(sq_data, data, siginfo, currentcount)
return valid
@@ -2152,8 +2151,7 @@ class RunQueueExecute:
if unihash != self.rqdata.runtaskentries[tid].unihash:
logger.info("Task %s unihash changed to %s" % (tid, unihash))
self.rqdata.runtaskentries[tid].unihash = unihash
(mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
bb.parse.siggen.set_unihash(taskfn + "." + taskname, unihash)
bb.parse.siggen.set_unihash(tid, unihash)
# Work out all tasks which depend on this one
total = set()
@@ -2177,12 +2175,11 @@ class RunQueueExecute:
continue
procdep = []
for dep in self.rqdata.runtaskentries[tid].depends:
procdep.append(fn_from_tid(dep) + "." + taskname_from_tid(dep))
(mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
procdep.append(dep)
orighash = self.rqdata.runtaskentries[tid].hash
self.rqdata.runtaskentries[tid].hash = bb.parse.siggen.get_taskhash(taskfn, taskname, procdep, self.rqdata.dataCaches[mc])
self.rqdata.runtaskentries[tid].hash = bb.parse.siggen.get_taskhash(tid, procdep, self.rqdata.dataCaches[mc_from_tid(tid)])
origuni = self.rqdata.runtaskentries[tid].unihash
self.rqdata.runtaskentries[tid].unihash = bb.parse.siggen.get_unihash(taskfn + "." + taskname)
self.rqdata.runtaskentries[tid].unihash = bb.parse.siggen.get_unihash(tid)
logger.debug(1, "Task %s hash changes: %s->%s %s->%s" % (tid, orighash, self.rqdata.runtaskentries[tid].hash, origuni, self.rqdata.runtaskentries[tid].unihash))
next |= self.rqdata.runtaskentries[tid].revdeps
changed.add(tid)

View File

@@ -46,13 +46,12 @@ class SignatureGenerator(object):
def finalise(self, fn, d, varient):
return
def get_unihash(self, task):
return self.taskhash[task]
def get_unihash(self, tid):
return self.taskhash[tid]
def get_taskhash(self, fn, task, deps, dataCache):
k = fn + "." + task
self.taskhash[k] = hashlib.sha256(k.encode("utf-8")).hexdigest()
return self.taskhash[k]
def get_taskhash(self, tid, deps, dataCache):
self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
return self.taskhash[tid]
def writeout_file_checksum_cache(self):
"""Write/update the file checksum cache onto disk"""
@@ -106,7 +105,6 @@ class SignatureGeneratorBasic(SignatureGenerator):
self.taints = {}
self.gendeps = {}
self.lookupcache = {}
self.pkgnameextract = re.compile(r"(?P<fn>.*)\..*")
self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
self.taskwhitelist = None
self.init_rundepcheck(data)
@@ -135,16 +133,16 @@ class SignatureGeneratorBasic(SignatureGenerator):
taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
for task in tasklist:
k = fn + "." + task
if not ignore_mismatch and k in self.basehash and self.basehash[k] != basehash[k]:
bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (k, self.basehash[k], basehash[k]))
tid = fn + ":" + task
if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
bb.error("The following commands may help:")
cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
# Make sure sigdata is dumped before run printdiff
bb.error("%s -Snone" % cmd)
bb.error("Then:")
bb.error("%s -Sprintdiff\n" % cmd)
self.basehash[k] = basehash[k]
self.basehash[tid] = basehash[tid]
self.taskdeps[fn] = taskdeps
self.gendeps[fn] = gendeps
@@ -171,7 +169,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
# self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
for task in taskdeps:
d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + "." + task])
d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
def rundep_check(self, fn, recipename, task, dep, depname, dataCache):
# Return True if we should keep the dependency, False to drop it
@@ -191,33 +189,26 @@ class SignatureGeneratorBasic(SignatureGenerator):
pass
return taint
def get_taskhash(self, fn, task, deps, dataCache):
def get_taskhash(self, tid, deps, dataCache):
mc = ''
if fn.startswith('mc:'):
mc = fn.split(':')[1]
k = fn + "." + task
(mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
data = dataCache.basetaskhash[k]
self.basehash[k] = data
self.runtaskdeps[k] = []
self.file_checksum_values[k] = []
data = dataCache.basetaskhash[tid]
self.basehash[tid] = data
self.runtaskdeps[tid] = []
self.file_checksum_values[tid] = []
recipename = dataCache.pkg_fn[fn]
for dep in sorted(deps, key=clean_basepath):
pkgname = self.pkgnameextract.search(dep).group('fn')
if mc:
depmc = pkgname.split(':')[1]
if mc != depmc:
continue
if dep.startswith("mc:") and not mc:
(depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep)
if mc != depmc:
continue
depname = dataCache.pkg_fn[pkgname]
depname = dataCache.pkg_fn[depfn]
if not self.rundep_check(fn, recipename, task, dep, depname, dataCache):
continue
if dep not in self.taskhash:
bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
data = data + self.get_unihash(dep)
self.runtaskdeps[k].append(dep)
self.runtaskdeps[tid].append(dep)
if task in dataCache.file_checksums[fn]:
if self.checksum_cache:
@@ -225,7 +216,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
else:
checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
for (f,cs) in checksums:
self.file_checksum_values[k].append((f,cs))
self.file_checksum_values[tid].append((f,cs))
if cs:
data = data + cs
@@ -235,16 +226,16 @@ class SignatureGeneratorBasic(SignatureGenerator):
import uuid
taint = str(uuid.uuid4())
data = data + taint
self.taints[k] = "nostamp:" + taint
self.taints[tid] = "nostamp:" + taint
taint = self.read_taint(fn, task, dataCache.stamp[fn])
if taint:
data = data + taint
self.taints[k] = taint
logger.warning("%s is tainted from a forced run" % k)
self.taints[tid] = taint
logger.warning("%s is tainted from a forced run" % tid)
h = hashlib.sha256(data.encode("utf-8")).hexdigest()
self.taskhash[k] = h
self.taskhash[tid] = h
#d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
return h
@@ -262,15 +253,15 @@ class SignatureGeneratorBasic(SignatureGenerator):
def dump_sigtask(self, fn, task, stampbase, runtime):
k = fn + "." + task
tid = fn + ":" + task
referencestamp = stampbase
if isinstance(runtime, str) and runtime.startswith("customfile"):
sigfile = stampbase
referencestamp = runtime[11:]
elif runtime and k in self.taskhash:
sigfile = stampbase + "." + task + ".sigdata" + "." + self.taskhash[k]
elif runtime and tid in self.taskhash:
sigfile = stampbase + "." + task + ".sigdata" + "." + self.taskhash[tid]
else:
sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[k]
sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
bb.utils.mkdirhier(os.path.dirname(sigfile))
@@ -279,7 +270,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
data['basewhitelist'] = self.basewhitelist
data['taskwhitelist'] = self.taskwhitelist
data['taskdeps'] = self.taskdeps[fn][task]
data['basehash'] = self.basehash[k]
data['basehash'] = self.basehash[tid]
data['gendeps'] = {}
data['varvals'] = {}
data['varvals'][task] = self.lookupcache[fn][task]
@@ -289,30 +280,30 @@ class SignatureGeneratorBasic(SignatureGenerator):
data['gendeps'][dep] = self.gendeps[fn][dep]
data['varvals'][dep] = self.lookupcache[fn][dep]
if runtime and k in self.taskhash:
data['runtaskdeps'] = self.runtaskdeps[k]
data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[k]]
if runtime and tid in self.taskhash:
data['runtaskdeps'] = self.runtaskdeps[tid]
data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
data['runtaskhashes'] = {}
for dep in data['runtaskdeps']:
data['runtaskhashes'][dep] = self.get_unihash(dep)
data['taskhash'] = self.taskhash[k]
data['taskhash'] = self.taskhash[tid]
taint = self.read_taint(fn, task, referencestamp)
if taint:
data['taint'] = taint
if runtime and k in self.taints:
if 'nostamp:' in self.taints[k]:
data['taint'] = self.taints[k]
if runtime and tid in self.taints:
if 'nostamp:' in self.taints[tid]:
data['taint'] = self.taints[tid]
computed_basehash = calc_basehash(data)
if computed_basehash != self.basehash[k]:
bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[k], k))
if runtime and k in self.taskhash:
if computed_basehash != self.basehash[tid]:
bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
if runtime and tid in self.taskhash:
computed_taskhash = calc_taskhash(data)
if computed_taskhash != self.taskhash[k]:
bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[k], k))
sigfile = sigfile.replace(self.taskhash[k], computed_taskhash)
if computed_taskhash != self.taskhash[tid]:
bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
try:
@@ -332,34 +323,33 @@ class SignatureGeneratorBasic(SignatureGenerator):
if fn in self.taskdeps:
for task in self.taskdeps[fn]:
tid = fn + ":" + task
(mc, _, _) = bb.runqueue.split_tid(tid)
k = fn + "." + task
if k not in self.taskhash:
mc = bb.runqueue.mc_from_tid(tid)
if tid not in self.taskhash:
continue
if dataCaches[mc].basetaskhash[k] != self.basehash[k]:
bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % k)
bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[k], self.basehash[k]))
if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
name = "basichash"
def get_stampfile_hash(self, task):
if task in self.taskhash:
return self.taskhash[task]
def get_stampfile_hash(self, tid):
if tid in self.taskhash:
return self.taskhash[tid]
# If task is not in basehash, then error
return self.basehash[task]
return self.basehash[tid]
def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
if taskname != "do_setscene" and taskname.endswith("_setscene"):
k = fn + "." + taskname[:-9]
tid = fn + ":" + taskname[:-9]
else:
k = fn + "." + taskname
tid = fn + ":" + taskname
if clean:
h = "*"
else:
h = self.get_stampfile_hash(k)
h = self.get_stampfile_hash(tid)
return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
@@ -378,32 +368,32 @@ class SignatureGeneratorUniHashMixIn(object):
self.server, self.method = data[:2]
super().set_taskdata(data[2:])
def __get_task_unihash_key(self, task):
# TODO: The key only *needs* to be the taskhash, the task is just
def __get_task_unihash_key(self, tid):
# TODO: The key only *needs* to be the taskhash, the tid is just
# convenient
return '%s:%s' % (task, self.taskhash[task])
return '%s:%s' % (tid, self.taskhash[tid])
def get_stampfile_hash(self, task):
if task in self.taskhash:
def get_stampfile_hash(self, tid):
if tid in self.taskhash:
# If a unique hash is reported, use it as the stampfile hash. This
# ensures that if a task won't be re-run if the taskhash changes,
# but it would result in the same output hash
unihash = self.unitaskhashes.get(self.__get_task_unihash_key(task), None)
unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None)
if unihash is not None:
return unihash
return super().get_stampfile_hash(task)
return super().get_stampfile_hash(tid)
def set_unihash(self, task, unihash):
self.unitaskhashes[self.__get_task_unihash_key(task)] = unihash
def set_unihash(self, tid, unihash):
self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash
def get_unihash(self, task):
def get_unihash(self, tid):
import urllib
import json
taskhash = self.taskhash[task]
taskhash = self.taskhash[tid]
key = self.__get_task_unihash_key(task)
key = self.__get_task_unihash_key(tid)
# TODO: This cache can grow unbounded. It probably only needs to keep
# for each task
@@ -428,7 +418,7 @@ class SignatureGeneratorUniHashMixIn(object):
try:
url = '%s/v1/equivalent?%s' % (self.server,
urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[task]}))
urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[tid]}))
request = urllib.request.Request(url)
response = urllib.request.urlopen(request)
@@ -441,9 +431,9 @@ class SignatureGeneratorUniHashMixIn(object):
# A unique hash equal to the taskhash is not very interesting,
# so it is reported it at debug level 2. If they differ, that
# is much more interesting, so it is reported at debug level 1
bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, task, self.server))
bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
else:
bb.debug(2, 'No reported unihash for %s:%s from %s' % (task, taskhash, self.server))
bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
except urllib.error.URLError as e:
bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
except (KeyError, json.JSONDecodeError) as e:
@@ -464,7 +454,7 @@ class SignatureGeneratorUniHashMixIn(object):
report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
tempdir = d.getVar('T')
fn = d.getVar('BB_FILENAME')
key = fn + '.do_' + task + ':' + taskhash
key = fn + ':do_' + task + ':' + taskhash
# Sanity checks
cache_unihash = self.unitaskhashes.get(key, None)

View File

@@ -466,7 +466,7 @@ class TaskHash(unittest.TestCase):
tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d)
taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, set(), "somefile")
bb.warn(str(lookupcache))
return basehash["somefile." + taskname]
return basehash["somefile:" + taskname]
d = bb.data.init()
d.setVar("__BBTASKS", ["mytask"])