bitbake: siggen: Split get_tashhash for performance

There are two operations happening in get_taskhash, the building of the
underlying data and the calculation of the hash.

Split these into two funtions since the preparation part doesn't need
to rerun when unihash changes, only the calculation does.

This split allows sigificant performance improvements for hashequiv
in builds where many hashes are equivalent and many hashes are changing.

(Bitbake rev: 6a32af2808d748819f4af55c443578c8a63062b3)

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Richard Purdie
2019-12-13 16:07:28 +00:00
parent 73896a7a0e
commit 884463c8d1
2 changed files with 25 additions and 9 deletions

View File

@@ -1185,6 +1185,7 @@ class RunQueueData:
procdep = []
for dep in self.runtaskentries[tid].depends:
procdep.append(dep)
bb.parse.siggen.prep_taskhash(tid, procdep, self.dataCaches[mc_from_tid(tid)])
self.runtaskentries[tid].hash = bb.parse.siggen.get_taskhash(tid, procdep, self.dataCaches[mc_from_tid(tid)])
self.runtaskentries[tid].unihash = bb.parse.siggen.get_unihash(tid)

View File

@@ -52,6 +52,9 @@ class SignatureGenerator(object):
def get_unihash(self, tid):
return self.taskhash[tid]
def prep_taskhash(self, tid, deps, dataCache):
return
def get_taskhash(self, tid, deps, dataCache):
self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
return self.taskhash[tid]
@@ -198,12 +201,11 @@ class SignatureGeneratorBasic(SignatureGenerator):
pass
return taint
def get_taskhash(self, tid, deps, dataCache):
def prep_taskhash(self, tid, deps, dataCache):
(mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
data = dataCache.basetaskhash[tid]
self.basehash[tid] = data
self.basehash[tid] = dataCache.basetaskhash[tid]
self.runtaskdeps[tid] = []
self.file_checksum_values[tid] = []
recipename = dataCache.pkg_fn[fn]
@@ -216,7 +218,6 @@ class SignatureGeneratorBasic(SignatureGenerator):
continue
if dep not in self.taskhash:
bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
data = data + self.get_unihash(dep)
self.runtaskdeps[tid].append(dep)
if task in dataCache.file_checksums[fn]:
@@ -226,27 +227,41 @@ class SignatureGeneratorBasic(SignatureGenerator):
checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
for (f,cs) in checksums:
self.file_checksum_values[tid].append((f,cs))
if cs:
data = data + cs
taskdep = dataCache.task_deps[fn]
if 'nostamp' in taskdep and task in taskdep['nostamp']:
# Nostamp tasks need an implicit taint so that they force any dependent tasks to run
if tid in self.taints and self.taints[tid].startswith("nostamp:"):
# Don't reset taint value upon every call
data = data + self.taints[tid][8:]
pass
else:
import uuid
taint = str(uuid.uuid4())
data = data + taint
self.taints[tid] = "nostamp:" + taint
taint = self.read_taint(fn, task, dataCache.stamp[fn])
if taint:
data = data + taint
self.taints[tid] = taint
logger.warning("%s is tainted from a forced run" % tid)
return
def get_taskhash(self, tid, deps, dataCache):
data = self.basehash[tid]
for dep in self.runtaskdeps[tid]:
data = data + self.get_unihash(dep)
for (f, cs) in self.file_checksum_values[tid]:
if cs:
data = data + cs
if tid in self.taints:
if self.taints[tid].startswith("nostamp:"):
data = data + self.taints[tid][8:]
else:
data = data + self.taints[tid]
h = hashlib.sha256(data.encode("utf-8")).hexdigest()
self.taskhash[tid] = h
#d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])