bitbake: runqueue: Avoid dumpsigs idle loop blocking

We're seeing some failures on hosts where slow "idle" loop iterations are
causing bitbake server timeouts. These seem to happen particularly in the
dump_signatures() function within runqueue.

That isn't entirely surprising since it creates a pool of threads to execute
work an at best can take around 10s to execture and return control backto the
main loop. On a slow system, it is understandable this can take longer,
particularly as these functions are creating large chunks of IO.

Since the work is being done in threads, we can launch them, return to idle
and check on the results periodically as they complete.

This should hopefully address some of the remaining timeout issues we see on
the autobuilder in oe-selftest sstate tests.

(Bitbake rev: e66f1b643b4b77404ba31f2704cda5af9bf00a57)

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Richard Purdie
2024-11-04 17:42:33 +00:00
parent 330720015a
commit 79f193ffa0

View File

@@ -128,6 +128,7 @@ class RunQueueStats:
# runQueue state machine
runQueuePrepare = 2
runQueueSceneInit = 3
runQueueDumpSigs = 4
runQueueRunning = 6
runQueueFailed = 7
runQueueCleanUp = 8
@@ -1588,14 +1589,19 @@ class RunQueue:
self.rqdata.init_progress_reporter.next_stage()
self.rqexe = RunQueueExecute(self)
dump = self.cooker.configuration.dump_signatures
if dump:
dumpsigs = self.cooker.configuration.dump_signatures
if dumpsigs:
self.rqdata.init_progress_reporter.finish()
if 'printdiff' in dump:
invalidtasks = self.print_diffscenetasks()
self.dump_signatures(dump)
if 'printdiff' in dump:
self.write_diffscenetasks(invalidtasks)
if 'printdiff' in dumpsigs:
self.invalidtasks_dump = self.print_diffscenetasks()
self.state = runQueueDumpSigs
if self.state is runQueueDumpSigs:
dumpsigs = self.cooker.configuration.dump_signatures
retval = self.dump_signatures(dumpsigs)
if retval is False:
if 'printdiff' in dumpsigs:
self.write_diffscenetasks(self.invalidtasks_dump)
self.state = runQueueComplete
if self.state is runQueueSceneInit:
@@ -1686,33 +1692,42 @@ class RunQueue:
bb.parse.siggen.dump_sigtask(taskfn, taskname, dataCaches[mc].stamp[taskfn], True)
def dump_signatures(self, options):
if bb.cooker.CookerFeatures.RECIPE_SIGGEN_INFO not in self.cooker.featureset:
bb.fatal("The dump signatures functionality needs the RECIPE_SIGGEN_INFO feature enabled")
if not hasattr(self, "dumpsigs_launched"):
if bb.cooker.CookerFeatures.RECIPE_SIGGEN_INFO not in self.cooker.featureset:
bb.fatal("The dump signatures functionality needs the RECIPE_SIGGEN_INFO feature enabled")
bb.note("Writing task signature files")
bb.note("Writing task signature files")
max_process = int(self.cfgData.getVar("BB_NUMBER_PARSE_THREADS") or os.cpu_count() or 1)
def chunkify(l, n):
return [l[i::n] for i in range(n)]
tids = chunkify(list(self.rqdata.runtaskentries), max_process)
# We cannot use the real multiprocessing.Pool easily due to some local data
# that can't be pickled. This is a cheap multi-process solution.
launched = []
while tids:
if len(launched) < max_process:
p = Process(target=self._rq_dump_sigtid, args=(tids.pop(), ))
max_process = int(self.cfgData.getVar("BB_NUMBER_PARSE_THREADS") or os.cpu_count() or 1)
def chunkify(l, n):
return [l[i::n] for i in range(n)]
dumpsigs_tids = chunkify(list(self.rqdata.runtaskentries), max_process)
# We cannot use the real multiprocessing.Pool easily due to some local data
# that can't be pickled. This is a cheap multi-process solution.
self.dumpsigs_launched = []
for tids in dumpsigs_tids:
p = Process(target=self._rq_dump_sigtid, args=(tids, ))
p.start()
launched.append(p)
for q in launched:
# The finished processes are joined when calling is_alive()
if not q.is_alive():
launched.remove(q)
for p in launched:
self.dumpsigs_launched.append(p)
return 1.0
for q in self.dumpsigs_launched:
# The finished processes are joined when calling is_alive()
if not q.is_alive():
self.dumpsigs_launched.remove(q)
if self.dumpsigs_launched:
return 1.0
for p in self.dumpsigs_launched:
p.join()
bb.parse.siggen.dump_sigs(self.rqdata.dataCaches, options)
return
return False
def print_diffscenetasks(self):
def get_root_invalid_tasks(task, taskdepends, valid, noexec, visited_invalid):