From 79f193ffa0394301242d25800a715b970c2a0b95 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Mon, 4 Nov 2024 17:42:33 +0000 Subject: [PATCH] bitbake: runqueue: Avoid dumpsigs idle loop blocking We're seeing some failures on hosts where slow "idle" loop iterations are causing bitbake server timeouts. These seem to happen particularly in the dump_signatures() function within runqueue. That isn't entirely surprising since it creates a pool of threads to execute work an at best can take around 10s to execture and return control backto the main loop. On a slow system, it is understandable this can take longer, particularly as these functions are creating large chunks of IO. Since the work is being done in threads, we can launch them, return to idle and check on the results periodically as they complete. This should hopefully address some of the remaining timeout issues we see on the autobuilder in oe-selftest sstate tests. (Bitbake rev: e66f1b643b4b77404ba31f2704cda5af9bf00a57) Signed-off-by: Richard Purdie --- bitbake/lib/bb/runqueue.py | 69 +++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py index 3462ed4457..1b5b58f352 100644 --- a/bitbake/lib/bb/runqueue.py +++ b/bitbake/lib/bb/runqueue.py @@ -128,6 +128,7 @@ class RunQueueStats: # runQueue state machine runQueuePrepare = 2 runQueueSceneInit = 3 +runQueueDumpSigs = 4 runQueueRunning = 6 runQueueFailed = 7 runQueueCleanUp = 8 @@ -1588,14 +1589,19 @@ class RunQueue: self.rqdata.init_progress_reporter.next_stage() self.rqexe = RunQueueExecute(self) - dump = self.cooker.configuration.dump_signatures - if dump: + dumpsigs = self.cooker.configuration.dump_signatures + if dumpsigs: self.rqdata.init_progress_reporter.finish() - if 'printdiff' in dump: - invalidtasks = self.print_diffscenetasks() - self.dump_signatures(dump) - if 'printdiff' in dump: - self.write_diffscenetasks(invalidtasks) + if 'printdiff' in dumpsigs: + self.invalidtasks_dump = self.print_diffscenetasks() + self.state = runQueueDumpSigs + + if self.state is runQueueDumpSigs: + dumpsigs = self.cooker.configuration.dump_signatures + retval = self.dump_signatures(dumpsigs) + if retval is False: + if 'printdiff' in dumpsigs: + self.write_diffscenetasks(self.invalidtasks_dump) self.state = runQueueComplete if self.state is runQueueSceneInit: @@ -1686,33 +1692,42 @@ class RunQueue: bb.parse.siggen.dump_sigtask(taskfn, taskname, dataCaches[mc].stamp[taskfn], True) def dump_signatures(self, options): - if bb.cooker.CookerFeatures.RECIPE_SIGGEN_INFO not in self.cooker.featureset: - bb.fatal("The dump signatures functionality needs the RECIPE_SIGGEN_INFO feature enabled") + if not hasattr(self, "dumpsigs_launched"): + if bb.cooker.CookerFeatures.RECIPE_SIGGEN_INFO not in self.cooker.featureset: + bb.fatal("The dump signatures functionality needs the RECIPE_SIGGEN_INFO feature enabled") - bb.note("Writing task signature files") + bb.note("Writing task signature files") - max_process = int(self.cfgData.getVar("BB_NUMBER_PARSE_THREADS") or os.cpu_count() or 1) - def chunkify(l, n): - return [l[i::n] for i in range(n)] - tids = chunkify(list(self.rqdata.runtaskentries), max_process) - # We cannot use the real multiprocessing.Pool easily due to some local data - # that can't be pickled. This is a cheap multi-process solution. - launched = [] - while tids: - if len(launched) < max_process: - p = Process(target=self._rq_dump_sigtid, args=(tids.pop(), )) + max_process = int(self.cfgData.getVar("BB_NUMBER_PARSE_THREADS") or os.cpu_count() or 1) + def chunkify(l, n): + return [l[i::n] for i in range(n)] + dumpsigs_tids = chunkify(list(self.rqdata.runtaskentries), max_process) + + # We cannot use the real multiprocessing.Pool easily due to some local data + # that can't be pickled. This is a cheap multi-process solution. + self.dumpsigs_launched = [] + + for tids in dumpsigs_tids: + p = Process(target=self._rq_dump_sigtid, args=(tids, )) p.start() - launched.append(p) - for q in launched: - # The finished processes are joined when calling is_alive() - if not q.is_alive(): - launched.remove(q) - for p in launched: + self.dumpsigs_launched.append(p) + + return 1.0 + + for q in self.dumpsigs_launched: + # The finished processes are joined when calling is_alive() + if not q.is_alive(): + self.dumpsigs_launched.remove(q) + + if self.dumpsigs_launched: + return 1.0 + + for p in self.dumpsigs_launched: p.join() bb.parse.siggen.dump_sigs(self.rqdata.dataCaches, options) - return + return False def print_diffscenetasks(self): def get_root_invalid_tasks(task, taskdepends, valid, noexec, visited_invalid):