bitbake: cooker: Use shared counter for processing parser jobs

Instead of pre-partitioning which jobs will go to which parser
processes, pass the list of all jobs to all the parser processes
(efficiently via fork()), then used a shared counter of the next index
in the list that needs to be processed. This allows the parser processes
to run independently of needing to be feed by the parent process, and
load balances them much better.

(Bitbake rev: 373c4ddaf0e8128cc4f7d47aefa9860bd477a00f)

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Joshua Watt
2025-07-08 09:42:22 -06:00
committed by Richard Purdie
parent 51d825b367
commit 5adeefd63f

View File

@@ -26,6 +26,7 @@ import json
import pickle
import codecs
import hashserv
import ctypes
logger = logging.getLogger("BitBake")
collectlog = logging.getLogger("BitBake.Collection")
@@ -1998,8 +1999,9 @@ class ParsingFailure(Exception):
Exception.__init__(self, realexception, recipe)
class Parser(multiprocessing.Process):
def __init__(self, jobs, results, quit, profile):
def __init__(self, jobs, next_job_id, results, quit, profile):
self.jobs = jobs
self.next_job_id = next_job_id
self.results = results
self.quit = quit
multiprocessing.Process.__init__(self)
@@ -2065,10 +2067,14 @@ class Parser(multiprocessing.Process):
break
job = None
try:
job = self.jobs.pop()
except IndexError:
havejobs = False
if havejobs:
with self.next_job_id.get_lock():
if self.next_job_id.value < len(self.jobs):
job = self.jobs[self.next_job_id.value]
self.next_job_id.value += 1
else:
havejobs = False
if job:
result = self.parse(*job)
# Clear the siggen cache after parsing to control memory usage, its huge
@@ -2134,13 +2140,13 @@ class CookerParser(object):
self.bb_caches = bb.cache.MulticonfigCache(self.cfgbuilder, self.cfghash, cooker.caches_array)
self.fromcache = set()
self.willparse = set()
self.willparse = []
for mc in self.cooker.multiconfigs:
for filename in self.mcfilelist[mc]:
appends = self.cooker.collections[mc].get_file_appends(filename)
layername = self.cooker.collections[mc].calc_bbfile_priority(filename)[2]
if not self.bb_caches[mc].cacheValid(filename, appends):
self.willparse.add((mc, self.bb_caches[mc], filename, appends, layername))
self.willparse.append((mc, self.bb_caches[mc], filename, appends, layername))
else:
self.fromcache.add((mc, self.bb_caches[mc], filename, appends, layername))
@@ -2159,18 +2165,18 @@ class CookerParser(object):
def start(self):
self.results = self.load_cached()
self.processes = []
if self.toparse:
bb.event.fire(bb.event.ParseStarted(self.toparse), self.cfgdata)
next_job_id = multiprocessing.Value(ctypes.c_int, 0)
self.parser_quit = multiprocessing.Event()
self.result_queue = multiprocessing.Queue()
def chunkify(lst,n):
return [lst[i::n] for i in range(n)]
self.jobs = chunkify(list(self.willparse), self.num_processes)
# Have to pass in willparse at fork time so all parsing processes have the unpickleable data
# then access it by index from the parse queue.
for i in range(0, self.num_processes):
parser = Parser(self.jobs[i], self.result_queue, self.parser_quit, self.cooker.configuration.profile)
parser = Parser(self.willparse, next_job_id, self.result_queue, self.parser_quit, self.cooker.configuration.profile)
parser.start()
self.process_names.append(parser.name)
self.processes.append(parser)