bitbake: cache: Allow compression of the data in SiggenRecipeInfo

The data in SiggenRecipeInfo is large and has a lot of duplication. The size
causes a few problems, impacting:

 - bitbake's overall memory usage
 - the amount of data sent over IPC between parsing processes and the server
 - the size of the cache files on disk
 - the size of "sigdata" hash information files on disk

The data consists of strings (some large) or frozenset lists of variables.
To reduce the impact we can:

a) deplicate the data
b) pass references to the object on the second usage
   (e.g. over IPC or saving into pickle).

This patch does this for SiggenRecipeInfo mostly behind the scenes
but we do need a couple of reset points so that streamed data is written
correctly on the second usage.

(Bitbake rev: 9a2b13af483c20763d6559a823310954884f6ab1)

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Richard Purdie
2022-11-17 11:09:53 +00:00
parent 26f255da09
commit 7d010055e2
2 changed files with 76 additions and 0 deletions

View File

@@ -263,6 +263,80 @@ class SiggenRecipeInfo(RecipeInfoCommon):
cachedata.siggen_varvals[fn] = self.siggen_varvals
cachedata.siggen_taskdeps[fn] = self.siggen_taskdeps
# The siggen variable data is large and impacts:
# - bitbake's overall memory usage
# - the amount of data sent over IPC between parsing processes and the server
# - the size of the cache files on disk
# - the size of "sigdata" hash information files on disk
# The data consists of strings (some large) or frozenset lists of variables
# As such, we a) deplicate the data here and b) pass references to the object at second
# access (e.g. over IPC or saving into pickle).
store = {}
save_map = {}
save_count = 1
restore_map = {}
restore_count = {}
@classmethod
def reset(cls):
# Needs to be called before starting new streamed data in a given process
# (e.g. writing out the cache again)
cls.save_map = {}
cls.save_count = 1
cls.restore_map = {}
cls.restore_count = {}
@classmethod
def _save(cls, deps):
ret = []
if not deps:
return deps
for dep in deps:
fs = deps[dep]
if fs in cls.save_map:
ret.append((dep, None, cls.save_map[fs]))
else:
cls.save_map[fs] = cls.save_count
ret.append((dep, fs, None))
cls.save_count = cls.save_count + 1
return ret
@classmethod
def _restore(cls, deps, pid):
ret = {}
if not deps:
return deps
if pid not in cls.restore_map:
cls.restore_map[pid] = {}
cls.restore_count[pid] = 1
map = cls.restore_map[pid]
for fs, dep, mapnum in deps:
if mapnum:
ret[dep] = map[mapnum]
else:
try:
fs = cls.store[fs]
except KeyError:
cls.store[fs] = fs
map[cls.restore_count[pid]] = fs
cls.restore_count[pid] = cls.restore_count[pid] + 1
ret[dep] = fs
return ret
def __getstate__(self):
ret = {}
for key in ["siggen_gendeps", "siggen_taskdeps", "siggen_varvals"]:
ret[key] = self._save(self.__dict__[key])
ret['pid'] = os.getpid()
return ret
def __setstate__(self, state):
pid = state['pid']
for key in ["siggen_gendeps", "siggen_taskdeps", "siggen_varvals"]:
setattr(self, key, self._restore(state[key], pid))
def virtualfn2realfn(virtualfn):
"""
Convert a virtual file name to a real one + the associated subclass keyword
@@ -621,6 +695,7 @@ class Cache(object):
p.dump(info)
del self.depends_cache
SiggenRecipeInfo.reset()
@staticmethod
def mtime(cachefile):

View File

@@ -2263,6 +2263,7 @@ class CookerParser(object):
bb.codeparser.parser_cache_savemerge()
bb.cache.SiggenRecipeInfo.reset()
bb.fetch.fetcher_parse_done()
if self.cooker.configuration.profile:
profiles = []