mirror of
https://git.yoctoproject.org/poky
synced 2026-04-22 06:32:12 +02:00
oeqa/logparser: Fix performance issues with ptest log parsing
On the autobuilder a ptest log with 2.1 million lines took around 18 hours to process. This is clearly crazy. We can tweak the processing code to: a) Stop repeatedly joining large strings together (append to a list instead) b) Use one startswith expression instead of multiple re.search() operations With this change it takes 18 hours down to around 12s. [YOCTO #13696] (From OE-Core rev: 01c9f40b979e36a53e789a6bedd89b6d9557dce3) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
@@ -25,13 +25,20 @@ class PtestParser(object):
|
||||
section_regex['exitcode'] = re.compile(r"^ERROR: Exit status is (.+)")
|
||||
section_regex['timeout'] = re.compile(r"^TIMEOUT: .*/(.+)/ptest")
|
||||
|
||||
# Cache markers so we don't take the re.search() hit all the time.
|
||||
markers = ("PASSED", "FAILED", "SKIPPED", "BEGIN:", "END:", "DURATION:", "ERROR: Exit", "TIMEOUT:")
|
||||
|
||||
def newsection():
|
||||
return { 'name': "No-section", 'log': "" }
|
||||
return { 'name': "No-section", 'log': [] }
|
||||
|
||||
current_section = newsection()
|
||||
|
||||
with open(logfile, errors='replace') as f:
|
||||
for line in f:
|
||||
if not line.startswith(markers):
|
||||
current_section['log'].append(line)
|
||||
continue
|
||||
|
||||
result = section_regex['begin'].search(line)
|
||||
if result:
|
||||
current_section['name'] = result.group(1)
|
||||
@@ -61,7 +68,7 @@ class PtestParser(object):
|
||||
current_section[t] = result.group(1)
|
||||
continue
|
||||
|
||||
current_section['log'] = current_section['log'] + line
|
||||
current_section['log'].append(line)
|
||||
|
||||
for t in test_regex:
|
||||
result = test_regex[t].search(line)
|
||||
@@ -70,6 +77,11 @@ class PtestParser(object):
|
||||
self.results[current_section['name']] = {}
|
||||
self.results[current_section['name']][result.group(1).strip()] = t
|
||||
|
||||
# Python performance for repeatedly joining long strings is poor, do it all at once at the end.
|
||||
# For 2.1 million lines in a log this reduces 18 hours to 12s.
|
||||
for section in self.sections:
|
||||
self.sections[section]['log'] = "".join(self.sections[section]['log'])
|
||||
|
||||
return self.results, self.sections
|
||||
|
||||
# Log the results as files. The file name is the section name and the contents are the tests in that section.
|
||||
|
||||
Reference in New Issue
Block a user