spdx: add option to include only compiled sources

When SPDX_INCLUDE_COMPILED_SOURCES is enabled, only include the
source code files that are used during compilation.

It uses debugsource information generated during do_package.

This enables an external tool to use the SPDX information to disregard
vulnerabilities that are not compiled.

As example, when used with the default config with linux-yocto, the spdx size is
reduced from 156MB to 61MB.

Tested with bitbake world on oe-core.

(From OE-Core rev: c6a2f1fca76fae4c3ea471a0c63d0b453beea968)
Adapted to existing files for create-spdx-2.2

CC: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
CC: Joshua Watt <JPEWhacker@gmail.com>
(From OE-Core rev: a2866934e58fb377a73e87576c8594988a63ad1b)

Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
This commit is contained in:
Daniel Turull
2025-06-19 10:47:36 +02:00
committed by Steve Sakoman
parent 5b7a6dec85
commit 2366605a35
2 changed files with 54 additions and 0 deletions

View File

@@ -100,6 +100,9 @@ python() {
# Transform the license array to a dictionary
data["licenses"] = {l["licenseId"]: l for l in data["licenses"]}
d.setVar("SPDX_LICENSE_DATA", data)
if d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1":
d.setVar("SPDX_INCLUDE_SOURCES", "1")
}
def convert_license_to_spdx(lic, document, d, existing={}):
@@ -215,6 +218,11 @@ def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
spdx_files = []
file_counter = 1
check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
if check_compiled_sources:
compiled_sources, types = oe.spdx.get_compiled_sources(d)
bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
for subdir, dirs, files in os.walk(topdir):
dirs[:] = [d for d in dirs if d not in ignore_dirs]
if subdir == str(topdir):
@@ -225,6 +233,10 @@ def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
filename = str(filepath.relative_to(topdir))
if not filepath.is_symlink() and filepath.is_file():
# Check if file is compiled
if check_compiled_sources:
if not oe.spdx.is_compiled_source(filename, compiled_sources, types):
continue
spdx_file = oe.spdx.SPDXFile()
spdx_file.SPDXID = get_spdxid(file_counter)
for t in get_types(filepath):

View File

@@ -355,3 +355,45 @@ class SPDXDocument(SPDXObject):
if r.spdxDocument == namespace:
return r
return None
def is_compiled_source (filename, compiled_sources, types):
"""
Check if the file is a compiled file
"""
import os
# If we don't have compiled source, we assume all are compiled.
if not compiled_sources:
return True
# We return always true if the file type is not in the list of compiled files.
# Some files in the source directory are not compiled, for example, Makefiles,
# but also python .py file. We need to include them in the SPDX.
basename = os.path.basename(filename)
ext = basename.partition(".")[2]
if ext not in types:
return True
# Check that the file is in the list
return filename in compiled_sources
def get_compiled_sources(d):
"""
Get list of compiled sources from debug information and normalize the paths
"""
import itertools
import oe.package
source_info = oe.package.read_debugsources_info(d)
if not source_info:
bb.debug(1, "Do not have debugsources.list. Skipping")
return [], []
# Sources are not split now in SPDX, so we aggregate them
sources = set(itertools.chain.from_iterable(source_info.values()))
# Check extensions of files
types = set()
for src in sources:
basename = os.path.basename(src)
ext = basename.partition(".")[2]
if ext not in types and ext:
types.add(ext)
bb.debug(1, f"Num of sources: {len(sources)} and types: {len(types)} {str(types)}")
return sources, types