python3: Clean up create manifest script

This patch intends to clean up the whole create_manifest script/task
for python3.

This is an effort to make the code more human friendly and facilitate
adoption, it not only cleans up the code but it also improves comments,
it should also be easier to upgrade the manifest after each python3
upgrade now, with these fixes the transition to python 3.7 should be
seamless.

It fixes a rather harmless bug where module dependencies were being
added twice to the core package and adds tests and sqlite3-tests
as special packages since we want specific dependencies on those.
It also fixes a bug that happened on a few packages that
contained a directory with the same name as the module itself
e.g. asyncio, where the script avoided checking that module for
dependencies.

Lastly, it improves the output, it errors out if a module is found
on more than one package, which is what usually happens when
python upstream introduces a new file, hence why the current
manifest is unaware of, it is better to exit with an error because
otherwise the user wouldnt know if anything went wrong unless the output
of the script was checked manually.

(From OE-Core rev: 658042073cb58c58ac4db4ff13689d1ffd89b72e)

Signed-off-by: Alejandro Enedino Hernandez Samaniego <alejandr@xilinx.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Alejandro Enedino Hernandez Samaniego
2018-09-04 23:45:44 -07:00
committed by Richard Purdie
parent d3148222f0
commit f65c898ba6

View File

@@ -48,21 +48,21 @@ import os
pyversion = str(sys.argv[1])
# Hack to get native python search path (for folders), not fond of it but it works for now
pivot='recipe-sysroot-native'
pivot = 'recipe-sysroot-native'
for p in sys.path:
if pivot in p:
nativelibfolder=p[:p.find(pivot)+len(pivot)]
nativelibfolder = p[:p.find(pivot)+len(pivot)]
# Empty dict to hold the whole manifest
new_manifest = {}
# Check for repeated files, folders and wildcards
allfiles=[]
repeated=[]
wildcards=[]
allfiles = []
repeated = []
wildcards = []
hasfolders=[]
allfolders=[]
hasfolders = []
allfolders = []
def isFolder(value):
value = value.replace('${PYTHON_MAJMIN}',pyversion)
@@ -79,166 +79,204 @@ def isCached(item):
# Read existing JSON manifest
with open('python3-manifest.json') as manifest:
old_manifest=json.load(manifest)
old_manifest = json.load(manifest)
#
# First pass to get core-package functionality, because we base everything on the fact that core is actually working
# Not exactly the same so it should not be a function
#
print ('Getting dependencies for package: core')
# Special call to check for core package
# This special call gets the core dependencies and
# appends to the old manifest so it doesnt hurt what it
# currently holds.
# This way when other packages check for dependencies
# on the new core package, they will still find them
# even when checking the old_manifest
output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package']).decode('utf8')
for item in output.split():
item = item.replace(pyversion,'${PYTHON_MAJMIN}')
# We append it so it doesnt hurt what we currently have:
if isCached(item):
if item not in old_manifest['core']['cached']:
# We use the same data structure since its the one which will be used to check
# dependencies for other packages
old_manifest['core']['cached'].append(item)
for coredep in output.split():
coredep = coredep.replace(pyversion,'${PYTHON_MAJMIN}')
if isCached(coredep):
if coredep not in old_manifest['core']['cached']:
old_manifest['core']['cached'].append(coredep)
else:
if item not in old_manifest['core']['files']:
# We use the same data structure since its the one which will be used to check
# dependencies for other packages
old_manifest['core']['files'].append(item)
if coredep not in old_manifest['core']['files']:
old_manifest['core']['files'].append(coredep)
for value in old_manifest['core']['files']:
value = value.replace(pyversion,'${PYTHON_MAJMIN}')
# Ignore folders, since we don't import those, difficult to handle multilib
if isFolder(value):
# Pass it directly
if isCached(value):
if value not in old_manifest['core']['cached']:
old_manifest['core']['cached'].append(value)
# The second step is to loop through the existing files contained in the core package
# according to the old manifest, identify if they are modules, or some other type
# of file that we cant import (directories, binaries, configs) in which case we
# can only assume they were added correctly (manually) so we ignore those and
# pass them to the manifest directly.
for filedep in old_manifest['core']['files']:
if isFolder(filedep):
if isCached(filedep):
if filedep not in old_manifest['core']['cached']:
old_manifest['core']['cached'].append(filedep)
else:
if value not in old_manifest['core']['files']:
old_manifest['core']['files'].append(value)
if filedep not in old_manifest['core']['files']:
old_manifest['core']['files'].append(filedep)
continue
# Ignore binaries, since we don't import those, assume it was added correctly (manually)
if '${bindir}' in value:
# Pass it directly
if value not in old_manifest['core']['files']:
old_manifest['core']['files'].append(value)
if '${bindir}' in filedep:
if filedep not in old_manifest['core']['files']:
old_manifest['core']['files'].append(filedep)
continue
# Ignore empty values
if value == '':
if filedep == '':
continue
if '${includedir}' in value:
if value not in old_manifest['core']['files']:
old_manifest['core']['files'].append(value)
if '${includedir}' in filedep:
if filedep not in old_manifest['core']['files']:
old_manifest['core']['files'].append(filedep)
continue
# Get module name , shouldnt be affected by libdir/bindir
value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0]
# Launch separate task for each module for deterministic behavior
# Each module will only import what is necessary for it to work in specific
print ('Getting dependencies for module: %s' % value)
output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8')
print ('The following dependencies were found for module %s:\n' % value)
# Get actual module name , shouldnt be affected by libdir/bindir, etc.
pymodule = os.path.splitext(os.path.basename(os.path.normpath(filedep)))[0]
# We now know that were dealing with a python module, so we can import it
# and check what its dependencies are.
# We launch a separate task for each module for deterministic behavior.
# Each module will only import what is necessary for it to work in specific.
# The output of each task will contain each module's dependencies
print ('Getting dependencies for module: %s' % pymodule)
output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule]).decode('utf8')
print ('The following dependencies were found for module %s:\n' % pymodule)
print (output)
for item in output.split():
item = item.replace(pyversion,'${PYTHON_MAJMIN}')
# We append it so it doesnt hurt what we currently have:
if isCached(item):
if item not in old_manifest['core']['cached']:
# We use the same data structure since its the one which will be used to check
# dependencies for other packages
old_manifest['core']['cached'].append(item)
for pymodule_dep in output.split():
pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
if isCached(pymodule_dep):
if pymodule_dep not in old_manifest['core']['cached']:
old_manifest['core']['cached'].append(pymodule_dep)
else:
if item not in old_manifest['core']['files']:
# We use the same data structure since its the one which will be used to check
# dependencies for other packages
old_manifest['core']['files'].append(item)
if pymodule_dep not in old_manifest['core']['files']:
old_manifest['core']['files'].append(pymodule_dep)
# We check which packages include folders
for key in old_manifest:
for value in old_manifest[key]['files']:
# Ignore folders, since we don't import those, difficult to handle multilib
if isFolder(value):
print ('%s is a folder' % value)
if key not in hasfolders:
hasfolders.append(key)
if value not in allfolders:
allfolders.append(value)
# At this point we are done with the core package.
# The old_manifest dictionary is updated only for the core package because
# all others will use this a base.
for key in old_manifest:
# To improve the script speed, we check which packages contain directories
# since we will be looping through (only) those later.
for pypkg in old_manifest:
for filedep in old_manifest[pypkg]['files']:
if isFolder(filedep):
print ('%s is a folder' % filedep)
if pypkg not in hasfolders:
hasfolders.append(pypkg)
if filedep not in allfolders:
allfolders.append(filedep)
# This is the main loop that will handle each package.
# It works in a similar fashion than the step before, but
# we will now be updating a new dictionary that will eventually
# become the new manifest.
#
# The following loops though all packages in the manifest,
# through all files on each of them, and checks whether or not
# they are modules and can be imported.
# If they can be imported, then it checks for dependencies for
# each of them by launching a separate task.
# The output of that task is then parsed and the manifest is updated
# accordingly, wether it should add the module on FILES for the current package
# or if that module already belongs to another package then the current one
# will RDEPEND on it
for pypkg in old_manifest:
# Use an empty dict as data structure to hold data for each package and fill it up
new_manifest[key]={}
new_manifest[key]['files']=[]
new_manifest[key]['rdepends']=[]
new_manifest[pypkg] = {}
new_manifest[pypkg]['files'] = []
new_manifest[pypkg]['rdepends'] = []
# All packages should depend on core
if key != 'core':
new_manifest[key]['rdepends'].append('core')
new_manifest[key]['cached']=[]
if pypkg != 'core':
new_manifest[pypkg]['rdepends'].append('core')
new_manifest[pypkg]['cached'] = []
else:
new_manifest[key]['cached']=old_manifest[key]['cached']
new_manifest[key]['summary']=old_manifest[key]['summary']
new_manifest[pypkg]['cached'] = old_manifest[pypkg]['cached']
new_manifest[pypkg]['summary'] = old_manifest[pypkg]['summary']
print('\n')
print('--------------------------')
print ('Handling package %s' % pypkg)
print('--------------------------')
# Handle special cases, we assume that when they were manually added
# to the manifest we knew what we were doing.
print('\n')
print('--------------------------')
print ('Handling package %s' % key)
print('--------------------------')
special_packages=['misc', 'modules', 'dev']
if key in special_packages or 'staticdev' in key:
print('Passing %s package directly' % key)
new_manifest[key]=old_manifest[key]
special_packages = ['misc', 'modules', 'dev', 'tests', 'sqlite3-tests']
if pypkg in special_packages or 'staticdev' in pypkg:
print('Passing %s package directly' % pypkg)
new_manifest[pypkg] = old_manifest[pypkg]
continue
for value in old_manifest[key]['files']:
# We already handled core on the first pass
if key == 'core':
new_manifest[key]['files'].append(value)
continue
# Ignore folders, since we don't import those, difficult to handle multilib
if isFolder(value):
# Pass folders directly
new_manifest[key]['files'].append(value)
# Ignore binaries, since we don't import those
if '${bindir}' in value:
# Pass it directly to the new manifest data structure
if value not in new_manifest[key]['files']:
new_manifest[key]['files'].append(value)
continue
# Ignore empty values
if value == '':
continue
if '${includedir}' in value:
if value not in new_manifest[key]['files']:
new_manifest[key]['files'].append(value)
for filedep in old_manifest[pypkg]['files']:
# We already handled core on the first pass, we can ignore it now
if pypkg == 'core':
if filedep not in new_manifest[pypkg]['files']:
new_manifest[pypkg]['files'].append(filedep)
continue
# Get module name , shouldnt be affected by libdir/bindir
# Handle/ignore what we cant import
if isFolder(filedep):
new_manifest[pypkg]['files'].append(filedep)
# Asyncio (and others) are both the package and the folder name, we should not skip those...
path,mod = os.path.split(filedep)
if mod != pypkg:
continue
if '${bindir}' in filedep:
if filedep not in new_manifest[pypkg]['files']:
new_manifest[pypkg]['files'].append(filedep)
continue
if filedep == '':
continue
if '${includedir}' in filedep:
if filedep not in new_manifest[pypkg]['files']:
new_manifest[pypkg]['files'].append(filedep)
continue
# Get actual module name , shouldnt be affected by libdir/bindir, etc.
# We need to check if the imported module comes from another (e.g. sqlite3.dump)
path,value = os.path.split(value)
path,pymodule = os.path.split(filedep)
path = os.path.basename(path)
value = os.path.splitext(os.path.basename(value))[0]
pymodule = os.path.splitext(os.path.basename(pymodule))[0]
# If this condition is met, it means we need to import it from another module
# or its the folder itself (e.g. unittest)
if path == key:
if value:
value = path + '.' + value
else:
value = path
if path == pypkg:
if pymodule:
pymodule = path + '.' + pymodule
else:
pymodule = path
# Launch separate task for each module for deterministic behavior
# Each module will only import what is necessary for it to work in specific
print ('\nGetting dependencies for module: %s' % value)
output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8')
# We can print dependencies for debugging purposes
print ('The following dependencies were found for module %s:\n' % value)
# We now know that were dealing with a python module, so we can import it
# and check what its dependencies are.
# We launch a separate task for each module for deterministic behavior.
# Each module will only import what is necessary for it to work in specific.
# The output of each task will contain each module's dependencies
print ('\nGetting dependencies for module: %s' % pymodule)
output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule]).decode('utf8')
print ('The following dependencies were found for module %s:\n' % pymodule)
print (output)
# Output will have all dependencies
reportFILES = []
reportRDEPS = []
for item in output.split():
for pymodule_dep in output.split():
# Warning: This first part is ugly
# One of the dependencies that was found, could be inside of one of the folders included by another package
@@ -258,22 +296,22 @@ for key in old_manifest:
# is folder_string inside path/folder1/folder2/filename?,
# Yes, it works, but we waste a couple of milliseconds.
item = item.replace(pyversion,'${PYTHON_MAJMIN}')
inFolders=False
pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
inFolders = False
for folder in allfolders:
if folder in item:
if folder in pymodule_dep:
inFolders = True # Did we find a folder?
folderFound = False # Second flag to break inner for
# Loop only through packages which contain folders
for keyfolder in hasfolders:
for pypkg_with_folder in hasfolders:
if (folderFound == False):
#print('Checking folder %s on package %s' % (item,keyfolder))
for file_folder in old_manifest[keyfolder]['files'] or file_folder in old_manifest[keyfolder]['cached']:
if file_folder==folder:
print ('%s folder found in %s' % (folder, keyfolder))
# print('Checking folder %s on package %s' % (pymodule_dep,pypkg_with_folder))
for folder_dep in old_manifest[pypkg_with_folder]['files'] or folder_dep in old_manifest[pypkg_with_folder]['cached']:
if folder_dep == folder:
print ('%s folder found in %s' % (folder, pypkg_with_folder))
folderFound = True
if keyfolder not in new_manifest[key]['rdepends'] and keyfolder != key:
new_manifest[key]['rdepends'].append(keyfolder)
if pypkg_with_folder not in new_manifest[pypkg]['rdepends'] and pypkg_with_folder != pypkg:
new_manifest[pypkg]['rdepends'].append(pypkg_with_folder)
else:
break
@@ -282,81 +320,95 @@ for key in old_manifest:
continue
# We might already have it on the dictionary since it could depend on a (previously checked) module
if item not in new_manifest[key]['files'] and item not in new_manifest[key]['cached']:
# No directories beyond this point
# We might already have this module on the dictionary since it could depend on a (previously checked) module
if pymodule_dep not in new_manifest[pypkg]['files'] and pymodule_dep not in new_manifest[pypkg]['cached']:
# Handle core as a special package, we already did it so we pass it to NEW data structure directly
if key=='core':
print('Adding %s to %s FILES' % (item, key))
if item.endswith('*'):
wildcards.append(item)
if isCached(item):
new_manifest[key]['cached'].append(item)
else:
new_manifest[key]['files'].append(item)
# Check for repeated files
if item not in allfiles:
allfiles.append(item)
else:
repeated.append(item)
if pypkg == 'core':
print('Adding %s to %s FILES' % (pymodule_dep, pypkg))
if pymodule_dep.endswith('*'):
wildcards.append(pymodule_dep)
if isCached(pymodule_dep):
new_manifest[pypkg]['cached'].append(pymodule_dep)
else:
new_manifest[pypkg]['files'].append(pymodule_dep)
# Check for repeated files
if pymodule_dep not in allfiles:
allfiles.append(pymodule_dep)
else:
if pymodule_dep not in repeated:
repeated.append(pymodule_dep)
else:
# Check if this dependency is already contained on another package, so we add it
# Last step: Figure out if we this belongs to FILES or RDEPENDS
# We check if this module is already contained on another package, so we add that one
# as an RDEPENDS, or if its not, it means it should be contained on the current
# package, so we should add it to FILES
for newkey in old_manifest:
# package, and we should add it to FILES
for possible_rdep in old_manifest:
# Debug
#print('Checking %s ' % item + ' in %s' % newkey)
if item in old_manifest[newkey]['files'] or item in old_manifest[newkey]['cached']:
# Since were nesting, we need to check its not the same key
if(newkey!=key):
if newkey not in new_manifest[key]['rdepends']:
# Add it to the new manifest data struct
reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (newkey, key, item))
new_manifest[key]['rdepends'].append(newkey)
break
# print('Checking %s ' % pymodule_dep + ' in %s' % possible_rdep)
if pymodule_dep in old_manifest[possible_rdep]['files'] or pymodule_dep in old_manifest[possible_rdep]['cached']:
# Since were nesting, we need to check its not the same pypkg
if(possible_rdep != pypkg):
if possible_rdep not in new_manifest[pypkg]['rdepends']:
# Add it to the new manifest data struct as RDEPENDS since it contains something this module needs
reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (possible_rdep, pypkg, pymodule_dep))
new_manifest[pypkg]['rdepends'].append(possible_rdep)
break
else:
# A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files)
if os.path.basename(item) != key:
reportFILES.append(('Adding %s to %s FILES\n' % (item, key)))
# Since it wasnt found on another package, its not an RDEP, so add it to FILES for this package
if isCached(item):
new_manifest[key]['cached'].append(item)
else:
new_manifest[key]['files'].append(item)
if item.endswith('*'):
wildcards.append(item)
if item not in allfiles:
allfiles.append(item)
# Since this module wasnt found on another package, it is not an RDEP,
# so we add it to FILES for this package.
# A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files)
if os.path.basename(pymodule_dep) != pypkg:
reportFILES.append(('Adding %s to %s FILES\n' % (pymodule_dep, pypkg)))
if isCached(pymodule_dep):
new_manifest[pypkg]['cached'].append(pymodule_dep)
else:
repeated.append(item)
new_manifest[pypkg]['files'].append(pymodule_dep)
if pymodule_dep.endswith('*'):
wildcards.append(pymodule_dep)
if pymodule_dep not in allfiles:
allfiles.append(pymodule_dep)
else:
if pymodule_dep not in repeated:
repeated.append(pymodule_dep)
print('\n')
print('#################################')
print('Summary for module %s' % value)
print('FILES found for module %s:' % value)
print('Summary for module %s' % pymodule)
print('FILES found for module %s:' % pymodule)
print(''.join(reportFILES))
print('RDEPENDS found for module %s:' % value)
print('RDEPENDS found for module %s:' % pymodule)
print(''.join(reportRDEPS))
print('#################################')
print ('The following files are repeated (contained in more than one package), please check which package should get it:')
print (repeated)
print('The following files contain wildcards, please check they are necessary')
print('The following FILES contain wildcards, please check if they are necessary')
print(wildcards)
print('The following files contain folders, please check they are necessary')
print('The following FILES contain folders, please check if they are necessary')
print(hasfolders)
# Sort it just so it looks nicer
for key in new_manifest:
new_manifest[key]['files'].sort()
new_manifest[key]['cached'].sort()
new_manifest[key]['rdepends'].sort()
for pypkg in new_manifest:
new_manifest[pypkg]['files'].sort()
new_manifest[pypkg]['cached'].sort()
new_manifest[pypkg]['rdepends'].sort()
# Create the manifest from the data structure that was built
with open('python3-manifest.json.new','w') as outfile:
json.dump(new_manifest,outfile,sort_keys=True, indent=4)
outfile.write('\n')
if (repeated):
error_msg = '\n\nERROR:\n'
error_msg += 'The following files are repeated (contained in more than one package),\n'
error_msg += 'this is likely to happen when new files are introduced after an upgrade,\n'
error_msg += 'please check which package should get it,\n modify the manifest accordingly and re-run the create_manifest task:\n'
error_msg += '\n'.join(repeated)
error_msg += '\n'
sys.exit(error_msg)