Files
poky/bitbake/lib/bb/fetch2/gcp.py
Etienne Cordonnier 4bca6762e5 bitbake: gcp.py: remove slow calls to gsutil stat
The changes of 1ab1d36c0af6fc58a974106b61ff4d37da6cb229 added calls to "gsutil stat" to avoid unhandled exceptions, however:
- in the case of checkstatus() this is redundant with the call to self.gcp_client.bucket(ud.host).blob(path).exists() which already returns True/False
 and does not throw an exception in case the file does not exist.
- Also the call to gsutil stat is much slower than using the python client to call exists() so we should not replace the call to exists() with a call to gsutil stat.
- I think the intent of calling check_network_access in checkstatus() was to error-out in case the error is disabled. We can rather change the string "gsutil stat" to something else to make the code more readable.
- add a try/except block in download() instead of the extra call to gsutil

[RP: Tweak to avoid import until needed so google module isn't required for everyone]
(Bitbake rev: 59df5390381792aba4f3f5185000adf5109267fb)

Signed-off-by: Etienne Cordonnier <ecordonnier@snap.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
2025-06-11 08:17:34 -07:00

103 lines
3.4 KiB
Python

"""
BitBake 'Fetch' implementation for Google Cloup Platform Storage.
Class for fetching files from Google Cloud Storage using the
Google Cloud Storage Python Client. The GCS Python Client must
be correctly installed, configured and authenticated prior to use.
Additionally, gsutil must also be installed.
"""
# Copyright (C) 2023, Snap Inc.
#
# Based in part on bb.fetch2.s3:
# Copyright (C) 2017 Andre McCurdy
#
# SPDX-License-Identifier: GPL-2.0-only
#
# Based on functions from the base bb module, Copyright 2003 Holger Schurig
import os
import bb
import urllib.parse, urllib.error
from bb.fetch2 import FetchMethod
from bb.fetch2 import FetchError
from bb.fetch2 import logger
class GCP(FetchMethod):
"""
Class to fetch urls via GCP's Python API.
"""
def __init__(self):
self.gcp_client = None
def supports(self, ud, d):
"""
Check to see if a given url can be fetched with GCP.
"""
return ud.type in ['gs']
def recommends_checksum(self, urldata):
return True
def urldata_init(self, ud, d):
if 'downloadfilename' in ud.parm:
ud.basename = ud.parm['downloadfilename']
else:
ud.basename = os.path.basename(ud.path)
ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
def get_gcp_client(self):
from google.cloud import storage
self.gcp_client = storage.Client(project=None)
def download(self, ud, d):
"""
Fetch urls using the GCP API.
Assumes localpath was called first.
"""
from google.api_core.exceptions import NotFound
logger.debug2(f"Trying to download gs://{ud.host}{ud.path} to {ud.localpath}")
if self.gcp_client is None:
self.get_gcp_client()
bb.fetch2.check_network_access(d, "blob.download_to_filename", f"gs://{ud.host}{ud.path}")
# Path sometimes has leading slash, so strip it
path = ud.path.lstrip("/")
blob = self.gcp_client.bucket(ud.host).blob(path)
try:
blob.download_to_filename(ud.localpath)
except NotFound:
raise FetchError("The GCP API threw a NotFound exception")
# Additional sanity checks copied from the wget class (although there
# are no known issues which mean these are required, treat the GCP API
# tool with a little healthy suspicion).
if not os.path.exists(ud.localpath):
raise FetchError(f"The GCP API returned success for gs://{ud.host}{ud.path} but {ud.localpath} doesn't exist?!")
if os.path.getsize(ud.localpath) == 0:
os.remove(ud.localpath)
raise FetchError(f"The downloaded file for gs://{ud.host}{ud.path} resulted in a zero size file?! Deleting and failing since this isn't right.")
return True
def checkstatus(self, fetch, ud, d):
"""
Check the status of a URL.
"""
logger.debug2(f"Checking status of gs://{ud.host}{ud.path}")
if self.gcp_client is None:
self.get_gcp_client()
bb.fetch2.check_network_access(d, "gcp_client.bucket(ud.host).blob(path).exists()", f"gs://{ud.host}{ud.path}")
# Path sometimes has leading slash, so strip it
path = ud.path.lstrip("/")
if self.gcp_client.bucket(ud.host).blob(path).exists() == False:
raise FetchError(f"The GCP API reported that gs://{ud.host}{ud.path} does not exist")
else:
return True