diff --git a/bitbake/doc/bitbake-user-manual/bitbake-user-manual-fetching.rst b/bitbake/doc/bitbake-user-manual/bitbake-user-manual-fetching.rst index c061bd70ea..f5723d6767 100644 --- a/bitbake/doc/bitbake-user-manual/bitbake-user-manual-fetching.rst +++ b/bitbake/doc/bitbake-user-manual/bitbake-user-manual-fetching.rst @@ -688,6 +688,40 @@ Here is an example URL:: It can also be used when setting mirrors definitions using the :term:`PREMIRRORS` variable. +.. _gcp-fetcher: + +GCP Fetcher (``gs://``) +-------------------------- + +This submodule fetches data from a +`Google Cloud Storage Bucket `__. +It uses the `Google Cloud Storage Python Client `__ +to check the status of objects in the bucket and download them. +The use of the Python client makes it substantially faster than using command +line tools such as gsutil. + +The fetcher requires the Google Cloud Storage Python Client to be installed, along +with the gsutil tool. + +The fetcher requires that the machine has valid credentials for accessing the +chosen bucket. Instructions for authentication can be found in the +`Google Cloud documentation `__. + +The fetcher can be used for fetching sstate artifacts from a GCS bucket by +specifying the :term:`SSTATE_MIRRORS` variable as shown below:: + + SSTATE_MIRRORS ?= "\ + file://.* gs:///PATH \ + " + +The fetcher can also be used in recipes:: + + SRC_URI = "gs:////" + +However, the checksum of the file should be also be provided:: + + SRC_URI[sha256sum] = "" + .. _crate-fetcher: Crate Fetcher (``crate://``) @@ -791,6 +825,8 @@ Fetch submodules also exist for the following: - OSC (``osc://``) +- S3 (``s3://``) + - Secure FTP (``sftp://``) - Secure Shell (``ssh://``) diff --git a/bitbake/lib/bb/fetch2/__init__.py b/bitbake/lib/bb/fetch2/__init__.py index 2428a26fa6..e4c1d20627 100644 --- a/bitbake/lib/bb/fetch2/__init__.py +++ b/bitbake/lib/bb/fetch2/__init__.py @@ -1290,7 +1290,7 @@ class FetchData(object): if checksum_name in self.parm: checksum_expected = self.parm[checksum_name] - elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate"]: + elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate", "gs"]: checksum_expected = None else: checksum_expected = d.getVarFlag("SRC_URI", checksum_name) @@ -1976,6 +1976,7 @@ from . import npm from . import npmsw from . import az from . import crate +from . import gcp methods.append(local.Local()) methods.append(wget.Wget()) @@ -1997,3 +1998,4 @@ methods.append(npm.Npm()) methods.append(npmsw.NpmShrinkWrap()) methods.append(az.Az()) methods.append(crate.Crate()) +methods.append(gcp.GCP()) diff --git a/bitbake/lib/bb/fetch2/gcp.py b/bitbake/lib/bb/fetch2/gcp.py new file mode 100644 index 0000000000..f42c81fda8 --- /dev/null +++ b/bitbake/lib/bb/fetch2/gcp.py @@ -0,0 +1,98 @@ +""" +BitBake 'Fetch' implementation for Google Cloup Platform Storage. + +Class for fetching files from Google Cloud Storage using the +Google Cloud Storage Python Client. The GCS Python Client must +be correctly installed, configured and authenticated prior to use. +Additionally, gsutil must also be installed. + +""" + +# Copyright (C) 2023, Snap Inc. +# +# Based in part on bb.fetch2.s3: +# Copyright (C) 2017 Andre McCurdy +# +# SPDX-License-Identifier: GPL-2.0-only +# +# Based on functions from the base bb module, Copyright 2003 Holger Schurig + +import os +import bb +import urllib.parse, urllib.error +from bb.fetch2 import FetchMethod +from bb.fetch2 import FetchError +from bb.fetch2 import logger + +class GCP(FetchMethod): + """ + Class to fetch urls via GCP's Python API. + """ + def __init__(self): + self.gcp_client = None + + def supports(self, ud, d): + """ + Check to see if a given url can be fetched with GCP. + """ + return ud.type in ['gs'] + + def recommends_checksum(self, urldata): + return True + + def urldata_init(self, ud, d): + if 'downloadfilename' in ud.parm: + ud.basename = ud.parm['downloadfilename'] + else: + ud.basename = os.path.basename(ud.path) + + ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) + + def get_gcp_client(self): + from google.cloud import storage + self.gcp_client = storage.Client(project=None) + + def download(self, ud, d): + """ + Fetch urls using the GCP API. + Assumes localpath was called first. + """ + logger.debug2(f"Trying to download gs://{ud.host}{ud.path} to {ud.localpath}") + if self.gcp_client is None: + self.get_gcp_client() + + bb.fetch2.check_network_access(d, "gsutil stat", ud.url) + + # Path sometimes has leading slash, so strip it + path = ud.path.lstrip("/") + blob = self.gcp_client.bucket(ud.host).blob(path) + blob.download_to_filename(ud.localpath) + + # Additional sanity checks copied from the wget class (although there + # are no known issues which mean these are required, treat the GCP API + # tool with a little healthy suspicion). + if not os.path.exists(ud.localpath): + raise FetchError(f"The GCP API returned success for gs://{ud.host}{ud.path} but {ud.localpath} doesn't exist?!") + + if os.path.getsize(ud.localpath) == 0: + os.remove(ud.localpath) + raise FetchError(f"The downloaded file for gs://{ud.host}{ud.path} resulted in a zero size file?! Deleting and failing since this isn't right.") + + return True + + def checkstatus(self, fetch, ud, d): + """ + Check the status of a URL. + """ + logger.debug2(f"Checking status of gs://{ud.host}{ud.path}") + if self.gcp_client is None: + self.get_gcp_client() + + bb.fetch2.check_network_access(d, "gsutil stat", ud.url) + + # Path sometimes has leading slash, so strip it + path = ud.path.lstrip("/") + if self.gcp_client.bucket(ud.host).blob(path).exists() == False: + raise FetchError(f"The GCP API reported that gs://{ud.host}{ud.path} does not exist") + else: + return True