From bae86fe618075f28b708d2c89d247937a75226fc Mon Sep 17 00:00:00 2001 From: Stefan Herbrechtsmeier Date: Fri, 7 Mar 2025 10:43:20 +0100 Subject: [PATCH] bitbake: fetch2: Partial revert decodeurl() to not use the URI class This partial reverts commit a5d569c94700f04b8193c6bccae5af619931b00f which changes decodeurl() to use the URI class to parse the URL instead of doing it itself. While reusing code is generally a good idea, using urllib.parse.urlparse() (which the URI class does) to parse the regular expression "URLs" that are used in PREMIRRORS and MIRRORS does not work. A regular expression URL containing https?://... would be silently ignored, while a URL using a negative lookahead such as git://(?!internal\.git\.server).*/.* would result in a cryptic error: Exception: re.error: missing ), unterminated subpattern at position 0 The problem is that urllib.parse.urlparse() treats the ? as the start of URL parameters and thus stops parsing whatever part of the URL it was parsing. Restore the old function and use it in the PREMIRRORS and MIRRORS code. (Bitbake rev: f8a7712754e6d0199a0d227fca288307b935368d) Signed-off-by: Peter Kjellerstedt Signed-off-by: Stefan Herbrechtsmeier Signed-off-by: Richard Purdie --- bitbake/lib/bb/fetch2/__init__.py | 54 +++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/bitbake/lib/bb/fetch2/__init__.py b/bitbake/lib/bb/fetch2/__init__.py index 93fe012ec3..5aa67accc3 100644 --- a/bitbake/lib/bb/fetch2/__init__.py +++ b/bitbake/lib/bb/fetch2/__init__.py @@ -357,6 +357,54 @@ def decodeurl(url): path = uri.path if uri.path else "/" return uri.scheme, uri.hostport, path, uri.username, uri.password, uri.params +def decodemirrorurl(url): + """Decodes a mirror URL into the tokens (scheme, network location, path, + user, password, parameters). + """ + m = re.compile('(?P[^:]*)://((?P[^/;]+)@)?(?P[^;]+)(;(?P.*))?').match(url) + if not m: + raise MalformedUrl(url) + + type = m.group('type') + location = m.group('location') + if not location: + raise MalformedUrl(url) + user = m.group('user') + parm = m.group('parm') + + locidx = location.find('/') + if locidx != -1 and type.lower() != 'file': + host = location[:locidx] + path = location[locidx:] + elif type.lower() == 'file': + host = "" + path = location + if user: + path = user + '@' + path + user = "" + else: + host = location + path = "/" + if user: + m = re.compile('(?P[^:]+)(:?(?P.*))').match(user) + if m: + user = m.group('user') + pswd = m.group('pswd') + else: + user = '' + pswd = '' + + p = collections.OrderedDict() + if parm: + for s in parm.split(';'): + if s: + if not '=' in s: + raise MalformedUrl(url, "The URL: '%s' is invalid: parameter %s does not specify a value (missing '=')" % (url, s)) + s1, s2 = s.split('=', 1) + p[s1] = s2 + + return type, host, urllib.parse.unquote(path), user, pswd, p + def encodeurl(decoded): """Encodes a URL from tokens (scheme, network location, path, user, password, parameters). @@ -391,9 +439,9 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): if not ud.url or not uri_find or not uri_replace: logger.error("uri_replace: passed an undefined value, not replacing") return None - uri_decoded = list(decodeurl(ud.url)) - uri_find_decoded = list(decodeurl(uri_find)) - uri_replace_decoded = list(decodeurl(uri_replace)) + uri_decoded = list(decodemirrorurl(ud.url)) + uri_find_decoded = list(decodemirrorurl(uri_find)) + uri_replace_decoded = list(decodemirrorurl(uri_replace)) logger.debug2("For url %s comparing %s to %s" % (uri_decoded, uri_find_decoded, uri_replace_decoded)) result_decoded = ['', '', '', '', '', {}] # 0 - type, 1 - host, 2 - path, 3 - user, 4- pswd, 5 - params