bitbake: fetch2: Partial revert decodeurl() to not use the URI class

This partial reverts commit a5d569c94700f04b8193c6bccae5af619931b00f
which changes decodeurl() to use the URI class to parse the URL instead
of doing it itself. While reusing code is generally a good idea, using
urllib.parse.urlparse() (which the URI class does) to parse the regular
expression "URLs" that are used in PREMIRRORS and MIRRORS does not work.
A regular expression URL containing https?://... would be silently
ignored, while a URL using a negative lookahead such as
git://(?!internal\.git\.server).*/.* would result in a cryptic error:

  Exception: re.error: missing ), unterminated subpattern at position 0

The problem is that urllib.parse.urlparse() treats the ? as the start of
URL parameters and thus stops parsing whatever part of the URL it was
parsing. Restore the old function and use it in the PREMIRRORS and
MIRRORS code.

(Bitbake rev: f8a7712754e6d0199a0d227fca288307b935368d)

Signed-off-by: Peter Kjellerstedt <peter.kjellerstedt@axis.com>
Signed-off-by: Stefan Herbrechtsmeier <stefan.herbrechtsmeier@weidmueller.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Stefan Herbrechtsmeier
2025-03-07 10:43:20 +01:00
committed by Richard Purdie
parent 5323603048
commit bae86fe618

View File

@@ -357,6 +357,54 @@ def decodeurl(url):
path = uri.path if uri.path else "/"
return uri.scheme, uri.hostport, path, uri.username, uri.password, uri.params
def decodemirrorurl(url):
"""Decodes a mirror URL into the tokens (scheme, network location, path,
user, password, parameters).
"""
m = re.compile('(?P<type>[^:]*)://((?P<user>[^/;]+)@)?(?P<location>[^;]+)(;(?P<parm>.*))?').match(url)
if not m:
raise MalformedUrl(url)
type = m.group('type')
location = m.group('location')
if not location:
raise MalformedUrl(url)
user = m.group('user')
parm = m.group('parm')
locidx = location.find('/')
if locidx != -1 and type.lower() != 'file':
host = location[:locidx]
path = location[locidx:]
elif type.lower() == 'file':
host = ""
path = location
if user:
path = user + '@' + path
user = ""
else:
host = location
path = "/"
if user:
m = re.compile('(?P<user>[^:]+)(:?(?P<pswd>.*))').match(user)
if m:
user = m.group('user')
pswd = m.group('pswd')
else:
user = ''
pswd = ''
p = collections.OrderedDict()
if parm:
for s in parm.split(';'):
if s:
if not '=' in s:
raise MalformedUrl(url, "The URL: '%s' is invalid: parameter %s does not specify a value (missing '=')" % (url, s))
s1, s2 = s.split('=', 1)
p[s1] = s2
return type, host, urllib.parse.unquote(path), user, pswd, p
def encodeurl(decoded):
"""Encodes a URL from tokens (scheme, network location, path,
user, password, parameters).
@@ -391,9 +439,9 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None):
if not ud.url or not uri_find or not uri_replace:
logger.error("uri_replace: passed an undefined value, not replacing")
return None
uri_decoded = list(decodeurl(ud.url))
uri_find_decoded = list(decodeurl(uri_find))
uri_replace_decoded = list(decodeurl(uri_replace))
uri_decoded = list(decodemirrorurl(ud.url))
uri_find_decoded = list(decodemirrorurl(uri_find))
uri_replace_decoded = list(decodemirrorurl(uri_replace))
logger.debug2("For url %s comparing %s to %s" % (uri_decoded, uri_find_decoded, uri_replace_decoded))
result_decoded = ['', '', '', '', '', {}]
# 0 - type, 1 - host, 2 - path, 3 - user, 4- pswd, 5 - params