mirror of
https://git.yoctoproject.org/poky
synced 2026-02-05 16:28:43 +01:00
python3: Upgrade 3.10.14 -> 3.10.15
Includes security fixes for CVE-2024-4030, CVE-2024-7592, CVE-2024-4032, CVE-2024-8088 CVE-2024-6232, CVE-2024-6923, CVE-2023-27043 and other bug fixes. Removed below patches, as the fixes included in 3.10.15 upgrade: 1. CVE-2023-27043.patch 2. CVE-2024-6232.patch 3. CVE-2024-7592.patch 4. CVE-2024-8088.patch Release Notes: https://www.python.org/downloads/release/python-31015/ (From OE-Core rev: e64ead97ae3d680f97bf85422f777cd77ae7c434) Signed-off-by: Divya Chellam <divya.chellam@windriver.com> Signed-off-by: Steve Sakoman <steve@sakoman.com>
This commit is contained in:
committed by
Steve Sakoman
parent
8190d9c754
commit
487e8cdf1d
@@ -1,510 +0,0 @@
|
||||
From 2a9273a0e4466e2f057f9ce6fe98cd8ce570331b Mon Sep 17 00:00:00 2001
|
||||
From: Petr Viktorin <encukou@gmail.com>
|
||||
Date: Fri, 6 Sep 2024 13:14:22 +0200
|
||||
Subject: [PATCH] [3.10] [CVE-2023-27043] gh-102988: Reject malformed addresses
|
||||
in email.parseaddr() (GH-111116) (#123768)
|
||||
|
||||
Detect email address parsing errors and return empty tuple to
|
||||
indicate the parsing error (old API). Add an optional 'strict'
|
||||
parameter to getaddresses() and parseaddr() functions. Patch by
|
||||
Thomas Dwyer.
|
||||
|
||||
(cherry picked from commit 4a153a1d3b18803a684cd1bcc2cdf3ede3dbae19)
|
||||
|
||||
Co-authored-by: Victor Stinner <vstinner@python.org>
|
||||
Co-Authored-By: Thomas Dwyer <github@tomd.tel>
|
||||
|
||||
Upstream-Status: Backport [https://github.com/python/cpython/commit/2a9273a0e4466e2f057f9ce6fe98cd8ce570331b]
|
||||
CVE: CVE-2023-27043
|
||||
Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com>
|
||||
---
|
||||
Doc/library/email.utils.rst | 19 +-
|
||||
Lib/email/utils.py | 151 ++++++++++++-
|
||||
Lib/test/test_email/test_email.py | 204 +++++++++++++++++-
|
||||
...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 +
|
||||
4 files changed, 361 insertions(+), 21 deletions(-)
|
||||
create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
|
||||
|
||||
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
|
||||
index 0e266b6..65948fb 100644
|
||||
--- a/Doc/library/email.utils.rst
|
||||
+++ b/Doc/library/email.utils.rst
|
||||
@@ -60,13 +60,18 @@ of the new API.
|
||||
begins with angle brackets, they are stripped off.
|
||||
|
||||
|
||||
-.. function:: parseaddr(address)
|
||||
+.. function:: parseaddr(address, *, strict=True)
|
||||
|
||||
Parse address -- which should be the value of some address-containing field such
|
||||
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
|
||||
*email address* parts. Returns a tuple of that information, unless the parse
|
||||
fails, in which case a 2-tuple of ``('', '')`` is returned.
|
||||
|
||||
+ If *strict* is true, use a strict parser which rejects malformed inputs.
|
||||
+
|
||||
+ .. versionchanged:: 3.10.15
|
||||
+ Add *strict* optional parameter and reject malformed inputs by default.
|
||||
+
|
||||
|
||||
.. function:: formataddr(pair, charset='utf-8')
|
||||
|
||||
@@ -84,12 +89,15 @@ of the new API.
|
||||
Added the *charset* option.
|
||||
|
||||
|
||||
-.. function:: getaddresses(fieldvalues)
|
||||
+.. function:: getaddresses(fieldvalues, *, strict=True)
|
||||
|
||||
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
|
||||
*fieldvalues* is a sequence of header field values as might be returned by
|
||||
- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
|
||||
- example that gets all the recipients of a message::
|
||||
+ :meth:`Message.get_all <email.message.Message.get_all>`.
|
||||
+
|
||||
+ If *strict* is true, use a strict parser which rejects malformed inputs.
|
||||
+
|
||||
+ Here's a simple example that gets all the recipients of a message::
|
||||
|
||||
from email.utils import getaddresses
|
||||
|
||||
@@ -99,6 +107,9 @@ of the new API.
|
||||
resent_ccs = msg.get_all('resent-cc', [])
|
||||
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
|
||||
|
||||
+ .. versionchanged:: 3.10.15
|
||||
+ Add *strict* optional parameter and reject malformed inputs by default.
|
||||
+
|
||||
|
||||
.. function:: parsedate(date)
|
||||
|
||||
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
|
||||
index cfdfeb3..9522341 100644
|
||||
--- a/Lib/email/utils.py
|
||||
+++ b/Lib/email/utils.py
|
||||
@@ -48,6 +48,7 @@ TICK = "'"
|
||||
specialsre = re.compile(r'[][\\()<>@,:;".]')
|
||||
escapesre = re.compile(r'[\\"]')
|
||||
|
||||
+
|
||||
def _has_surrogates(s):
|
||||
"""Return True if s contains surrogate-escaped binary data."""
|
||||
# This check is based on the fact that unless there are surrogates, utf8
|
||||
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
|
||||
return address
|
||||
|
||||
|
||||
+def _iter_escaped_chars(addr):
|
||||
+ pos = 0
|
||||
+ escape = False
|
||||
+ for pos, ch in enumerate(addr):
|
||||
+ if escape:
|
||||
+ yield (pos, '\\' + ch)
|
||||
+ escape = False
|
||||
+ elif ch == '\\':
|
||||
+ escape = True
|
||||
+ else:
|
||||
+ yield (pos, ch)
|
||||
+ if escape:
|
||||
+ yield (pos, '\\')
|
||||
+
|
||||
+
|
||||
+def _strip_quoted_realnames(addr):
|
||||
+ """Strip real names between quotes."""
|
||||
+ if '"' not in addr:
|
||||
+ # Fast path
|
||||
+ return addr
|
||||
+
|
||||
+ start = 0
|
||||
+ open_pos = None
|
||||
+ result = []
|
||||
+ for pos, ch in _iter_escaped_chars(addr):
|
||||
+ if ch == '"':
|
||||
+ if open_pos is None:
|
||||
+ open_pos = pos
|
||||
+ else:
|
||||
+ if start != open_pos:
|
||||
+ result.append(addr[start:open_pos])
|
||||
+ start = pos + 1
|
||||
+ open_pos = None
|
||||
+
|
||||
+ if start < len(addr):
|
||||
+ result.append(addr[start:])
|
||||
+
|
||||
+ return ''.join(result)
|
||||
|
||||
-def getaddresses(fieldvalues):
|
||||
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
|
||||
- all = COMMASPACE.join(str(v) for v in fieldvalues)
|
||||
- a = _AddressList(all)
|
||||
- return a.addresslist
|
||||
+
|
||||
+supports_strict_parsing = True
|
||||
+
|
||||
+def getaddresses(fieldvalues, *, strict=True):
|
||||
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
|
||||
+
|
||||
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
|
||||
+ its place.
|
||||
+
|
||||
+ If strict is true, use a strict parser which rejects malformed inputs.
|
||||
+ """
|
||||
+
|
||||
+ # If strict is true, if the resulting list of parsed addresses is greater
|
||||
+ # than the number of fieldvalues in the input list, a parsing error has
|
||||
+ # occurred and consequently a list containing a single empty 2-tuple [('',
|
||||
+ # '')] is returned in its place. This is done to avoid invalid output.
|
||||
+ #
|
||||
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
|
||||
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
|
||||
+ # Safe output: [('', '')]
|
||||
+
|
||||
+ if not strict:
|
||||
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
|
||||
+ a = _AddressList(all)
|
||||
+ return a.addresslist
|
||||
+
|
||||
+ fieldvalues = [str(v) for v in fieldvalues]
|
||||
+ fieldvalues = _pre_parse_validation(fieldvalues)
|
||||
+ addr = COMMASPACE.join(fieldvalues)
|
||||
+ a = _AddressList(addr)
|
||||
+ result = _post_parse_validation(a.addresslist)
|
||||
+
|
||||
+ # Treat output as invalid if the number of addresses is not equal to the
|
||||
+ # expected number of addresses.
|
||||
+ n = 0
|
||||
+ for v in fieldvalues:
|
||||
+ # When a comma is used in the Real Name part it is not a deliminator.
|
||||
+ # So strip those out before counting the commas.
|
||||
+ v = _strip_quoted_realnames(v)
|
||||
+ # Expected number of addresses: 1 + number of commas
|
||||
+ n += 1 + v.count(',')
|
||||
+ if len(result) != n:
|
||||
+ return [('', '')]
|
||||
+
|
||||
+ return result
|
||||
+
|
||||
+
|
||||
+def _check_parenthesis(addr):
|
||||
+ # Ignore parenthesis in quoted real names.
|
||||
+ addr = _strip_quoted_realnames(addr)
|
||||
+
|
||||
+ opens = 0
|
||||
+ for pos, ch in _iter_escaped_chars(addr):
|
||||
+ if ch == '(':
|
||||
+ opens += 1
|
||||
+ elif ch == ')':
|
||||
+ opens -= 1
|
||||
+ if opens < 0:
|
||||
+ return False
|
||||
+ return (opens == 0)
|
||||
+
|
||||
+
|
||||
+def _pre_parse_validation(email_header_fields):
|
||||
+ accepted_values = []
|
||||
+ for v in email_header_fields:
|
||||
+ if not _check_parenthesis(v):
|
||||
+ v = "('', '')"
|
||||
+ accepted_values.append(v)
|
||||
+
|
||||
+ return accepted_values
|
||||
+
|
||||
+
|
||||
+def _post_parse_validation(parsed_email_header_tuples):
|
||||
+ accepted_values = []
|
||||
+ # The parser would have parsed a correctly formatted domain-literal
|
||||
+ # The existence of an [ after parsing indicates a parsing failure
|
||||
+ for v in parsed_email_header_tuples:
|
||||
+ if '[' in v[1]:
|
||||
+ v = ('', '')
|
||||
+ accepted_values.append(v)
|
||||
+
|
||||
+ return accepted_values
|
||||
|
||||
|
||||
def _format_timetuple_and_zone(timetuple, zone):
|
||||
@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
|
||||
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
|
||||
|
||||
|
||||
-def parseaddr(addr):
|
||||
+def parseaddr(addr, *, strict=True):
|
||||
"""
|
||||
Parse addr into its constituent realname and email address parts.
|
||||
|
||||
Return a tuple of realname and email address, unless the parse fails, in
|
||||
which case return a 2-tuple of ('', '').
|
||||
+
|
||||
+ If strict is True, use a strict parser which rejects malformed inputs.
|
||||
"""
|
||||
- addrs = _AddressList(addr).addresslist
|
||||
- if not addrs:
|
||||
- return '', ''
|
||||
+ if not strict:
|
||||
+ addrs = _AddressList(addr).addresslist
|
||||
+ if not addrs:
|
||||
+ return ('', '')
|
||||
+ return addrs[0]
|
||||
+
|
||||
+ if isinstance(addr, list):
|
||||
+ addr = addr[0]
|
||||
+
|
||||
+ if not isinstance(addr, str):
|
||||
+ return ('', '')
|
||||
+
|
||||
+ addr = _pre_parse_validation([addr])[0]
|
||||
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
|
||||
+
|
||||
+ if not addrs or len(addrs) > 1:
|
||||
+ return ('', '')
|
||||
+
|
||||
return addrs[0]
|
||||
|
||||
|
||||
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
|
||||
index 8b16cca..5b19bb3 100644
|
||||
--- a/Lib/test/test_email/test_email.py
|
||||
+++ b/Lib/test/test_email/test_email.py
|
||||
@@ -16,6 +16,7 @@ from unittest.mock import patch
|
||||
|
||||
import email
|
||||
import email.policy
|
||||
+import email.utils
|
||||
|
||||
from email.charset import Charset
|
||||
from email.generator import Generator, DecodedGenerator, BytesGenerator
|
||||
@@ -3288,15 +3289,154 @@ Foo
|
||||
[('Al Person', 'aperson@dom.ain'),
|
||||
('Bud Person', 'bperson@dom.ain')])
|
||||
|
||||
+ def test_getaddresses_comma_in_name(self):
|
||||
+ """GH-106669 regression test."""
|
||||
+ self.assertEqual(
|
||||
+ utils.getaddresses(
|
||||
+ [
|
||||
+ '"Bud, Person" <bperson@dom.ain>',
|
||||
+ 'aperson@dom.ain (Al Person)',
|
||||
+ '"Mariusz Felisiak" <to@example.com>',
|
||||
+ ]
|
||||
+ ),
|
||||
+ [
|
||||
+ ('Bud, Person', 'bperson@dom.ain'),
|
||||
+ ('Al Person', 'aperson@dom.ain'),
|
||||
+ ('Mariusz Felisiak', 'to@example.com'),
|
||||
+ ],
|
||||
+ )
|
||||
+
|
||||
+ def test_parsing_errors(self):
|
||||
+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
|
||||
+ alice = 'alice@example.org'
|
||||
+ bob = 'bob@example.com'
|
||||
+ empty = ('', '')
|
||||
+
|
||||
+ # Test utils.getaddresses() and utils.parseaddr() on malformed email
|
||||
+ # addresses: default behavior (strict=True) rejects malformed address,
|
||||
+ # and strict=False which tolerates malformed address.
|
||||
+ for invalid_separator, expected_non_strict in (
|
||||
+ ('(', [(f'<{bob}>', alice)]),
|
||||
+ (')', [('', alice), empty, ('', bob)]),
|
||||
+ ('<', [('', alice), empty, ('', bob), empty]),
|
||||
+ ('>', [('', alice), empty, ('', bob)]),
|
||||
+ ('[', [('', f'{alice}[<{bob}>]')]),
|
||||
+ (']', [('', alice), empty, ('', bob)]),
|
||||
+ ('@', [empty, empty, ('', bob)]),
|
||||
+ (';', [('', alice), empty, ('', bob)]),
|
||||
+ (':', [('', alice), ('', bob)]),
|
||||
+ ('.', [('', alice + '.'), ('', bob)]),
|
||||
+ ('"', [('', alice), ('', f'<{bob}>')]),
|
||||
+ ):
|
||||
+ address = f'{alice}{invalid_separator}<{bob}>'
|
||||
+ with self.subTest(address=address):
|
||||
+ self.assertEqual(utils.getaddresses([address]),
|
||||
+ [empty])
|
||||
+ self.assertEqual(utils.getaddresses([address], strict=False),
|
||||
+ expected_non_strict)
|
||||
+
|
||||
+ self.assertEqual(utils.parseaddr([address]),
|
||||
+ empty)
|
||||
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
||||
+ ('', address))
|
||||
+
|
||||
+ # Comma (',') is treated differently depending on strict parameter.
|
||||
+ # Comma without quotes.
|
||||
+ address = f'{alice},<{bob}>'
|
||||
+ self.assertEqual(utils.getaddresses([address]),
|
||||
+ [('', alice), ('', bob)])
|
||||
+ self.assertEqual(utils.getaddresses([address], strict=False),
|
||||
+ [('', alice), ('', bob)])
|
||||
+ self.assertEqual(utils.parseaddr([address]),
|
||||
+ empty)
|
||||
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
||||
+ ('', address))
|
||||
+
|
||||
+ # Real name between quotes containing comma.
|
||||
+ address = '"Alice, alice@example.org" <bob@example.com>'
|
||||
+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
|
||||
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
|
||||
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
|
||||
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
|
||||
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
||||
+ ('', address))
|
||||
+
|
||||
+ # Valid parenthesis in comments.
|
||||
+ address = 'alice@example.org (Alice)'
|
||||
+ expected_strict = ('Alice', 'alice@example.org')
|
||||
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
|
||||
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
|
||||
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
|
||||
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
||||
+ ('', address))
|
||||
+
|
||||
+ # Invalid parenthesis in comments.
|
||||
+ address = 'alice@example.org )Alice('
|
||||
+ self.assertEqual(utils.getaddresses([address]), [empty])
|
||||
+ self.assertEqual(utils.getaddresses([address], strict=False),
|
||||
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
|
||||
+ self.assertEqual(utils.parseaddr([address]), empty)
|
||||
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
||||
+ ('', address))
|
||||
+
|
||||
+ # Two addresses with quotes separated by comma.
|
||||
+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
|
||||
+ self.assertEqual(utils.getaddresses([address]),
|
||||
+ [('Jane Doe', 'jane@example.net'),
|
||||
+ ('John Doe', 'john@example.net')])
|
||||
+ self.assertEqual(utils.getaddresses([address], strict=False),
|
||||
+ [('Jane Doe', 'jane@example.net'),
|
||||
+ ('John Doe', 'john@example.net')])
|
||||
+ self.assertEqual(utils.parseaddr([address]), empty)
|
||||
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
||||
+ ('', address))
|
||||
+
|
||||
+ # Test email.utils.supports_strict_parsing attribute
|
||||
+ self.assertEqual(email.utils.supports_strict_parsing, True)
|
||||
+
|
||||
def test_getaddresses_nasty(self):
|
||||
- eq = self.assertEqual
|
||||
- eq(utils.getaddresses(['foo: ;']), [('', '')])
|
||||
- eq(utils.getaddresses(
|
||||
- ['[]*-- =~$']),
|
||||
- [('', ''), ('', ''), ('', '*--')])
|
||||
- eq(utils.getaddresses(
|
||||
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
|
||||
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
|
||||
+ for addresses, expected in (
|
||||
+ (['"Sürname, Firstname" <to@example.com>'],
|
||||
+ [('Sürname, Firstname', 'to@example.com')]),
|
||||
+
|
||||
+ (['foo: ;'],
|
||||
+ [('', '')]),
|
||||
+
|
||||
+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
|
||||
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
|
||||
+
|
||||
+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
|
||||
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
|
||||
+
|
||||
+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
|
||||
+ [('', '')]),
|
||||
+
|
||||
+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
|
||||
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
|
||||
+
|
||||
+ (['John Doe <jdoe@machine(comment). example>'],
|
||||
+ [('John Doe (comment)', 'jdoe@machine.example')]),
|
||||
+
|
||||
+ (['"Mary Smith: Personal Account" <smith@home.example>'],
|
||||
+ [('Mary Smith: Personal Account', 'smith@home.example')]),
|
||||
+
|
||||
+ (['Undisclosed recipients:;'],
|
||||
+ [('', '')]),
|
||||
+
|
||||
+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
|
||||
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
|
||||
+ ):
|
||||
+ with self.subTest(addresses=addresses):
|
||||
+ self.assertEqual(utils.getaddresses(addresses),
|
||||
+ expected)
|
||||
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
|
||||
+ expected)
|
||||
+
|
||||
+ addresses = ['[]*-- =~$']
|
||||
+ self.assertEqual(utils.getaddresses(addresses),
|
||||
+ [('', '')])
|
||||
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
|
||||
+ [('', ''), ('', ''), ('', '*--')])
|
||||
|
||||
def test_getaddresses_embedded_comment(self):
|
||||
"""Test proper handling of a nested comment"""
|
||||
@@ -3485,6 +3625,54 @@ multipart/report
|
||||
m = cls(*constructor, policy=email.policy.default)
|
||||
self.assertIs(m.policy, email.policy.default)
|
||||
|
||||
+ def test_iter_escaped_chars(self):
|
||||
+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
|
||||
+ [(0, 'a'),
|
||||
+ (2, '\\\\'),
|
||||
+ (3, 'b'),
|
||||
+ (5, '\\"'),
|
||||
+ (6, 'c'),
|
||||
+ (8, '\\\\'),
|
||||
+ (9, '"'),
|
||||
+ (10, 'd')])
|
||||
+ self.assertEqual(list(utils._iter_escaped_chars('a\\')),
|
||||
+ [(0, 'a'), (1, '\\')])
|
||||
+
|
||||
+ def test_strip_quoted_realnames(self):
|
||||
+ def check(addr, expected):
|
||||
+ self.assertEqual(utils._strip_quoted_realnames(addr), expected)
|
||||
+
|
||||
+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
|
||||
+ ' <jane@example.net>, <john@example.net>')
|
||||
+ check(r'"Jane \"Doe\"." <jane@example.net>',
|
||||
+ ' <jane@example.net>')
|
||||
+
|
||||
+ # special cases
|
||||
+ check(r'before"name"after', 'beforeafter')
|
||||
+ check(r'before"name"', 'before')
|
||||
+ check(r'b"name"', 'b') # single char
|
||||
+ check(r'"name"after', 'after')
|
||||
+ check(r'"name"a', 'a') # single char
|
||||
+ check(r'"name"', '')
|
||||
+
|
||||
+ # no change
|
||||
+ for addr in (
|
||||
+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
|
||||
+ 'lone " quote',
|
||||
+ ):
|
||||
+ self.assertEqual(utils._strip_quoted_realnames(addr), addr)
|
||||
+
|
||||
+
|
||||
+ def test_check_parenthesis(self):
|
||||
+ addr = 'alice@example.net'
|
||||
+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
|
||||
+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
|
||||
+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
|
||||
+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
|
||||
+
|
||||
+ # Ignore real name between quotes
|
||||
+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
|
||||
+
|
||||
|
||||
# Test the iterator/generators
|
||||
class TestIterators(TestEmailBase):
|
||||
diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
|
||||
new file mode 100644
|
||||
index 0000000..3d0e9e4
|
||||
--- /dev/null
|
||||
+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
|
||||
@@ -0,0 +1,8 @@
|
||||
+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
|
||||
+return ``('', '')`` 2-tuples in more situations where invalid email
|
||||
+addresses are encountered instead of potentially inaccurate values. Add
|
||||
+optional *strict* parameter to these two functions: use ``strict=False`` to
|
||||
+get the old behavior, accept malformed inputs.
|
||||
+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
|
||||
+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
|
||||
+Stinner to improve the CVE-2023-27043 fix.
|
||||
--
|
||||
2.25.1
|
||||
|
||||
@@ -1,251 +0,0 @@
|
||||
From 3a22dc1079be5a75750d24dc6992956e7b84b5a0 Mon Sep 17 00:00:00 2001
|
||||
From: Seth Michael Larson <seth@python.org>
|
||||
Date: Tue, 3 Sep 2024 10:07:53 -0500
|
||||
Subject: [PATCH 2/2] [3.10] gh-121285: Remove backtracking when parsing
|
||||
tarfile headers (GH-121286) (#123640)
|
||||
|
||||
* Remove backtracking when parsing tarfile headers
|
||||
* Rewrite PAX header parsing to be stricter
|
||||
* Optimize parsing of GNU extended sparse headers v0.0
|
||||
|
||||
(cherry picked from commit 34ddb64d088dd7ccc321f6103d23153256caa5d4)
|
||||
|
||||
Upstream-Status: Backport from https://github.com/python/cpython/commit/743acbe872485dc18df4d8ab2dc7895187f062c4
|
||||
CVE: CVE-2024-6232
|
||||
|
||||
Co-authored-by: Kirill Podoprigora <kirill.bast9@mail.ru>
|
||||
Co-authored-by: Gregory P. Smith <greg@krypto.org>
|
||||
Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>
|
||||
---
|
||||
Lib/tarfile.py | 105 +++++++++++-------
|
||||
Lib/test/test_tarfile.py | 42 +++++++
|
||||
...-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | 2 +
|
||||
3 files changed, 111 insertions(+), 38 deletions(-)
|
||||
create mode 100644 Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
|
||||
|
||||
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
|
||||
index 495349f08f9..3ab6811d633 100755
|
||||
--- a/Lib/tarfile.py
|
||||
+++ b/Lib/tarfile.py
|
||||
@@ -841,6 +841,9 @@ def data_filter(member, dest_path):
|
||||
# Sentinel for replace() defaults, meaning "don't change the attribute"
|
||||
_KEEP = object()
|
||||
|
||||
+# Header length is digits followed by a space.
|
||||
+_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
|
||||
+
|
||||
class TarInfo(object):
|
||||
"""Informational class which holds the details about an
|
||||
archive member given by a tar header block.
|
||||
@@ -1410,41 +1413,59 @@ def _proc_pax(self, tarfile):
|
||||
else:
|
||||
pax_headers = tarfile.pax_headers.copy()
|
||||
|
||||
- # Check if the pax header contains a hdrcharset field. This tells us
|
||||
- # the encoding of the path, linkpath, uname and gname fields. Normally,
|
||||
- # these fields are UTF-8 encoded but since POSIX.1-2008 tar
|
||||
- # implementations are allowed to store them as raw binary strings if
|
||||
- # the translation to UTF-8 fails.
|
||||
- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
|
||||
- if match is not None:
|
||||
- pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
|
||||
-
|
||||
- # For the time being, we don't care about anything other than "BINARY".
|
||||
- # The only other value that is currently allowed by the standard is
|
||||
- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
|
||||
- hdrcharset = pax_headers.get("hdrcharset")
|
||||
- if hdrcharset == "BINARY":
|
||||
- encoding = tarfile.encoding
|
||||
- else:
|
||||
- encoding = "utf-8"
|
||||
-
|
||||
# Parse pax header information. A record looks like that:
|
||||
# "%d %s=%s\n" % (length, keyword, value). length is the size
|
||||
# of the complete record including the length field itself and
|
||||
- # the newline. keyword and value are both UTF-8 encoded strings.
|
||||
- regex = re.compile(br"(\d+) ([^=]+)=")
|
||||
+ # the newline.
|
||||
pos = 0
|
||||
- while True:
|
||||
- match = regex.match(buf, pos)
|
||||
- if not match:
|
||||
- break
|
||||
+ encoding = None
|
||||
+ raw_headers = []
|
||||
+ while len(buf) > pos and buf[pos] != 0x00:
|
||||
+ if not (match := _header_length_prefix_re.match(buf, pos)):
|
||||
+ raise InvalidHeaderError("invalid header")
|
||||
+ try:
|
||||
+ length = int(match.group(1))
|
||||
+ except ValueError:
|
||||
+ raise InvalidHeaderError("invalid header")
|
||||
+ # Headers must be at least 5 bytes, shortest being '5 x=\n'.
|
||||
+ # Value is allowed to be empty.
|
||||
+ if length < 5:
|
||||
+ raise InvalidHeaderError("invalid header")
|
||||
+ if pos + length > len(buf):
|
||||
+ raise InvalidHeaderError("invalid header")
|
||||
|
||||
- length, keyword = match.groups()
|
||||
- length = int(length)
|
||||
- if length == 0:
|
||||
+ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header
|
||||
+ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
|
||||
+ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
|
||||
+
|
||||
+ # Check the framing of the header. The last character must be '\n' (0x0A)
|
||||
+ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
|
||||
raise InvalidHeaderError("invalid header")
|
||||
- value = buf[match.end(2) + 1:match.start(1) + length - 1]
|
||||
+ raw_headers.append((length, raw_keyword, raw_value))
|
||||
+
|
||||
+ # Check if the pax header contains a hdrcharset field. This tells us
|
||||
+ # the encoding of the path, linkpath, uname and gname fields. Normally,
|
||||
+ # these fields are UTF-8 encoded but since POSIX.1-2008 tar
|
||||
+ # implementations are allowed to store them as raw binary strings if
|
||||
+ # the translation to UTF-8 fails. For the time being, we don't care about
|
||||
+ # anything other than "BINARY". The only other value that is currently
|
||||
+ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
|
||||
+ # Note that we only follow the initial 'hdrcharset' setting to preserve
|
||||
+ # the initial behavior of the 'tarfile' module.
|
||||
+ if raw_keyword == b"hdrcharset" and encoding is None:
|
||||
+ if raw_value == b"BINARY":
|
||||
+ encoding = tarfile.encoding
|
||||
+ else: # This branch ensures only the first 'hdrcharset' header is used.
|
||||
+ encoding = "utf-8"
|
||||
+
|
||||
+ pos += length
|
||||
|
||||
+ # If no explicit hdrcharset is set, we use UTF-8 as a default.
|
||||
+ if encoding is None:
|
||||
+ encoding = "utf-8"
|
||||
+
|
||||
+ # After parsing the raw headers we can decode them to text.
|
||||
+ for length, raw_keyword, raw_value in raw_headers:
|
||||
# Normally, we could just use "utf-8" as the encoding and "strict"
|
||||
# as the error handler, but we better not take the risk. For
|
||||
# example, GNU tar <= 1.23 is known to store filenames it cannot
|
||||
@@ -1452,17 +1473,16 @@ def _proc_pax(self, tarfile):
|
||||
# hdrcharset=BINARY header).
|
||||
# We first try the strict standard encoding, and if that fails we
|
||||
# fall back on the user's encoding and error handler.
|
||||
- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
|
||||
+ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
|
||||
tarfile.errors)
|
||||
if keyword in PAX_NAME_FIELDS:
|
||||
- value = self._decode_pax_field(value, encoding, tarfile.encoding,
|
||||
+ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
|
||||
tarfile.errors)
|
||||
else:
|
||||
- value = self._decode_pax_field(value, "utf-8", "utf-8",
|
||||
+ value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
|
||||
tarfile.errors)
|
||||
|
||||
pax_headers[keyword] = value
|
||||
- pos += length
|
||||
|
||||
# Fetch the next header.
|
||||
try:
|
||||
@@ -1477,7 +1497,7 @@ def _proc_pax(self, tarfile):
|
||||
|
||||
elif "GNU.sparse.size" in pax_headers:
|
||||
# GNU extended sparse format version 0.0.
|
||||
- self._proc_gnusparse_00(next, pax_headers, buf)
|
||||
+ self._proc_gnusparse_00(next, raw_headers)
|
||||
|
||||
elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
|
||||
# GNU extended sparse format version 1.0.
|
||||
@@ -1499,15 +1519,24 @@ def _proc_pax(self, tarfile):
|
||||
|
||||
return next
|
||||
|
||||
- def _proc_gnusparse_00(self, next, pax_headers, buf):
|
||||
+ def _proc_gnusparse_00(self, next, raw_headers):
|
||||
"""Process a GNU tar extended sparse header, version 0.0.
|
||||
"""
|
||||
offsets = []
|
||||
- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
|
||||
- offsets.append(int(match.group(1)))
|
||||
numbytes = []
|
||||
- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
|
||||
- numbytes.append(int(match.group(1)))
|
||||
+ for _, keyword, value in raw_headers:
|
||||
+ if keyword == b"GNU.sparse.offset":
|
||||
+ try:
|
||||
+ offsets.append(int(value.decode()))
|
||||
+ except ValueError:
|
||||
+ raise InvalidHeaderError("invalid header")
|
||||
+
|
||||
+ elif keyword == b"GNU.sparse.numbytes":
|
||||
+ try:
|
||||
+ numbytes.append(int(value.decode()))
|
||||
+ except ValueError:
|
||||
+ raise InvalidHeaderError("invalid header")
|
||||
+
|
||||
next.sparse = list(zip(offsets, numbytes))
|
||||
|
||||
def _proc_gnusparse_01(self, next, pax_headers):
|
||||
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
|
||||
index cfc13bccb20..007c3e94acb 100644
|
||||
--- a/Lib/test/test_tarfile.py
|
||||
+++ b/Lib/test/test_tarfile.py
|
||||
@@ -1139,6 +1139,48 @@ def test_pax_number_fields(self):
|
||||
finally:
|
||||
tar.close()
|
||||
|
||||
+ def test_pax_header_bad_formats(self):
|
||||
+ # The fields from the pax header have priority over the
|
||||
+ # TarInfo.
|
||||
+ pax_header_replacements = (
|
||||
+ b" foo=bar\n",
|
||||
+ b"0 \n",
|
||||
+ b"1 \n",
|
||||
+ b"2 \n",
|
||||
+ b"3 =\n",
|
||||
+ b"4 =a\n",
|
||||
+ b"1000000 foo=bar\n",
|
||||
+ b"0 foo=bar\n",
|
||||
+ b"-12 foo=bar\n",
|
||||
+ b"000000000000000000000000036 foo=bar\n",
|
||||
+ )
|
||||
+ pax_headers = {"foo": "bar"}
|
||||
+
|
||||
+ for replacement in pax_header_replacements:
|
||||
+ with self.subTest(header=replacement):
|
||||
+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
|
||||
+ encoding="iso8859-1")
|
||||
+ try:
|
||||
+ t = tarfile.TarInfo()
|
||||
+ t.name = "pax" # non-ASCII
|
||||
+ t.uid = 1
|
||||
+ t.pax_headers = pax_headers
|
||||
+ tar.addfile(t)
|
||||
+ finally:
|
||||
+ tar.close()
|
||||
+
|
||||
+ with open(tmpname, "rb") as f:
|
||||
+ data = f.read()
|
||||
+ self.assertIn(b"11 foo=bar\n", data)
|
||||
+ data = data.replace(b"11 foo=bar\n", replacement)
|
||||
+
|
||||
+ with open(tmpname, "wb") as f:
|
||||
+ f.truncate()
|
||||
+ f.write(data)
|
||||
+
|
||||
+ with self.assertRaisesRegex(tarfile.ReadError, r"method tar: ReadError\('invalid header'\)"):
|
||||
+ tarfile.open(tmpname, encoding="iso8859-1")
|
||||
+
|
||||
|
||||
class WriteTestBase(TarTest):
|
||||
# Put all write tests in here that are supposed to be tested
|
||||
diff --git a/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
|
||||
new file mode 100644
|
||||
index 00000000000..81f918bfe2b
|
||||
--- /dev/null
|
||||
+++ b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
|
||||
@@ -0,0 +1,2 @@
|
||||
+Remove backtracking from tarfile header parsing for ``hdrcharset``, PAX, and
|
||||
+GNU sparse headers.
|
||||
--
|
||||
2.46.0
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
From 3c15b8437f57fe1027171b34af88bf791cf1868c Mon Sep 17 00:00:00 2001
|
||||
From: "Miss Islington (bot)"
|
||||
<31488909+miss-islington@users.noreply.github.com>
|
||||
Date: Wed, 4 Sep 2024 17:50:36 +0200
|
||||
Subject: [PATCH 1/2] [3.10] gh-123067: Fix quadratic complexity in parsing
|
||||
"-quoted cookie values with backslashes (GH-123075) (#123106)
|
||||
|
||||
This fixes CVE-2024-7592.
|
||||
(cherry picked from commit 44e458357fca05ca0ae2658d62c8c595b048b5ef)
|
||||
|
||||
Upstream-Status: Backport from https://github.com/python/cpython/commit/b2f11ca7667e4d57c71c1c88b255115f16042d9a
|
||||
CVE: CVE-2024-7592
|
||||
|
||||
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
|
||||
Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>
|
||||
---
|
||||
Lib/http/cookies.py | 34 ++++-------------
|
||||
Lib/test/test_http_cookies.py | 38 +++++++++++++++++++
|
||||
...-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst | 1 +
|
||||
3 files changed, 47 insertions(+), 26 deletions(-)
|
||||
create mode 100644 Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst
|
||||
|
||||
diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py
|
||||
index 35ac2dc6ae2..2c1f021d0ab 100644
|
||||
--- a/Lib/http/cookies.py
|
||||
+++ b/Lib/http/cookies.py
|
||||
@@ -184,8 +184,13 @@ def _quote(str):
|
||||
return '"' + str.translate(_Translator) + '"'
|
||||
|
||||
|
||||
-_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
|
||||
-_QuotePatt = re.compile(r"[\\].")
|
||||
+_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub
|
||||
+
|
||||
+def _unquote_replace(m):
|
||||
+ if m[1]:
|
||||
+ return chr(int(m[1], 8))
|
||||
+ else:
|
||||
+ return m[2]
|
||||
|
||||
def _unquote(str):
|
||||
# If there aren't any doublequotes,
|
||||
@@ -205,30 +210,7 @@ def _unquote(str):
|
||||
# \012 --> \n
|
||||
# \" --> "
|
||||
#
|
||||
- i = 0
|
||||
- n = len(str)
|
||||
- res = []
|
||||
- while 0 <= i < n:
|
||||
- o_match = _OctalPatt.search(str, i)
|
||||
- q_match = _QuotePatt.search(str, i)
|
||||
- if not o_match and not q_match: # Neither matched
|
||||
- res.append(str[i:])
|
||||
- break
|
||||
- # else:
|
||||
- j = k = -1
|
||||
- if o_match:
|
||||
- j = o_match.start(0)
|
||||
- if q_match:
|
||||
- k = q_match.start(0)
|
||||
- if q_match and (not o_match or k < j): # QuotePatt matched
|
||||
- res.append(str[i:k])
|
||||
- res.append(str[k+1])
|
||||
- i = k + 2
|
||||
- else: # OctalPatt matched
|
||||
- res.append(str[i:j])
|
||||
- res.append(chr(int(str[j+1:j+4], 8)))
|
||||
- i = j + 4
|
||||
- return _nulljoin(res)
|
||||
+ return _unquote_sub(_unquote_replace, str)
|
||||
|
||||
# The _getdate() routine is used to set the expiration time in the cookie's HTTP
|
||||
# header. By default, _getdate() returns the current time in the appropriate
|
||||
diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py
|
||||
index 6072c7e15e9..644e75cd5b7 100644
|
||||
--- a/Lib/test/test_http_cookies.py
|
||||
+++ b/Lib/test/test_http_cookies.py
|
||||
@@ -5,6 +5,7 @@
|
||||
import unittest
|
||||
from http import cookies
|
||||
import pickle
|
||||
+from test import support
|
||||
|
||||
|
||||
class CookieTests(unittest.TestCase):
|
||||
@@ -58,6 +59,43 @@ def test_basic(self):
|
||||
for k, v in sorted(case['dict'].items()):
|
||||
self.assertEqual(C[k].value, v)
|
||||
|
||||
+ def test_unquote(self):
|
||||
+ cases = [
|
||||
+ (r'a="b=\""', 'b="'),
|
||||
+ (r'a="b=\\"', 'b=\\'),
|
||||
+ (r'a="b=\="', 'b=='),
|
||||
+ (r'a="b=\n"', 'b=n'),
|
||||
+ (r'a="b=\042"', 'b="'),
|
||||
+ (r'a="b=\134"', 'b=\\'),
|
||||
+ (r'a="b=\377"', 'b=\xff'),
|
||||
+ (r'a="b=\400"', 'b=400'),
|
||||
+ (r'a="b=\42"', 'b=42'),
|
||||
+ (r'a="b=\\042"', 'b=\\042'),
|
||||
+ (r'a="b=\\134"', 'b=\\134'),
|
||||
+ (r'a="b=\\\""', 'b=\\"'),
|
||||
+ (r'a="b=\\\042"', 'b=\\"'),
|
||||
+ (r'a="b=\134\""', 'b=\\"'),
|
||||
+ (r'a="b=\134\042"', 'b=\\"'),
|
||||
+ ]
|
||||
+ for encoded, decoded in cases:
|
||||
+ with self.subTest(encoded):
|
||||
+ C = cookies.SimpleCookie()
|
||||
+ C.load(encoded)
|
||||
+ self.assertEqual(C['a'].value, decoded)
|
||||
+
|
||||
+ @support.requires_resource('cpu')
|
||||
+ def test_unquote_large(self):
|
||||
+ n = 10**6
|
||||
+ for encoded in r'\\', r'\134':
|
||||
+ with self.subTest(encoded):
|
||||
+ data = 'a="b=' + encoded*n + ';"'
|
||||
+ C = cookies.SimpleCookie()
|
||||
+ C.load(data)
|
||||
+ value = C['a'].value
|
||||
+ self.assertEqual(value[:3], 'b=\\')
|
||||
+ self.assertEqual(value[-2:], '\\;')
|
||||
+ self.assertEqual(len(value), n + 3)
|
||||
+
|
||||
def test_load(self):
|
||||
C = cookies.SimpleCookie()
|
||||
C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme')
|
||||
diff --git a/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst
|
||||
new file mode 100644
|
||||
index 00000000000..6a234561fe3
|
||||
--- /dev/null
|
||||
+++ b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst
|
||||
@@ -0,0 +1 @@
|
||||
+Fix quadratic complexity in parsing ``"``-quoted cookie values with backslashes by :mod:`http.cookies`.
|
||||
--
|
||||
2.46.0
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
From e0264a61119d551658d9445af38323ba94fc16db Mon Sep 17 00:00:00 2001
|
||||
From: "Jason R. Coombs" <jaraco@jaraco.com>
|
||||
Date: Thu, 22 Aug 2024 19:24:33 -0400
|
||||
Subject: [PATCH] CVE-2024-8088: Sanitize names in zipfile.Path. (GH-122906)
|
||||
|
||||
Upstream-Status: Backport from https://github.com/python/cpython/commit/e0264a61119d551658d9445af38323ba94fc16db
|
||||
CVE: CVE-2024-8088
|
||||
|
||||
Signed-off-by: Rohini Sangam <rsangam@mvista.com>
|
||||
---
|
||||
Lib/test/test_zipfile.py | 17 ++++++
|
||||
Lib/zipfile.py | 61 ++++++++++++++++++-
|
||||
2 files changed, 77 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
|
||||
index 32c0170..a60dc11 100644
|
||||
--- a/Lib/test/test_zipfile.py
|
||||
+++ b/Lib/test/test_zipfile.py
|
||||
@@ -3280,6 +3280,23 @@ with zipfile.ZipFile(io.BytesIO(), "w") as zf:
|
||||
zipfile.Path(zf)
|
||||
zf.extractall(source_path.parent)
|
||||
|
||||
+ def test_malformed_paths(self):
|
||||
+ """
|
||||
+ Path should handle malformed paths.
|
||||
+ """
|
||||
+ data = io.BytesIO()
|
||||
+ zf = zipfile.ZipFile(data, "w")
|
||||
+ zf.writestr("/one-slash.txt", b"content")
|
||||
+ zf.writestr("//two-slash.txt", b"content")
|
||||
+ zf.writestr("../parent.txt", b"content")
|
||||
+ zf.filename = ''
|
||||
+ root = zipfile.Path(zf)
|
||||
+ assert list(map(str, root.iterdir())) == [
|
||||
+ 'one-slash.txt',
|
||||
+ 'two-slash.txt',
|
||||
+ 'parent.txt',
|
||||
+ ]
|
||||
+
|
||||
|
||||
class StripExtraTests(unittest.TestCase):
|
||||
# Note: all of the "z" characters are technically invalid, but up
|
||||
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
|
||||
index 7d18bc2..cbac8d9 100644
|
||||
--- a/Lib/zipfile.py
|
||||
+++ b/Lib/zipfile.py
|
||||
@@ -9,6 +9,7 @@ import io
|
||||
import itertools
|
||||
import os
|
||||
import posixpath
|
||||
+import re
|
||||
import shutil
|
||||
import stat
|
||||
import struct
|
||||
@@ -2182,7 +2183,65 @@ def _difference(minuend, subtrahend):
|
||||
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
|
||||
|
||||
|
||||
-class CompleteDirs(ZipFile):
|
||||
+class SanitizedNames:
|
||||
+ """
|
||||
+ ZipFile mix-in to ensure names are sanitized.
|
||||
+ """
|
||||
+
|
||||
+ def namelist(self):
|
||||
+ return list(map(self._sanitize, super().namelist()))
|
||||
+
|
||||
+ @staticmethod
|
||||
+ def _sanitize(name):
|
||||
+ r"""
|
||||
+ Ensure a relative path with posix separators and no dot names.
|
||||
+ Modeled after
|
||||
+ https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
|
||||
+ but provides consistent cross-platform behavior.
|
||||
+ >>> san = SanitizedNames._sanitize
|
||||
+ >>> san('/foo/bar')
|
||||
+ 'foo/bar'
|
||||
+ >>> san('//foo.txt')
|
||||
+ 'foo.txt'
|
||||
+ >>> san('foo/.././bar.txt')
|
||||
+ 'foo/bar.txt'
|
||||
+ >>> san('foo../.bar.txt')
|
||||
+ 'foo../.bar.txt'
|
||||
+ >>> san('\\foo\\bar.txt')
|
||||
+ 'foo/bar.txt'
|
||||
+ >>> san('D:\\foo.txt')
|
||||
+ 'D/foo.txt'
|
||||
+ >>> san('\\\\server\\share\\file.txt')
|
||||
+ 'server/share/file.txt'
|
||||
+ >>> san('\\\\?\\GLOBALROOT\\Volume3')
|
||||
+ '?/GLOBALROOT/Volume3'
|
||||
+ >>> san('\\\\.\\PhysicalDrive1\\root')
|
||||
+ 'PhysicalDrive1/root'
|
||||
+ Retain any trailing slash.
|
||||
+ >>> san('abc/')
|
||||
+ 'abc/'
|
||||
+ Raises a ValueError if the result is empty.
|
||||
+ >>> san('../..')
|
||||
+ Traceback (most recent call last):
|
||||
+ ...
|
||||
+ ValueError: Empty filename
|
||||
+ """
|
||||
+
|
||||
+ def allowed(part):
|
||||
+ return part and part not in {'..', '.'}
|
||||
+
|
||||
+ # Remove the drive letter.
|
||||
+ # Don't use ntpath.splitdrive, because that also strips UNC paths
|
||||
+ bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
|
||||
+ clean = bare.replace('\\', '/')
|
||||
+ parts = clean.split('/')
|
||||
+ joined = '/'.join(filter(allowed, parts))
|
||||
+ if not joined:
|
||||
+ raise ValueError("Empty filename")
|
||||
+ return joined + '/' * name.endswith('/')
|
||||
+
|
||||
+
|
||||
+class CompleteDirs(SanitizedNames, ZipFile):
|
||||
"""
|
||||
A ZipFile subclass that ensures that implied directories
|
||||
are always included in the namelist.
|
||||
--
|
||||
2.35.7
|
||||
|
||||
Reference in New Issue
Block a user