From f98c90f071228ed73df997807298e1df4f790c33 Mon Sep 17 00:00:00 2001 From: Martin Blech <78768+martinblech@users.noreply.github.com> Date: Mon, 8 Sep 2025 11:18:33 -0700 Subject: [PATCH] Enhance unparse() XML name validation with stricter rules and tests Extend existing validation (previously only for "<" and ">") to also reject element, attribute, and xmlns prefix names that are non-string, start with "?" or "!", or contain "/", spaces, tabs, or newlines. Update _emit and namespace handling to use _validate_name. Add tests covering these new invalid name cases. CVE: CVE-2025-9375 Upstream-Status: Backport https://github.com/martinblech/xmltodict/commit/f98c90f071228ed73df997807298e1df4f790c33 https://git.launchpad.net/ubuntu/+source/python-xmltodict/commit/?id=e8110a20e00d80db31d5fc9f8f4577328385d6b6 Signed-off-by: Saravanan xml = unparse({"a": {"@attr": "12", "#text": "x"}}, full_document=False) # The generated XML should contain escaped '<' and '>' within the attribute value self.assertIn('attr="1<middle>2"', xml) + + def test_rejects_tag_name_starting_with_question(self): + with self.assertRaises(ValueError): + unparse({"?pi": "data"}, full_document=False) + + def test_rejects_tag_name_starting_with_bang(self): + with self.assertRaises(ValueError): + unparse({"!decl": "data"}, full_document=False) + + def test_rejects_attribute_name_starting_with_question(self): + with self.assertRaises(ValueError): + unparse({"a": {"@?weird": "x"}}, full_document=False) + + def test_rejects_attribute_name_starting_with_bang(self): + with self.assertRaises(ValueError): + unparse({"a": {"@!weird": "x"}}, full_document=False) + + def test_rejects_xmlns_prefix_starting_with_question_or_bang(self): + with self.assertRaises(ValueError): + unparse({"a": {"@xmlns": {"?p": "http://e/"}}}, full_document=False) + with self.assertRaises(ValueError): + unparse({"a": {"@xmlns": {"!p": "http://e/"}}}, full_document=False) + + def test_rejects_non_string_names(self): + class Weird: + def __str__(self): + return "bad>name" + + # Non-string element key + with self.assertRaises(ValueError): + unparse({Weird(): "x"}, full_document=False) + # Non-string attribute key + with self.assertRaises(ValueError): + unparse({"a": {Weird(): "x"}}, full_document=False) + + def test_rejects_tag_name_with_slash(self): + with self.assertRaises(ValueError): + unparse({"bad/name": "x"}, full_document=False) + + def test_rejects_tag_name_with_whitespace(self): + for name in ["bad name", "bad\tname", "bad\nname"]: + with self.assertRaises(ValueError): + unparse({name: "x"}, full_document=False) + + def test_rejects_attribute_name_with_slash(self): + with self.assertRaises(ValueError): + unparse({"a": {"@bad/name": "x"}}, full_document=False) + + def test_rejects_attribute_name_with_whitespace(self): + for name in ["@bad name", "@bad\tname", "@bad\nname"]: + with self.assertRaises(ValueError): + unparse({"a": {name: "x"}}, full_document=False) + + def test_rejects_xmlns_prefix_with_slash_or_whitespace(self): + # Slash + with self.assertRaises(ValueError): + unparse({"a": {"@xmlns": {"bad/prefix": "http://e/"}}}, full_document=False) + # Whitespace + with self.assertRaises(ValueError): + unparse({"a": {"@xmlns": {"bad prefix": "http://e/"}}}, full_document=False) Index: python-xmltodict-0.13.0/xmltodict.py =================================================================== --- python-xmltodict-0.13.0.orig/xmltodict.py +++ python-xmltodict-0.13.0/xmltodict.py @@ -387,7 +387,42 @@ def _has_angle_brackets(value): return isinstance(value, str) and ("<" in value or ">" in value) +def _has_invalid_name_chars(value): + """Return True if value (a str) contains any disallowed name characters. + + Disallowed: '<', '>', '/', or any whitespace character. + Non-string values return False. + """ + if not isinstance(value, str): + return False + if "<" in value or ">" in value or "/" in value: + return True + # Check for any whitespace (spaces, tabs, newlines, etc.) + return any(ch.isspace() for ch in value) + + +def _validate_name(value, kind): + """Validate an element/attribute name for XML safety. + + Raises ValueError with a specific reason when invalid. + + kind: 'element' or 'attribute' (used in error messages) + """ + if not isinstance(value, str): + raise ValueError(f"{kind} name must be a string") + if value.startswith("?") or value.startswith("!"): + raise ValueError(f'Invalid {kind} name: cannot start with "?" or "!"') + if "<" in value or ">" in value: + raise ValueError(f'Invalid {kind} name: "<" or ">" not allowed') + if "/" in value: + raise ValueError(f'Invalid {kind} name: "/" not allowed') + if any(ch.isspace() for ch in value): + raise ValueError(f"Invalid {kind} name: whitespace not allowed") + + def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'): + if not isinstance(name, str): + return name if not namespaces: return name try: @@ -421,8 +456,7 @@ def _emit(key, value, content_handler, return key, value = result # Minimal validation to avoid breaking out of tag context - if _has_angle_brackets(key): - raise ValueError('Invalid element name: "<" or ">" not allowed') + _validate_name(key, "element") if (not hasattr(value, '__iter__') or isinstance(value, _basestring) or isinstance(value, dict)): @@ -451,23 +485,19 @@ def _emit(key, value, content_handler, if ik == cdata_key: cdata = iv continue - if ik.startswith(attr_prefix): + if isinstance(ik, str) and ik.startswith(attr_prefix): ik = _process_namespace(ik, namespaces, namespace_separator, attr_prefix) if ik == '@xmlns' and isinstance(iv, dict): for k, v in iv.items(): - if _has_angle_brackets(k): - raise ValueError( - 'Invalid attribute name: "<" or ">" not allowed' - ) + _validate_name(k, "attribute") attr = 'xmlns{}'.format(':{}'.format(k) if k else '') attrs[attr] = _unicode(v) continue if not isinstance(iv, _unicode): iv = _unicode(iv) attr_name = ik[len(attr_prefix) :] - if _has_angle_brackets(attr_name): - raise ValueError('Invalid attribute name: "<" or ">" not allowed') + _validate_name(attr_name, "attribute") attrs[attr_name] = iv continue children.append((ik, iv))