diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py index 0557af3954..f8ec0865a7 100644 --- a/django/core/serializers/xml_serializer.py +++ b/django/core/serializers/xml_serializer.py @@ -3,7 +3,8 @@ XML serializer. """ import json -from xml.dom import pulldom +from contextlib import contextmanager +from xml.dom import minidom, pulldom from xml.sax import handler from xml.sax.expatreader import ExpatParser as _ExpatParser @@ -15,6 +16,25 @@ from django.db import DEFAULT_DB_ALIAS, models from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError +@contextmanager +def fast_cache_clearing(): + """Workaround for performance issues in minidom document checks. + + Speeds up repeated DOM operations by skipping unnecessary full traversal + of the DOM tree. + """ + module_helper_was_lambda = False + if original_fn := getattr(minidom, "_in_document", None): + module_helper_was_lambda = original_fn.__name__ == "" + if not module_helper_was_lambda: + minidom._in_document = lambda node: bool(node.ownerDocument) + try: + yield + finally: + if original_fn and not module_helper_was_lambda: + minidom._in_document = original_fn + + class Serializer(base.Serializer): """Serialize a QuerySet to XML.""" @@ -210,7 +230,8 @@ class Deserializer(base.Deserializer): def __next__(self): for event, node in self.event_stream: if event == "START_ELEMENT" and node.nodeName == "object": - self.event_stream.expandNode(node) + with fast_cache_clearing(): + self.event_stream.expandNode(node) return self._handle_object(node) raise StopIteration @@ -397,20 +418,26 @@ class Deserializer(base.Deserializer): def getInnerText(node): """Get all the inner text of a DOM node (recursively).""" + inner_text_list = getInnerTextList(node) + return "".join(inner_text_list) + + +def getInnerTextList(node): + """Return a list of the inner texts of a DOM node (recursively).""" # inspired by # https://mail.python.org/pipermail/xml-sig/2005-March/011022.html - inner_text = [] + result = [] for child in node.childNodes: if ( child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE ): - inner_text.append(child.data) + result.append(child.data) elif child.nodeType == child.ELEMENT_NODE: - inner_text.extend(getInnerText(child)) + result.extend(getInnerTextList(child)) else: pass - return "".join(inner_text) + return result # Below code based on Christian Heimes' defusedxml diff --git a/docs/releases/4.2.27.txt b/docs/releases/4.2.27.txt index e95dc63f74..b843f6a443 100644 --- a/docs/releases/4.2.27.txt +++ b/docs/releases/4.2.27.txt @@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the ``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on PostgreSQL. +CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer`` +================================================================================= + +:ref:`XML Serialization ` was subject to a potential +denial-of-service attack due to quadratic time complexity when deserializing +crafted documents containing many nested invalid elements. The internal helper +``django.core.serializers.xml_serializer.getInnerText()`` previously +accumulated inner text inefficiently during recursion. It now collects text per +element, avoiding excessive resource usage. + Bugfixes ======== diff --git a/docs/releases/5.1.15.txt b/docs/releases/5.1.15.txt index f55623ea96..63ff22732e 100644 --- a/docs/releases/5.1.15.txt +++ b/docs/releases/5.1.15.txt @@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the ``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on PostgreSQL. +CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer`` +================================================================================= + +:ref:`XML Serialization ` was subject to a potential +denial-of-service attack due to quadratic time complexity when deserializing +crafted documents containing many nested invalid elements. The internal helper +``django.core.serializers.xml_serializer.getInnerText()`` previously +accumulated inner text inefficiently during recursion. It now collects text per +element, avoiding excessive resource usage. + Bugfixes ======== diff --git a/docs/releases/5.2.9.txt b/docs/releases/5.2.9.txt index 08c298999a..ba235d05c6 100644 --- a/docs/releases/5.2.9.txt +++ b/docs/releases/5.2.9.txt @@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the ``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on PostgreSQL. +CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer`` +================================================================================= + +:ref:`XML Serialization ` was subject to a potential +denial-of-service attack due to quadratic time complexity when deserializing +crafted documents containing many nested invalid elements. The internal helper +``django.core.serializers.xml_serializer.getInnerText()`` previously +accumulated inner text inefficiently during recursion. It now collects text per +element, avoiding excessive resource usage. + Bugfixes ======== diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt index f0ac0811be..2b28f5e15a 100644 --- a/docs/topics/serialization.txt +++ b/docs/topics/serialization.txt @@ -173,6 +173,8 @@ Identifier Information .. _jsonl: https://jsonlines.org/ .. _PyYAML: https://pyyaml.org/ +.. _serialization-formats-xml: + XML --- diff --git a/tests/serializers/test_deserialization.py b/tests/serializers/test_deserialization.py index 0bbb46b7ce..a718a99038 100644 --- a/tests/serializers/test_deserialization.py +++ b/tests/serializers/test_deserialization.py @@ -1,11 +1,15 @@ import json +import time import unittest from django.core.serializers.base import DeserializationError, DeserializedObject from django.core.serializers.json import Deserializer as JsonDeserializer from django.core.serializers.jsonl import Deserializer as JsonlDeserializer from django.core.serializers.python import Deserializer +from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer +from django.db import models from django.test import SimpleTestCase +from django.test.utils import garbage_collect from .models import Author @@ -133,3 +137,53 @@ class TestDeserializer(SimpleTestCase): self.assertEqual(first_item.object, self.jane) self.assertEqual(second_item.object, self.joe) + + def test_crafted_xml_performance(self): + """The time to process invalid inputs is not quadratic.""" + + def build_crafted_xml(depth, leaf_text_len): + nested_open = "" * depth + nested_close = "" * depth + leaf = "x" * leaf_text_len + field_content = f"{nested_open}{leaf}{nested_close}" + return f""" + + + {field_content} + m + + + """ + + def deserialize(crafted_xml): + iterator = XMLDeserializer(crafted_xml) + garbage_collect() + + start_time = time.perf_counter() + result = list(iterator) + end_time = time.perf_counter() + + self.assertEqual(len(result), 1) + self.assertIsInstance(result[0].object, models.Model) + return end_time - start_time + + def assertFactor(label, params, factor=2): + factors = [] + prev_time = None + for depth, length in params: + crafted_xml = build_crafted_xml(depth, length) + elapsed = deserialize(crafted_xml) + if prev_time is not None: + factors.append(elapsed / prev_time) + prev_time = elapsed + + with self.subTest(label): + # Assert based on the average factor to reduce test flakiness. + self.assertLessEqual(sum(factors) / len(factors), factor) + + assertFactor( + "varying depth, varying length", + [(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)], + 2, + ) + assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)