Fixed CVE-2025-64460 -- Corrected quadratic inner text accumulation in XML serializer.

Previously, `getInnerText()` recursively used `list.extend()` on strings,
which added each character from child nodes as a separate list element.
On deeply nested XML content, this caused the overall deserialization
work to grow quadratically with input size, potentially allowing
disproportionate CPU consumption for crafted XML.

The fix separates collection of inner texts from joining them, so that
each subtree is joined only once, reducing the complexity to linear in
the size of the input. These changes also include a mitigation for a
xml.dom.minidom performance issue.

Thanks Seokchan Yoon (https://ch4n3.kr/) for report.

Co-authored-by: Jacob Walls <jacobtylerwalls@gmail.com>
Co-authored-by: Natalia <124304+nessita@users.noreply.github.com>
This commit is contained in:
Shai Berger
2025-10-11 21:42:56 +03:00
committed by Natalia
parent 5b90ca1e75
commit 50efb718b3
6 changed files with 119 additions and 6 deletions

View File

@@ -3,7 +3,8 @@ XML serializer.
"""
import json
from xml.dom import pulldom
from contextlib import contextmanager
from xml.dom import minidom, pulldom
from xml.sax import handler
from xml.sax.expatreader import ExpatParser as _ExpatParser
@@ -15,6 +16,25 @@ from django.db import DEFAULT_DB_ALIAS, models
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
@contextmanager
def fast_cache_clearing():
"""Workaround for performance issues in minidom document checks.
Speeds up repeated DOM operations by skipping unnecessary full traversal
of the DOM tree.
"""
module_helper_was_lambda = False
if original_fn := getattr(minidom, "_in_document", None):
module_helper_was_lambda = original_fn.__name__ == "<lambda>"
if not module_helper_was_lambda:
minidom._in_document = lambda node: bool(node.ownerDocument)
try:
yield
finally:
if original_fn and not module_helper_was_lambda:
minidom._in_document = original_fn
class Serializer(base.Serializer):
"""Serialize a QuerySet to XML."""
@@ -210,7 +230,8 @@ class Deserializer(base.Deserializer):
def __next__(self):
for event, node in self.event_stream:
if event == "START_ELEMENT" and node.nodeName == "object":
self.event_stream.expandNode(node)
with fast_cache_clearing():
self.event_stream.expandNode(node)
return self._handle_object(node)
raise StopIteration
@@ -397,20 +418,26 @@ class Deserializer(base.Deserializer):
def getInnerText(node):
"""Get all the inner text of a DOM node (recursively)."""
inner_text_list = getInnerTextList(node)
return "".join(inner_text_list)
def getInnerTextList(node):
"""Return a list of the inner texts of a DOM node (recursively)."""
# inspired by
# https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
inner_text = []
result = []
for child in node.childNodes:
if (
child.nodeType == child.TEXT_NODE
or child.nodeType == child.CDATA_SECTION_NODE
):
inner_text.append(child.data)
result.append(child.data)
elif child.nodeType == child.ELEMENT_NODE:
inner_text.extend(getInnerText(child))
result.extend(getInnerTextList(child))
else:
pass
return "".join(inner_text)
return result
# Below code based on Christian Heimes' defusedxml

View File

@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
PostgreSQL.
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
=================================================================================
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
denial-of-service attack due to quadratic time complexity when deserializing
crafted documents containing many nested invalid elements. The internal helper
``django.core.serializers.xml_serializer.getInnerText()`` previously
accumulated inner text inefficiently during recursion. It now collects text per
element, avoiding excessive resource usage.
Bugfixes
========

View File

@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
PostgreSQL.
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
=================================================================================
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
denial-of-service attack due to quadratic time complexity when deserializing
crafted documents containing many nested invalid elements. The internal helper
``django.core.serializers.xml_serializer.getInnerText()`` previously
accumulated inner text inefficiently during recursion. It now collects text per
element, avoiding excessive resource usage.
Bugfixes
========

View File

@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
PostgreSQL.
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
=================================================================================
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
denial-of-service attack due to quadratic time complexity when deserializing
crafted documents containing many nested invalid elements. The internal helper
``django.core.serializers.xml_serializer.getInnerText()`` previously
accumulated inner text inefficiently during recursion. It now collects text per
element, avoiding excessive resource usage.
Bugfixes
========

View File

@@ -173,6 +173,8 @@ Identifier Information
.. _jsonl: https://jsonlines.org/
.. _PyYAML: https://pyyaml.org/
.. _serialization-formats-xml:
XML
---

View File

@@ -1,11 +1,15 @@
import json
import time
import unittest
from django.core.serializers.base import DeserializationError, DeserializedObject
from django.core.serializers.json import Deserializer as JsonDeserializer
from django.core.serializers.jsonl import Deserializer as JsonlDeserializer
from django.core.serializers.python import Deserializer
from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer
from django.db import models
from django.test import SimpleTestCase
from django.test.utils import garbage_collect
from .models import Author
@@ -133,3 +137,53 @@ class TestDeserializer(SimpleTestCase):
self.assertEqual(first_item.object, self.jane)
self.assertEqual(second_item.object, self.joe)
def test_crafted_xml_performance(self):
"""The time to process invalid inputs is not quadratic."""
def build_crafted_xml(depth, leaf_text_len):
nested_open = "<nested>" * depth
nested_close = "</nested>" * depth
leaf = "x" * leaf_text_len
field_content = f"{nested_open}{leaf}{nested_close}"
return f"""
<django-objects version="1.0">
<object model="contenttypes.contenttype" pk="1">
<field name="app_label">{field_content}</field>
<field name="model">m</field>
</object>
</django-objects>
"""
def deserialize(crafted_xml):
iterator = XMLDeserializer(crafted_xml)
garbage_collect()
start_time = time.perf_counter()
result = list(iterator)
end_time = time.perf_counter()
self.assertEqual(len(result), 1)
self.assertIsInstance(result[0].object, models.Model)
return end_time - start_time
def assertFactor(label, params, factor=2):
factors = []
prev_time = None
for depth, length in params:
crafted_xml = build_crafted_xml(depth, length)
elapsed = deserialize(crafted_xml)
if prev_time is not None:
factors.append(elapsed / prev_time)
prev_time = elapsed
with self.subTest(label):
# Assert based on the average factor to reduce test flakiness.
self.assertLessEqual(sum(factors) / len(factors), factor)
assertFactor(
"varying depth, varying length",
[(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
2,
)
assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)