mirror of
https://github.com/django/django.git
synced 2026-02-09 02:49:25 +08:00
Fixed CVE-2025-64460 -- Corrected quadratic inner text accumulation in XML serializer.
Previously, `getInnerText()` recursively used `list.extend()` on strings, which added each character from child nodes as a separate list element. On deeply nested XML content, this caused the overall deserialization work to grow quadratically with input size, potentially allowing disproportionate CPU consumption for crafted XML. The fix separates collection of inner texts from joining them, so that each subtree is joined only once, reducing the complexity to linear in the size of the input. These changes also include a mitigation for a xml.dom.minidom performance issue. Thanks Seokchan Yoon (https://ch4n3.kr/) for report. Co-authored-by: Jacob Walls <jacobtylerwalls@gmail.com> Co-authored-by: Natalia <124304+nessita@users.noreply.github.com>
This commit is contained in:
@@ -3,7 +3,8 @@ XML serializer.
|
||||
"""
|
||||
|
||||
import json
|
||||
from xml.dom import pulldom
|
||||
from contextlib import contextmanager
|
||||
from xml.dom import minidom, pulldom
|
||||
from xml.sax import handler
|
||||
from xml.sax.expatreader import ExpatParser as _ExpatParser
|
||||
|
||||
@@ -15,6 +16,25 @@ from django.db import DEFAULT_DB_ALIAS, models
|
||||
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
|
||||
|
||||
|
||||
@contextmanager
|
||||
def fast_cache_clearing():
|
||||
"""Workaround for performance issues in minidom document checks.
|
||||
|
||||
Speeds up repeated DOM operations by skipping unnecessary full traversal
|
||||
of the DOM tree.
|
||||
"""
|
||||
module_helper_was_lambda = False
|
||||
if original_fn := getattr(minidom, "_in_document", None):
|
||||
module_helper_was_lambda = original_fn.__name__ == "<lambda>"
|
||||
if not module_helper_was_lambda:
|
||||
minidom._in_document = lambda node: bool(node.ownerDocument)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if original_fn and not module_helper_was_lambda:
|
||||
minidom._in_document = original_fn
|
||||
|
||||
|
||||
class Serializer(base.Serializer):
|
||||
"""Serialize a QuerySet to XML."""
|
||||
|
||||
@@ -210,7 +230,8 @@ class Deserializer(base.Deserializer):
|
||||
def __next__(self):
|
||||
for event, node in self.event_stream:
|
||||
if event == "START_ELEMENT" and node.nodeName == "object":
|
||||
self.event_stream.expandNode(node)
|
||||
with fast_cache_clearing():
|
||||
self.event_stream.expandNode(node)
|
||||
return self._handle_object(node)
|
||||
raise StopIteration
|
||||
|
||||
@@ -397,20 +418,26 @@ class Deserializer(base.Deserializer):
|
||||
|
||||
def getInnerText(node):
|
||||
"""Get all the inner text of a DOM node (recursively)."""
|
||||
inner_text_list = getInnerTextList(node)
|
||||
return "".join(inner_text_list)
|
||||
|
||||
|
||||
def getInnerTextList(node):
|
||||
"""Return a list of the inner texts of a DOM node (recursively)."""
|
||||
# inspired by
|
||||
# https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
|
||||
inner_text = []
|
||||
result = []
|
||||
for child in node.childNodes:
|
||||
if (
|
||||
child.nodeType == child.TEXT_NODE
|
||||
or child.nodeType == child.CDATA_SECTION_NODE
|
||||
):
|
||||
inner_text.append(child.data)
|
||||
result.append(child.data)
|
||||
elif child.nodeType == child.ELEMENT_NODE:
|
||||
inner_text.extend(getInnerText(child))
|
||||
result.extend(getInnerTextList(child))
|
||||
else:
|
||||
pass
|
||||
return "".join(inner_text)
|
||||
return result
|
||||
|
||||
|
||||
# Below code based on Christian Heimes' defusedxml
|
||||
|
||||
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
|
||||
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
|
||||
PostgreSQL.
|
||||
|
||||
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
|
||||
=================================================================================
|
||||
|
||||
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
|
||||
denial-of-service attack due to quadratic time complexity when deserializing
|
||||
crafted documents containing many nested invalid elements. The internal helper
|
||||
``django.core.serializers.xml_serializer.getInnerText()`` previously
|
||||
accumulated inner text inefficiently during recursion. It now collects text per
|
||||
element, avoiding excessive resource usage.
|
||||
|
||||
Bugfixes
|
||||
========
|
||||
|
||||
|
||||
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
|
||||
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
|
||||
PostgreSQL.
|
||||
|
||||
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
|
||||
=================================================================================
|
||||
|
||||
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
|
||||
denial-of-service attack due to quadratic time complexity when deserializing
|
||||
crafted documents containing many nested invalid elements. The internal helper
|
||||
``django.core.serializers.xml_serializer.getInnerText()`` previously
|
||||
accumulated inner text inefficiently during recursion. It now collects text per
|
||||
element, avoiding excessive resource usage.
|
||||
|
||||
Bugfixes
|
||||
========
|
||||
|
||||
|
||||
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
|
||||
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
|
||||
PostgreSQL.
|
||||
|
||||
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
|
||||
=================================================================================
|
||||
|
||||
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
|
||||
denial-of-service attack due to quadratic time complexity when deserializing
|
||||
crafted documents containing many nested invalid elements. The internal helper
|
||||
``django.core.serializers.xml_serializer.getInnerText()`` previously
|
||||
accumulated inner text inefficiently during recursion. It now collects text per
|
||||
element, avoiding excessive resource usage.
|
||||
|
||||
Bugfixes
|
||||
========
|
||||
|
||||
|
||||
@@ -173,6 +173,8 @@ Identifier Information
|
||||
.. _jsonl: https://jsonlines.org/
|
||||
.. _PyYAML: https://pyyaml.org/
|
||||
|
||||
.. _serialization-formats-xml:
|
||||
|
||||
XML
|
||||
---
|
||||
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
import json
|
||||
import time
|
||||
import unittest
|
||||
|
||||
from django.core.serializers.base import DeserializationError, DeserializedObject
|
||||
from django.core.serializers.json import Deserializer as JsonDeserializer
|
||||
from django.core.serializers.jsonl import Deserializer as JsonlDeserializer
|
||||
from django.core.serializers.python import Deserializer
|
||||
from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer
|
||||
from django.db import models
|
||||
from django.test import SimpleTestCase
|
||||
from django.test.utils import garbage_collect
|
||||
|
||||
from .models import Author
|
||||
|
||||
@@ -133,3 +137,53 @@ class TestDeserializer(SimpleTestCase):
|
||||
|
||||
self.assertEqual(first_item.object, self.jane)
|
||||
self.assertEqual(second_item.object, self.joe)
|
||||
|
||||
def test_crafted_xml_performance(self):
|
||||
"""The time to process invalid inputs is not quadratic."""
|
||||
|
||||
def build_crafted_xml(depth, leaf_text_len):
|
||||
nested_open = "<nested>" * depth
|
||||
nested_close = "</nested>" * depth
|
||||
leaf = "x" * leaf_text_len
|
||||
field_content = f"{nested_open}{leaf}{nested_close}"
|
||||
return f"""
|
||||
<django-objects version="1.0">
|
||||
<object model="contenttypes.contenttype" pk="1">
|
||||
<field name="app_label">{field_content}</field>
|
||||
<field name="model">m</field>
|
||||
</object>
|
||||
</django-objects>
|
||||
"""
|
||||
|
||||
def deserialize(crafted_xml):
|
||||
iterator = XMLDeserializer(crafted_xml)
|
||||
garbage_collect()
|
||||
|
||||
start_time = time.perf_counter()
|
||||
result = list(iterator)
|
||||
end_time = time.perf_counter()
|
||||
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertIsInstance(result[0].object, models.Model)
|
||||
return end_time - start_time
|
||||
|
||||
def assertFactor(label, params, factor=2):
|
||||
factors = []
|
||||
prev_time = None
|
||||
for depth, length in params:
|
||||
crafted_xml = build_crafted_xml(depth, length)
|
||||
elapsed = deserialize(crafted_xml)
|
||||
if prev_time is not None:
|
||||
factors.append(elapsed / prev_time)
|
||||
prev_time = elapsed
|
||||
|
||||
with self.subTest(label):
|
||||
# Assert based on the average factor to reduce test flakiness.
|
||||
self.assertLessEqual(sum(factors) / len(factors), factor)
|
||||
|
||||
assertFactor(
|
||||
"varying depth, varying length",
|
||||
[(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
|
||||
2,
|
||||
)
|
||||
assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
|
||||
|
||||
Reference in New Issue
Block a user