Refs #36769 -- Raised SuspiciousOperation for unexpected nested tags in XML Deserializer.

Thanks Shai Berger and Natalia Bidart for reviews.
This commit is contained in:
Jacob Walls
2026-01-07 16:23:48 -05:00
parent a25158f5cc
commit 73c5e94521
7 changed files with 60 additions and 60 deletions

View File

@@ -10,7 +10,7 @@ from xml.sax.expatreader import ExpatParser as _ExpatParser
from django.apps import apps
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.core.exceptions import ObjectDoesNotExist, SuspiciousOperation
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
@@ -411,6 +411,8 @@ class Deserializer(base.Deserializer):
try:
for c in node.getElementsByTagName("object"):
values.append(m2m_convert(c))
except SuspiciousOperation:
raise
except Exception as e:
if isinstance(e, ObjectDoesNotExist) and self.handle_forward_references:
return base.DEFER_FIELD
@@ -440,6 +442,8 @@ class Deserializer(base.Deserializer):
def check_element_type(element):
if element.childNodes:
raise SuspiciousOperation(f"Unexpected element: {element.tagName!r}")
return element.nodeType in (element.TEXT_NODE, element.CDATA_SECTION_NODE)

View File

@@ -303,6 +303,10 @@ Serialization
``()``. This ensures primary keys are serialized when using
:option:`dumpdata --natural-primary`.
* The XML deserializer now raises
:exc:`~django.core.exceptions.SuspiciousOperation` when it encounters
unexpected nested tags.
Signals
~~~~~~~

View File

@@ -198,7 +198,8 @@ lowercase name of the model ("session") separated by a dot.
Each field of the object is serialized as a ``<field>``-element sporting the
fields "type" and "name". The text content of the element represents the value
that should be stored.
that should be stored. (If the element contains child tags,
:exc:`~django.core.exceptions.SuspiciousOperation` is raised.)
Foreign keys and other relational fields are treated a little bit differently:
@@ -237,6 +238,11 @@ This example links the given user with the permission models with PKs 46 and
XHTML, XML and Control Codes
<https://www.w3.org/International/questions/qa-controls>`_.
.. versionchanged:: 6.1
:exc:`~django.core.exceptions.SuspiciousOperation` is raised when
unexpected nested tags are found.
.. _serialization-formats-json:
JSON

View File

@@ -3,6 +3,7 @@
<object pk="1" model="fixtures.person">
<field type="CharField" name="name">
<!-- This <em> is unexpected & invalid -->
Django <em>pony</em>
</field>
</object>

View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<django-objects version="1.0">
<object pk="1" model="fixtures.book">
<field type="CharField" name="name">Music for all ages</field>
<field to="fixtures.person" name="authors" rel="ManyToManyRel">
<object>
<!-- This <em> is unexpected & invalid -->
<natural>Artist formerly known as <em>Prince</em></natural>
</object>
</field>
</object>
</django-objects>

View File

@@ -10,6 +10,7 @@ from unittest import mock
from django.apps import apps
from django.contrib.sites.models import Site
from django.core import management
from django.core.exceptions import SuspiciousOperation
from django.core.files.temp import NamedTemporaryFile
from django.core.management import CommandError
from django.core.management.commands.dumpdata import ProxyModelWarning
@@ -23,7 +24,6 @@ from .models import (
CircularA,
CircularB,
NaturalKeyThing,
Person,
PrimaryKeyUUIDModel,
ProxySpy,
Spy,
@@ -522,12 +522,13 @@ class FixtureLoadingTests(DumpDataAssertMixin, TestCase):
)
def test_deeply_nested_elements(self):
"""Text inside deeply-nested tags is skipped."""
management.call_command(
"loaddata", "invalid_deeply_nested_elements.xml", verbosity=0
)
person = Person.objects.get(pk=1)
self.assertEqual(person.name, "Django") # not "Django pony"
"""Text inside deeply-nested tags raises SuspiciousOperation."""
for file in [
"invalid_deeply_nested_elements.xml",
"invalid_deeply_nested_elements_natural_key.xml",
]:
with self.subTest(file=file), self.assertRaises(SuspiciousOperation):
management.call_command("loaddata", file, verbosity=0)
def test_dumpdata_with_excludes(self):
# Load fixture1 which has a site, two articles, and a category

View File

@@ -1,15 +1,14 @@
import json
import time
import textwrap
import unittest
from django.core.exceptions import SuspiciousOperation
from django.core.serializers.base import DeserializationError, DeserializedObject
from django.core.serializers.json import Deserializer as JsonDeserializer
from django.core.serializers.jsonl import Deserializer as JsonlDeserializer
from django.core.serializers.python import Deserializer
from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer
from django.db import models
from django.test import SimpleTestCase
from django.test.utils import garbage_collect
from .models import Author
@@ -138,52 +137,23 @@ class TestDeserializer(SimpleTestCase):
self.assertEqual(first_item.object, self.jane)
self.assertEqual(second_item.object, self.joe)
def test_crafted_xml_performance(self):
"""The time to process invalid inputs is not quadratic."""
def build_crafted_xml(depth, leaf_text_len):
nested_open = "<nested>" * depth
nested_close = "</nested>" * depth
leaf = "x" * leaf_text_len
field_content = f"{nested_open}{leaf}{nested_close}"
return f"""
<django-objects version="1.0">
<object model="contenttypes.contenttype" pk="1">
<field name="app_label">{field_content}</field>
<field name="model">m</field>
</object>
</django-objects>
"""
def deserialize(crafted_xml):
iterator = XMLDeserializer(crafted_xml)
garbage_collect()
start_time = time.perf_counter()
result = list(iterator)
end_time = time.perf_counter()
self.assertEqual(len(result), 1)
self.assertIsInstance(result[0].object, models.Model)
return end_time - start_time
def assertFactor(label, params, factor=2):
factors = []
prev_time = None
for depth, length in params:
crafted_xml = build_crafted_xml(depth, length)
elapsed = deserialize(crafted_xml)
if prev_time is not None:
factors.append(elapsed / prev_time)
prev_time = elapsed
with self.subTest(label):
# Assert based on the average factor to reduce test flakiness.
self.assertLessEqual(sum(factors) / len(factors), factor)
assertFactor(
"varying depth, varying length",
[(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
2,
def test_crafted_xml_rejected(self):
depth = 100
leaf_text_len = 1000
nested_open = "<nested>" * depth
nested_close = "</nested>" * depth
leaf = "x" * leaf_text_len
field_content = f"{nested_open}{leaf}{nested_close}"
crafted_xml = textwrap.dedent(
f"""
<django-objects version="1.0">
<object model="contenttypes.contenttype" pk="1">
<field name="app_label">{field_content}</field>
<field name="model">m</field>
</object>
</django-objects>"""
)
assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
msg = "Unexpected element: 'nested'"
with self.assertRaisesMessage(SuspiciousOperation, msg):
list(XMLDeserializer(crafted_xml))