From 73c5e94521c5b97e27cd2fe2d5b5c2e65f402755 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 7 Jan 2026 16:23:48 -0500 Subject: [PATCH] Refs #36769 -- Raised SuspiciousOperation for unexpected nested tags in XML Deserializer. Thanks Shai Berger and Natalia Bidart for reviews. --- django/core/serializers/xml_serializer.py | 6 +- docs/releases/6.1.txt | 4 ++ docs/topics/serialization.txt | 8 ++- .../invalid_deeply_nested_elements.xml | 1 + ...lid_deeply_nested_elements_natural_key.xml | 14 ++++ tests/fixtures/tests.py | 15 ++-- tests/serializers/test_deserialization.py | 72 ++++++------------- 7 files changed, 60 insertions(+), 60 deletions(-) create mode 100644 tests/fixtures/fixtures/invalid_deeply_nested_elements_natural_key.xml diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py index e159180e17..d8ffbdf00a 100644 --- a/django/core/serializers/xml_serializer.py +++ b/django/core/serializers/xml_serializer.py @@ -10,7 +10,7 @@ from xml.sax.expatreader import ExpatParser as _ExpatParser from django.apps import apps from django.conf import settings -from django.core.exceptions import ObjectDoesNotExist +from django.core.exceptions import ObjectDoesNotExist, SuspiciousOperation from django.core.serializers import base from django.db import DEFAULT_DB_ALIAS, models from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError @@ -411,6 +411,8 @@ class Deserializer(base.Deserializer): try: for c in node.getElementsByTagName("object"): values.append(m2m_convert(c)) + except SuspiciousOperation: + raise except Exception as e: if isinstance(e, ObjectDoesNotExist) and self.handle_forward_references: return base.DEFER_FIELD @@ -440,6 +442,8 @@ class Deserializer(base.Deserializer): def check_element_type(element): + if element.childNodes: + raise SuspiciousOperation(f"Unexpected element: {element.tagName!r}") return element.nodeType in (element.TEXT_NODE, element.CDATA_SECTION_NODE) diff --git a/docs/releases/6.1.txt b/docs/releases/6.1.txt index b5b47a11d8..57129bfb66 100644 --- a/docs/releases/6.1.txt +++ b/docs/releases/6.1.txt @@ -303,6 +303,10 @@ Serialization ``()``. This ensures primary keys are serialized when using :option:`dumpdata --natural-primary`. +* The XML deserializer now raises + :exc:`~django.core.exceptions.SuspiciousOperation` when it encounters + unexpected nested tags. + Signals ~~~~~~~ diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt index f801df0561..1a129072e0 100644 --- a/docs/topics/serialization.txt +++ b/docs/topics/serialization.txt @@ -198,7 +198,8 @@ lowercase name of the model ("session") separated by a dot. Each field of the object is serialized as a ````-element sporting the fields "type" and "name". The text content of the element represents the value -that should be stored. +that should be stored. (If the element contains child tags, +:exc:`~django.core.exceptions.SuspiciousOperation` is raised.) Foreign keys and other relational fields are treated a little bit differently: @@ -237,6 +238,11 @@ This example links the given user with the permission models with PKs 46 and XHTML, XML and Control Codes `_. +.. versionchanged:: 6.1 + + :exc:`~django.core.exceptions.SuspiciousOperation` is raised when + unexpected nested tags are found. + .. _serialization-formats-json: JSON diff --git a/tests/fixtures/fixtures/invalid_deeply_nested_elements.xml b/tests/fixtures/fixtures/invalid_deeply_nested_elements.xml index 2ed39c3d36..e3ed67f320 100644 --- a/tests/fixtures/fixtures/invalid_deeply_nested_elements.xml +++ b/tests/fixtures/fixtures/invalid_deeply_nested_elements.xml @@ -3,6 +3,7 @@ + Django pony diff --git a/tests/fixtures/fixtures/invalid_deeply_nested_elements_natural_key.xml b/tests/fixtures/fixtures/invalid_deeply_nested_elements_natural_key.xml new file mode 100644 index 0000000000..7433fad6c7 --- /dev/null +++ b/tests/fixtures/fixtures/invalid_deeply_nested_elements_natural_key.xml @@ -0,0 +1,14 @@ + + + + + Music for all ages + + + + Artist formerly known as Prince + + + + + diff --git a/tests/fixtures/tests.py b/tests/fixtures/tests.py index 7511569d21..dd03343027 100644 --- a/tests/fixtures/tests.py +++ b/tests/fixtures/tests.py @@ -10,6 +10,7 @@ from unittest import mock from django.apps import apps from django.contrib.sites.models import Site from django.core import management +from django.core.exceptions import SuspiciousOperation from django.core.files.temp import NamedTemporaryFile from django.core.management import CommandError from django.core.management.commands.dumpdata import ProxyModelWarning @@ -23,7 +24,6 @@ from .models import ( CircularA, CircularB, NaturalKeyThing, - Person, PrimaryKeyUUIDModel, ProxySpy, Spy, @@ -522,12 +522,13 @@ class FixtureLoadingTests(DumpDataAssertMixin, TestCase): ) def test_deeply_nested_elements(self): - """Text inside deeply-nested tags is skipped.""" - management.call_command( - "loaddata", "invalid_deeply_nested_elements.xml", verbosity=0 - ) - person = Person.objects.get(pk=1) - self.assertEqual(person.name, "Django") # not "Django pony" + """Text inside deeply-nested tags raises SuspiciousOperation.""" + for file in [ + "invalid_deeply_nested_elements.xml", + "invalid_deeply_nested_elements_natural_key.xml", + ]: + with self.subTest(file=file), self.assertRaises(SuspiciousOperation): + management.call_command("loaddata", file, verbosity=0) def test_dumpdata_with_excludes(self): # Load fixture1 which has a site, two articles, and a category diff --git a/tests/serializers/test_deserialization.py b/tests/serializers/test_deserialization.py index a718a99038..b6a0818c42 100644 --- a/tests/serializers/test_deserialization.py +++ b/tests/serializers/test_deserialization.py @@ -1,15 +1,14 @@ import json -import time +import textwrap import unittest +from django.core.exceptions import SuspiciousOperation from django.core.serializers.base import DeserializationError, DeserializedObject from django.core.serializers.json import Deserializer as JsonDeserializer from django.core.serializers.jsonl import Deserializer as JsonlDeserializer from django.core.serializers.python import Deserializer from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer -from django.db import models from django.test import SimpleTestCase -from django.test.utils import garbage_collect from .models import Author @@ -138,52 +137,23 @@ class TestDeserializer(SimpleTestCase): self.assertEqual(first_item.object, self.jane) self.assertEqual(second_item.object, self.joe) - def test_crafted_xml_performance(self): - """The time to process invalid inputs is not quadratic.""" - - def build_crafted_xml(depth, leaf_text_len): - nested_open = "" * depth - nested_close = "" * depth - leaf = "x" * leaf_text_len - field_content = f"{nested_open}{leaf}{nested_close}" - return f""" - - - {field_content} - m - - - """ - - def deserialize(crafted_xml): - iterator = XMLDeserializer(crafted_xml) - garbage_collect() - - start_time = time.perf_counter() - result = list(iterator) - end_time = time.perf_counter() - - self.assertEqual(len(result), 1) - self.assertIsInstance(result[0].object, models.Model) - return end_time - start_time - - def assertFactor(label, params, factor=2): - factors = [] - prev_time = None - for depth, length in params: - crafted_xml = build_crafted_xml(depth, length) - elapsed = deserialize(crafted_xml) - if prev_time is not None: - factors.append(elapsed / prev_time) - prev_time = elapsed - - with self.subTest(label): - # Assert based on the average factor to reduce test flakiness. - self.assertLessEqual(sum(factors) / len(factors), factor) - - assertFactor( - "varying depth, varying length", - [(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)], - 2, + def test_crafted_xml_rejected(self): + depth = 100 + leaf_text_len = 1000 + nested_open = "" * depth + nested_close = "" * depth + leaf = "x" * leaf_text_len + field_content = f"{nested_open}{leaf}{nested_close}" + crafted_xml = textwrap.dedent( + f""" + + + {field_content} + m + + """ ) - assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2) + + msg = "Unexpected element: 'nested'" + with self.assertRaisesMessage(SuspiciousOperation, msg): + list(XMLDeserializer(crafted_xml))