gh-90949: expose Expat API to tune exponential expansion protections by picnixz · Pull Request #139368 · python/cpython · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 63 additions & 5 deletions Doc/library/pyexpat.rst
12 changes: 10 additions & 2 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -558,10 +558,18 @@ xml.parsers.expat

* Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold`
and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification`
to :ref:`xmlparser <xmlparser-objects>` objects to prevent use of
disproportional amounts of dynamic memory from within an Expat parser.
to :ref:`xmlparser <xmlparser-objects>` objects to tune protections against
disproportional amounts of dynamic memory usage from within an Expat parser.
(Contributed by Bénédikt Tran in :gh:`90949`.)

* Add :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold`

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not quite sure how to go about backporting this, I think we'll need to remove it from here?

@picnixz picnixz May 25, 2026

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, you can drop this part in older branches but we still want the versionadded:: next in the docs directly. What's New can be omitted. You can add the explicit Python version when it was added though (3.14.x) and not just 3.14 (don't know if :: next handles it...)

and :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionMaximumAmplification`
to :ref:`xmlparser <xmlparser-objects>` objects to tune protections against
`billion laughs`_ attacks.
(Contributed by Bénédikt Tran in :gh:`90949`.)

.. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack


zlib
----
Expand Down
5 changes: 5 additions & 0 deletions Include/pyexpat.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ struct PyExpat_CAPI
XML_Parser parser, unsigned long long activationThresholdBytes);
XML_Bool (*SetAllocTrackerMaximumAmplification)(
XML_Parser parser, float maxAmplificationFactor);
/* might be NULL for expat < 2.4.0 */
XML_Bool (*SetBillionLaughsAttackProtectionActivationThreshold)(
XML_Parser parser, unsigned long long activationThresholdBytes);
XML_Bool (*SetBillionLaughsAttackProtectionMaximumAmplification)(
XML_Parser parser, float maxAmplificationFactor);
/* always add new stuff to the end! */
};

58 changes: 58 additions & 0 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,64 @@ def test_set_maximum_amplification__fail_for_subparser(self):
self.assert_root_parser_failure(setter, 123.45)


@unittest.skipIf(expat.version_info < (2, 4, 0), "requires Expat >= 2.4.0")
class ExpansionProtectionTest(AttackProtectionTestBase, unittest.TestCase):

def assert_rejected(self, func, /, *args, **kwargs):
"""Check that func(*args, **kwargs) hits the allocation limit."""
msg = (
r"limit on input amplification factor \(from DTD and entities\) "
r"breached: line \d+, column \d+"
)
self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs)

def set_activation_threshold(self, parser, threshold):
return parser.SetBillionLaughsAttackProtectionActivationThreshold(threshold)

def set_maximum_amplification(self, parser, max_factor):
return parser.SetBillionLaughsAttackProtectionMaximumAmplification(max_factor)

def test_set_activation_threshold__threshold_reached(self):
parser = expat.ParserCreate()
# Choose a threshold expected to be always reached.
self.set_activation_threshold(parser, 3)
# Check that the threshold is reached by choosing a small factor
# and a payload whose peak amplification factor exceeds it.
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
payload = self.exponential_expansion_payload(ncols=10, nrows=4)
self.assert_rejected(parser.Parse, payload, True)

def test_set_activation_threshold__threshold_not_reached(self):
parser = expat.ParserCreate()
# Choose a threshold expected to be never reached.
self.set_activation_threshold(parser, pow(10, 5))
# Check that the threshold is reached by choosing a small factor
# and a payload whose peak amplification factor exceeds it.
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
payload = self.exponential_expansion_payload(ncols=10, nrows=4)
self.assertIsNotNone(parser.Parse(payload, True))

def test_set_maximum_amplification__amplification_exceeded(self):
parser = expat.ParserCreate()
# Unconditionally enable maximum activation factor.
self.set_activation_threshold(parser, 0)
# Choose a max amplification factor expected to always be exceeded.
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
# Craft a payload for which the peak amplification factor is > 1.0.
payload = self.exponential_expansion_payload(ncols=1, nrows=2)
self.assert_rejected(parser.Parse, payload, True)

def test_set_maximum_amplification__amplification_not_exceeded(self):
parser = expat.ParserCreate()
# Unconditionally enable maximum activation factor.
self.set_activation_threshold(parser, 0)
# Choose a max amplification factor expected to never be exceeded.
self.assertIsNone(self.set_maximum_amplification(parser, 1e4))
# Craft a payload for which the peak amplification factor is < 1e4.
payload = self.exponential_expansion_payload(ncols=1, nrows=2)
self.assertIsNotNone(parser.Parse(payload, True))


@unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2")
class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase):

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold`
and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification`
to :ref:`xmlparser <xmlparser-objects>` objects to prevent use of
disproportional amounts of dynamic memory from within an Expat parser.
to :ref:`xmlparser <xmlparser-objects>` objects to tune protections against
disproportional amounts of dynamic memory usage from within an Expat parser.
Patch by Bénédikt Tran.
150 changes: 147 additions & 3 deletions Modules/clinic/pyexpat.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading