From dccf01ad30da25285cffd370d46c8e10844ff08d Mon Sep 17 00:00:00 2001 From: Julien Castiaux Date: Wed, 26 Feb 2020 11:02:01 +0100 Subject: [PATCH 1/2] bpo-39757: Use IDNA to encode domain part in email address While b64/qp can be used to encode most non-ascii headers, the domain part of an email address must conform IDNA ([rfc5890], [rfc5891]) thus be encoded using the punycode algorithm ([rfc3492]). [SMTPUTF8](rfc6531) is no exception. [rfc5890]: https://tools.ietf.org/html/rfc5890 [rfc5891]: https://tools.ietf.org/html/rfc5891 [rfc3492]: https://tools.ietf.org/html/rfc3492 [rfc6531]: https://tools.ietf.org/html/rfc6531#section-3.2 --- Lib/email/_header_value_parser.py | 4 +++- Lib/test/test_email/test_generator.py | 32 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 9c55ef7fb453be..d57129f4abc8a1 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -497,8 +497,10 @@ class Domain(TokenList): @property def domain(self): - return ''.join(super().value.split()) + return ''.join(super().value.split()).encode('idna').decode('ascii') + def __str__(self): + return super().__str__().encode('idna').decode('ascii') class DotAtom(TokenList): token_type = 'dot-atom' diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py index 89e7edeb63a892..64a07a0082dfd0 100644 --- a/Lib/test/test_email/test_generator.py +++ b/Lib/test/test_email/test_generator.py @@ -271,6 +271,38 @@ def test_cte_type_7bit_transforms_8bit_cte(self): g.flatten(msg) self.assertEqual(s.getvalue(), expected) + def test_unicode_domain_transforms_idna(self): + msg = EmailMessage() + msg['From'] = "john@examplé.com" + msg['To'] = "Joe " + + expected = textwrap.dedent("""\ + From: john@xn--exampl-gva.com + To: Joe + + """).encode('ascii').replace(b'\n', b'\r\n') + + s = io.BytesIO() + g = BytesGenerator(s, policy=policy.SMTP) + g.flatten(msg) + self.assertEqual(s.getvalue(), expected) + + def test_unicode_domain_transforms_idna_smtputf8_policy(self): + msg = EmailMessage() + msg['From'] = "john@examplé.com" + msg['To'] = "Joe " + + expected = textwrap.dedent("""\ + From: john@xn--exampl-gva.com + To: Joe + + """).encode('utf-8').replace(b'\n', b'\r\n') + + s = io.BytesIO() + g = BytesGenerator(s, policy=policy.SMTPUTF8) + g.flatten(msg) + self.assertEqual(s.getvalue(), expected) + def test_smtputf8_policy(self): msg = EmailMessage() msg['From'] = "Páolo " From 12d60ed29a8f7645e3b78ac644f9abee87688b96 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 27 Feb 2020 11:31:13 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2020-02-27-11-31-12.bpo-39757.FC-lgv.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2020-02-27-11-31-12.bpo-39757.FC-lgv.rst diff --git a/Misc/NEWS.d/next/Library/2020-02-27-11-31-12.bpo-39757.FC-lgv.rst b/Misc/NEWS.d/next/Library/2020-02-27-11-31-12.bpo-39757.FC-lgv.rst new file mode 100644 index 00000000000000..aea025915b27fe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-27-11-31-12.bpo-39757.FC-lgv.rst @@ -0,0 +1 @@ +Add IDNA support in email addresses in email.message.EmailMessage \ No newline at end of file