From defd7b159dcdd37733ba0bdbcfc6ac944afe33c3 Mon Sep 17 00:00:00 2001 From: Son Nguyen Kim Date: Thu, 9 Sep 2021 11:47:01 +0200 Subject: [PATCH] Fix get_header_unicode: handle the case header contains several parts --- app/email_utils.py | 35 ++++++++++++++++++++++------------- tests/test_email_utils.py | 6 ++++++ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/app/email_utils.py b/app/email_utils.py index 80153606..606d9af3 100644 --- a/app/email_utils.py +++ b/app/email_utils.py @@ -652,24 +652,33 @@ def get_spam_from_header(spam_status_header, max_score=None) -> (bool, str): def get_header_unicode(header: str) -> str: + """ + Convert a header to unicode + Should be used to handle headers like From:, To:, CC:, Subject: + """ if header is None: return "" - decoded_string, charset = decode_header(header)[0] - if charset is not None: - try: - return decoded_string.decode(charset) - except UnicodeDecodeError: - LOG.w("Cannot decode header %s", header) - except LookupError: # charset is unknown - LOG.w("Cannot decode %s with %s, use utf-8", decoded_string, charset) + ret = "" + for to_decoded_str, charset in decode_header(header): + if charset is None: + if type(to_decoded_str) is bytes: + decoded_str = to_decoded_str.decode() + else: + decoded_str = to_decoded_str + else: try: - return decoded_string.decode("utf-8") - except UnicodeDecodeError: - LOG.w("Cannot UTF-8 decode %s", decoded_string) - return decoded_string.decode("utf-8", errors="replace") + decoded_str = to_decoded_str.decode(charset) + except (LookupError, UnicodeDecodeError): # charset is unknown + LOG.w("Cannot decode %s with %s, try utf-8", to_decoded_str, charset) + try: + decoded_str = to_decoded_str.decode("utf-8") + except UnicodeDecodeError: + LOG.w("Cannot UTF-8 decode %s", to_decoded_str) + decoded_str = to_decoded_str.decode("utf-8", errors="replace") + ret += decoded_str - return header + return ret def parseaddr_unicode(addr) -> (str, str): diff --git a/tests/test_email_utils.py b/tests/test_email_utils.py index 5f432356..5e1e7528 100644 --- a/tests/test_email_utils.py +++ b/tests/test_email_utils.py @@ -29,6 +29,7 @@ from app.email_utils import ( parse_id_from_bounce, get_queue_id, should_ignore_bounce, + get_header_unicode, ) from app.extensions import db from app.models import User, CustomDomain, Alias, Contact, EmailLog, IgnoreBounceSender @@ -748,3 +749,8 @@ def test_should_ignore_bounce(flask_client): IgnoreBounceSender.create(mail_from="to-ignore@example.com") assert should_ignore_bounce("to-ignore@example.com") + + +def test_get_header_unicode(): + assert get_header_unicode("ab@cd.com") == "ab@cd.com" + assert get_header_unicode("=?utf-8?B?w6nDqQ==?=@example.com") == "éé@example.com"