Fix get_header_unicode: handle the case header contains several parts

This commit is contained in:
Son Nguyen Kim 2021-09-09 11:47:01 +02:00
parent 493a5daa45
commit defd7b159d
2 changed files with 28 additions and 13 deletions

View file

@ -652,24 +652,33 @@ def get_spam_from_header(spam_status_header, max_score=None) -> (bool, str):
def get_header_unicode(header: str) -> str:
"""
Convert a header to unicode
Should be used to handle headers like From:, To:, CC:, Subject:
"""
if header is None:
return ""
decoded_string, charset = decode_header(header)[0]
if charset is not None:
try:
return decoded_string.decode(charset)
except UnicodeDecodeError:
LOG.w("Cannot decode header %s", header)
except LookupError: # charset is unknown
LOG.w("Cannot decode %s with %s, use utf-8", decoded_string, charset)
ret = ""
for to_decoded_str, charset in decode_header(header):
if charset is None:
if type(to_decoded_str) is bytes:
decoded_str = to_decoded_str.decode()
else:
decoded_str = to_decoded_str
else:
try:
return decoded_string.decode("utf-8")
except UnicodeDecodeError:
LOG.w("Cannot UTF-8 decode %s", decoded_string)
return decoded_string.decode("utf-8", errors="replace")
decoded_str = to_decoded_str.decode(charset)
except (LookupError, UnicodeDecodeError): # charset is unknown
LOG.w("Cannot decode %s with %s, try utf-8", to_decoded_str, charset)
try:
decoded_str = to_decoded_str.decode("utf-8")
except UnicodeDecodeError:
LOG.w("Cannot UTF-8 decode %s", to_decoded_str)
decoded_str = to_decoded_str.decode("utf-8", errors="replace")
ret += decoded_str
return header
return ret
def parseaddr_unicode(addr) -> (str, str):

View file

@ -29,6 +29,7 @@ from app.email_utils import (
parse_id_from_bounce,
get_queue_id,
should_ignore_bounce,
get_header_unicode,
)
from app.extensions import db
from app.models import User, CustomDomain, Alias, Contact, EmailLog, IgnoreBounceSender
@ -748,3 +749,8 @@ def test_should_ignore_bounce(flask_client):
IgnoreBounceSender.create(mail_from="to-ignore@example.com")
assert should_ignore_bounce("to-ignore@example.com")
def test_get_header_unicode():
assert get_header_unicode("ab@cd.com") == "ab@cd.com"
assert get_header_unicode("=?utf-8?B?w6nDqQ==?=@example.com") == "éé@example.com"