From 41478a57157400be95c13b786fc22193fbe0a827 Mon Sep 17 00:00:00 2001 From: Son Nguyen Kim Date: Fri, 10 Sep 2021 17:26:14 +0200 Subject: [PATCH] replace parseaddr_unicode by parse_full_address --- app/email_utils.py | 60 +++++++++++---------------------------- tests/test_email_utils.py | 20 +++++-------- tests/test_models.py | 6 ++-- 3 files changed, 27 insertions(+), 59 deletions(-) diff --git a/app/email_utils.py b/app/email_utils.py index a4e8af5c..d0fd4073 100644 --- a/app/email_utils.py +++ b/app/email_utils.py @@ -7,12 +7,11 @@ import random import re import time from copy import deepcopy -from email.errors import HeaderParseError from email.header import decode_header, Header from email.message import Message from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText -from email.utils import make_msgid, formatdate, parseaddr +from email.utils import make_msgid, formatdate from smtplib import SMTP, SMTPServerDisconnected from typing import Tuple, List, Optional, Union @@ -20,6 +19,8 @@ import arrow import dkim import spf from email_validator import validate_email, EmailNotValidError +from flanker.addresslib import address +from flanker.addresslib.address import EmailAddress from jinja2 import Environment, FileSystemLoader from sqlalchemy import func @@ -681,47 +682,6 @@ def get_header_unicode(header: Union[str, Header]) -> str: return ret -def parseaddr_unicode(addr) -> (str, str): - """Like parseaddr() but return name in unicode instead of in RFC 2047 format - Should be used instead of parseaddr() - '=?UTF-8?B?TmjGoW4gTmd1eeG7hW4=?= ' -> ('Nhơn Nguyễn', "abcd@gmail.com") - """ - # sometimes linebreaks are present in addr - addr = addr.replace("\n", "").strip() - name, email = parseaddr(addr) - # email can have whitespace so we can't remove whitespace here - email = email.strip().lower() - if name: - name = name.strip() - try: - decoded_string, charset = decode_header(name)[0] - except HeaderParseError: # fail in case - LOG.w("Can't decode name %s", name) - else: - if charset is not None: - try: - name = decoded_string.decode(charset) - except UnicodeDecodeError: - LOG.w("Cannot decode addr name %s", name) - name = "" - except LookupError: # charset is unknown - LOG.w( - "Cannot decode %s with %s, use utf-8", decoded_string, charset - ) - try: - name = decoded_string.decode("utf-8") - except UnicodeDecodeError: - LOG.w("utf-8 not work on %s", decoded_string) - name = "" - - else: - name = decoded_string - - if type(name) == bytes: - name = name.decode() - return name, email - - def copy(msg: Message) -> Message: """return a copy of message""" try: @@ -1257,3 +1217,17 @@ def should_ignore_bounce(mail_from: str) -> bool: return True return False + + +def parse_full_address(full_address) -> (str, str): + """ + parse the email address full format and return the display name and address + For ex: ab -> (ab, cd@xy.com) + '=?UTF-8?B?TmjGoW4gTmd1eeG7hW4=?= ' -> ('Nhơn Nguyễn', "abcd@gmail.com") + + If the parsing fails, raise ValueError + """ + full_address: EmailAddress = address.parse(full_address) + if full_address is None: + raise ValueError + return full_address.display_name, full_address.address diff --git a/tests/test_email_utils.py b/tests/test_email_utils.py index 5e1e7528..3395bcca 100644 --- a/tests/test_email_utils.py +++ b/tests/test_email_utils.py @@ -10,7 +10,6 @@ from app.email_utils import ( email_can_be_used_as_mailbox, delete_header, add_or_replace_header, - parseaddr_unicode, send_email_with_rate_control, copy, get_spam_from_header, @@ -30,6 +29,7 @@ from app.email_utils import ( get_queue_id, should_ignore_bounce, get_header_unicode, + parse_full_address, ) from app.extensions import db from app.models import User, CustomDomain, Alias, Contact, EmailLog, IgnoreBounceSender @@ -100,43 +100,37 @@ def test_add_or_replace_header(): assert msg._headers == [("H", "new")] -def test_parseaddr_unicode(): +def test_parse_full_address(): # only email - assert parseaddr_unicode("abcd@gmail.com") == ( + assert parse_full_address("abcd@gmail.com") == ( "", "abcd@gmail.com", ) # ascii address - assert parseaddr_unicode("First Last ") == ( + assert parse_full_address("First Last ") == ( "First Last", "abcd@gmail.com", ) # Handle quote - assert parseaddr_unicode('"First Last" ') == ( + assert parse_full_address('"First Last" ') == ( "First Last", "abcd@gmail.com", ) # UTF-8 charset - assert parseaddr_unicode("=?UTF-8?B?TmjGoW4gTmd1eeG7hW4=?= ") == ( + assert parse_full_address("=?UTF-8?B?TmjGoW4gTmd1eeG7hW4=?= ") == ( "Nhơn Nguyễn", "abcd@gmail.com", ) # iso-8859-1 charset - assert parseaddr_unicode("=?iso-8859-1?q?p=F6stal?= ") == ( + assert parse_full_address("=?iso-8859-1?q?p=F6stal?= ") == ( "pöstal", "abcd@gmail.com", ) - # when a name can't be decoded, return an empty string - assert parseaddr_unicode("=?UTF-8?B?Cec���?= ") == ( - "", - "test@example.com", - ) - def test_send_email_with_rate_control(flask_client): user = User.create( diff --git a/tests/test_models.py b/tests/test_models.py index 76e621ce..48aa0af5 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -3,7 +3,7 @@ from uuid import UUID import pytest from app.config import EMAIL_DOMAIN, MAX_NB_EMAIL_FREE_PLAN -from app.email_utils import parseaddr_unicode +from app.email_utils import parse_full_address from app.extensions import db from app.models import ( generate_email, @@ -159,8 +159,8 @@ def test_new_addr(flask_client): == "=?utf-8?q?Nh=C6=A1n_Nguy=E1=BB=85n_-_abcd_at_example=2Ecom?= " ) - # sanity check for parseaddr_unicode - assert parseaddr_unicode(c1.new_addr()) == ( + # sanity check + assert parse_full_address(c1.new_addr()) == ( "Nhơn Nguyễn - abcd at example.com", "rep@sl", )