feat: use message-id header to only process message once

This commit is contained in:
Leon
2025-07-16 18:38:06 +02:00
parent d47e975574
commit 9512601124
8 changed files with 120 additions and 14 deletions

View File

@@ -70,7 +70,7 @@ def test_process_emails(mock_imap, db_session: Session):
mock_mail.search.return_value = ("OK", [b"1"])
# Mock email content
mock_msg_bytes = b"From: newsletter@example.com\nSubject: Test Subject\n\nTest Body"
mock_msg_bytes = b"From: newsletter@example.com\nSubject: Test Subject\nMessage-ID: <test@test.com>\n\nTest Body"
mock_mail.fetch.return_value = ("OK", [(None, mock_msg_bytes)])
process_emails(db_session)
@@ -151,7 +151,7 @@ def test_process_emails_auto_add_sender(mock_imap, db_session: Session):
mock_mail = MagicMock()
mock_imap.return_value = mock_mail
mock_mail.search.return_value = ("OK", [b"1"])
mock_msg_bytes = b"From: New Sender <new@example.com>\nSubject: New Email\n\nHello"
mock_msg_bytes = b"From: New Sender <new@example.com>\nSubject: New Email\nMessage-ID: <new@new.com>\n\nHello"
mock_mail.fetch.return_value = ("OK", [(None, mock_msg_bytes)])
process_emails(db_session)
@@ -192,10 +192,55 @@ def test_process_emails_no_move_or_read(mock_imap, db_session: Session):
mock_mail = MagicMock()
mock_imap.return_value = mock_mail
mock_mail.search.return_value = ("OK", [b"1"])
mock_msg_bytes = b"From: newsletter@example.com\nSubject: Test Subject\n\nTest Body"
mock_msg_bytes = b"From: newsletter@example.com\nSubject: Test Subject\nMessage-ID: <test@test.com>\n\nTest Body"
mock_mail.fetch.return_value = ("OK", [(None, mock_msg_bytes)])
process_emails(db_session)
mock_mail.store.assert_not_called()
mock_mail.copy.assert_not_called()
@patch("app.services.email_processor.imaplib.IMAP4_SSL")
def test_process_emails_avoids_duplicates(mock_imap, db_session: Session):
"""Test that process_emails avoids processing duplicate emails."""
settings_data = SettingsCreate(
imap_server="imap.test.com",
imap_username="test@test.com",
imap_password="password",
)
create_or_update_settings(db_session, settings_data)
newsletter_data = NewsletterCreate(
name="Test Newsletter", sender_emails=["newsletter@example.com"]
)
newsletter = create_newsletter(db_session, newsletter_data)
# Create an entry that already exists
from app.crud.entries import create_entry
from app.schemas.entries import EntryCreate
create_entry(
db_session,
EntryCreate(
subject="Existing Subject",
body="Existing Body",
message_id="<existing@message.com>",
),
newsletter.id,
)
mock_mail = MagicMock()
mock_imap.return_value = mock_mail
mock_mail.search.return_value = ("OK", [b"1"])
# This email has the same Message-ID as the one we just created
mock_msg_bytes = b"From: newsletter@example.com\nSubject: Test Subject\nMessage-ID: <existing@message.com>\n\nTest Body"
mock_mail.fetch.return_value = ("OK", [(None, mock_msg_bytes)])
process_emails(db_session)
# Verify that no new entry was created
from app.crud.entries import get_entries_by_newsletter
entries = get_entries_by_newsletter(db_session, newsletter.id)
assert len(entries) == 1
assert entries[0].subject == "Existing Subject"

View File

@@ -148,7 +148,11 @@ def test_create_entry(db_session: Session):
name="Test Newsletter 5", sender_emails=[unique_email]
)
newsletter = create_newsletter(db_session, newsletter_data)
entry_data = EntryCreate(subject="Test Subject", body="Test Body")
entry_data = EntryCreate(
subject="Test Subject",
body="Test Body",
message_id=f"<{uuid.uuid4()}@test.com>",
)
entry = create_entry(db_session, entry_data, newsletter.id)
assert entry.subject == "Test Subject"
assert entry.newsletter_id == newsletter.id
@@ -162,10 +166,18 @@ def test_get_entries_by_newsletter(db_session: Session):
)
newsletter = create_newsletter(db_session, newsletter_data)
create_entry(
db_session, EntryCreate(subject="Entry 1", body="Body 1"), newsletter.id
db_session,
EntryCreate(
subject="Entry 1", body="Body 1", message_id=f"<{uuid.uuid4()}@test.com>"
),
newsletter.id,
)
create_entry(
db_session, EntryCreate(subject="Entry 2", body="Body 2"), newsletter.id
db_session,
EntryCreate(
subject="Entry 2", body="Body 2", message_id=f"<{uuid.uuid4()}@test.com>"
),
newsletter.id,
)
entries = get_entries_by_newsletter(db_session, newsletter.id)
assert len(entries) == 2
@@ -183,9 +195,14 @@ def test_update_newsletter(db_session: Session):
from app.schemas.newsletters import NewsletterUpdate
updated_email = f"updated_sender_{uuid.uuid4()}@test.com"
updated_newsletter_data = NewsletterUpdate(name="Updated Newsletter", sender_emails=[updated_email])
updated_newsletter_data = NewsletterUpdate(
name="Updated Newsletter", sender_emails=[updated_email]
)
from app.crud.newsletters import update_newsletter
updated_newsletter = update_newsletter(db_session, newsletter.id, updated_newsletter_data)
updated_newsletter = update_newsletter(
db_session, newsletter.id, updated_newsletter_data
)
assert updated_newsletter.name == "Updated Newsletter"
assert len(updated_newsletter.senders) == 1
@@ -201,6 +218,7 @@ def test_delete_newsletter(db_session: Session):
newsletter = create_newsletter(db_session, newsletter_data)
from app.crud.newsletters import delete_newsletter
deleted_newsletter = delete_newsletter(db_session, newsletter.id)
assert deleted_newsletter.id == newsletter.id
@@ -208,4 +226,5 @@ def test_delete_newsletter(db_session: Session):
# Verify it's actually deleted
from app.crud.newsletters import get_newsletter
assert get_newsletter(db_session, newsletter.id) is None

View File

@@ -164,9 +164,17 @@ def test_get_newsletter_feed(client: TestClient):
newsletter_id = create_response.json()["id"]
# Add some entries to the newsletter
entry_data_1 = {"subject": "Test Entry 1", "body": "<p>Content 1</p>"}
entry_data_1 = {
"subject": "Test Entry 1",
"body": "<p>Content 1</p>",
"message_id": f"<entry1_{uuid.uuid4()}@test.com>",
}
client.post(f"/newsletters/{newsletter_id}/entries", json=entry_data_1)
entry_data_2 = {"subject": "Test Entry 2", "body": "<p>Content 2</p>"}
entry_data_2 = {
"subject": "Test Entry 2",
"body": "<p>Content 2</p>",
"message_id": f"<entry2_{uuid.uuid4()}@test.com>",
}
client.post(f"/newsletters/{newsletter_id}/entries", json=entry_data_2)
response = client.get(f"/feeds/{newsletter_id}")

View File

@@ -1,3 +1,5 @@
import uuid
from sqlalchemy.orm import Session
from app.crud.entries import create_entry
@@ -17,12 +19,16 @@ def test_generate_feed(db_session: Session):
# Create entries for the newsletter
entry1_data = EntryCreate(
subject="First Entry", body="<p>This is the first entry.</p>"
subject="First Entry",
body="<p>This is the first entry.</p>",
message_id=f"<{uuid.uuid4()}@test.com>",
)
create_entry(db_session, entry1_data, newsletter.id)
entry2_data = EntryCreate(
subject="Second Entry", body="<p>This is the second entry.</p>"
subject="Second Entry",
body="<p>This is the second entry.</p>",
message_id=f"<{uuid.uuid4()}@test.com>",
)
create_entry(db_session, entry2_data, newsletter.id)