use received_at for ingesting emails and feed generation (#22)

* use received_at for ingesting emails and feed generation

* fix test
This commit is contained in:
Matt
2025-10-21 14:45:14 -04:00
committed by GitHub
parent 54285a2ab8
commit 35fdb8b615
5 changed files with 18 additions and 3 deletions

View File

@@ -29,9 +29,16 @@ def get_entries_by_newsletter(
logger.debug(
f"Querying entries for newsletter_id={newsletter_id}, skip={skip}, limit={limit}"
)
query = db.query(Entry).filter(Entry.newsletter_id == newsletter_id).offset(skip)
query = (
db.query(Entry)
.order_by(Entry.received_at.desc())
.filter(Entry.newsletter_id == newsletter_id)
.offset(skip)
)
if limit is not None:
query = query.limit(limit)
return query.all()

View File

@@ -9,6 +9,7 @@ class EntryBase(BaseModel):
subject: str
body: str
message_id: str
received_at: datetime.datetime | None = None
class EntryCreate(EntryBase):

View File

@@ -199,6 +199,8 @@ def _process_single_email(
subject = str(make_header(decode_header(msg["Subject"])))
body = _get_email_body(msg)
date_str = msg["Date"]
received_at = email.utils.parsedate_to_datetime(date_str) if date_str else None
if newsletter.extract_content:
cleaned_data = _extract_and_clean_html(body)
@@ -206,7 +208,9 @@ def _process_single_email(
# so we only override the body.
body = cleaned_data["body"]
entry_schema = EntryCreate(subject=subject, body=body, message_id=message_id)
entry_schema = EntryCreate(
subject=subject, body=body, message_id=message_id, received_at=received_at
)
new_entry = create_entry(db, entry_schema, newsletter.id)
if not new_entry:

View File

@@ -41,11 +41,14 @@ def _add_entries_to_feed(
else entry.subject
)
fe.content(entry.body, type="html")
if entry.received_at.tzinfo is None:
timezone_aware_received_at = entry.received_at.replace(tzinfo=tz.tzutc())
fe.published(timezone_aware_received_at)
fe.updated(timezone_aware_received_at)
else:
fe.published(entry.received_at)
fe.updated(entry.received_at)
def generate_feed(db: Session, feed_identifier: str):

View File

@@ -251,7 +251,7 @@ def test_get_entries_by_newsletter(db_session: Session):
)
entries = get_entries_by_newsletter(db_session, newsletter.id)
assert len(entries) == 2
assert entries[0].subject == "Entry 1"
assert entries[0].subject == "Entry 2"
def test_update_newsletter(db_session: Session):