mirror of
https://github.com/khoaliber/LetterFeed.git
synced 2026-03-09 05:39:13 +00:00
feat: text content extraction
This commit is contained in:
@@ -50,7 +50,9 @@ def get_newsletters(db: Session, skip: int = 0, limit: int = 100):
|
||||
def create_newsletter(db: Session, newsletter: NewsletterCreate):
|
||||
"""Create a new newsletter."""
|
||||
logger.info(f"Creating new newsletter with name '{newsletter.name}'")
|
||||
db_newsletter = Newsletter(name=newsletter.name)
|
||||
db_newsletter = Newsletter(
|
||||
name=newsletter.name, extract_content=newsletter.extract_content
|
||||
)
|
||||
db.add(db_newsletter)
|
||||
db.commit()
|
||||
db.refresh(db_newsletter)
|
||||
|
||||
@@ -12,6 +12,7 @@ class Newsletter(Base):
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String)
|
||||
is_active = Column(Boolean, default=True)
|
||||
extract_content = Column(Boolean, default=False)
|
||||
|
||||
senders = relationship(
|
||||
"Sender", back_populates="newsletter", cascade="all, delete-orphan"
|
||||
|
||||
@@ -28,6 +28,7 @@ class NewsletterBase(BaseModel):
|
||||
"""Base schema for a newsletter."""
|
||||
|
||||
name: str
|
||||
extract_content: bool = False
|
||||
|
||||
|
||||
class NewsletterCreate(NewsletterBase):
|
||||
|
||||
@@ -2,6 +2,7 @@ import email
|
||||
import imaplib
|
||||
from email.header import decode_header, make_header
|
||||
|
||||
import trafilatura
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.logging import get_logger
|
||||
@@ -114,6 +115,10 @@ def process_emails(db: Session):
|
||||
pass
|
||||
|
||||
final_body = html or body
|
||||
if newsletter.extract_content:
|
||||
extracted_body = trafilatura.extract(final_body)
|
||||
if extracted_body:
|
||||
final_body = extracted_body
|
||||
|
||||
entry = EntryCreate(
|
||||
subject=subject, body=final_body, message_id=message_id
|
||||
|
||||
Reference in New Issue
Block a user