From 834092a3ecdb2b5dd3fa787ca134be2773973b5b Mon Sep 17 00:00:00 2001 From: dimon Date: Wed, 3 Jun 2026 20:47:46 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=208=20major=20features:=20trafilatura?= =?UTF-8?q?,=20digest,=20ntfy=20actions,=20templates,=20FTS5=20search,=20b?= =?UTF-8?q?ackup/restore,=20proxy,=20RSS=20reader?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Full article extraction via trafilatura (fetch_full_article) - Digest mode with configurable period (digest_enabled, digest_period_hours) - ntfy Actions buttons (Open article, Open feed) - Notification templates with {title}, {body}, {link}, {source}, {image_url} - FTS5 full-text search in notification history - Database backup/restore (download/upload .db) - HTTP/SOCKS proxy for RSS feed fetching (proxy_url setting) - Built-in RSS reader tab with categories, unread counts, article detail view - Auto-category 'Общее' for feeds without a category - Article storage (Article table) for reader - DigestEntry model for pending digest entries Co-Authored-By: Claude Opus 4.8 (1M context) --- app/checker.py | 209 ++++++++++++++++++++- app/database.py | 38 +++- app/delivery.py | 90 +++++++-- app/main.py | 394 ++++++++++++++++++++++++++++++++++++++- app/models.py | 60 ++++++ app/ntfy.py | 13 ++ app/scheduler.py | 89 +++++++++ app/schemas.py | 35 ++++ app/static/app.js | 255 ++++++++++++++++++++++++- app/static/i18n.js | 108 +++++++++++ app/static/style.css | 51 +++++ app/templates/index.html | 115 +++++++++++- requirements.txt | 1 + 13 files changed, 1414 insertions(+), 44 deletions(-) diff --git a/app/checker.py b/app/checker.py index 18a6ca2..1b9dacc 100644 --- a/app/checker.py +++ b/app/checker.py @@ -2,30 +2,38 @@ from __future__ import annotations import asyncio +import json import logging import re from datetime import datetime, timezone from html import unescape +import httpx +from io import StringIO + import feedparser from sqlmodel import Session, select from . import delivery from .database import engine, get_settings from .delivery import Message -from .models import Feed, Notification, SeenEntry +from .models import Article, DigestEntry, Feed, Notification, SeenEntry log = logging.getLogger("checker") _TAG_RE = re.compile(r"<[^>]+>") _IMG_RE = re.compile(r']+src=["\']([^"\']+)["\']', re.IGNORECASE) +_VIDEO_RE = re.compile( + r'<(?:video|iframe|source|embed)[^>]*src=["\']([^"\']+)["\']', re.IGNORECASE +) +_ENC_VIDEO_RE = re.compile(r'<(?:video|iframe|source|embed)[^>]*>', re.IGNORECASE) def _strip_html(text: str, limit: int = 1500) -> str: text = unescape(_TAG_RE.sub(" ", text or "")) text = re.sub(r"[ \t]+", " ", text) text = re.sub(r"\n\s*\n\s*\n+", "\n\n", text).strip() - if len(text) > limit: + if limit and len(text) > limit: text = text[:limit].rsplit(" ", 1)[0] + " …" return text @@ -57,6 +65,60 @@ def _extract_image(entry) -> str: return match.group(1) if match else "" +def _extract_all_images(entry) -> list[str]: + """Extract ALL image URLs from a feed entry (deduplicated, order preserved).""" + urls: list[str] = [] + + # media_content / media_thumbnail + for key in ("media_content", "media_thumbnail"): + media = entry.get(key) + if media and isinstance(media, list): + for item in media: + url = item.get("url") + if url: + urls.append(url) + + # enclosure links with image/ type + for link in entry.get("links", []): + if link.get("rel") == "enclosure" and str(link.get("type", "")).startswith("image"): + href = link.get("href", "") + if href: + urls.append(href) + + # tags in HTML body + html = entry.get("summary") or entry.get("description") or "" + if not html: + content = entry.get("content") + if content and isinstance(content, list): + html = content[0].get("value", "") + urls.extend(_IMG_RE.findall(html or "")) + + # deduplicate preserving order + return list(dict.fromkeys(urls)) + + +def _extract_videos(entry) -> list[str]: + """Extract video/multimedia URLs from a feed entry.""" + urls: list[str] = [] + + # enclosure links with video/ type + for link in entry.get("links", []): + if link.get("rel") == "enclosure" and str(link.get("type", "")).startswith("video"): + href = link.get("href", "") + if href: + urls.append(href) + + #