Initial commit: AegisSight-Monitor (OSINT-Monitoringsystem)
Dieser Commit ist enthalten in:
527
src/routers/sources.py
Normale Datei
527
src/routers/sources.py
Normale Datei
@@ -0,0 +1,527 @@
|
||||
"""Sources-Router: Quellenverwaltung (Multi-Tenant)."""
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
|
||||
from auth import get_current_user
|
||||
from database import db_dependency, refresh_source_counts
|
||||
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name
|
||||
import aiosqlite
|
||||
|
||||
logger = logging.getLogger("osint.sources")
|
||||
|
||||
router = APIRouter(prefix="/api/sources", tags=["sources"])
|
||||
|
||||
SOURCE_UPDATE_COLUMNS = {"name", "url", "domain", "source_type", "category", "status", "notes"}
|
||||
|
||||
|
||||
def _check_source_ownership(source: dict, username: str):
|
||||
"""Prueft ob der Nutzer die Quelle bearbeiten/loeschen darf."""
|
||||
added_by = source.get("added_by", "")
|
||||
if added_by == "system":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="System-Quellen koennen nicht veraendert werden",
|
||||
)
|
||||
if added_by and added_by != username:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Nur der Ersteller kann diese Quelle bearbeiten",
|
||||
)
|
||||
|
||||
|
||||
@router.get("", response_model=list[SourceResponse])
|
||||
async def list_sources(
|
||||
source_type: str = None,
|
||||
category: str = None,
|
||||
source_status: str = None,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Alle Quellen auflisten (global + org-spezifisch)."""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
|
||||
# Global (tenant_id=NULL) + eigene Org
|
||||
query = "SELECT * FROM sources WHERE (tenant_id IS NULL OR tenant_id = ?)"
|
||||
params = [tenant_id]
|
||||
|
||||
if source_type:
|
||||
query += " AND source_type = ?"
|
||||
params.append(source_type)
|
||||
if category:
|
||||
query += " AND category = ?"
|
||||
params.append(category)
|
||||
if source_status:
|
||||
query += " AND status = ?"
|
||||
params.append(source_status)
|
||||
|
||||
query += " ORDER BY source_type, category, name"
|
||||
cursor = await db.execute(query, params)
|
||||
rows = await cursor.fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_source_stats(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Aggregierte Quellen-Statistiken (global + eigene Org)."""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
|
||||
cursor = await db.execute("""
|
||||
SELECT
|
||||
source_type,
|
||||
COUNT(*) as count,
|
||||
SUM(article_count) as total_articles
|
||||
FROM sources
|
||||
WHERE status = 'active' AND (tenant_id IS NULL OR tenant_id = ?)
|
||||
GROUP BY source_type
|
||||
""", (tenant_id,))
|
||||
rows = await cursor.fetchall()
|
||||
|
||||
stats = {
|
||||
"rss_feed": {"count": 0, "articles": 0},
|
||||
"web_source": {"count": 0, "articles": 0},
|
||||
"excluded": {"count": 0, "articles": 0},
|
||||
}
|
||||
for row in rows:
|
||||
st = row["source_type"]
|
||||
if st in stats:
|
||||
stats[st]["count"] = row["count"]
|
||||
stats[st]["articles"] = row["total_articles"] or 0
|
||||
|
||||
cursor = await db.execute(
|
||||
"SELECT COUNT(*) as cnt FROM articles WHERE tenant_id = ?",
|
||||
(tenant_id,),
|
||||
)
|
||||
total_row = await cursor.fetchone()
|
||||
|
||||
return {
|
||||
"by_type": stats,
|
||||
"total_sources": sum(s["count"] for s in stats.values()),
|
||||
"total_articles": total_row["cnt"],
|
||||
}
|
||||
|
||||
|
||||
@router.post("/discover", response_model=DiscoverResponse)
|
||||
async def discover_source_endpoint(
|
||||
data: DiscoverRequest,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
):
|
||||
"""RSS-Feed, Name, Kategorie und Domain einer URL automatisch erkennen."""
|
||||
try:
|
||||
result = await discover_source(data.url)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Discovery fehlgeschlagen: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="Discovery fehlgeschlagen")
|
||||
|
||||
|
||||
@router.post("/discover-multi", response_model=DiscoverMultiResponse)
|
||||
async def discover_multi_endpoint(
|
||||
data: DiscoverRequest,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Findet ALLE RSS-Feeds einer Domain, bewertet sie mit Claude und legt relevante als Quellen an."""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
try:
|
||||
multi = await discover_all_feeds(data.url)
|
||||
domain = multi["domain"]
|
||||
category = multi["category"]
|
||||
|
||||
if not multi["feeds"]:
|
||||
single = await discover_source(data.url)
|
||||
sources = []
|
||||
if single.get("rss_url"):
|
||||
cursor = await db.execute(
|
||||
"SELECT id FROM sources WHERE url = ?", (single["rss_url"],)
|
||||
)
|
||||
existing = await cursor.fetchone()
|
||||
if not existing:
|
||||
cursor = await db.execute(
|
||||
"""INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id)
|
||||
VALUES (?, ?, ?, ?, ?, 'active', ?, ?)""",
|
||||
(single["name"], single["rss_url"], single["domain"],
|
||||
single["source_type"], single["category"], current_user["username"], tenant_id),
|
||||
)
|
||||
await db.commit()
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (cursor.lastrowid,))
|
||||
row = await cursor.fetchone()
|
||||
sources.append(dict(row))
|
||||
|
||||
return DiscoverMultiResponse(
|
||||
domain=single.get("domain", domain),
|
||||
category=single.get("category", category),
|
||||
added_count=len(sources),
|
||||
skipped_count=1 if not sources and single.get("rss_url") else 0,
|
||||
total_found=1 if single.get("rss_url") else 0,
|
||||
sources=sources,
|
||||
fallback_single=True,
|
||||
)
|
||||
|
||||
relevant_feeds = await evaluate_feeds_with_claude(domain, multi["feeds"])
|
||||
|
||||
cursor = await db.execute("SELECT url FROM sources WHERE url IS NOT NULL")
|
||||
existing_urls = {row["url"] for row in await cursor.fetchall()}
|
||||
|
||||
new_ids = []
|
||||
skipped = 0
|
||||
for feed in relevant_feeds:
|
||||
if feed["url"] in existing_urls:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
cursor = await db.execute(
|
||||
"""INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id)
|
||||
VALUES (?, ?, ?, 'rss_feed', ?, 'active', ?, ?)""",
|
||||
(feed["name"], feed["url"], domain, category, current_user["username"], tenant_id),
|
||||
)
|
||||
new_ids.append(cursor.lastrowid)
|
||||
existing_urls.add(feed["url"])
|
||||
|
||||
cursor = await db.execute(
|
||||
"SELECT id FROM sources WHERE LOWER(domain) = ? AND source_type = 'web_source'",
|
||||
(domain.lower(),),
|
||||
)
|
||||
if not await cursor.fetchone():
|
||||
cursor = await db.execute(
|
||||
"""INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id)
|
||||
VALUES (?, ?, ?, 'web_source', ?, 'active', ?, ?)""",
|
||||
(domain_to_display_name(domain), f"https://{domain}", domain, category, current_user["username"], tenant_id),
|
||||
)
|
||||
new_ids.append(cursor.lastrowid)
|
||||
|
||||
await db.commit()
|
||||
|
||||
added_sources = []
|
||||
if new_ids:
|
||||
placeholders = ",".join("?" for _ in new_ids)
|
||||
cursor = await db.execute(
|
||||
f"SELECT * FROM sources WHERE id IN ({placeholders}) ORDER BY id",
|
||||
new_ids,
|
||||
)
|
||||
added_sources = [dict(row) for row in await cursor.fetchall()]
|
||||
|
||||
return DiscoverMultiResponse(
|
||||
domain=domain,
|
||||
category=category,
|
||||
added_count=len(added_sources),
|
||||
skipped_count=skipped,
|
||||
total_found=len(multi["feeds"]),
|
||||
sources=added_sources,
|
||||
fallback_single=False,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Multi-Discovery fehlgeschlagen: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="Multi-Discovery fehlgeschlagen")
|
||||
|
||||
|
||||
@router.post("/rediscover-existing")
|
||||
async def rediscover_existing_endpoint(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Einmalige Migration: Bestehende RSS-Quellen nach zusaetzlichen Feeds durchsuchen."""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
try:
|
||||
cursor = await db.execute(
|
||||
"SELECT * FROM sources WHERE source_type = 'rss_feed' AND status = 'active' AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||
(tenant_id,),
|
||||
)
|
||||
existing_sources = [dict(row) for row in await cursor.fetchall()]
|
||||
|
||||
domains = defaultdict(list)
|
||||
for src in existing_sources:
|
||||
if src["domain"]:
|
||||
domains[src["domain"]].append(src)
|
||||
|
||||
cursor = await db.execute("SELECT url FROM sources WHERE url IS NOT NULL")
|
||||
existing_urls = {row["url"] for row in await cursor.fetchall()}
|
||||
|
||||
domains_processed = 0
|
||||
feeds_added = 0
|
||||
feeds_skipped = 0
|
||||
|
||||
for domain, sources in domains.items():
|
||||
domains_processed += 1
|
||||
base_url = f"https://{domain}"
|
||||
|
||||
try:
|
||||
multi = await discover_all_feeds(base_url)
|
||||
if not multi["feeds"]:
|
||||
continue
|
||||
|
||||
relevant_feeds = await evaluate_feeds_with_claude(domain, multi["feeds"])
|
||||
category = _detect_category(domain)
|
||||
|
||||
for feed in relevant_feeds:
|
||||
if feed["url"] in existing_urls:
|
||||
feeds_skipped += 1
|
||||
continue
|
||||
|
||||
await db.execute(
|
||||
"""INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id)
|
||||
VALUES (?, ?, ?, 'rss_feed', ?, 'active', ?, ?)""",
|
||||
(feed["name"], feed["url"], domain, category, current_user["username"], tenant_id),
|
||||
)
|
||||
existing_urls.add(feed["url"])
|
||||
feeds_added += 1
|
||||
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.warning(f"Rediscovery fuer {domain} fehlgeschlagen: {e}")
|
||||
continue
|
||||
|
||||
return {
|
||||
"domains_processed": domains_processed,
|
||||
"feeds_added": feeds_added,
|
||||
"feeds_skipped": feeds_skipped,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Rediscovery fehlgeschlagen: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="Rediscovery fehlgeschlagen")
|
||||
|
||||
|
||||
@router.post("/block-domain")
|
||||
async def block_domain(
|
||||
data: DomainActionRequest,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Domain sperren: Alle Feeds deaktivieren + excluded-Eintrag anlegen."""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
domain = data.domain.lower().strip()
|
||||
username = current_user["username"]
|
||||
|
||||
cursor = await db.execute(
|
||||
"SELECT added_by FROM sources WHERE LOWER(domain) = ? AND source_type != 'excluded' AND status = 'active' AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||
(domain, tenant_id),
|
||||
)
|
||||
affected = await cursor.fetchall()
|
||||
for row in affected:
|
||||
ab = row["added_by"] or ""
|
||||
if ab != "system" and ab != username and ab != "":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Domain enthaelt Quellen anderer Nutzer",
|
||||
)
|
||||
|
||||
cursor = await db.execute(
|
||||
"UPDATE sources SET status = 'inactive' WHERE LOWER(domain) = ? AND source_type != 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||
(domain, tenant_id),
|
||||
)
|
||||
feeds_deactivated = cursor.rowcount
|
||||
|
||||
cursor = await db.execute(
|
||||
"SELECT id FROM sources WHERE LOWER(domain) = ? AND source_type = 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||
(domain, tenant_id),
|
||||
)
|
||||
existing = await cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
excluded_id = existing["id"]
|
||||
if data.notes:
|
||||
await db.execute(
|
||||
"UPDATE sources SET notes = ? WHERE id = ?",
|
||||
(data.notes, excluded_id),
|
||||
)
|
||||
else:
|
||||
cursor = await db.execute(
|
||||
"""INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by, tenant_id)
|
||||
VALUES (?, NULL, ?, 'excluded', 'sonstige', 'active', ?, ?, ?)""",
|
||||
(domain, domain, data.notes, current_user["username"], tenant_id),
|
||||
)
|
||||
excluded_id = cursor.lastrowid
|
||||
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"domain": domain,
|
||||
"feeds_deactivated": feeds_deactivated,
|
||||
"excluded_id": excluded_id,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/unblock-domain")
|
||||
async def unblock_domain(
|
||||
data: DomainActionRequest,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Domain entsperren: excluded-Eintrag loeschen + Feeds reaktivieren."""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
domain = data.domain.lower().strip()
|
||||
|
||||
cursor = await db.execute(
|
||||
"SELECT COUNT(*) as cnt FROM sources WHERE LOWER(domain) = ? AND source_type != 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||
(domain, tenant_id),
|
||||
)
|
||||
row = await cursor.fetchone()
|
||||
has_feeds = row["cnt"] > 0
|
||||
|
||||
if has_feeds:
|
||||
await db.execute(
|
||||
"DELETE FROM sources WHERE LOWER(domain) = ? AND source_type = 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||
(domain, tenant_id),
|
||||
)
|
||||
cursor = await db.execute(
|
||||
"UPDATE sources SET status = 'active' WHERE LOWER(domain) = ? AND source_type != 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
|
||||
(domain, tenant_id),
|
||||
)
|
||||
feeds_reactivated = cursor.rowcount
|
||||
else:
|
||||
await db.execute(
|
||||
"""UPDATE sources SET source_type = 'web_source', status = 'active', notes = 'Entsperrt'
|
||||
WHERE LOWER(domain) = ? AND source_type = 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)""",
|
||||
(domain, tenant_id),
|
||||
)
|
||||
feeds_reactivated = 0
|
||||
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"domain": domain,
|
||||
"feeds_reactivated": feeds_reactivated,
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/domain/{domain}")
|
||||
async def delete_domain(
|
||||
domain: str,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Alle Quellen einer Domain loeschen (nur org-eigene, nicht globale)."""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
domain_lower = domain.lower().strip()
|
||||
|
||||
cursor = await db.execute(
|
||||
"SELECT * FROM sources WHERE LOWER(domain) = ? AND tenant_id = ?",
|
||||
(domain_lower, tenant_id),
|
||||
)
|
||||
rows = await cursor.fetchall()
|
||||
|
||||
if not rows:
|
||||
raise HTTPException(status_code=404, detail="Keine Quellen fuer diese Domain gefunden")
|
||||
|
||||
username = current_user["username"]
|
||||
for row in rows:
|
||||
source = dict(row)
|
||||
if source["added_by"] == "system":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Domain enthaelt System-Quellen, die nicht geloescht werden koennen",
|
||||
)
|
||||
if source["added_by"] and source["added_by"] != username:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Domain enthaelt Quellen anderer Nutzer",
|
||||
)
|
||||
|
||||
await db.execute(
|
||||
"DELETE FROM sources WHERE LOWER(domain) = ? AND tenant_id = ?",
|
||||
(domain_lower, tenant_id),
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"domain": domain_lower,
|
||||
"deleted_count": len(rows),
|
||||
}
|
||||
|
||||
|
||||
@router.post("", response_model=SourceResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_source(
|
||||
data: SourceCreate,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Neue Quelle hinzufuegen (org-spezifisch)."""
|
||||
tenant_id = current_user.get("tenant_id")
|
||||
cursor = await db.execute(
|
||||
"""INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by, tenant_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
data.name,
|
||||
data.url,
|
||||
data.domain,
|
||||
data.source_type,
|
||||
data.category,
|
||||
data.status,
|
||||
data.notes,
|
||||
current_user["username"],
|
||||
tenant_id,
|
||||
),
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (cursor.lastrowid,))
|
||||
row = await cursor.fetchone()
|
||||
return dict(row)
|
||||
|
||||
|
||||
@router.put("/{source_id}", response_model=SourceResponse)
|
||||
async def update_source(
|
||||
source_id: int,
|
||||
data: SourceUpdate,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Quelle bearbeiten."""
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
|
||||
|
||||
_check_source_ownership(dict(row), current_user["username"])
|
||||
|
||||
updates = {}
|
||||
for field, value in data.model_dump(exclude_none=True).items():
|
||||
if field not in SOURCE_UPDATE_COLUMNS:
|
||||
continue
|
||||
updates[field] = value
|
||||
|
||||
if not updates:
|
||||
return dict(row)
|
||||
|
||||
set_clause = ", ".join(f"{k} = ?" for k in updates)
|
||||
values = list(updates.values()) + [source_id]
|
||||
|
||||
await db.execute(f"UPDATE sources SET {set_clause} WHERE id = ?", values)
|
||||
await db.commit()
|
||||
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
row = await cursor.fetchone()
|
||||
return dict(row)
|
||||
|
||||
|
||||
@router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_source(
|
||||
source_id: int,
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Quelle loeschen."""
|
||||
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
|
||||
|
||||
_check_source_ownership(dict(row), current_user["username"])
|
||||
|
||||
await db.execute("DELETE FROM sources WHERE id = ?", (source_id,))
|
||||
await db.commit()
|
||||
|
||||
|
||||
@router.post("/refresh-counts")
|
||||
async def trigger_refresh_counts(
|
||||
current_user: dict = Depends(get_current_user),
|
||||
db: aiosqlite.Connection = Depends(db_dependency),
|
||||
):
|
||||
"""Artikelzaehler fuer alle Quellen neu berechnen."""
|
||||
await refresh_source_counts(db)
|
||||
return {"status": "ok"}
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren