Initial commit: AegisSight-Monitor (OSINT-Monitoringsystem)

Dieser Commit ist enthalten in:
claude-dev
2026-03-04 17:53:18 +01:00
Commit 8312d24912
51 geänderte Dateien mit 19355 neuen und 0 gelöschten Zeilen

527
src/routers/sources.py Normale Datei
Datei anzeigen

@@ -0,0 +1,527 @@
"""Sources-Router: Quellenverwaltung (Multi-Tenant)."""
import logging
from collections import defaultdict
from fastapi import APIRouter, Depends, HTTPException, status
from models import SourceCreate, SourceUpdate, SourceResponse, DiscoverRequest, DiscoverResponse, DiscoverMultiResponse, DomainActionRequest
from auth import get_current_user
from database import db_dependency, refresh_source_counts
from source_rules import discover_source, discover_all_feeds, evaluate_feeds_with_claude, _extract_domain, _detect_category, domain_to_display_name
import aiosqlite
logger = logging.getLogger("osint.sources")
router = APIRouter(prefix="/api/sources", tags=["sources"])
SOURCE_UPDATE_COLUMNS = {"name", "url", "domain", "source_type", "category", "status", "notes"}
def _check_source_ownership(source: dict, username: str):
"""Prueft ob der Nutzer die Quelle bearbeiten/loeschen darf."""
added_by = source.get("added_by", "")
if added_by == "system":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="System-Quellen koennen nicht veraendert werden",
)
if added_by and added_by != username:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Nur der Ersteller kann diese Quelle bearbeiten",
)
@router.get("", response_model=list[SourceResponse])
async def list_sources(
source_type: str = None,
category: str = None,
source_status: str = None,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Alle Quellen auflisten (global + org-spezifisch)."""
tenant_id = current_user.get("tenant_id")
# Global (tenant_id=NULL) + eigene Org
query = "SELECT * FROM sources WHERE (tenant_id IS NULL OR tenant_id = ?)"
params = [tenant_id]
if source_type:
query += " AND source_type = ?"
params.append(source_type)
if category:
query += " AND category = ?"
params.append(category)
if source_status:
query += " AND status = ?"
params.append(source_status)
query += " ORDER BY source_type, category, name"
cursor = await db.execute(query, params)
rows = await cursor.fetchall()
return [dict(row) for row in rows]
@router.get("/stats")
async def get_source_stats(
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Aggregierte Quellen-Statistiken (global + eigene Org)."""
tenant_id = current_user.get("tenant_id")
cursor = await db.execute("""
SELECT
source_type,
COUNT(*) as count,
SUM(article_count) as total_articles
FROM sources
WHERE status = 'active' AND (tenant_id IS NULL OR tenant_id = ?)
GROUP BY source_type
""", (tenant_id,))
rows = await cursor.fetchall()
stats = {
"rss_feed": {"count": 0, "articles": 0},
"web_source": {"count": 0, "articles": 0},
"excluded": {"count": 0, "articles": 0},
}
for row in rows:
st = row["source_type"]
if st in stats:
stats[st]["count"] = row["count"]
stats[st]["articles"] = row["total_articles"] or 0
cursor = await db.execute(
"SELECT COUNT(*) as cnt FROM articles WHERE tenant_id = ?",
(tenant_id,),
)
total_row = await cursor.fetchone()
return {
"by_type": stats,
"total_sources": sum(s["count"] for s in stats.values()),
"total_articles": total_row["cnt"],
}
@router.post("/discover", response_model=DiscoverResponse)
async def discover_source_endpoint(
data: DiscoverRequest,
current_user: dict = Depends(get_current_user),
):
"""RSS-Feed, Name, Kategorie und Domain einer URL automatisch erkennen."""
try:
result = await discover_source(data.url)
return result
except Exception as e:
logger.error(f"Discovery fehlgeschlagen: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Discovery fehlgeschlagen")
@router.post("/discover-multi", response_model=DiscoverMultiResponse)
async def discover_multi_endpoint(
data: DiscoverRequest,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Findet ALLE RSS-Feeds einer Domain, bewertet sie mit Claude und legt relevante als Quellen an."""
tenant_id = current_user.get("tenant_id")
try:
multi = await discover_all_feeds(data.url)
domain = multi["domain"]
category = multi["category"]
if not multi["feeds"]:
single = await discover_source(data.url)
sources = []
if single.get("rss_url"):
cursor = await db.execute(
"SELECT id FROM sources WHERE url = ?", (single["rss_url"],)
)
existing = await cursor.fetchone()
if not existing:
cursor = await db.execute(
"""INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id)
VALUES (?, ?, ?, ?, ?, 'active', ?, ?)""",
(single["name"], single["rss_url"], single["domain"],
single["source_type"], single["category"], current_user["username"], tenant_id),
)
await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (cursor.lastrowid,))
row = await cursor.fetchone()
sources.append(dict(row))
return DiscoverMultiResponse(
domain=single.get("domain", domain),
category=single.get("category", category),
added_count=len(sources),
skipped_count=1 if not sources and single.get("rss_url") else 0,
total_found=1 if single.get("rss_url") else 0,
sources=sources,
fallback_single=True,
)
relevant_feeds = await evaluate_feeds_with_claude(domain, multi["feeds"])
cursor = await db.execute("SELECT url FROM sources WHERE url IS NOT NULL")
existing_urls = {row["url"] for row in await cursor.fetchall()}
new_ids = []
skipped = 0
for feed in relevant_feeds:
if feed["url"] in existing_urls:
skipped += 1
continue
cursor = await db.execute(
"""INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id)
VALUES (?, ?, ?, 'rss_feed', ?, 'active', ?, ?)""",
(feed["name"], feed["url"], domain, category, current_user["username"], tenant_id),
)
new_ids.append(cursor.lastrowid)
existing_urls.add(feed["url"])
cursor = await db.execute(
"SELECT id FROM sources WHERE LOWER(domain) = ? AND source_type = 'web_source'",
(domain.lower(),),
)
if not await cursor.fetchone():
cursor = await db.execute(
"""INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id)
VALUES (?, ?, ?, 'web_source', ?, 'active', ?, ?)""",
(domain_to_display_name(domain), f"https://{domain}", domain, category, current_user["username"], tenant_id),
)
new_ids.append(cursor.lastrowid)
await db.commit()
added_sources = []
if new_ids:
placeholders = ",".join("?" for _ in new_ids)
cursor = await db.execute(
f"SELECT * FROM sources WHERE id IN ({placeholders}) ORDER BY id",
new_ids,
)
added_sources = [dict(row) for row in await cursor.fetchall()]
return DiscoverMultiResponse(
domain=domain,
category=category,
added_count=len(added_sources),
skipped_count=skipped,
total_found=len(multi["feeds"]),
sources=added_sources,
fallback_single=False,
)
except Exception as e:
logger.error(f"Multi-Discovery fehlgeschlagen: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Multi-Discovery fehlgeschlagen")
@router.post("/rediscover-existing")
async def rediscover_existing_endpoint(
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Einmalige Migration: Bestehende RSS-Quellen nach zusaetzlichen Feeds durchsuchen."""
tenant_id = current_user.get("tenant_id")
try:
cursor = await db.execute(
"SELECT * FROM sources WHERE source_type = 'rss_feed' AND status = 'active' AND (tenant_id IS NULL OR tenant_id = ?)",
(tenant_id,),
)
existing_sources = [dict(row) for row in await cursor.fetchall()]
domains = defaultdict(list)
for src in existing_sources:
if src["domain"]:
domains[src["domain"]].append(src)
cursor = await db.execute("SELECT url FROM sources WHERE url IS NOT NULL")
existing_urls = {row["url"] for row in await cursor.fetchall()}
domains_processed = 0
feeds_added = 0
feeds_skipped = 0
for domain, sources in domains.items():
domains_processed += 1
base_url = f"https://{domain}"
try:
multi = await discover_all_feeds(base_url)
if not multi["feeds"]:
continue
relevant_feeds = await evaluate_feeds_with_claude(domain, multi["feeds"])
category = _detect_category(domain)
for feed in relevant_feeds:
if feed["url"] in existing_urls:
feeds_skipped += 1
continue
await db.execute(
"""INSERT INTO sources (name, url, domain, source_type, category, status, added_by, tenant_id)
VALUES (?, ?, ?, 'rss_feed', ?, 'active', ?, ?)""",
(feed["name"], feed["url"], domain, category, current_user["username"], tenant_id),
)
existing_urls.add(feed["url"])
feeds_added += 1
await db.commit()
except Exception as e:
logger.warning(f"Rediscovery fuer {domain} fehlgeschlagen: {e}")
continue
return {
"domains_processed": domains_processed,
"feeds_added": feeds_added,
"feeds_skipped": feeds_skipped,
}
except Exception as e:
logger.error(f"Rediscovery fehlgeschlagen: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Rediscovery fehlgeschlagen")
@router.post("/block-domain")
async def block_domain(
data: DomainActionRequest,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Domain sperren: Alle Feeds deaktivieren + excluded-Eintrag anlegen."""
tenant_id = current_user.get("tenant_id")
domain = data.domain.lower().strip()
username = current_user["username"]
cursor = await db.execute(
"SELECT added_by FROM sources WHERE LOWER(domain) = ? AND source_type != 'excluded' AND status = 'active' AND (tenant_id IS NULL OR tenant_id = ?)",
(domain, tenant_id),
)
affected = await cursor.fetchall()
for row in affected:
ab = row["added_by"] or ""
if ab != "system" and ab != username and ab != "":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Domain enthaelt Quellen anderer Nutzer",
)
cursor = await db.execute(
"UPDATE sources SET status = 'inactive' WHERE LOWER(domain) = ? AND source_type != 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
(domain, tenant_id),
)
feeds_deactivated = cursor.rowcount
cursor = await db.execute(
"SELECT id FROM sources WHERE LOWER(domain) = ? AND source_type = 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
(domain, tenant_id),
)
existing = await cursor.fetchone()
if existing:
excluded_id = existing["id"]
if data.notes:
await db.execute(
"UPDATE sources SET notes = ? WHERE id = ?",
(data.notes, excluded_id),
)
else:
cursor = await db.execute(
"""INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by, tenant_id)
VALUES (?, NULL, ?, 'excluded', 'sonstige', 'active', ?, ?, ?)""",
(domain, domain, data.notes, current_user["username"], tenant_id),
)
excluded_id = cursor.lastrowid
await db.commit()
return {
"domain": domain,
"feeds_deactivated": feeds_deactivated,
"excluded_id": excluded_id,
}
@router.post("/unblock-domain")
async def unblock_domain(
data: DomainActionRequest,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Domain entsperren: excluded-Eintrag loeschen + Feeds reaktivieren."""
tenant_id = current_user.get("tenant_id")
domain = data.domain.lower().strip()
cursor = await db.execute(
"SELECT COUNT(*) as cnt FROM sources WHERE LOWER(domain) = ? AND source_type != 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
(domain, tenant_id),
)
row = await cursor.fetchone()
has_feeds = row["cnt"] > 0
if has_feeds:
await db.execute(
"DELETE FROM sources WHERE LOWER(domain) = ? AND source_type = 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
(domain, tenant_id),
)
cursor = await db.execute(
"UPDATE sources SET status = 'active' WHERE LOWER(domain) = ? AND source_type != 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)",
(domain, tenant_id),
)
feeds_reactivated = cursor.rowcount
else:
await db.execute(
"""UPDATE sources SET source_type = 'web_source', status = 'active', notes = 'Entsperrt'
WHERE LOWER(domain) = ? AND source_type = 'excluded' AND (tenant_id IS NULL OR tenant_id = ?)""",
(domain, tenant_id),
)
feeds_reactivated = 0
await db.commit()
return {
"domain": domain,
"feeds_reactivated": feeds_reactivated,
}
@router.delete("/domain/{domain}")
async def delete_domain(
domain: str,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Alle Quellen einer Domain loeschen (nur org-eigene, nicht globale)."""
tenant_id = current_user.get("tenant_id")
domain_lower = domain.lower().strip()
cursor = await db.execute(
"SELECT * FROM sources WHERE LOWER(domain) = ? AND tenant_id = ?",
(domain_lower, tenant_id),
)
rows = await cursor.fetchall()
if not rows:
raise HTTPException(status_code=404, detail="Keine Quellen fuer diese Domain gefunden")
username = current_user["username"]
for row in rows:
source = dict(row)
if source["added_by"] == "system":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Domain enthaelt System-Quellen, die nicht geloescht werden koennen",
)
if source["added_by"] and source["added_by"] != username:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Domain enthaelt Quellen anderer Nutzer",
)
await db.execute(
"DELETE FROM sources WHERE LOWER(domain) = ? AND tenant_id = ?",
(domain_lower, tenant_id),
)
await db.commit()
return {
"domain": domain_lower,
"deleted_count": len(rows),
}
@router.post("", response_model=SourceResponse, status_code=status.HTTP_201_CREATED)
async def create_source(
data: SourceCreate,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Neue Quelle hinzufuegen (org-spezifisch)."""
tenant_id = current_user.get("tenant_id")
cursor = await db.execute(
"""INSERT INTO sources (name, url, domain, source_type, category, status, notes, added_by, tenant_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
data.name,
data.url,
data.domain,
data.source_type,
data.category,
data.status,
data.notes,
current_user["username"],
tenant_id,
),
)
await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (cursor.lastrowid,))
row = await cursor.fetchone()
return dict(row)
@router.put("/{source_id}", response_model=SourceResponse)
async def update_source(
source_id: int,
data: SourceUpdate,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Quelle bearbeiten."""
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
_check_source_ownership(dict(row), current_user["username"])
updates = {}
for field, value in data.model_dump(exclude_none=True).items():
if field not in SOURCE_UPDATE_COLUMNS:
continue
updates[field] = value
if not updates:
return dict(row)
set_clause = ", ".join(f"{k} = ?" for k in updates)
values = list(updates.values()) + [source_id]
await db.execute(f"UPDATE sources SET {set_clause} WHERE id = ?", values)
await db.commit()
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
return dict(row)
@router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_source(
source_id: int,
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Quelle loeschen."""
cursor = await db.execute("SELECT * FROM sources WHERE id = ?", (source_id,))
row = await cursor.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Quelle nicht gefunden")
_check_source_ownership(dict(row), current_user["username"])
await db.execute("DELETE FROM sources WHERE id = ?", (source_id,))
await db.commit()
@router.post("/refresh-counts")
async def trigger_refresh_counts(
current_user: dict = Depends(get_current_user),
db: aiosqlite.Connection = Depends(db_dependency),
):
"""Artikelzaehler fuer alle Quellen neu berechnen."""
await refresh_source_counts(db)
return {"status": "ok"}