diff --git a/src/main.py b/src/main.py index e744133..3aaa28c 100644 --- a/src/main.py +++ b/src/main.py @@ -246,7 +246,14 @@ async def cleanup_expired(): ) logger.info(f"Lage {incident['id']} archiviert (Aufbewahrung abgelaufen)") - # Verwaiste running-Einträge bereinigen (> 15 Minuten ohne Abschluss) + # Verwaiste running-Einträge bereinigen. + # Pruefen auf Pipeline-Fortschritt: legitime Long-Runner (z.B. Translator + # nach summary fuer jp_demo mit 200+ Artikeln ~20 Min) duerfen nicht + # vorzeitig gekillt werden. Ein Refresh gilt als verwaist, wenn entweder + # (a) seit ORPHAN_IDLE_LIMIT Min kein Pipeline-Step Fortschritt zeigte, + # oder (b) das harte Limit ORPHAN_HARD_LIMIT Min ueberschritten wurde. + ORPHAN_IDLE_LIMIT = 30 + ORPHAN_HARD_LIMIT = 90 cursor = await db.execute( "SELECT id, incident_id, started_at FROM refresh_log WHERE status = 'running'" ) @@ -258,12 +265,46 @@ async def cleanup_expired(): else: started = started.astimezone(TIMEZONE) age_minutes = (now - started).total_seconds() / 60 - if age_minutes >= 15: + if age_minutes < ORPHAN_IDLE_LIMIT: + continue + + # Letzter Pipeline-Step-Fortschritt (Start ODER Ende) + prog_cursor = await db.execute( + """SELECT MAX(COALESCE(completed_at, started_at)) AS last_activity + FROM refresh_pipeline_steps WHERE refresh_log_id = ?""", + (orphan["id"],), + ) + prog_row = await prog_cursor.fetchone() + last_activity_str = prog_row["last_activity"] if prog_row else None + + is_orphan = False + reason = None + if age_minutes >= ORPHAN_HARD_LIMIT: + is_orphan = True + reason = f"Verwaist (>{int(age_minutes)} Min, hartes Limit {ORPHAN_HARD_LIMIT} Min)" + elif last_activity_str: + last_activity = datetime.fromisoformat(last_activity_str) + if last_activity.tzinfo is None: + last_activity = last_activity.replace(tzinfo=TIMEZONE) + else: + last_activity = last_activity.astimezone(TIMEZONE) + idle_minutes = (now - last_activity).total_seconds() / 60 + if idle_minutes >= ORPHAN_IDLE_LIMIT: + is_orphan = True + reason = ( + f"Verwaist (kein Pipeline-Fortschritt seit {int(idle_minutes)} Min, " + f"gesamt {int(age_minutes)} Min)" + ) + else: + is_orphan = True + reason = f"Verwaist (keine Pipeline-Schritte nach {int(age_minutes)} Min)" + + if is_orphan: await db.execute( "UPDATE refresh_log SET status = 'error', completed_at = ?, error_message = ? WHERE id = ?", - (now.strftime('%Y-%m-%d %H:%M:%S'), f"Verwaist (>{int(age_minutes)} Min ohne Abschluss, automatisch bereinigt)", orphan["id"]), + (now.strftime('%Y-%m-%d %H:%M:%S'), reason, orphan["id"]), ) - logger.warning(f"Verwaisten Refresh #{orphan['id']} für Lage {orphan['incident_id']} bereinigt ({int(age_minutes)} Min)") + logger.warning(f"Verwaisten Refresh #{orphan['id']} fuer Lage {orphan['incident_id']} bereinigt: {reason}") # Alte Notifications bereinigen (> 7 Tage) await db.execute("DELETE FROM notifications WHERE created_at < datetime('now', '-7 days')")