from __future__ import annotations import hashlib import sqlite3 from contextlib import contextmanager from datetime import UTC, datetime from pathlib import Path from typing import Iterator TERMINAL_STATUSES = {"uploaded", "duplicate"} def utc_now() -> str: return datetime.now(UTC).isoformat(timespec="seconds") def sha256_file(path: Path) -> str: digest = hashlib.sha256() with path.open("rb") as handle: for chunk in iter(lambda: handle.read(1024 * 1024), b""): digest.update(chunk) return digest.hexdigest() class ActivityStore: def __init__(self, db_path: Path) -> None: self.db_path = db_path def initialize(self) -> None: self.db_path.parent.mkdir(parents=True, exist_ok=True) with self._connect() as conn: conn.execute( """ CREATE TABLE IF NOT EXISTS activities ( source_ref TEXT PRIMARY KEY, title TEXT, source_url TEXT, raw_path TEXT, converted_path TEXT, raw_sha256 TEXT, converted_sha256 TEXT, status TEXT NOT NULL, error_message TEXT, attempts INTEGER NOT NULL DEFAULT 0, patched_field_count INTEGER, garmin_activity_id TEXT, first_seen_at TEXT NOT NULL, updated_at TEXT NOT NULL, uploaded_at TEXT ) """ ) conn.execute( "CREATE INDEX IF NOT EXISTS idx_activities_raw_sha ON activities(raw_sha256)" ) conn.execute( """ CREATE INDEX IF NOT EXISTS idx_activities_converted_sha ON activities(converted_sha256) """ ) def is_terminal_source(self, source_ref: str) -> bool: row = self._fetch_one( "SELECT status FROM activities WHERE source_ref = ?", (source_ref,) ) return bool(row and row["status"] in TERMINAL_STATUSES) def is_uploaded_hash(self, file_hash: str) -> bool: row = self._fetch_one( """ SELECT source_ref FROM activities WHERE status IN ('uploaded', 'duplicate') AND (raw_sha256 = ? OR converted_sha256 = ?) LIMIT 1 """, (file_hash, file_hash), ) return row is not None def record_downloaded( self, source_ref: str, title: str, source_url: str | None, raw_path: Path, raw_sha256: str, ) -> None: now = utc_now() with self._connect() as conn: conn.execute( """ INSERT INTO activities ( source_ref, title, source_url, raw_path, raw_sha256, status, first_seen_at, updated_at ) VALUES (?, ?, ?, ?, ?, 'downloaded', ?, ?) ON CONFLICT(source_ref) DO UPDATE SET title = excluded.title, source_url = excluded.source_url, raw_path = excluded.raw_path, raw_sha256 = excluded.raw_sha256, status = CASE WHEN activities.status IN ('uploaded', 'duplicate') THEN activities.status ELSE 'downloaded' END, error_message = NULL, updated_at = excluded.updated_at """, ( source_ref, title, source_url, str(raw_path), raw_sha256, now, now, ), ) def mark_converted( self, source_ref: str, converted_path: Path, converted_sha256: str, patched_field_count: int, ) -> None: self._execute( """ UPDATE activities SET converted_path = ?, converted_sha256 = ?, patched_field_count = ?, status = CASE WHEN status IN ('uploaded', 'duplicate') THEN status ELSE 'converted' END, error_message = NULL, updated_at = ? WHERE source_ref = ? """, ( str(converted_path), converted_sha256, patched_field_count, utc_now(), source_ref, ), ) def mark_uploaded(self, source_ref: str, garmin_activity_id: str | None) -> None: now = utc_now() self._execute( """ UPDATE activities SET status = 'uploaded', garmin_activity_id = ?, error_message = NULL, uploaded_at = ?, updated_at = ? WHERE source_ref = ? """, (garmin_activity_id, now, now, source_ref), ) def mark_duplicate(self, source_ref: str, message: str) -> None: now = utc_now() self._execute( """ UPDATE activities SET status = 'duplicate', error_message = ?, uploaded_at = COALESCE(uploaded_at, ?), updated_at = ? WHERE source_ref = ? """, (message, now, now, source_ref), ) def mark_failed(self, source_ref: str, message: str) -> None: self._execute( """ UPDATE activities SET status = 'failed', error_message = ?, attempts = attempts + 1, updated_at = ? WHERE source_ref = ? """, (message[:1000], utc_now(), source_ref), ) def get_status(self, source_ref: str) -> str | None: row = self._fetch_one( "SELECT status FROM activities WHERE source_ref = ?", (source_ref,) ) return row["status"] if row else None @contextmanager def _connect(self) -> Iterator[sqlite3.Connection]: conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row try: yield conn conn.commit() finally: conn.close() def _execute(self, sql: str, params: tuple[object, ...]) -> None: with self._connect() as conn: conn.execute(sql, params) def _fetch_one( self, sql: str, params: tuple[object, ...] ) -> sqlite3.Row | None: with self._connect() as conn: return conn.execute(sql, params).fetchone()