This commit is contained in:
Bastian Wagner
2026-05-05 19:26:43 +02:00
commit 8d07939527
29 changed files with 2646 additions and 0 deletions

View File

@@ -0,0 +1,219 @@
from __future__ import annotations
import hashlib
import sqlite3
from contextlib import contextmanager
from datetime import UTC, datetime
from pathlib import Path
from typing import Iterator
TERMINAL_STATUSES = {"uploaded", "duplicate"}
def utc_now() -> str:
return datetime.now(UTC).isoformat(timespec="seconds")
def sha256_file(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
class ActivityStore:
def __init__(self, db_path: Path) -> None:
self.db_path = db_path
def initialize(self) -> None:
self.db_path.parent.mkdir(parents=True, exist_ok=True)
with self._connect() as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS activities (
source_ref TEXT PRIMARY KEY,
title TEXT,
source_url TEXT,
raw_path TEXT,
converted_path TEXT,
raw_sha256 TEXT,
converted_sha256 TEXT,
status TEXT NOT NULL,
error_message TEXT,
attempts INTEGER NOT NULL DEFAULT 0,
patched_field_count INTEGER,
garmin_activity_id TEXT,
first_seen_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
uploaded_at TEXT
)
"""
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_activities_raw_sha ON activities(raw_sha256)"
)
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_activities_converted_sha
ON activities(converted_sha256)
"""
)
def is_terminal_source(self, source_ref: str) -> bool:
row = self._fetch_one(
"SELECT status FROM activities WHERE source_ref = ?", (source_ref,)
)
return bool(row and row["status"] in TERMINAL_STATUSES)
def is_uploaded_hash(self, file_hash: str) -> bool:
row = self._fetch_one(
"""
SELECT source_ref FROM activities
WHERE status IN ('uploaded', 'duplicate')
AND (raw_sha256 = ? OR converted_sha256 = ?)
LIMIT 1
""",
(file_hash, file_hash),
)
return row is not None
def record_downloaded(
self,
source_ref: str,
title: str,
source_url: str | None,
raw_path: Path,
raw_sha256: str,
) -> None:
now = utc_now()
with self._connect() as conn:
conn.execute(
"""
INSERT INTO activities (
source_ref, title, source_url, raw_path, raw_sha256, status,
first_seen_at, updated_at
)
VALUES (?, ?, ?, ?, ?, 'downloaded', ?, ?)
ON CONFLICT(source_ref) DO UPDATE SET
title = excluded.title,
source_url = excluded.source_url,
raw_path = excluded.raw_path,
raw_sha256 = excluded.raw_sha256,
status = CASE
WHEN activities.status IN ('uploaded', 'duplicate')
THEN activities.status
ELSE 'downloaded'
END,
error_message = NULL,
updated_at = excluded.updated_at
""",
(
source_ref,
title,
source_url,
str(raw_path),
raw_sha256,
now,
now,
),
)
def mark_converted(
self,
source_ref: str,
converted_path: Path,
converted_sha256: str,
patched_field_count: int,
) -> None:
self._execute(
"""
UPDATE activities
SET converted_path = ?,
converted_sha256 = ?,
patched_field_count = ?,
status = CASE
WHEN status IN ('uploaded', 'duplicate') THEN status
ELSE 'converted'
END,
error_message = NULL,
updated_at = ?
WHERE source_ref = ?
""",
(
str(converted_path),
converted_sha256,
patched_field_count,
utc_now(),
source_ref,
),
)
def mark_uploaded(self, source_ref: str, garmin_activity_id: str | None) -> None:
now = utc_now()
self._execute(
"""
UPDATE activities
SET status = 'uploaded',
garmin_activity_id = ?,
error_message = NULL,
uploaded_at = ?,
updated_at = ?
WHERE source_ref = ?
""",
(garmin_activity_id, now, now, source_ref),
)
def mark_duplicate(self, source_ref: str, message: str) -> None:
now = utc_now()
self._execute(
"""
UPDATE activities
SET status = 'duplicate',
error_message = ?,
uploaded_at = COALESCE(uploaded_at, ?),
updated_at = ?
WHERE source_ref = ?
""",
(message, now, now, source_ref),
)
def mark_failed(self, source_ref: str, message: str) -> None:
self._execute(
"""
UPDATE activities
SET status = 'failed',
error_message = ?,
attempts = attempts + 1,
updated_at = ?
WHERE source_ref = ?
""",
(message[:1000], utc_now(), source_ref),
)
def get_status(self, source_ref: str) -> str | None:
row = self._fetch_one(
"SELECT status FROM activities WHERE source_ref = ?", (source_ref,)
)
return row["status"] if row else None
@contextmanager
def _connect(self) -> Iterator[sqlite3.Connection]:
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
try:
yield conn
conn.commit()
finally:
conn.close()
def _execute(self, sql: str, params: tuple[object, ...]) -> None:
with self._connect() as conn:
conn.execute(sql, params)
def _fetch_one(
self, sql: str, params: tuple[object, ...]
) -> sqlite3.Row | None:
with self._connect() as conn:
return conn.execute(sql, params).fetchone()