220 lines
6.7 KiB
Python
220 lines
6.7 KiB
Python
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import sqlite3
|
|
from contextlib import contextmanager
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
from typing import Iterator
|
|
|
|
TERMINAL_STATUSES = {"uploaded", "duplicate"}
|
|
|
|
|
|
def utc_now() -> str:
|
|
return datetime.now(UTC).isoformat(timespec="seconds")
|
|
|
|
|
|
def sha256_file(path: Path) -> str:
|
|
digest = hashlib.sha256()
|
|
with path.open("rb") as handle:
|
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
digest.update(chunk)
|
|
return digest.hexdigest()
|
|
|
|
|
|
class ActivityStore:
|
|
def __init__(self, db_path: Path) -> None:
|
|
self.db_path = db_path
|
|
|
|
def initialize(self) -> None:
|
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS activities (
|
|
source_ref TEXT PRIMARY KEY,
|
|
title TEXT,
|
|
source_url TEXT,
|
|
raw_path TEXT,
|
|
converted_path TEXT,
|
|
raw_sha256 TEXT,
|
|
converted_sha256 TEXT,
|
|
status TEXT NOT NULL,
|
|
error_message TEXT,
|
|
attempts INTEGER NOT NULL DEFAULT 0,
|
|
patched_field_count INTEGER,
|
|
garmin_activity_id TEXT,
|
|
first_seen_at TEXT NOT NULL,
|
|
updated_at TEXT NOT NULL,
|
|
uploaded_at TEXT
|
|
)
|
|
"""
|
|
)
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_activities_raw_sha ON activities(raw_sha256)"
|
|
)
|
|
conn.execute(
|
|
"""
|
|
CREATE INDEX IF NOT EXISTS idx_activities_converted_sha
|
|
ON activities(converted_sha256)
|
|
"""
|
|
)
|
|
|
|
def is_terminal_source(self, source_ref: str) -> bool:
|
|
row = self._fetch_one(
|
|
"SELECT status FROM activities WHERE source_ref = ?", (source_ref,)
|
|
)
|
|
return bool(row and row["status"] in TERMINAL_STATUSES)
|
|
|
|
def is_uploaded_hash(self, file_hash: str) -> bool:
|
|
row = self._fetch_one(
|
|
"""
|
|
SELECT source_ref FROM activities
|
|
WHERE status IN ('uploaded', 'duplicate')
|
|
AND (raw_sha256 = ? OR converted_sha256 = ?)
|
|
LIMIT 1
|
|
""",
|
|
(file_hash, file_hash),
|
|
)
|
|
return row is not None
|
|
|
|
def record_downloaded(
|
|
self,
|
|
source_ref: str,
|
|
title: str,
|
|
source_url: str | None,
|
|
raw_path: Path,
|
|
raw_sha256: str,
|
|
) -> None:
|
|
now = utc_now()
|
|
with self._connect() as conn:
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO activities (
|
|
source_ref, title, source_url, raw_path, raw_sha256, status,
|
|
first_seen_at, updated_at
|
|
)
|
|
VALUES (?, ?, ?, ?, ?, 'downloaded', ?, ?)
|
|
ON CONFLICT(source_ref) DO UPDATE SET
|
|
title = excluded.title,
|
|
source_url = excluded.source_url,
|
|
raw_path = excluded.raw_path,
|
|
raw_sha256 = excluded.raw_sha256,
|
|
status = CASE
|
|
WHEN activities.status IN ('uploaded', 'duplicate')
|
|
THEN activities.status
|
|
ELSE 'downloaded'
|
|
END,
|
|
error_message = NULL,
|
|
updated_at = excluded.updated_at
|
|
""",
|
|
(
|
|
source_ref,
|
|
title,
|
|
source_url,
|
|
str(raw_path),
|
|
raw_sha256,
|
|
now,
|
|
now,
|
|
),
|
|
)
|
|
|
|
def mark_converted(
|
|
self,
|
|
source_ref: str,
|
|
converted_path: Path,
|
|
converted_sha256: str,
|
|
patched_field_count: int,
|
|
) -> None:
|
|
self._execute(
|
|
"""
|
|
UPDATE activities
|
|
SET converted_path = ?,
|
|
converted_sha256 = ?,
|
|
patched_field_count = ?,
|
|
status = CASE
|
|
WHEN status IN ('uploaded', 'duplicate') THEN status
|
|
ELSE 'converted'
|
|
END,
|
|
error_message = NULL,
|
|
updated_at = ?
|
|
WHERE source_ref = ?
|
|
""",
|
|
(
|
|
str(converted_path),
|
|
converted_sha256,
|
|
patched_field_count,
|
|
utc_now(),
|
|
source_ref,
|
|
),
|
|
)
|
|
|
|
def mark_uploaded(self, source_ref: str, garmin_activity_id: str | None) -> None:
|
|
now = utc_now()
|
|
self._execute(
|
|
"""
|
|
UPDATE activities
|
|
SET status = 'uploaded',
|
|
garmin_activity_id = ?,
|
|
error_message = NULL,
|
|
uploaded_at = ?,
|
|
updated_at = ?
|
|
WHERE source_ref = ?
|
|
""",
|
|
(garmin_activity_id, now, now, source_ref),
|
|
)
|
|
|
|
def mark_duplicate(self, source_ref: str, message: str) -> None:
|
|
now = utc_now()
|
|
self._execute(
|
|
"""
|
|
UPDATE activities
|
|
SET status = 'duplicate',
|
|
error_message = ?,
|
|
uploaded_at = COALESCE(uploaded_at, ?),
|
|
updated_at = ?
|
|
WHERE source_ref = ?
|
|
""",
|
|
(message, now, now, source_ref),
|
|
)
|
|
|
|
def mark_failed(self, source_ref: str, message: str) -> None:
|
|
self._execute(
|
|
"""
|
|
UPDATE activities
|
|
SET status = 'failed',
|
|
error_message = ?,
|
|
attempts = attempts + 1,
|
|
updated_at = ?
|
|
WHERE source_ref = ?
|
|
""",
|
|
(message[:1000], utc_now(), source_ref),
|
|
)
|
|
|
|
def get_status(self, source_ref: str) -> str | None:
|
|
row = self._fetch_one(
|
|
"SELECT status FROM activities WHERE source_ref = ?", (source_ref,)
|
|
)
|
|
return row["status"] if row else None
|
|
|
|
@contextmanager
|
|
def _connect(self) -> Iterator[sqlite3.Connection]:
|
|
conn = sqlite3.connect(self.db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
try:
|
|
yield conn
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
def _execute(self, sql: str, params: tuple[object, ...]) -> None:
|
|
with self._connect() as conn:
|
|
conn.execute(sql, params)
|
|
|
|
def _fetch_one(
|
|
self, sql: str, params: tuple[object, ...]
|
|
) -> sqlite3.Row | None:
|
|
with self._connect() as conn:
|
|
return conn.execute(sql, params).fetchone()
|
|
|