aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fnme/data.py34
1 files changed, 22 insertions, 12 deletions
diff --git a/fnme/data.py b/fnme/data.py
index 11497ce..6d85f43 100644
--- a/fnme/data.py
+++ b/fnme/data.py
@@ -1,3 +1,4 @@
+import csv
from pathlib import Path
import pandas as pd
@@ -14,20 +15,29 @@ def get_latest_data() -> tuple[pd.DataFrame, str | None]:
cache_dir.mkdir(parents=True, exist_ok=True)
- remote_last_modified = requests.head(
- ENDPOINT, headers=HEADERS, timeout=10
- ).headers.get("Last-Modified")
-
cached_last_modified = (
timestamp_path.read_text() if timestamp_path.exists() else None
)
- if not csv_path.exists() or remote_last_modified != cached_last_modified:
- response = requests.get(ENDPOINT, headers=HEADERS, timeout=10)
- response.raise_for_status()
- last_modified = response.headers.get("Last-Modified")
- csv_path.write_text(response.text, encoding="utf-8")
- timestamp_path.write_text(last_modified or "")
- return pd.read_csv(csv_path), last_modified
+ conditional_headers = {
+ **HEADERS,
+ **(
+ {"If-Modified-Since": cached_last_modified}
+ if cached_last_modified and csv_path.exists()
+ else {}
+ ),
+ }
+
+ response = requests.get(ENDPOINT, headers=conditional_headers, timeout=10)
+ response.raise_for_status()
+
+ if response.status_code == 304:
+ print(f"[*] Using cached data. Last modified: {cached_last_modified}")
+ return pd.read_csv(csv_path), cached_last_modified
+
+ print("[!] Cache is stale. Refreshing.")
- return pd.read_csv(csv_path), cached_last_modified
+ last_modified = response.headers.get("Last-Modified")
+ csv_path.write_text(response.text, encoding="utf-8")
+ timestamp_path.write_text(last_modified or "")
+ return pd.read_csv(csv_path), last_modified
git.ajschof.me — hosted by ajschofield — powered by cgit