From cc3a9b54e32113c4e7f3616d7e6350e432b1aa1c Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 4 May 2026 23:08:14 +0100 Subject: add logic to cache data download in get_latest_data() --- fnme/data.py | 32 +++++++++++++++++++++++++++----- pyproject.toml | 3 ++- uv.lock | 11 +++++++++++ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/fnme/data.py b/fnme/data.py index bdec6df..17f30c5 100644 --- a/fnme/data.py +++ b/fnme/data.py @@ -1,13 +1,35 @@ -from io import StringIO +import os +from pathlib import Path from typing import Optional import pandas as pd import requests +from platformdirs import user_cache_path from fnme.constants import ENDPOINT, HEADERS -def get_latest_data() -> tuple[pd.DataFrame, Optional[str]]: - response = requests.get(ENDPOINT, headers=HEADERS, timeout=10) - response.raise_for_status() - return pd.read_csv(StringIO(response.text)), response.headers.get("Last-Modified") +def get_latest_data() -> tuple[pd.DataFrame, str | None]: + cache_dir = Path(user_cache_path(appname="fnme", appauthor=False)) + csv_path = cache_dir / "latest_data.csv" + timestamp_path = cache_dir / "timestamp.txt" + + cache_dir.mkdir(parents=True, exist_ok=True) + + remote_last_modified = requests.head( + ENDPOINT, headers=HEADERS, timeout=10 + ).headers.get("Last-Modified") + + cached_last_modified = ( + timestamp_path.read_text() if timestamp_path.exists() else None + ) + + if not csv_path.exists() or remote_last_modified != cached_last_modified: + response = requests.get(ENDPOINT, headers=HEADERS, timeout=10) + response.raise_for_status() + last_modified = response.headers.get("Last-Modified") + csv_path.write_text(response.text, encoding="utf-8") + timestamp_path.write_text(last_modified or "") + return pd.read_csv(csv_path), last_modified + + return pd.read_csv(csv_path), cached_last_modified diff --git a/pyproject.toml b/pyproject.toml index fcf426d..feeed1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,8 @@ dependencies = [ "pandas>=3.0.2", "requests>=2.33.1", "numpy>=2.4.4", - "tabulate>=0.10.0" + "tabulate>=0.10.0", + "platformdirs>=4.9.6", ] [project.scripts] diff --git a/uv.lock b/uv.lock index 67e5ad6..0f922e3 100644 --- a/uv.lock +++ b/uv.lock @@ -116,6 +116,7 @@ dependencies = [ { name = "geopy" }, { name = "numpy" }, { name = "pandas" }, + { name = "platformdirs" }, { name = "requests" }, { name = "tabulate" }, ] @@ -125,6 +126,7 @@ requires-dist = [ { name = "geopy", specifier = ">=2.4.1" }, { name = "numpy", specifier = ">=2.4.4" }, { name = "pandas", specifier = ">=3.0.2" }, + { name = "platformdirs", specifier = ">=4.9.6" }, { name = "requests", specifier = ">=2.33.1" }, { name = "tabulate", specifier = ">=0.10.0" }, ] @@ -298,6 +300,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/2b/f8434233fab2bd66a02ec014febe4e5adced20e2693e0e90a07d118ed30e/pandas-3.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:5371b72c2d4d415d08765f32d689217a43227484e81b2305b52076e328f6f482", size = 9455341, upload-time = "2026-03-31T06:48:28.418Z" }, ] +[[package]] +name = "platformdirs" +version = "4.9.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9f/4a/0883b8e3802965322523f0b200ecf33d31f10991d0401162f4b23c698b42/platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", size = 29400, upload-time = "2026-04-09T00:04:10.812Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348, upload-time = "2026-04-09T00:04:09.463Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" -- cgit v1.2.3