aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Schofield <git@ajschof.me>2026-04-27 22:23:09 +0100
committerAlex Schofield <git@ajschof.me>2026-04-27 22:23:09 +0100
commitfae55f02b2b4739fdbbee7c660927639781a3fb3 (patch)
treef47bfb2bfee2cd2be41893fad493290483838c51
parentf896458629f0ed7ac1d64775710980856d8a7c64 (diff)
downloadfuelnearme-fae55f02b2b4739fdbbee7c660927639781a3fb3.tar.gz
fuelnearme-fae55f02b2b4739fdbbee7c660927639781a3fb3.zip
vectorise filter_df using haversine and bounding box pre-filter
-rw-r--r--main.py70
1 files changed, 44 insertions, 26 deletions
diff --git a/main.py b/main.py
index b6c245a..f875fe6 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,10 @@
import argparse
+import math
import sys
from io import StringIO
from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
import pandas as pd
import requests
from geopy.distance import geodesic
@@ -49,32 +51,48 @@ def get_latest_data() -> tuple[pd.DataFrame, Optional[str]]:
def filter_df(
dframe: pd.DataFrame, arguments: argparse.Namespace, loc: Tuple[float, float]
) -> List[Dict[str, Any]]:
- near_stations = []
- for station, latitude, longitude, e5_price, e10_price, diesel_price in zip(
- dframe["forecourts.trading_name"],
- dframe["forecourts.location.latitude"],
- dframe["forecourts.location.longitude"],
- dframe["forecourts.fuel_price.E5"],
- dframe["forecourts.fuel_price.E10"],
- dframe["forecourts.fuel_price.B7S"],
- ):
- distance_from_current_location = geodesic((latitude, longitude), loc).miles
- if distance_from_current_location < arguments.radius:
- station_dict = {
- "station_name": station,
- "distance": round(distance_from_current_location, 1),
- "e5_price": round(e5_price / 100, 2)
- if not pd.isna(e5_price)
- else "N/A",
- "e10_price": round(e10_price / 100, 2)
- if not pd.isna(e10_price)
- else "N/A",
- "diesel_price": round(diesel_price / 100, 2)
- if not pd.isna(diesel_price)
- else "N/A",
- }
- near_stations.append(station_dict)
- return near_stations
+
+ def bounding_box() -> pd.DataFrame:
+ lat, lon = loc
+ deg_lat = arguments.radius / 69.0
+ deg_lon = arguments.radius / (69.0 * math.cos(math.radians(lat)))
+ return dframe[
+ dframe["forecourts.location.latitude"].between(lat - deg_lat, lat + deg_lat)
+ & dframe["forecourts.location.longitude"].between(
+ lon - deg_lon, lon + deg_lon
+ )
+ ]
+
+ def haversine_miles(lat2: np.ndarray, lon2: np.ndarray) -> np.ndarray:
+ R = 3958.8
+ lat1, lon1 = np.radians(loc[0]), np.radians(loc[1])
+ lat2, lon2 = np.radians(lat2), np.radians(lon2)
+ dlat = lat2 - lat1
+ dlon = lon2 - lon1
+ a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
+ return R * 2 * np.arcsin(np.sqrt(a))
+
+ def pence_to_pounds(col: pd.Series) -> pd.Series:
+ return (col / 100).round(2).where(col.notna(), other="N/A")
+
+ df = bounding_box().copy()
+
+ df["distance"] = haversine_miles(
+ df["forecourts.location.latitude"].to_numpy(),
+ df["forecourts.location.longitude"].to_numpy(),
+ ).round(1)
+
+ df = df[df["distance"] < arguments.radius]
+
+ df = df.assign(
+ e5_price=pence_to_pounds(df["forecourts.fuel_price.E5"]),
+ e10_price=pence_to_pounds(df["forecourts.fuel_price.E10"]),
+ diesel_price=pence_to_pounds(df["forecourts.fuel_price.B7S"]),
+ )
+
+ return df.rename(columns={"forecourts.trading_name": "station_name"})[
+ ["station_name", "distance", "e5_price", "e10_price", "diesel_price"]
+ ].to_dict(orient="records")
def sort_stations(stations: list[dict], sort: str) -> list[dict]:
git.ajschof.me — hosted by ajschofield — powered by cgit