diff options
| author | Alex Schofield <git@ajschof.me> | 2026-04-27 22:23:09 +0100 |
|---|---|---|
| committer | Alex Schofield <git@ajschof.me> | 2026-04-27 22:23:09 +0100 |
| commit | fae55f02b2b4739fdbbee7c660927639781a3fb3 (patch) | |
| tree | f47bfb2bfee2cd2be41893fad493290483838c51 | |
| parent | f896458629f0ed7ac1d64775710980856d8a7c64 (diff) | |
| download | fuelnearme-fae55f02b2b4739fdbbee7c660927639781a3fb3.tar.gz fuelnearme-fae55f02b2b4739fdbbee7c660927639781a3fb3.zip | |
vectorise filter_df using haversine and bounding box pre-filter
| -rw-r--r-- | main.py | 70 |
1 files changed, 44 insertions, 26 deletions
@@ -1,8 +1,10 @@ import argparse +import math import sys from io import StringIO from typing import Any, Dict, List, Optional, Tuple +import numpy as np import pandas as pd import requests from geopy.distance import geodesic @@ -49,32 +51,48 @@ def get_latest_data() -> tuple[pd.DataFrame, Optional[str]]: def filter_df( dframe: pd.DataFrame, arguments: argparse.Namespace, loc: Tuple[float, float] ) -> List[Dict[str, Any]]: - near_stations = [] - for station, latitude, longitude, e5_price, e10_price, diesel_price in zip( - dframe["forecourts.trading_name"], - dframe["forecourts.location.latitude"], - dframe["forecourts.location.longitude"], - dframe["forecourts.fuel_price.E5"], - dframe["forecourts.fuel_price.E10"], - dframe["forecourts.fuel_price.B7S"], - ): - distance_from_current_location = geodesic((latitude, longitude), loc).miles - if distance_from_current_location < arguments.radius: - station_dict = { - "station_name": station, - "distance": round(distance_from_current_location, 1), - "e5_price": round(e5_price / 100, 2) - if not pd.isna(e5_price) - else "N/A", - "e10_price": round(e10_price / 100, 2) - if not pd.isna(e10_price) - else "N/A", - "diesel_price": round(diesel_price / 100, 2) - if not pd.isna(diesel_price) - else "N/A", - } - near_stations.append(station_dict) - return near_stations + + def bounding_box() -> pd.DataFrame: + lat, lon = loc + deg_lat = arguments.radius / 69.0 + deg_lon = arguments.radius / (69.0 * math.cos(math.radians(lat))) + return dframe[ + dframe["forecourts.location.latitude"].between(lat - deg_lat, lat + deg_lat) + & dframe["forecourts.location.longitude"].between( + lon - deg_lon, lon + deg_lon + ) + ] + + def haversine_miles(lat2: np.ndarray, lon2: np.ndarray) -> np.ndarray: + R = 3958.8 + lat1, lon1 = np.radians(loc[0]), np.radians(loc[1]) + lat2, lon2 = np.radians(lat2), np.radians(lon2) + dlat = lat2 - lat1 + dlon = lon2 - lon1 + a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2 + return R * 2 * np.arcsin(np.sqrt(a)) + + def pence_to_pounds(col: pd.Series) -> pd.Series: + return (col / 100).round(2).where(col.notna(), other="N/A") + + df = bounding_box().copy() + + df["distance"] = haversine_miles( + df["forecourts.location.latitude"].to_numpy(), + df["forecourts.location.longitude"].to_numpy(), + ).round(1) + + df = df[df["distance"] < arguments.radius] + + df = df.assign( + e5_price=pence_to_pounds(df["forecourts.fuel_price.E5"]), + e10_price=pence_to_pounds(df["forecourts.fuel_price.E10"]), + diesel_price=pence_to_pounds(df["forecourts.fuel_price.B7S"]), + ) + + return df.rename(columns={"forecourts.trading_name": "station_name"})[ + ["station_name", "distance", "e5_price", "e10_price", "diesel_price"] + ].to_dict(orient="records") def sort_stations(stations: list[dict], sort: str) -> list[dict]: |
