From 5efb2c25ea5b9773b4019267baaf0e819ba46094 Mon Sep 17 00:00:00 2001 From: Malin Date: Sun, 22 Mar 2026 21:01:03 +0100 Subject: [PATCH] feat: add rows_processed/rows_skipped diagnostics to health + refresh endpoints Helps diagnose whether the product cap is from EAN filtering or a downstream limit. health and refresh now return: product_count, rows_processed, rows_skipped. Co-Authored-By: Claude Sonnet 4.6 --- main.py | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/main.py b/main.py index 3bbb1eb..f49738b 100644 --- a/main.py +++ b/main.py @@ -39,8 +39,12 @@ def safe_float(value, default): return float(str(value).strip()) except (ValueError, TypeError): return default + + products_cache = [] last_refresh = None +rows_processed = 0 +rows_skipped = 0 cache_lock = threading.Lock() api_key_header = APIKeyHeader(name="X-API-Key") @@ -52,18 +56,19 @@ def verify_key(key: str = Security(api_key_header)): def download_and_parse(): - global products_cache, last_refresh + global products_cache, last_refresh, rows_processed, rows_skipped resp = requests.get(EXCEL_URL, timeout=60) resp.raise_for_status() - # read_only=True would stop at the sheet's declared dimension attribute, silently - # missing any rows MTZ added beyond the original range. Since the file is already - # in memory (BytesIO), read_only gives no I/O benefit and data_only=True suffices. + # read_only=True would stop at the sheet's declared attribute, + # silently missing any rows added beyond the original range. wb = load_workbook(BytesIO(resp.content), data_only=True) ws = wb.active rows = list(ws.iter_rows(min_row=6, values_only=True)) parsed = [] + skipped = 0 for row in rows: - if row[1] is None: # col B (index 1) = item_code + if row[1] is None: # col B (index 1) = item_code — empty row + skipped += 1 continue ean_raw = row[3] # col D if ean_raw is None: @@ -75,6 +80,7 @@ def download_and_parse(): # Skip products with blank or non-numeric EAN codes if not ean or not ean.isdigit(): + skipped += 1 continue brand_raw = row[9] # col J @@ -93,6 +99,8 @@ def download_and_parse(): ) with cache_lock: products_cache = parsed + rows_processed = len(rows) + rows_skipped = skipped last_refresh = time.time() wb.close() @@ -115,11 +123,14 @@ def startup(): @app.get("/api/health") def health(): - return { - "status": "ok", - "product_count": len(products_cache), - "last_refresh": last_refresh, - } + with cache_lock: + return { + "status": "ok", + "product_count": len(products_cache), + "rows_processed": rows_processed, + "rows_skipped": rows_skipped, + "last_refresh": last_refresh, + } @app.get("/api/products", dependencies=[Depends(verify_key)]) @@ -131,4 +142,10 @@ def get_products(): @app.post("/api/refresh", dependencies=[Depends(verify_key)]) def refresh(): download_and_parse() - return {"status": "ok", "product_count": len(products_cache)} + with cache_lock: + return { + "status": "ok", + "product_count": len(products_cache), + "rows_processed": rows_processed, + "rows_skipped": rows_skipped, + }