From 3b167cd39658e18084da3a59a8303f3638fdf73f Mon Sep 17 00:00:00 2001 From: Malin Date: Sun, 22 Mar 2026 20:51:08 +0100 Subject: [PATCH] fix: drop read_only=True to avoid openpyxl dimension truncation openpyxl in read_only mode stops iterating at the sheet's cached attribute in the XML. If MTZ extended the Excel beyond the original row range, those rows were silently ignored (hence always ~4000 products regardless of the real count). Removing read_only=True forces openpyxl to read all actual data rows. The file is already in BytesIO so there is no I/O penalty. Co-Authored-By: Claude Sonnet 4.6 --- main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index b779fda..3bbb1eb 100644 --- a/main.py +++ b/main.py @@ -55,7 +55,10 @@ def download_and_parse(): global products_cache, last_refresh resp = requests.get(EXCEL_URL, timeout=60) resp.raise_for_status() - wb = load_workbook(BytesIO(resp.content), read_only=True, data_only=True) + # read_only=True would stop at the sheet's declared dimension attribute, silently + # missing any rows MTZ added beyond the original range. Since the file is already + # in memory (BytesIO), read_only gives no I/O benefit and data_only=True suffices. + wb = load_workbook(BytesIO(resp.content), data_only=True) ws = wb.active rows = list(ws.iter_rows(min_row=6, values_only=True)) parsed = []