This commit is contained in:
David Rice
2026-04-30 12:39:48 +01:00
parent bc2791e6fa
commit 70b2b6acc3
6 changed files with 431 additions and 67 deletions

View File

@@ -168,6 +168,7 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]) -> list[TableLocation]:
"""
Scan a list of (1-based-row-num, row-values-tuple) pairs for sub-tables
that have both a 'Manufacturer' and 'MPN' header column.
Handles multiple tables side-by-side on the same header row.
"""
tables: list[TableLocation] = []
i = 0
@@ -175,46 +176,59 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]) -> list[TableLocation]:
row_num, row = indexed_rows[i]
row_str = [_cell(v) for v in row]
mfr_col_0 = next((c for c, v in enumerate(row_str) if v.lower() == "manufacturer"), None)
mpn_col_0 = next((c for c, v in enumerate(row_str) if v.lower() == "mpn"), None)
mfr_cols = [c for c, v in enumerate(row_str) if v.lower() == "manufacturer"]
mpn_cols = [c for c, v in enumerate(row_str) if v.lower() == "mpn"]
if mfr_col_0 is None or mpn_col_0 is None:
if not mfr_cols or not mpn_cols:
i += 1
continue
# Found a header row consume data rows below it
data: list[tuple[int, str, str]] = []
j = i + 1
empty_streak = 0
while j < len(indexed_rows):
dr_num, dr = indexed_rows[j]
mfr = _cell(dr[mfr_col_0] if mfr_col_0 < len(dr) else None)
mpn = _cell(dr[mpn_col_0] if mpn_col_0 < len(dr) else None)
if not mfr and not mpn:
empty_streak += 1
if empty_streak >= 3:
break
j += 1
continue
empty_streak = 0
# Another header row signals the end of this table
if mfr.lower() == "manufacturer" and mpn.lower() == "mpn":
# Pair each mfr_col with its nearest unpaired mpn_col
pairs: list[tuple[int, int]] = []
used_mpn: set[int] = set()
for mfr_col_0 in mfr_cols:
available = [c for c in mpn_cols if c not in used_mpn]
if not available:
break
best_mpn = min(available, key=lambda c: abs(c - mfr_col_0))
pairs.append((mfr_col_0, best_mpn))
used_mpn.add(best_mpn)
if mpn and not _skip(mpn):
data.append((dr_num, mfr, mpn))
j += 1
max_j = i + 1
for mfr_col_0, mpn_col_0 in pairs:
data: list[tuple[int, str, str]] = []
j = i + 1
empty_streak = 0
while j < len(indexed_rows):
dr_num, dr = indexed_rows[j]
mfr = _cell(dr[mfr_col_0] if mfr_col_0 < len(dr) else None)
mpn = _cell(dr[mpn_col_0] if mpn_col_0 < len(dr) else None)
tables.append(TableLocation(
sheet_name="", # filled by caller
header_row=row_num,
mfr_col=mfr_col_0 + 1, # convert to 1-based
mpn_col=mpn_col_0 + 1,
data=data,
))
i = j # jump past the table we just consumed
if not mfr and not mpn:
empty_streak += 1
if empty_streak >= 3:
break
j += 1
continue
empty_streak = 0
if mfr.lower() == "manufacturer" and mpn.lower() == "mpn":
break
if mpn and not _skip(mpn):
data.append((dr_num, mfr, mpn))
j += 1
max_j = max(max_j, j)
tables.append(TableLocation(
sheet_name="",
header_row=row_num,
mfr_col=mfr_col_0 + 1, # convert to 1-based
mpn_col=mpn_col_0 + 1,
data=data,
))
i = max_j
return tables