Updates
This commit is contained in:
@@ -168,6 +168,7 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]) -> list[TableLocation]:
|
||||
"""
|
||||
Scan a list of (1-based-row-num, row-values-tuple) pairs for sub-tables
|
||||
that have both a 'Manufacturer' and 'MPN' header column.
|
||||
Handles multiple tables side-by-side on the same header row.
|
||||
"""
|
||||
tables: list[TableLocation] = []
|
||||
i = 0
|
||||
@@ -175,46 +176,59 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]) -> list[TableLocation]:
|
||||
row_num, row = indexed_rows[i]
|
||||
row_str = [_cell(v) for v in row]
|
||||
|
||||
mfr_col_0 = next((c for c, v in enumerate(row_str) if v.lower() == "manufacturer"), None)
|
||||
mpn_col_0 = next((c for c, v in enumerate(row_str) if v.lower() == "mpn"), None)
|
||||
mfr_cols = [c for c, v in enumerate(row_str) if v.lower() == "manufacturer"]
|
||||
mpn_cols = [c for c, v in enumerate(row_str) if v.lower() == "mpn"]
|
||||
|
||||
if mfr_col_0 is None or mpn_col_0 is None:
|
||||
if not mfr_cols or not mpn_cols:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Found a header row – consume data rows below it
|
||||
data: list[tuple[int, str, str]] = []
|
||||
j = i + 1
|
||||
empty_streak = 0
|
||||
while j < len(indexed_rows):
|
||||
dr_num, dr = indexed_rows[j]
|
||||
mfr = _cell(dr[mfr_col_0] if mfr_col_0 < len(dr) else None)
|
||||
mpn = _cell(dr[mpn_col_0] if mpn_col_0 < len(dr) else None)
|
||||
|
||||
if not mfr and not mpn:
|
||||
empty_streak += 1
|
||||
if empty_streak >= 3:
|
||||
break
|
||||
j += 1
|
||||
continue
|
||||
empty_streak = 0
|
||||
|
||||
# Another header row signals the end of this table
|
||||
if mfr.lower() == "manufacturer" and mpn.lower() == "mpn":
|
||||
# Pair each mfr_col with its nearest unpaired mpn_col
|
||||
pairs: list[tuple[int, int]] = []
|
||||
used_mpn: set[int] = set()
|
||||
for mfr_col_0 in mfr_cols:
|
||||
available = [c for c in mpn_cols if c not in used_mpn]
|
||||
if not available:
|
||||
break
|
||||
best_mpn = min(available, key=lambda c: abs(c - mfr_col_0))
|
||||
pairs.append((mfr_col_0, best_mpn))
|
||||
used_mpn.add(best_mpn)
|
||||
|
||||
if mpn and not _skip(mpn):
|
||||
data.append((dr_num, mfr, mpn))
|
||||
j += 1
|
||||
max_j = i + 1
|
||||
for mfr_col_0, mpn_col_0 in pairs:
|
||||
data: list[tuple[int, str, str]] = []
|
||||
j = i + 1
|
||||
empty_streak = 0
|
||||
while j < len(indexed_rows):
|
||||
dr_num, dr = indexed_rows[j]
|
||||
mfr = _cell(dr[mfr_col_0] if mfr_col_0 < len(dr) else None)
|
||||
mpn = _cell(dr[mpn_col_0] if mpn_col_0 < len(dr) else None)
|
||||
|
||||
tables.append(TableLocation(
|
||||
sheet_name="", # filled by caller
|
||||
header_row=row_num,
|
||||
mfr_col=mfr_col_0 + 1, # convert to 1-based
|
||||
mpn_col=mpn_col_0 + 1,
|
||||
data=data,
|
||||
))
|
||||
i = j # jump past the table we just consumed
|
||||
if not mfr and not mpn:
|
||||
empty_streak += 1
|
||||
if empty_streak >= 3:
|
||||
break
|
||||
j += 1
|
||||
continue
|
||||
empty_streak = 0
|
||||
|
||||
if mfr.lower() == "manufacturer" and mpn.lower() == "mpn":
|
||||
break
|
||||
|
||||
if mpn and not _skip(mpn):
|
||||
data.append((dr_num, mfr, mpn))
|
||||
j += 1
|
||||
|
||||
max_j = max(max_j, j)
|
||||
tables.append(TableLocation(
|
||||
sheet_name="",
|
||||
header_row=row_num,
|
||||
mfr_col=mfr_col_0 + 1, # convert to 1-based
|
||||
mpn_col=mpn_col_0 + 1,
|
||||
data=data,
|
||||
))
|
||||
|
||||
i = max_j
|
||||
return tables
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user