This commit is contained in:
David Rice
2026-05-05 15:02:49 +01:00
parent 82171742d2
commit 1ebd78f538
4 changed files with 181 additions and 105 deletions

28
inspect_seout.py Normal file
View File

@@ -0,0 +1,28 @@
#!/usr/bin/env python3
"""Quick diagnostic prints every sheet name, its headers, and 3 sample rows."""
from pathlib import Path
import openpyxl
path = Path("OCTO") / "seout.xlsx"
if not path.exists():
print(f"NOT FOUND: {path}")
raise SystemExit(1)
wb = openpyxl.load_workbook(path, data_only=True, read_only=True)
for sheet in wb.sheetnames:
ws = wb[sheet]
rows = list(ws.iter_rows(values_only=True))
print(f"\n=== Sheet: {sheet!r} ({len(rows)} rows) ===")
# Print first 15 rows so we can see past the metadata
for i, row in enumerate(rows[:15]):
vals = [v for v in row if v is not None]
if vals:
print(f" row {i+1}: {list(row)}")
# Also find any row that looks like a header (contains text with "part" or "mfg" or "price")
print("\n --- Searching for header row ---")
for i, row in enumerate(rows):
row_str = [str(v).lower() for v in row if v is not None]
if any("part" in v or "mfg" in v or "price" in v or "avg" in v for v in row_str):
print(f" row {i+1}: {list(row)}")
break
wb.close()

View File

@@ -1,19 +1,21 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Octo Fill Octo Fill Silicon Expert
========= ==========================
Reads the Octopart export (OCTO/octo.xlsx) and fills the Reads the Silicon Expert export (OCTO/seout.xlsx) and fills the
"Unit Cost EUR @1000" column in every component table across every "Unit Cost EUR @1000" column in every component table across every
sheet/tab of every BoM file in the BoM/ folder. sheet/tab of every BoM file in the BoM/ folder.
Silicon Expert column mapping:
Manufacturer → UPLOADED MFG
MPN → UPLOADED PART
Price → BUDGETARY PRICES column, parses "Min X & Avg Y" → uses Avg (EUR)
Matching strategy: Matching strategy:
1. Exact match on both Original Manufacturer + Original Part (preferred) 1. Exact match on both Uploaded Mfg + Uploaded Part (preferred)
2. Fallback: match on Original Part alone (handles slight manufacturer 2. Fallback: match on Uploaded Part alone
name differences between BoM and Octopart)
Where a part appears more than once in octo.xlsx (multiple distributor
offers), the lowest price is used.
Where a part appears more than once, the lowest price is used.
Cells that already contain a value are left untouched. Cells that already contain a value are left untouched.
Usage: Usage:
@@ -27,6 +29,7 @@ import logging
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import re
import openpyxl import openpyxl
from openpyxl.cell.cell import MergedCell from openpyxl.cell.cell import MergedCell
@@ -41,6 +44,7 @@ _SFP.__init__ = _sfp_patched
BOM_DIR = Path("BoM") BOM_DIR = Path("BoM")
OCTO_DIR = Path("OCTO") OCTO_DIR = Path("OCTO")
SEOUT_FILE = OCTO_DIR / "seout.xlsx"
COST_HEADER = "Unit Cost EUR @1000" COST_HEADER = "Unit Cost EUR @1000"
SKIP_MPNS = { SKIP_MPNS = {
@@ -57,12 +61,28 @@ logging.basicConfig(
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# ── Load Octopart data ───────────────────────────────────────────────────────── # ── Load Silicon Expert data ───────────────────────────────────────────────────
def _find_col(headers: dict[str, int], substring: str) -> Optional[int]:
"""Return the index of the first header whose name contains substring (case-insensitive)."""
for name, idx in headers.items():
if substring.lower() in name.lower():
return idx
return None
def load_seout(path: Path) -> tuple[dict[tuple[str, str], float], dict[str, float]]:
"""
Load seout.xlsx into lookup maps.
exact_map (mfg_lower, part_lower) → lowest unit price (EUR)
mpn_map part_lower → lowest unit price (EUR) [fallback]
"""
exact_map: dict[tuple[str, str], float] = {}
mpn_map: dict[str, float] = {}
def _load_single(path: Path, exact_map: dict, mpn_map: dict) -> int:
"""Load one Octopart xlsx into the shared maps. Returns number of entries added."""
wb = openpyxl.load_workbook(path, data_only=True, read_only=True) wb = openpyxl.load_workbook(path, data_only=True, read_only=True)
added = 0 added = 0
avg_col_name = None
for sheet_name in wb.sheetnames: for sheet_name in wb.sheetnames:
ws = wb[sheet_name] ws = wb[sheet_name]
@@ -72,30 +92,54 @@ def _load_single(path: Path, exact_map: dict, mpn_map: dict) -> int:
row = list(row) row = list(row)
if headers is None: if headers is None:
row_lower = [str(v).strip().lower() if v is not None else "" for v in row] row_lower = [str(v).strip().lower() if v is not None else "" for v in row]
if "original part" in row_lower and "original manufacturer" in row_lower: has_part = any("uploaded part" in v for v in row_lower)
headers = {str(row[i]).strip(): i for i in range(len(row)) if row[i] is not None} has_mfg = any("uploaded mfg" in v for v in row_lower)
if has_part and has_mfg:
headers = {
str(row[i]).strip(): i
for i in range(len(row))
if row[i] is not None
}
log.info(f" Sheet '{sheet_name}' headers: {list(headers.keys())}")
for h in headers:
if "budgetary" in h.lower() or "price" in h.lower():
avg_col_name = h
break
continue continue
if not any(row): if not any(row):
continue continue
mpn_col = _find_col(headers, "original part") mpn_col = _find_col(headers, "uploaded part")
mfr_col = _find_col(headers, "original manufacturer") mfr_col = _find_col(headers, "uploaded mfg")
price_col = _find_col(headers, "unit price") # Silicon Expert stores prices as "Min X & Avg Y" in a BUDGETARY PRICES column
price_col = _find_col(headers, "budgetary") or _find_col(headers, "price")
if mpn_col is None or price_col is None: if mpn_col is None or price_col is None:
continue continue
mpn = str(row[mpn_col]).strip() if mpn_col < len(row) and row[mpn_col] is not None else "" mpn = (
mfr = str(row[mfr_col]).strip() if mfr_col is not None and mfr_col < len(row) and row[mfr_col] is not None else "" str(row[mpn_col]).strip()
price_raw = row[price_col] if price_col < len(row) else None if mpn_col < len(row) and row[mpn_col] is not None
else ""
)
mfr = (
str(row[mfr_col]).strip()
if mfr_col is not None and mfr_col < len(row) and row[mfr_col] is not None
else ""
)
price_raw = str(row[price_col]).strip() if price_col < len(row) and row[price_col] is not None else ""
if not mpn or mpn.lower() in SKIP_MPNS: if not mpn or mpn.lower() in SKIP_MPNS:
continue continue
# Parse "Min 0.818 & Avg 1.3225562077" → extract the Avg value
avg_match = re.search(r'Avg\s+([\d.]+)', price_raw, re.IGNORECASE)
if not avg_match:
continue
try: try:
price = float(price_raw) price = float(avg_match.group(1))
except (TypeError, ValueError): except ValueError:
continue continue
if price <= 0: if price <= 0:
@@ -111,39 +155,19 @@ def _load_single(path: Path, exact_map: dict, mpn_map: dict) -> int:
mpn_map[mpn_k] = price mpn_map[mpn_k] = price
wb.close() wb.close()
return added if not exact_map:
log.warning(
f"No entries loaded from {path.name}. "
def load_octo(octo_dir: Path) -> tuple[dict[tuple[str, str], float], dict[str, float]]: "Check that the file has columns containing 'Uploaded Part', 'Uploaded Mfg', "
""" "Expected a 'BUDGETARY PRICES' column with values like 'Min X & Avg Y'."
Reads every .xlsx file in octo_dir into shared lookup maps. )
exact_map (manufacturer_lower, mpn_lower) → lowest unit price log.info(
mpn_map mpn_lower → lowest unit price (fallback) f"Silicon Expert ({path.name}): {len(exact_map)} unique (mfg, part) entries "
""" f"— avg price column: '{avg_col_name}'"
files = sorted(octo_dir.glob("*.xlsx")) )
if not files:
log.error(f"No .xlsx files found in {octo_dir}/")
sys.exit(1)
exact_map: dict[tuple[str, str], float] = {}
mpn_map: dict[str, float] = {}
for f in files:
added = _load_single(f, exact_map, mpn_map)
log.info(f" {f.name}: {added} entries loaded")
log.info(f"Octopart total: {len(exact_map)} unique (manufacturer, part) entries")
return exact_map, mpn_map return exact_map, mpn_map
def _find_col(headers: dict[str, int], prefix: str) -> Optional[int]:
"""Case-insensitive prefix match on header names."""
for name, idx in headers.items():
if name.lower().startswith(prefix.lower()):
return idx
return None
# ── BoM table finding ────────────────────────────────────────────────────────── # ── BoM table finding ──────────────────────────────────────────────────────────
def _cell(value) -> str: def _cell(value) -> str:
@@ -153,8 +177,7 @@ def _cell(value) -> str:
def _find_tables(indexed_rows: list[tuple[int, tuple]]): def _find_tables(indexed_rows: list[tuple[int, tuple]]):
""" """
Yields TableInfo dicts per component table found. Yields TableInfo dicts per component table found.
Handles multiple tables side-by-side on the same row by finding ALL Handles multiple tables side-by-side on the same row.
Manufacturer+MPN column pairs in a header row, not just the first.
Includes 'start_col' so the cost-column search stays within each table. Includes 'start_col' so the cost-column search stays within each table.
""" """
i = 0 i = 0
@@ -162,7 +185,6 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]):
row_num, row = indexed_rows[i] row_num, row = indexed_rows[i]
row_str = [_cell(v) for v in row] row_str = [_cell(v) for v in row]
# All column positions that are "manufacturer" or "mpn"
mfr_cols = [c for c, v in enumerate(row_str) if v.lower() == "manufacturer"] mfr_cols = [c for c, v in enumerate(row_str) if v.lower() == "manufacturer"]
mpn_cols = [c for c, v in enumerate(row_str) if v.lower() == "mpn"] mpn_cols = [c for c, v in enumerate(row_str) if v.lower() == "mpn"]
@@ -170,7 +192,6 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]):
i += 1 i += 1
continue continue
# Pair each mfr_col with its nearest unpaired mpn_col
pairs: list[tuple[int, int]] = [] pairs: list[tuple[int, int]] = []
used_mpn: set[int] = set() used_mpn: set[int] = set()
for mfr_col in mfr_cols: for mfr_col in mfr_cols:
@@ -182,6 +203,8 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]):
used_mpn.add(best_mpn) used_mpn.add(best_mpn)
max_j = i + 1 max_j = i + 1
new_header_j = None # earliest row where a same-column header reappeared
for mfr_col, mpn_col in pairs: for mfr_col, mpn_col in pairs:
data: list[tuple[int, str, str]] = [] data: list[tuple[int, str, str]] = []
j = i + 1 j = i + 1
@@ -199,10 +222,11 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]):
continue continue
empty_streak = 0 empty_streak = 0
# Detect a new table header anywhere in the row (handles sub-tables # Same-column header detected — record it but let other pairs
# at different column positions than the current table) # continue reading past it so their data isn't truncated.
dr_str_lower = [_cell(v).lower() for v in dr] if mfr.lower() == "manufacturer" and mpn.lower() == "mpn":
if "manufacturer" in dr_str_lower and "mpn" in dr_str_lower: if new_header_j is None or j < new_header_j:
new_header_j = j
break break
if mpn and mpn.lower() not in SKIP_MPNS: if mpn and mpn.lower() not in SKIP_MPNS:
@@ -212,13 +236,16 @@ def _find_tables(indexed_rows: list[tuple[int, tuple]]):
max_j = max(max_j, j) max_j = max(max_j, j)
yield { yield {
"header_row": row_num, "header_row": row_num,
"mfr_col": mfr_col + 1, # 1-based "mfr_col": mfr_col + 1,
"mpn_col": mpn_col + 1, "mpn_col": mpn_col + 1,
"start_col": min(mfr_col, mpn_col) + 1, # leftmost col of this table "start_col": min(mfr_col, mpn_col) + 1,
"data": data, "data": data,
} }
i = max_j # Rewind to the earliest sub-table header so the outer loop can pick it
# up, while still allowing wider tables (other columns) to have yielded
# their full data above.
i = new_header_j if new_header_j is not None else max_j
# ── Write back to BoM files ──────────────────────────────────────────────────── # ── Write back to BoM files ────────────────────────────────────────────────────
@@ -240,8 +267,8 @@ def fill_boms(
for f in files: for f in files:
log.info(f"Processing {f.name}") log.info(f"Processing {f.name}")
try: try:
# data_only gives us resolved cell values (not formula strings) for # data_only resolves formula cells (e.g. =UPPER("Mfr")) to their values
# table/part detection; the writable wb is used for reading/writing prices. # for detection; the writable wb is used for writing prices.
wb_ro = openpyxl.load_workbook(f, data_only=True, read_only=True) wb_ro = openpyxl.load_workbook(f, data_only=True, read_only=True)
wb = openpyxl.load_workbook(f) wb = openpyxl.load_workbook(f)
except Exception as exc: except Exception as exc:
@@ -255,6 +282,16 @@ def fill_boms(
for i, row in enumerate(wb_ro[sheet_name].iter_rows(values_only=True), start=1) for i, row in enumerate(wb_ro[sheet_name].iter_rows(values_only=True), start=1)
] ]
# Reuse the same cost column for all stacked tables at the same
# start_col on this sheet, so a second sub-table doesn't create a
# new column one position to the right.
sheet_cost_cols: dict[int, int] = {}
KNOWN_COST_HEADERS = {
COST_HEADER.lower(),
"unit cost 1000x data",
}
for table in _find_tables(indexed): for table in _find_tables(indexed):
header_row = table["header_row"] header_row = table["header_row"]
data_rows = [r for r, _, _ in table["data"]] data_rows = [r for r, _, _ in table["data"]]
@@ -267,24 +304,18 @@ def fill_boms(
f"table at col {table['start_col']}, {len(table['data'])} parts{row_range}" f"table at col {table['start_col']}, {len(table['data'])} parts{row_range}"
) )
# Find or create the cost column. if table["start_col"] in sheet_cost_cols:
# Accept either of the two known column names (the primary # Stacked table — reuse the cost column found/created by the
# COST_HEADER or the name used by the earlier write-back script). # first table at this column position on this sheet.
KNOWN_COST_HEADERS = { cost_col = sheet_cost_cols[table["start_col"]]
COST_HEADER.lower(), else:
"unit cost 1000x data",
}
cost_col = None cost_col = None
last_used = table["start_col"] last_used = table["start_col"]
max_col = ws.max_column or 1 max_col = ws.max_column or 1
# Search only within this table's column range (from its
# leftmost column rightward) so side-by-side tables don't
# steal each other's cost column.
for c in range(table["start_col"], max_col + 1): for c in range(table["start_col"], max_col + 1):
val = ws.cell(header_row, c).value val = ws.cell(header_row, c).value
if val is not None: if val is not None:
val_str = str(val).strip() val_str = str(val).strip()
# Don't count formula placeholders as "used" columns
if not val_str.startswith("="): if not val_str.startswith("="):
last_used = c last_used = c
if val_str.lower() in KNOWN_COST_HEADERS: if val_str.lower() in KNOWN_COST_HEADERS:
@@ -297,6 +328,11 @@ def fill_boms(
cost_col += 1 cost_col += 1
ws.cell(header_row, cost_col).value = COST_HEADER ws.cell(header_row, cost_col).value = COST_HEADER
sheet_cost_cols[table["start_col"]] = cost_col
log.info(f" Cost column: {cost_col} ('{ws.cell(header_row, cost_col).value}')")
tbl_filled = tbl_skipped = tbl_missing = 0
for row_num, mfr, mpn in table["data"]: for row_num, mfr, mpn in table["data"]:
cell = ws.cell(row_num, cost_col) cell = ws.cell(row_num, cost_col)
if isinstance(cell, MergedCell): if isinstance(cell, MergedCell):
@@ -309,11 +345,11 @@ def fill_boms(
or (isinstance(existing, (int, float)) and existing == 0) or (isinstance(existing, (int, float)) and existing == 0)
) )
if not is_empty and not is_formula: if not is_empty and not is_formula:
log.info(f" Skip row {row_num} [{mpn}]: cell already has {repr(existing)}") log.debug(f" Skip row {row_num} [{mpn}]: cell already has {repr(existing)}")
total_skipped += 1 total_skipped += 1
tbl_skipped += 1
continue continue
# Look up price: exact match first, then MPN-only fallback
price = exact_map.get((mfr.lower(), mpn.lower())) price = exact_map.get((mfr.lower(), mpn.lower()))
if price is None: if price is None:
price = mpn_map.get(mpn.lower()) price = mpn_map.get(mpn.lower())
@@ -322,10 +358,18 @@ def fill_boms(
if price is not None: if price is not None:
cell.value = price cell.value = price
cell.number_format = "0.000000"
total_filled += 1 total_filled += 1
tbl_filled += 1
else: else:
total_missing += 1 total_missing += 1
log.info(f" No match in Octopart: [{mfr}] [{mpn}]") tbl_missing += 1
log.debug(f" No match: [{mfr}] [{mpn}]")
log.info(
f" → filled {tbl_filled}, skipped {tbl_skipped}, "
f"no match {tbl_missing}"
)
wb_ro.close() wb_ro.close()
try: try:
@@ -339,7 +383,7 @@ def fill_boms(
log.info( log.info(
f"Done filled: {total_filled}, " f"Done filled: {total_filled}, "
f"already populated (skipped): {total_skipped}, " f"already populated (skipped): {total_skipped}, "
f"no match in Octopart: {total_missing}" f"no match in Silicon Expert: {total_missing}"
) )
@@ -351,5 +395,9 @@ if __name__ == "__main__":
log.error(f"Not found: {p}") log.error(f"Not found: {p}")
sys.exit(1) sys.exit(1)
exact_map, mpn_map = load_octo(OCTO_DIR) if not SEOUT_FILE.exists():
log.error(f"Silicon Expert export not found: {SEOUT_FILE}")
sys.exit(1)
exact_map, mpn_map = load_seout(SEOUT_FILE)
fill_boms(BOM_DIR, exact_map, mpn_map) fill_boms(BOM_DIR, exact_map, mpn_map)