From bc2791e6faa39689bdcc2743f870ed5a4d1a1732 Mon Sep 17 00:00:00 2001 From: David Rice Date: Thu, 30 Apr 2026 09:49:48 +0100 Subject: [PATCH] chenages --- bom_extract.py | 140 +++++++++++++++++++++++++++++++++++++++++++++++++ bom_parts.xlsx | Bin 0 -> 11283 bytes 2 files changed, 140 insertions(+) create mode 100644 bom_extract.py create mode 100644 bom_parts.xlsx diff --git a/bom_extract.py b/bom_extract.py new file mode 100644 index 0000000..b0a7882 --- /dev/null +++ b/bom_extract.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +""" +BoM Extractor +============= +Reads every .xlsx / .xlsm file from the BoM/ folder, extracts all +(Manufacturer, MPN) pairs from every table in every sheet, deduplicates, +and writes the result to bom_parts.xlsx. + +Usage: + python bom_extract.py +""" + +from __future__ import annotations + +import sys +import logging +from pathlib import Path + +import openpyxl +import pandas as pd + +BOM_DIR = Path("BoM") +OUTPUT_FILE = Path("bom_parts.xlsx") + +SKIP_MPNS = { + "", "tbd", "n/a", "na", "-", "--", "---", "?", "none", + "null", "nan", "xxx", "x", "dnf", "dnp", "do not fit", + "do not populate", +} + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%H:%M:%S", +) +log = logging.getLogger(__name__) + + +def _cell(value) -> str: + return str(value).strip() if value is not None else "" + + +def _find_tables(indexed_rows: list[tuple[int, tuple]]) -> list[tuple[str, str]]: + """Return all (manufacturer, mpn) pairs found across every table in the row list.""" + parts: list[tuple[str, str]] = [] + i = 0 + while i < len(indexed_rows): + _, row = indexed_rows[i] + row_str = [_cell(v) for v in row] + + mfr_col = next((c for c, v in enumerate(row_str) if v.lower() == "manufacturer"), None) + mpn_col = next((c for c, v in enumerate(row_str) if v.lower() == "mpn"), None) + + if mfr_col is None or mpn_col is None: + i += 1 + continue + + j = i + 1 + empty_streak = 0 + while j < len(indexed_rows): + _, dr = indexed_rows[j] + mfr = _cell(dr[mfr_col] if mfr_col < len(dr) else None) + mpn = _cell(dr[mpn_col] if mpn_col < len(dr) else None) + + if not mfr and not mpn: + empty_streak += 1 + if empty_streak >= 3: + break + j += 1 + continue + empty_streak = 0 + + if mfr.lower() == "manufacturer" and mpn.lower() == "mpn": + break + + if mpn and mpn.lower() not in SKIP_MPNS: + parts.append((mfr, mpn)) + j += 1 + + i = j + return parts + + +def extract(bom_dir: Path) -> list[tuple[str, str]]: + files = sorted(f for f in bom_dir.iterdir() if f.suffix.lower() in {".xlsx", ".xlsm"}) + + if not files: + log.error(f"No .xlsx/.xlsm files found in {bom_dir}/") + sys.exit(1) + + seen: set[tuple[str, str]] = set() + parts: list[tuple[str, str]] = [] + + for f in files: + log.info(f"Reading {f.name}") + try: + wb = openpyxl.load_workbook(f, data_only=True, read_only=True) + for sheet_name in wb.sheetnames: + ws = wb[sheet_name] + indexed = [ + (i, tuple(row)) + for i, row in enumerate(ws.iter_rows(values_only=True), start=1) + ] + found = _find_tables(indexed) + new = [(mfr, mpn) for mfr, mpn in found + if (mfr.lower(), mpn.lower()) not in seen] + for mfr, mpn in new: + seen.add((mfr.lower(), mpn.lower())) + parts.append((mfr, mpn)) + if found: + log.info(f" Sheet '{sheet_name}': {len(found)} rows, {len(new)} new unique") + wb.close() + except Exception as exc: + log.error(f" Failed to read {f.name}: {exc}") + + log.info(f"Total unique parts: {len(parts)}") + return parts + + +def write(parts: list[tuple[str, str]], output: Path) -> None: + df = pd.DataFrame(parts, columns=["Manufacturer", "MPN"]) + df.sort_values(["Manufacturer", "MPN"], inplace=True, ignore_index=True) + + with pd.ExcelWriter(output, engine="openpyxl") as writer: + df.to_excel(writer, index=False, sheet_name="Parts") + ws = writer.sheets["Parts"] + for col in ws.columns: + width = max(len(str(cell.value or "")) for cell in col) + ws.column_dimensions[col[0].column_letter].width = min(width + 3, 60) + + log.info(f"Written → {output} ({len(parts)} unique parts)") + + +if __name__ == "__main__": + if not BOM_DIR.exists(): + log.error(f"BoM directory '{BOM_DIR}' not found.") + sys.exit(1) + + parts = extract(BOM_DIR) + write(parts, OUTPUT_FILE) diff --git a/bom_parts.xlsx b/bom_parts.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1002e1cda59be0e8f6bd19ff1d73a73de5f3f05a GIT binary patch literal 11283 zcmZ{K1z43$*X|~SO>as`>5@jIyStT?g?Ph@>9#*m)FX+z<#>SqSE++*3 z5;|g}Y+y`fC*!s_+beL{!SOjc-S_KgffVPBk}bVP#>CbdE?u6F4UQ&sr3{B9-J$SI z+JQLGF6@aOhdUCF!{m-1Z2ONowj;#kN0L9m*8C!FVm`hZ97!@T;y>n(&X1n>p_s7I z3@PE8ANp`6l~%Q$$_m%y8_o^wuY~SC-tJj3mAkRhXari!QBQe36p@fIt~EAP_bX7=PGaOW z$ou)NXPw!yDlX?7iu}0BJSXZ~E&O97@;dv=wVAug!oiiuFJ3$M@^hBCE5Vdo5b)u1 zwNDfm{QTWJt1yAlJ$f)T=1$0-xyR05G|O!>HRjCm_9kQ+S`;MQ5Z$|&vt?U^Pjc3h zsr~-dCSEj_#N~)9Rzlqmu^Q4od-i9$WoZQ)Tnd%(w;Y3!WhwKXWM7|5@v0jm8w3-f zR50$Kvad-ozhnyygLemQsj9gDCe0SlVJ~oGRGF>BZ-@=29C7x)AM{@tpIh{6A&Hso zZnzvfS;$?Ec!R)4v&%<>5wY@_KDub7L0;mke;+P|k>4i4?ystqRS5;eyg8}OmOCak zHL{a#{kK(_pAPoAwvp?1LYwBhjmWBJT_o101&Lh=niQzyhIFytV+9Mu^Ey+h{ED)T zExu9Mq=c~)Cc~s-CzIg$1R+WJOaMdn>0r`a`mzl3$oJ^SxKn0-aG`t{hO{La6V&iS z?8uBD9_Qf9GronT6x~D^4NyarE!vBwSK1~>(wyp9QPN>lYw<8s5Nri@wy7R8GWiSU zMD6p9KnWFE)~QVtak9~MA%#uVsl0D*kx`SQ+7+>?$UTqng9Bd?5XI{d!txv zFtvsThtz7}=9i&zBWAKAH|Y=dH#^A4{$e$PjWQD}mN=wos-^S+Nw}FzKG7-U5VQ20 z0COXf4aRFh9Ly2?5d}dVEf^iSL-ImPcj}})oX0;+!@AAPBCn1qcdNe#F1_ONI>v?!iqD%kSb9MVP1SvwiS1=Z4k)sB~qPi6Um zxup*;{WvEFUrQ72Z2ym^M-Ee|`!noc>E29v4)ixwU*}^*e(g>C)Hi0#x1n=jUSslE zrUEzXyJ5@{ZE`FIIo)qTRhlUz6Uxu~Gnqg5OfRdSi>E2UxR!0nE2wSYRyNU2eW6_R zMKe7KXou7oXK8pCm?G)@EN`4;yR%LDg+Fl}zeL=gh}vK{mw-Oymy7O)!4n=Q^FA$9 zbP)59>$7cx-bc|Fuo7m}^7PW}G8@^$(X z`KjT4(UK2IxXci1<`|Mqc~KZa?{ccoz#ca>u!}DLQ$)X$;d3D$7v9LnaTwc51rvXH zg;jG`^WsXmAB&8Z$eKq(NJCieo26}gr-eDQx|mk?rEVW2j#uodB);MD9w7Esd{FP` z-8X%6uEYzbS+J&nVCIX#vV9@@EV3=ugH;Wccth7%kw6K=? z#!nc*LX&ToNd_^Ajw4={6?>xnFIV3iBv`)Mxwc3%>)pN*Z|Bx)J7E_!Gh7jz9mXF0 zP54dOlQzfTSmb*XrJ0uX2qI9KuhoE&a; zj#f@@O--Ghm><6$kB{<9E2mjaSMhsf*kgfrz1=j(7WDCOcR4fa zxW8&RISD-{u@zk8Yi+#tyj}zPZhhVEFU{@`m-p{tPC_fs8#p`onSAb?E1*)}1Q)$; z9M7)SR{UlcI($9OH|{3-m>XuV$GWq#1sCV;4=&Co<{LE3k9W5O*-7p%s{C#+HYyE# z+b&n?+vW{?<`-K0vIGsj*^vk?`g&ZrpPM@SU0;{hsNC6DmwC*2v<{#4&Dvp@JZ~b| zIB;{jx4+lCIop=wDw$PmI5^*zv0XS2y!&S3a=n$%Qc*M3E$FAar$w^Ev328oTGOXv z=p}e{J<@ICa=tavw|GXPpSfn9w`l9vad|S)-gNEiaJ^Dg9iwq^w?EOnav$C9(^17{ zo_FpiuzMhIKb>rz_l=~(`~IHu-m{szKP1^u^mbZlajZj6X}_r6Cb*DZG6nzp$$Zfox^-~{w*V5jD6njOFBHu$#NqE&n#;xw((iYbw=nP)d` zn7-+`A_i4XUGgcf2Sak(-BbGc(mpmw_pzjzQ@@Nvs+_8mIi|^fXX3E?GFp79fk`9% z@-17NzUsso1ZgItc5t=ul9@@+=c}UPSIP9!3bwBPtsmCMB(q16ANsTC*u)&#cyXeb zm?D*$<3*T<6q4i|?=mt%sWwCtA~sgZ8Jh6kGl(A$U?!t6A9UH!l{fwl#vU8^Y|qWr z+K{Olur!Fx)V9=yUVx4|71Zr@AdQV@J8fTZ!|7$meeO?)V~ZGi9-d%f=sp;dK1{NZ zE{{-xrhj*FsBgiDDmu{S!tRcn*wBqGVck_^A>c6}BS)z{Z?wCFhv8RJVJPc9)lo^u8P98h`f z>BaaeuGj4O`_~+N_~o_o6lyQ9CQR&_elIw^KvPsiEIiMBt>w<6ZIRv!PNGqf;%MW{ z9PFMr3D|1eR>jQHN0psw+o=UBgwppFo>x0n`ioAieg3`R$}{I`zc%fBe8r*b0sufA z&z#dEaN(Kz)8az#uJr0#`Zt4nFAtXI`>J)L@1Ckun$3TV3-rnvYddB?)=;a6-!}fl zFBOoy?m{)pWR+hfOE)hk>EDRJ5qRFAiSN&~x$JUkfshT{F$LJoq)@rebXRRD2?ptvHufGiA^6wk)+>M$Fni&WyD_Hz0zFOyxKxJl#Us7Z%*R zO)2Rl4Tqz1kJS&MW7lbVTN3fb(vgM8R=%Pxc5Wi7cFM?7VK79erCC+wqgwmQu@_lk zzjclLOPitzzzuV4#YrnNXBAvszDpBMezNELWv)qBA|nZ$bk7#)S;cMB=UejdpG&Nz z-0r0?b7CNJ|8k_T9v?JmlQtoTE@^rS7xp_xoLYa%vYpJc)z&NHEFJNN5!ek;M3{B! z3U{7Q*3*+gFXA-QM(qul8XZPv6rXYz@HnVTBSX`#M>HVl9BQazSuyb)qn^KDF8yh6 zKVES^NcMYXIQDn)gr_GSYLC@EsCWt z&LxD~*H*k_&8WE+4v*A5{jT)m)6k+rnTtpkN$j(e2MWuY>CtnhOp+1c@-adEpP02+ zgnD6VO;ZY&{9xJ$YxuYX%va>iQi3=3r&?I(16P)Kqg14o(-_J}CY??;+U^}OkwKgy zAmG&LJPRTu1GU{B8{T(DeZ_IKu6Cx&r)`28JznthsJj2vdvx?q=gNxL{aWW;5Mf{6 z`Pop=9wKLS%)xvLVhe^X5s4!D9f~W-9G5`5o5Ay4fkp>s@3Hhs|8e@jNeakHrwS-nesaJP!!h_j?lm({jvZ;>T_Y|Ah)tMU$vRzA*+q zo#!>~Rjo5WX;h-qPwEFZqkv)c zl$IaH{EX>A;I~#&Z_~oJmcxBI+BZ!#N$Y+@l>Ce=S+}{qkG{U|1Z6|if4YnhIOhLg z)_Tv(ybu+>XPw{Us^g@zO1iad@{yb=5$E@XMW@k+826)@65Wk7tTpoD`kR(0l~{`6 zU_Las=YA8egZkg*-bm%T7(bQPQP9G3G^tZ6>ddJtbuBAnUt!Y9n3}`HSBVuZd%~A! zU-LdNXm%`opjPc2PN0{8B(h^<9m~?z+b_sAj$}!Y-Sl#m*zU4mv~QC1>ZPjGBBnQr ziqSPsE!;f^iwG5YxJR{39!6X0t-c%0a(2wG>+ha$5|}@)Nf0d;e@LD(MK)NP7@b@8 zYm&{U`4c?vP!&_WJ@=()CWD^Dd3qgY>evcfoSsbFWe2|7q{Nw5dwqBF?g&WL`VMNc zyz|mjvvnC&;2zg1@-DK}C9s%#6>S&U^b)wovx=#UET1os2R7g%MzRDKyt~OrNwNpG zVz8(oQrMlL6O^K#J8jE1{ z5S@e6h2+@Nmb8KyMr2d~75%lE+JOGygbc#nWs4MQ7)k+vK%~%EbV*W2C?hsH8DX^0 zF1VDM2$qk>{8VBf!i7xin*O>v3`cd2NI?meij)eM6A}A!<_c06fzck6A~pyq4BzM! zN}&{l^cLym~Lk1kK-P@@$?NI;h{a%d+K`#0i{ z!Nl|DGDOJNbVN@!M)~YiG%giWOq-0=6lNH0(fj&=!J%CXz+i6nZ|QRSw5G|XrqeA5 zbZw2?cyRmh#y~FzN@ypS>QITaX|bxdB9$AYic%hrGxYUGS{$(tRZCsUo3uDRIaOO- z%Bi$Czje82z&QI*Wr$zbI*MeR#0SzlC>mZgd4!1YZ=`gV)9iE3>-26MrR)?%42vkN zl}{UFQ8*TJ-WM;XCgxpd7(R~#)U(wzSFGy3zf3pCuO@m@Jl1##kKBkWBD{(e!NM23 zN>|Gh7}$?pZ|IKv<+X{KiU(a7pAm6L(I|ts`7UO|lQXUkE%vLV(!_4afB;Bf+WRsh?*-HdlJlc544zF9#WR2~%nI0PO7Bks+ zvZWjGc~`Kf^1*SlLg|j}WyPvuope4}mK6eGv@X@M6)zdGbz$<0V>8DL!!cq-AphLm zzcD|EOy;_yR!n#n6Yp#>M+647uqUuRo6#@(Mt?6a)C!(uO>gG*mauEBPA}$R6`2t` zK8IZv*NpjDHfzzwh3fC>_@*!KWbE`x#pq+r}CT@}`OwN;Lvb#L(_c3*LcXsh!da6Fc|uJO?K(5TBknFP-{T zur2Qp9OSlB02MJO1Qv4HDFcAe2cV9MXb7~NKeT|dCDnEE#Cdo~MyS;+6MtDUJh~AL zlgiM=?`W|pGEGgG z4f@K7mAtY=P)d)uVv+pH|GHw=rP*^wp<8(2uS;mWtPId-knq9$176J|%1 z;%iorGAk{D+K534vS2 zCNAAdBfkDDE4#fPc@lAEOk#X6pOq z?!@aUuZo#Sy2-@JVz$vW7*t(-RyX|ReF~DraU*^kiQ^$(xcdjkBi>o5sUh8P-NOnR z)90LsBA;O!u~fLjigDss zU2~q$Lu%JYDlfB4D5_q_tmM?$4?-93utJ!0!~@kTmq!aRV zRoUkOzTcA3CHq?Lh-_Z=h54qOw&){$K!aAP52wJ^_Je^H);dr#tp9p=s!^dWu3zv1eIhNJVu8P4l=1lyU6-n`&WItz0FHPy zUvcb{$D#jqTg;R>AIrsqb!~BzER%?nq`iGd+)MenuWML``MREY(sg5j>B>74qsowo zQ0AQU?I_-&+UxdgH{(HKnF=ZFzL6z}I6Qc~8#pxK=R5*qMHtLwG2lN@Sbw*Nupz)J zQ!)VH1p@pLsX(dj{CzrMbNp5D8}%0#DkleP8^-(sxgC$6m9gK;0ko?3H(i61~c0I(l`x(yeY z@By6JaD}lyXxyE%hiIXKjpe4Bmq?EvO>yWJ#=#_&(&`)Ogftdm!vdBJaS}lnQ|}O2 z#;1V<44q1qT?z$!#U(-)`QtPZdRNbb_xu3peE@m@FqbR1u|tbRj{%JH?o~~@mqsfF zSWE;r>k=XOcP`Gdb(j#mqx9=>+ z4;-9m;TMnsH>0)yrTB2UoP~ges;FTLskZ2$qpYN!Z<+xsd;I?lApB>*I3A>pb)E=4 z?#rJJU`fR67hp4SGVr~vE`8e|xihFl(fxkK8^_hB!{%`=Q* zAG;@&#@YAqD>hVMJ^7*iB*-m`wCwjHR{qHY!P-v}P`n~pr8gedU>wDvhfz4vLu3Q- zfKN1Eat`Z1(mfLfeJII+v?tJUj0KNECCM9sFg8Wn+h3Wa;M`$s8A10A{N-!HG(LFc zb)*84R$#j(dWdWRYJ@i>uF3ux3jwQKJtP~0tAXHGX$Cm?zaL2KE)f+xuC0;4v1P)R z?YL1f`KP&Yf?6ih4_BMAPN3mUgEvJ9d`4v9RVB zqve`_qn>~&Xm?Lu(l8!<@MM-VxzKdG(fH9h>wAWw{0(y&HGp}A-AM;q!R%=?+5H$C zwyEAXXpzCOGExWM6z33!IOGJh>8gjrbZPT`nhWK>2hIz3ewHkEjHlKM$tS?xX5e)) zjl2vuke_Ka!%S<22zZ~%8gYSD1|QPG{_1Z?F-ag83s3;Ye-fzq2ymYSHa-GkKsS(A z@xV?vP?bjt1L`@DAGi#FEoBJx$^%h5LjwA9A9h_J8eH5p8QVQ^!lM|lm!V_@M8G}e zR}#x(x(SI@eoQxbBH{rL>BbOD{Ny1Y1(8Lwm@S3=X(IfmX`J>k1EI$KGd7G5`5^-# zisG=6O40R=e1J42Jf0jV9H`L9hs6B=!T~_n9fhQEN8DvHS6q)qw+3XBKvuICzp=$C z8Z`_<5*_D=Py?dqlB80G^d`aQL3q+{(HVLL*j>kTufKE7YLKP9YPd7662I3h5%10wB6mNEIm^ z0Pi}53ciLMgrCN;r?gb^G0gb4RjkV1I7j!}b@P$fxS2suN^KPN#R#4?N^CXZt2MiA9U z@ZJa#U6rRU+x-&4-}M!>`|~C%F7SRK_5=ef-xOdRycc|6sQ)zPq$ytDcg37U#DHnc zWK%roE*M&{j0sGq226JgraqZN28^KXCS`Gz3vPD?2l5j1#2QDXi^BXGJOTmNFVb#M ze+#G*eOe0w+bkf)ydErrfogYkJn9V$lM$>L4pbDynK9{ zvufsdaw)LfeCu<+E!S#~$g|*gv(kMcc;;qckU3B@ws>%M-SRDF zC3H4@(e_~e$?nh1^Oly|6TzLu~YYm|9(p~{^UqP3l;>DLZV!afhjDOS-bue~1Cz(&p4OPJ&wPE@9YU8OD1FCnzn`Eby(h!wF6gW9+Ks5uc)v4J_nVAs?~C>Mpie&A z5<^-${`n@&v)0F|tRJM+Hl>LZ-3G?wW>QQ<<7;2=Hq)ZL{fT($T{H|0+^A2C4V-Hd8fsDt9)_A@7@db^P{}u~ zoXBPBRSR#QCS_PeER`(x7kCGrT>YGCWeEOwAb={qaKZ zwtZd`8wsQPNmw((1qwB)Q;sj{&J)u3V7{{`_s)D$vfxH5!!LI&qX36Ikqq23I{|yn zA70P9Ob`n~?_RGOrNZ$$xeuxCrXQvt9YlYm&^piwbeml3X?13#IY9q(?iqHz&>4ky z>A>G_f;R?%rJsLS9Xlg;I)TmIN-NtYS0*+)EAE#a`d;}RjvwcT{_x-SWS!e+XXgLj zY!uq@#T-C|6)oT>#-Gjh@JiU&&d%y#yG6y$Ds;1;h8R!{!j%!jQWm;!$`Mh*W9Q7b z6t`&0E)O1|?`|n;Q&T&1C+#gxZY*YzXx6_vnIsB|v7k=cQ@z$wrkpbJXJ2~0!g`TR zm9(---Z%@VIGk{W zn!^zhydfGHdG8=q)$|bgpRlWrv7*)EeDVnPr{g4YZ@^0E z7HV#8Pg~HM*l+DXJXlyp>24%@{Z&!V8}8pBez~*KTn>cx8VED+^8KM+>kpm&XP_Uc zRTuXOJ%|NW%#Ugi(J)6P6gjoIO8rI%K7q`Z}T{{-4|Mw3l2`-$iyqDq62)^W{ zOUGo1)tVKmSeo5~a@6zG61ifWY75;uUljHP!?aq`ydqt-@C>y#=`Z186R&Z<3EjN% zwLaRKY50=n=fAYlB}Nce@KzNsRu*Y&m!s3Ono^rWd_IZoyB_ru-N`U_Rw=Jc`RwBp z`V+0ZFZ{kq=^nmaHjFSu<)zI3b}~Ig1WE`vI0g5oC{0Jy&eqw~*4aSS!`{?M?}1C@ z37rbCtdI}`(+T>Y=$?m$noP@T+!wHQnWls@3KRJD9$qyOv2P(P?iLuCmmPjr4K)pV z5*1f?Sezx;12G_L-pG(wrgA>0j7e{yuaa<0-b|SV+Y4q2_!m&7<|hk_j+moZk5EgU z&Y@7BB@(EyRF!0%1;dppQ*Zn#$5ceBAGpo=ZUC>x68UQGayUT%L9J7|$Ne|`(&$C= z7nRm-ON;Q<)Uc>P0niW=>COoOjjqb9XGHWawK3L6Q;x$NnGC8YGf$I$GCBqDekmO- zlFj^G^lr71S(WPHL8~Z(s9PqADZ?@|s)yM747)5yXb*9-)sl}t#)}=1{R+xHzC z-rqZV`+3K%Iu?Ic?Y#BoA@DyE{3ZyaBntzJ1N#3La00A({P71O{Qs77{w@7?<>Vh( z5Xc|q%m0-APaWmo;(yl{{#SefnErpMHvC(`-(@EMQBWU%2h^tm3jWuklfSk6eGT}J z7Jry_@<%QIy%hW{`givJuc!#szjFECf`4bj{|eGm|6j}hlNDdlI#