jedec parse

a1ive · a1ive · commit bf4e4490bf62 · 2025-08-04T21:07:02.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -385,4 +385,6 @@ FodyWeavers.xsd
 
 # JetBrains Rider
 .idea/
-*.sln.iml
+*.sln.iml
+
+*.pdf
diff --git a/libnw/jep106.ids b/libnw/jep106.ids
@@ -1,6 +1,6 @@
 
 # JEP106BM
-# Version: 2025.07.17
+# Version: 2025.06.12
 1
 	1 AMD
 	2 AMI
@@ -70,7 +70,7 @@
 	66 Macronix
 	67 Xerox
 	68 Plus Logic
-	69 SanDisk (Western Digital)
+	69 SanDisk Technologies Inc
 	70 Elan Circuit Tech.
 	71 European Silicon Str.
 	72 Apple Computer
@@ -151,7 +151,7 @@
 	20 Smart Modular
 	21 Hughes Aircraft
 	22 Lanstar Semiconductor
-	23 Marvell (Qlogic)
+	23 Qlogic
 	24 Kingston
 	25 Music Semi
 	26 Ericsson Components
@@ -458,7 +458,7 @@
 	73 Ritek Corp
 	74 empowerTel Networks
 	75 Hypertec
-	76 Marvell (Cavium Networks)
+	76 Cavium Networks
 	77 PLX Technology
 	78 Massana Design
 	79 Intrinsity
@@ -487,7 +487,7 @@
 	102 Layer N Networks
 	103 MtekVision (Atsana)
 	104 Allegro Networks
-	105 Marvell
+	105 Marvell Semiconductors
 	106 Netergy Microelectronic
 	107 NVIDIA
 	108 Internet Machines
@@ -560,7 +560,7 @@
 	48 OCZ
 	49 Emuzed
 	50 LOGIC Devices
-	51 Marvell (Inphi)
+	51 Inphi Corporation
 	52 Quake Technologies
 	53 Vixel
 	54 SolusTek
@@ -750,7 +750,7 @@
 	111 MetaRAM
 	112 Axel Electronics Co Ltd
 	113 Tilera Corporation
-	114 Marvell (Aquantia)
+	114 Aquantia
 	115 Vivace Semiconductor
 	116 Redpine Signals
 	117 Octalica
@@ -971,7 +971,7 @@
 	78 Mustang
 	79 Orca Systems
 	80 Passif Semiconductor
-	81 GigaDevice Semiconductor (Beijing) Inc
+	81 GigaDevice Semiconductor (Beijing)
 	82 Memphis Electronic
 	83 Beckhoff Automation GmbH
 	84 Harmony Semiconductor Corp
@@ -1142,7 +1142,7 @@
 	122 AltoBeam
 	123 Wave Computing
 	124 Beijing TrustNet Technology Co Ltd
-	125 Marvell (Innovium)
+	125 Innovium Inc
 	126 Starsway Technology Limited
 10
 	1 Weltronics Co LTD
@@ -1375,7 +1375,7 @@
 	101 Esperanto Technologies
 	102 JinSheng Electronic (Shenzhen) Co Ltd
 	103 Shenzhen Shi Bolunshuai Technology
-	104 Shanghai Rei Zuan Information Tech
+	104 Shanghai Ruixuan Information Tech
 	105 Fraunhofer IIS
 	106 Kandou Bus SA
 	107 Acer
@@ -1623,7 +1623,7 @@
 	95 Sitrus Technology
 	96 AnHui Conner Storage Co Ltd
 	97 Rochester Electronics
-	98 Wuxi Smart Memories Technologies Co Ltd
+	98 Wuxi Smart Memories Technologies Co
 	99 Star Memory
 	100 Agile Memory Technology Co Ltd
 	101 MEJEC
@@ -1658,9 +1658,9 @@
 	3 Shenzhen Feisrike Technology Co Ltd
 	4 Shenzhen Sunhome Electronics Co Ltd
 	5 Global Mixed-mode Technology Inc
-	6 Shenzhen Weien Electronics Co. Ltd.
+	6 Shenzhen Weien Electronics Co Ltd.
 	7 Shenzhen Cooyes Technology Co Ltd
-	8 ShenZhen ChaoYing ZhiNeng
+	8 ShenZhen ChaoYing ZhiNeng Technology
 	9 E-Rockic Technology Company Limited
 	10 Aerospace Science Memory Shenzhen
 	11 Shenzhen Quanji Technology Co Ltd
@@ -1804,7 +1804,7 @@
 	22 Chiplego Technology (Shanghai) Co Ltd
 	23 StoreSkill
 	24 Shenzhen Astou Technology Company
-	25 Guangdong LeafFive Technology Limited
+	25 Guangdong LeapFive Technology Limited
 	26 Jin JuQuan
 	27 Huaxuan Technology (Shenzhen) Co Ltd
 	28 Gigastone Corporation
@@ -1872,12 +1872,12 @@
 	90 SSCT
 	91 Sichuan Heentai Semiconductor Co Ltd
 	92 Zhejiang University
-	93 Guangzhou ShinGroup
+	93 www.shingroup.cn
 	94 Suzhou Nano Mchip Technology Company
 	95 Feature Integration Technology Inc
 	96 d-Matrix
 	97 Golden Memory
-	98 MACHENIKE
+	98 Qingdao Thunderobot Technology Co Ltd
 	99 Shenzhen Tianxiang Chuangxin Technology
 	100 HYPHY USA
 	101 Valkyrie
@@ -1899,12 +1899,12 @@
 	117 HOGE Technology Co Ltd
 	118 United Micro Technology (Shenzhen) Co
 	119 Fabric of Truth Inc
-	120 Epitech
+	120 Elpitech
 	121 Elitestek
 	122 Cornelis Networks Inc
 	123 WingSemi Technologies Co Ltd
 	124 ForwardEdge ASIC
-	125 Beijing Future Imprint Technology Co Ltd
+	125 Beijing Future Signet Technology Co Ltd
 	126 Fine Made Microelectronics Group Co Ltd
 16
 	1 Changxin Memory Technology (Shanghai)
@@ -1923,16 +1923,16 @@
 	14 Shenzhen Ranshuo Technology Co Limited
 	15 ScaleFlux
 	16 XC Memory
-	17 Guangzhou Beimu Technology Co., Ltd
+	17 Guangzhou Beimu Technology Co Ltd
 	18 Rays Semiconductor Nanjing Co Ltd
 	19 Milli-Centi Intelligence Technology Jiangsu
-	20 Zilia Technologioes
+	20 Zilia Technologies
 	21 Incore Semiconductors
 	22 Kinetic Technologies
 	23 Nanjing Houmo Technology Co Ltd
 	24 Suzhou Yige Technology Co Ltd
 	25 Shenzhen Techwinsemi Technology Co Ltd
-	26 Pure Array Technology (Shanghai) Co. Ltd
+	26 Pure Array Technology (Shanghai) Co Ltd
 	27 Shenzhen Techwinsemi Technology Udstore
 	28 RISE MODE
 	29 NEWREESTAR
diff --git a/libnw/version.h b/libnw/version.h
@@ -8,7 +8,7 @@
 #define NWINFO_MAJOR_VERSION 1
 #define NWINFO_MINOR_VERSION 4
 #define NWINFO_MICRO_VERSION 2
-#define NWINFO_BUILD_VERSION 0
+#define NWINFO_BUILD_VERSION 1
 
 #define NWINFO_VERSION      NWINFO_MAJOR_VERSION,NWINFO_MINOR_VERSION,NWINFO_MICRO_VERSION,NWINFO_BUILD_VERSION
 #define NWINFO_VERSION_STR  QUOTE(NWINFO_MAJOR_VERSION.NWINFO_MINOR_VERSION.NWINFO_MICRO_VERSION.NWINFO_BUILD_VERSION)
diff --git a/parse_jep106.py b/parse_jep106.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import re
+import datetime
+
+# PyMuPDF is imported as fitz
+try:
+    import fitz
+except ImportError:
+    print("FATAL: PyMuPDF library not found.")
+    print("--> Please install it by running: pip install pymupdf")
+    sys.exit(1)
+
+START_PAGE_INDEX = 5
+DATE_SEARCH_PAGE_INDEX = 4
+
+def extract_document_name(doc):
+    """Extracts the document identifier (e.g., JEP106BM) from the PDF."""
+    print("[INFO] Extracting document name...")
+    # Attempt to find in metadata first
+    meta_title = doc.metadata.get('title', '')
+    match = re.search(r'(JEP106[A-Z]{2})', meta_title, re.IGNORECASE)
+    if match:
+        name = match.group(1).upper()
+        print("  |-- Found in metadata: '{}'".format(name))
+        return name
+
+    # Fallback to scanning the cover page
+    try:
+        print("  |-- Not in metadata, scanning cover page...")
+        cover_page_text = doc.load_page(0).get_text("text")
+        match = re.search(r'(JEP106[A-Z]{2})', cover_page_text, re.IGNORECASE)
+        if match:
+            name = match.group(1).upper()
+            print("  |-- Found on cover page: '{}'".format(name))
+            return name
+    except Exception as e:
+        print("  |-- [WARN] Error scanning cover page: {}".format(e))
+
+    print("  |-- [WARN] Could not find name. Using default 'JEP106BM'.")
+    return "JEP106BM"
+
+def extract_document_date(doc):
+    """Scans a specific page of the PDF to find the document's effective date."""
+    print("[INFO] Extracting document date from page {}...".format(DATE_SEARCH_PAGE_INDEX + 1))
+    try:
+        page = doc.load_page(DATE_SEARCH_PAGE_INDEX)
+        text = page.get_text("text")
+
+        match = re.search(r'The present list is complete as of\s+(.*?)\.', text, re.IGNORECASE)
+        if match:
+            date_str = match.group(1).strip()
+            dt_obj = datetime.datetime.strptime(date_str, "%B %d, %Y")
+            formatted_date = dt_obj.strftime("%Y.%m.%d")
+            print("  |-- Found and parsed date: '{}'".format(formatted_date))
+            return formatted_date
+    except Exception as e:
+        print("  |-- [WARN] Failed to parse date: {}".format(e))
+
+    print("  |-- [WARN] Could not extract date. Using current date as fallback.")
+    return datetime.date.today().strftime("%Y.%m.%d")
+
+def clean_manufacturer_name(raw_name):
+    """
+    Cleans the raw manufacturer name by stripping trailing table data and
+    normalizing internal whitespace.
+    """
+    # " 1 1 0 0 0 1 1 1 C7"
+    cleaned_name = re.sub(r'(\s+[01])+(\s+[0-9A-Fa-f]{2})?$', '', raw_name)
+
+    # Replace sequences of one or more whitespace characters with a single space.
+    cleaned_name = re.sub(r'\s+', ' ', cleaned_name)
+
+    # Handle non-ASCII punctuation
+    replacements = {
+        '\u2019': "'",  # Right Single Quotation Mark -> Apostrophe
+        '\u2018': "'",  # Left Single Quotation Mark  -> Apostrophe
+        '\u201d': '"',  # Right Double Quotation Mark -> Quotation Mark
+        '\u201c': '"',  # Left Double Quotation Mark  -> Quotation Mark
+        '\u2014': '-',  # Em Dash -> Hyphen
+        '\u2013': '-',  # En Dash -> Hyphen
+    }
+
+    for old, new in replacements.items():
+        cleaned_name = cleaned_name.replace(old, new)
+
+    return cleaned_name.strip()
+
+def parse_jep106_pdf(input_path, output_path):
+    """
+    Parses the JEP106 PDF file.
+    """
+    print("--- JEP106 Parser Started ---")
+    print("[INFO] Input PDF: {}".format(input_path))
+    print("[INFO] Output file: {}".format(output_path))
+
+    try:
+        doc = fitz.open(input_path)
+        print("[OK] PDF file opened successfully ({} pages).".format(len(doc)))
+    except Exception as e:
+        print("FATAL: Failed to open or read the PDF file '{}'.".format(input_path))
+        print("--> Error: {}".format(e))
+        return
+
+    output_lines = []
+    output_lines.append('')
+
+    # Header generation
+    output_lines.append("# {}".format(extract_document_name(doc)))
+    output_lines.append("# Version: {}".format(extract_document_date(doc)))
+    print("[OK] File header generated.")
+
+    current_bank = 0
+    manufacturer_count = 0
+    line_pattern = re.compile(r'^(\d{1,3})\s+(.*)')
+    print("\n--- Starting Parsing ---")
+
+    for page_num in range(START_PAGE_INDEX, len(doc)):
+        print("\n[PAGE {}/{}]".format(page_num + 1, len(doc)))
+        page = doc.load_page(page_num)
+        text = page.get_text("text")
+
+        # Check for the start of the appendix to stop parsing.
+        if "Annex A (informative) Name Changes" in text:
+            print("  [STOP] Detected start of Annex A. Terminating main content parsing.")
+            break
+
+        lines = text.split('\n')
+
+        if page_num == START_PAGE_INDEX and current_bank == 0:
+            current_bank = 1
+            output_lines.append(str(current_bank))
+            print("  -> Initialized to Bank {}.".format(current_bank))
+
+        # Check for bank switch text before parsing lines to correctly associate all entries
+        if "The following numbers are all in bank" in text:
+            current_bank += 1
+            output_lines.append(str(current_bank))
+            print("  -> Detected switch to Bank {}.".format(current_bank))
+
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            line_stripped = line.strip()
+
+            # Immediately prepare for the next iteration.
+            i += 1
+
+            if not line_stripped:
+                continue
+
+            match = line_pattern.match(line_stripped)
+            if match:
+                id_code, raw_name = match.groups()
+
+                # Look ahead to the next line for a possible continuation.
+                # A continuation line is a non-empty line that does NOT start with another ID.
+                if i < len(lines):  # Check if a next line exists.
+                    next_line_stripped = lines[i].strip()
+                    # Use regex to check if the next line is a continuation.
+                    if next_line_stripped and not line_pattern.match(next_line_stripped):
+                        # It's a continuation. Append it to the raw name.
+                        raw_name = f"{raw_name} {next_line_stripped}"
+                        # We have consumed the next line, so advance the index again.
+                        i += 1
+
+                # Skip entries that are not actual manufacturers
+                if "Continuation Code" in raw_name:
+                    print("  [SKIP] 'Continuation Code' entry.")
+                    continue
+
+                final_name = clean_manufacturer_name(raw_name)
+                output_lines.append("\t{} {}".format(id_code, final_name))
+                manufacturer_count += 1
+
+                print("  [OK]   ID: {:<4} Name: {}".format(id_code, final_name))
+
+    print("\n--- Parsing Complete ---")
+    print("[INFO] Total manufacturers found: {}".format(manufacturer_count))
+    print("[INFO] Total banks processed: {}".format(current_bank))
+    print("[INFO] Writing {} final lines to '{}'...".format(len(output_lines), output_path))
+
+    try:
+        with open(output_path, 'w', encoding='ascii', errors='strict') as f:
+            f.write('\n'.join(output_lines))
+            f.write('\n')
+        print("\n--- SUCCESS ---")
+        print("Generated file: '{}'".format(output_path))
+    except Exception as e:
+        print("\n--- FATAL ERROR ---")
+        print("Failed to write to the output file '{}'.".format(output_path))
+        print("--> Character Encoding Error or I/O issue.")
+        print("--> Detailed Error: {}".format(e))
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print("Usage: python {} <input_pdf_file> <output_ids_file>".format(sys.argv[0]))
+        sys.exit(1)
+
+    parse_jep106_pdf(sys.argv[1], sys.argv[2])