Skip to content

Commit 35bf06a

Browse files
Merge pull request #173 from dynamsoft-docs/preview
Preview
2 parents 60a3d03 + 94586f1 commit 35bf06a

File tree

2 files changed

+151
-0
lines changed

2 files changed

+151
-0
lines changed

.github/workflows/called-workflow-build-sync-production.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,15 @@ jobs:
4747
run: |
4848
curl -X POST -H "${{ secrets.WEBHOOK_USER }}" -H "${{ secrets.WEBHOOK_TOKEN }}" -H "Content-Type: application/json" -d "[\"/${{ inputs.doc-url }}/*\"]" ${{ secrets.WEBHOOK_URL }}
4949
50+
- name: Update Sitemap
51+
if: startsWith(inputs.doc-url, 'barcode-reader') ||
52+
startsWith(inputs.doc-url, 'capture-vision') ||
53+
startsWith(inputs.doc-url, 'label-recognition') ||
54+
startsWith(inputs.doc-url, 'document-normalizer') ||
55+
startsWith(inputs.doc-url, 'code-parser') ||
56+
startsWith(inputs.doc-url, 'camera-enhancer')
57+
run: |
58+
cd ${{ runner.temp }}/DocHome/assets/scripts/GenerateSitemap
59+
python GenerateSitemapByMenuTree.py --product dcv,dbr
60+
curl -T ${{ runner.temp }}/DocHome/assets/scripts/GenerateSitemap/barcode-reader/docs/menu-tree-sitemap.xml ftp://${{ secrets.FTP_DYNAMSOFT_LOCAL_SERVER }}/www.dynamsoft.com/barcode-reader/docs/ --user ${{ secrets.FTP_DYNAMSOFT_LOCAL_USER }}:${{ secrets.FTP_DYNAMSOFT_LOCAL_PASSWORD }}
61+
curl -T ${{ runner.temp }}/DocHome/assets/scripts/GenerateSitemap/capture-vision/docs/menu-tree-sitemap.xml ftp://${{ secrets.FTP_DYNAMSOFT_LOCAL_SERVER }}/www.dynamsoft.com/capture-vision/docs/ --user ${{ secrets.FTP_DYNAMSOFT_LOCAL_USER }}:${{ secrets.FTP_DYNAMSOFT_LOCAL_PASSWORD }}
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# -*- coding: utf-8 -*-
2+
from urllib.parse import urljoin, urlparse, parse_qs, urlencode, urlunparse
3+
import requests
4+
from bs4 import BeautifulSoup
5+
import xml.etree.ElementTree as ET
6+
import argparse
7+
import os
8+
9+
# python GenerateSitemapByMenuTree.py --product dbr
10+
# python GenerateSitemapByMenuTree.py --product dcv
11+
# python GenerateSitemapByMenuTree.py --product dcv,dbr --baseuri https://url
12+
13+
# target menu tree urls
14+
paramParser = argparse.ArgumentParser(description="power shell param")
15+
paramParser.add_argument("--product", type=str, required=True, help="you product")
16+
paramParser.add_argument("--baseuri", type=str, help="you base uri", default="https://www.dynamsoft.com")
17+
args = paramParser.parse_args()
18+
19+
WebBaseURI = args.baseuri
20+
products = args.product.split(",")
21+
productsUrls = {}
22+
23+
for product in products:
24+
if product == "dbr":
25+
productsUrls[product] = [
26+
f"{WebBaseURI}/barcode-reader/docs/core/Hide_Tree_Page.html",
27+
f"{WebBaseURI}/barcode-reader/docs/server/Hide_Tree_Page.html",
28+
f"{WebBaseURI}/barcode-reader/docs/mobile/Hide_Tree_Page.html",
29+
f"{WebBaseURI}/barcode-reader/docs/web/Hide_Tree_Page.html"
30+
]
31+
if product == "dcv":
32+
productsUrls[product] = [
33+
f"{WebBaseURI}/capture-vision/docs/core/Hide_Tree_Page.html",
34+
f"{WebBaseURI}/capture-vision/docs/server/Hide_Tree_Page.html",
35+
f"{WebBaseURI}/capture-vision/docs/mobile/Hide_Tree_Page.html",
36+
f"{WebBaseURI}/capture-vision/docs/web/Hide_Tree_Page.html"
37+
]
38+
39+
def extract_links(url, repo_type):
40+
try:
41+
response = requests.get(url, timeout=100)
42+
response.raise_for_status()
43+
soup = BeautifulSoup(response.text, 'html.parser')
44+
45+
version_tree = soup.find(id="version_tree_latest_version")
46+
if not version_tree:
47+
print(f"not find id='version_tree_latest_version': {url}")
48+
return []
49+
50+
liList = version_tree.find_all('li', {}, False)
51+
links = []
52+
for liItem in liList:
53+
lang = liItem.get('lang')
54+
if lang is None or lang == "":
55+
lang = repo_type
56+
temp_links = [urljoin(urljoin(WebBaseURI, a['href'].split("?")[0]), f"?lang={lang}") for a in liItem.find_all('a', href=True) if 'refreshLink' not in a.get('class', [])]
57+
links.extend(temp_links)
58+
59+
return links
60+
except requests.RequestException as e:
61+
print(f"request error: {url} -> {e}")
62+
return []
63+
64+
def merge_urls(url_list):
65+
url_dict = {}
66+
67+
for url in url_list:
68+
parsed_url = urlparse(url)
69+
base_url = urlunparse((parsed_url.scheme, parsed_url.netloc, parsed_url.path, '', '', ''))
70+
query_params = parse_qs(parsed_url.query)
71+
72+
if base_url not in url_dict:
73+
url_dict[base_url] = {}
74+
75+
for key, values in query_params.items():
76+
if key in url_dict[base_url]:
77+
url_dict[base_url][key].update(values)
78+
else:
79+
url_dict[base_url][key] = set(values)
80+
81+
merged_urls = []
82+
83+
for base_url, params in url_dict.items():
84+
merged_query = '&'.join(f"{k}={','.join(sorted(v))}" for k, v in params.items())
85+
merged_url = f"{base_url}?{merged_query}" if merged_query else base_url
86+
merged_urls.append(merged_url)
87+
88+
return merged_urls
89+
90+
def is_docs_link(x):
91+
return "/docs/" in x
92+
93+
def get_repo_type(url):
94+
if "docs/server/" in url:
95+
return "server"
96+
if "docs/mobile/" in url:
97+
return "mobile"
98+
if "docs/web/" in url:
99+
return "javascript"
100+
if "docs/core/" in url:
101+
return "core"
102+
103+
def get_directory_by_product(product):
104+
if product=="dbr":
105+
return "barcode-reader/docs"
106+
if product == "dcv":
107+
return "capture-vision/docs"
108+
109+
def write_xml_file(directory, filename, xml_element):
110+
if not os.path.exists(directory):
111+
os.makedirs(directory)
112+
113+
filepath = os.path.join(directory, filename)
114+
115+
tree = ET.ElementTree(xml_element)
116+
tree.write(filepath, encoding="utf-8", xml_declaration=True)
117+
print("menu-tree-sitemap.xml is generated.")
118+
119+
120+
for item in productsUrls:
121+
all_links = []
122+
for url in productsUrls[item]:
123+
links = extract_links(url, get_repo_type(url))
124+
links = list(filter(is_docs_link, links))
125+
print(f"{url}: {len(links)} links")
126+
all_links.extend(links)
127+
all_links = merge_urls(all_links)
128+
129+
unique_links = list(set(all_links))
130+
131+
# generate sitemap.xml
132+
urlset = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
133+
for link in unique_links:
134+
url_element = ET.SubElement(urlset, "url")
135+
loc = ET.SubElement(url_element, "loc")
136+
loc.text = link
137+
138+
write_xml_file(get_directory_by_product(item), "menu-tree-sitemap.xml", urlset)
139+

0 commit comments

Comments
 (0)