Skip to content

Commit 6942115

Browse files
authored
Merge pull request #4 from aUsernameWoW/feature/add_url_hyperlink_in_bibliography
Feature/add url hyperlink in bibliography
2 parents 40ade07 + a567abd commit 6942115

File tree

4 files changed

+134
-4
lines changed

4 files changed

+134
-4
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,10 @@ if __name__ == '__main__':
8282
# 改为仅句首单词的首字母大写时,你可以给出一个专有名词列表,noterools 会检测其中的专有名词,防止这些名词被错误设置为小写
8383
# word_list = ["UNet", "US", "China", "WRF"]
8484
# add_format_title_hook(word, lower_all_words=True, word_list=word_list)
85+
86+
# 为参考文献目录表中出现的网址添加超链接
87+
# add_url_hyperlink_hook(word)
88+
89+
# 自定义超链接的颜色以及是否添加下划线 (参数可选)
90+
# add_url_hyperlink_hook(word, color=16711680, no_under_line=False)
8591
```

README_EN.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,10 @@ if __name__ == '__main__':
8484
# You can give a list contains proper noun when change format to Sentence Case.
8585
# word_list = ["UNet", "US", "China", "WRF"]
8686
# add_format_title_hook(word, lower_all_words=True, word_list=word_list)
87+
88+
# Add hyperlinks to URLs in bibliography
89+
# add_url_hyperlink_hook(word)
90+
91+
# And customize URL appearance (parameters are optional)
92+
# add_url_hyperlink_hook(word, color=16711680, no_under_line=False)
8793
```

noterools/bibliography.py

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
import re
23
from typing import Optional
34

@@ -6,7 +7,7 @@
67
from .csl import GetCSLJsonHook, add_get_csl_json_hook
78
from .error import HookNotRegisteredError, ParamsError
89
from .hook import ExtensionHookBase, HOOKTYPE, HookBase
9-
from .utils import logger
10+
from .utils import logger, find_urls
1011
from .word import Word
1112
from .zotero import zotero_check_initialized, zotero_query_pages
1213

@@ -620,5 +621,87 @@ def add_format_title_hook(word: Word, upper_first_char=False, upper_all_words=Fa
620621
return bib_format_title_hook
621622

622623

623-
__all__ = ["BibLoopHook", "BibBookmarkHook", "BibUpdateDashSymbolHook", "BibFormatTitleHook", "add_bib_loop_hook", "add_bib_bookmark_hook", "add_update_dash_symbol_hook",
624-
"add_format_title_hook"]
624+
class BibURLHyperlinkHook(ExtensionHookBase):
625+
"""
626+
This extension hook adds hyperlinks to URLs in your bibliography.
627+
"""
628+
629+
def __init__(self, color: int = None, no_under_line=False):
630+
"""
631+
Initialize the URL hyperlink hook.
632+
633+
:param color: Set font color for URLs. Defaults to None (keep original color).
634+
:type color: int
635+
:param no_under_line: If remove the underline of hyperlinks. Defaults to False.
636+
:type no_under_line: bool
637+
"""
638+
super().__init__(name="BibURLHyperlinkHook")
639+
self.color = color
640+
self.no_under_line = no_under_line
641+
642+
def on_iterate(self, word: Word, word_range):
643+
"""
644+
Process each bibliography entry to find and hyperlink URLs.
645+
646+
:param word: Word object.
647+
:type word: Word
648+
:param word_range: Range object of the bibliography entry.
649+
:type word_range: object
650+
"""
651+
bib_text = word_range.Text
652+
653+
# Find URLs in the bibliography text
654+
url_positions = find_urls(bib_text)
655+
656+
if not url_positions:
657+
return
658+
659+
logger.debug(f"Found {len(url_positions)} URLs in bibliography entry")
660+
661+
# Process URLs in reverse order to maintain position integrity
662+
for start_pos, end_pos, url in reversed(url_positions):
663+
# Create a duplicate range for the URL
664+
url_range = word_range.Duplicate
665+
666+
# Move start and end positions to isolate just the URL text
667+
url_range.MoveStart(Unit=1, Count=start_pos)
668+
url_range.MoveEnd(Unit=1, Count=-(len(bib_text) - end_pos))
669+
670+
try:
671+
# Add hyperlink to the URL
672+
word.add_hyperlink(url, url_range, no_under_line=self.no_under_line)
673+
674+
# Set color if specified (after adding hyperlink)
675+
if self.color is not None:
676+
url_range.Font.Color = self.color
677+
678+
logger.debug(f"Added hyperlink for URL: {url}")
679+
680+
except AddHyperlinkError:
681+
logger.warning(f"Failed to add hyperlink for URL: {url}")
682+
683+
684+
def add_url_hyperlink_hook(word: Word, color: int = None, no_under_line=False) -> BibURLHyperlinkHook:
685+
"""
686+
Register ``BibURLHyperlinkHook`` to add hyperlinks to URLs in bibliography.
687+
688+
:param word: ``noterools.word.Word`` object.
689+
:type word: Word
690+
:param color: Set font color for URLs. Defaults to None (keep original color).
691+
:type color: int
692+
:param no_under_line: If remove the underline of hyperlinks. Defaults to False.
693+
:type no_under_line: bool
694+
:return: ``BibURLHyperlinkHook`` instance.
695+
:rtype: BibURLHyperlinkHook
696+
"""
697+
add_get_csl_json_hook(word) # In case it's needed for future functionality
698+
url_hyperlink_hook = BibURLHyperlinkHook(color, no_under_line)
699+
bib_loop_hook = add_bib_loop_hook(word)
700+
bib_loop_hook.set_hook(url_hyperlink_hook)
701+
702+
return url_hyperlink_hook
703+
704+
705+
__all__ = ["BibLoopHook", "BibBookmarkHook", "BibUpdateDashSymbolHook", "BibFormatTitleHook",
706+
"BibURLHyperlinkHook", "add_bib_loop_hook", "add_bib_bookmark_hook",
707+
"add_update_dash_symbol_hook", "add_format_title_hook", "add_url_hyperlink_hook"]

noterools/utils.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
import logging
23
import re
34

@@ -41,4 +42,38 @@ def get_year_list(text: str) -> list[str]:
4142
return re.findall(pattern, text)
4243

4344

44-
__all__ = ["logger", "replace_invalid_char", "get_year_list"]
45+
def find_urls(text: str) -> list[tuple[int, int, str]]:
46+
"""
47+
Find URLs in text and return their positions and values.
48+
49+
:param text: The text to search
50+
:type text: str
51+
:return: List of tuples (start_pos, end_pos, url)
52+
:rtype: list[tuple[int, int, str]]
53+
"""
54+
# Pattern to match common URL formats, excluding trailing punctuation
55+
url_pattern = r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&//=]*))'
56+
57+
# Pattern to match DOIs, excluding trailing punctuation
58+
doi_pattern = r'(doi\.org/[0-9a-zA-Z./\-_]+)'
59+
60+
# Combine patterns
61+
combined_pattern = f"{url_pattern}|{doi_pattern}"
62+
63+
urls = []
64+
for match in re.finditer(combined_pattern, text):
65+
start, end = match.span()
66+
url = match.group(0)
67+
68+
# Remove trailing punctuation
69+
while url and url[-1] in '.,:;)]}"\'':
70+
url = url[:-1]
71+
end -= 1
72+
73+
if url: # Only add if URL is not empty after processing
74+
urls.append((start, end, url))
75+
76+
return urls
77+
78+
79+
__all__ = ["logger", "replace_invalid_char", "get_year_list", "find_urls"]

0 commit comments

Comments
 (0)