med/medg.py at main · litlnemo/med · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import PySimpleGUI as sg
import os
import requests
from bs4 import BeautifulSoup
import webbrowser
import re
from tkinter import filedialog

"""
med.py: A script for searching the Middle English Dictionary,
   designed for use by those researching Medieval names.

   v. 1.0.1, 14 September 2013

   v. 1.0.2, 30 September 2013:
       - added loop, so you can search over and over again
       - custom icon
       - program now closes after you cancel search
       - pages slightly prettier.

   v. 2.0, 8 August 2024:
       - updated for Python 3
       - Pashua removed as it is no longer supported
       - this means that for now this is command line only, sorry
       - no loop at the moment
       - cannot open pages with full open results... for now
       - last version without GUI dialog box

    v. 2.0G, 10 Aug 2024:
       - PySimpleGUI added

    v. 2.1, 11 Aug 2024
       - Standalone Mac app with Platypus
       - changed deprecated "soup.find(text)" to "soup.find(string)"

    v. 2.2G 12 Aug 2024
       - different searches added -- "definition and notes" and "modern English word equivalent"
       - from this point version numbers are intended to match the standalone app

    v. 2.2.3 17 Aug 2024
       - fixed visual stuff on the html results page
       - now users can choose where to save the page
"""

# Ask user where to save file
dirname = filedialog.askdirectory(title="Choose a folder for your results", message="Choose a folder for your results")

# Search window contents
sg.theme('SystemDefaultForReal')
layout = [  [sg.Text('Enter search word: '), sg.InputText(key='searchword')],
            [[sg.Radio('Headword & forms ', group_id=1, key='hnf', default=True), sg.Radio('Definition & notes ', key='dnn', group_id=1), sg.Radio('Modern English word equivalent', key='mdne', group_id=1)]],
            [sg.Push(), sg.Button('Submit',visible=False, bind_return_key=True)] ]

# Create the Window
window = sg.Window('MED Open Search', layout)

# Event Loop to process "events" and get the "values" of the inputs
while True:
    event, value = window.read()
    if event == sg.WIN_CLOSED or event == 'Cancel': # if user closes window or clicks cancel
        break

    else:
        search_term = value["searchword"]

        # formatting URL based on chosen type of search
        # headword and forms search

        if value['hnf'] == True:
            url = (
                "https://quod.lib.umich.edu/m/middle-english-dictionary/dictionary?utf8=✓&search_field=hnf&q="
                + search_term
                )

        # notes and definitions search

        elif value['dnn'] == True:
            url = (
                "https://quod.lib.umich.edu/m/middle-english-dictionary/dictionary?utf8=✓&search_field=notes_and_def&q="
                + search_term
                )

        # modern english word equivalents search

        else:
            url = (
                "https://quod.lib.umich.edu/m/middle-english-dictionary/dictionary?utf8=✓&search_field=oed&q="
                + search_term
                )

        # Get text of search results

        r = requests.get(url)
        page_data = r.text
        soup = BeautifulSoup(page_data, "html.parser")
        count = 0

        # Open file to write to, write HTML for top of page

        file_path = os.path.expanduser(f"{dirname}/medresults.html")
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(
                "<html>"
                + "\n"
                + '<head><title>Middle English Dictionary open search results</title><base href="http://quod.lib.umich.edu" /><meta charset="UTF-8">'
                + "\n"
                + '<style>li:nth-of-type(odd) { background-color: #fdf0b4; }</style>'
                + "\n"
                + '</head>'
                + "\n"
                + '<body style="font-family: helvetica, arial, sans-serif; margin:0 auto; max-width: 700px; " >'
                + "\n"
                )
            file.write(
                '<h1 style="text-align: center;" width="100%">Middle English Dictionary open search results for the word <span style="font-style: italic">"'
                + str(search_term)
                + '"</span>:</h1>'
                + "\n"
                )
            file.write("<ol>" + "\n")

            # Find relevant links, build LIs with them

            get_urls = soup.find_all("h3", class_="index_title document-title-heading col-md-12")

            if not get_urls:
                print("Error! No results.")
                file.write("<li>" + "\n")
                file.write("Sorry, no results!" + "\n")
                file.write("</li>" + "\n")

            for h3_tag in get_urls:
                # Find the <a> tag inside the <h3>
                link_tag = h3_tag.find("a")

                # If there is no <a> tag, continue to the next h3_tag
                if not link_tag:
                    continue

                # Remove the h3 tag but keep its contents (specifically the <a> tag)
                h3_tag.unwrap()

                # Remove the span with the class "document-counter" if it exists
                span_counter = h3_tag.find("span", class_="document-counter")
                if span_counter:
                    span_counter.decompose()

                file.write('<li style="padding: 10px 0 10px 5px"><b>' + "\n")

                # Find the correct index number in the link
                index_number = re.search(r"MED\d\d\d\d\d", link_tag["href"])

                if index_number:
                    full_url = "https://quod.lib.umich.edu/m/middle-english-dictionary/dictionary/" + index_number.group(0)
                    link_tag["href"] = full_url

                file.write(str(link_tag))
                file.write("</li></b>" + "\n")
                print("Added " + str(count + 1) + " to page")
                count += 1

            # Finish HTML and close 'er up!

            file.write(
                "</ol>"
                + "\n"
                + '<p style="font-size: small; border-top: 1px solid black; margin-top: 10px; padding-top: 15px;">Page generated by MED Open Search. Software &copy; Wendi Dunlap, 2013, 2024. The MED and all search results &copy; University of Michigan. </p></body>'
                + "\n"
                + "</html>"
                )

            html_url = "file://" + file_path

            webbrowser.open_new(html_url)