mailman-archive-scraper/MailmanArchiveScraper-example.cfg at master · pbabinca/mailman-archive-scraper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Settings for the MailmanArchiveScraper script.
#
# Copy the MailmanArchiveScraper-example.cfg, rename it MailmanArchiveScraper.cfg
# and put it in the same directory as the scrip itself.
#
# The minimum settings you need to set are:
# 1. domain -- The domain name that your Mailman pages are on.
# 2. list_name -- Name of your mailing list.
# 3. email and password -- Required if your Mailman archive is password protected.
# 4. publish_dir -- The path to the local directory the files should be republished to.
# 5. publish_url -- If you're publishing the messages to a website.
#
# If you want an RSS feed to be created you'll also need to fill in the settings in that section.


###############################################################################
[Mailman]
# Settings associated with the remote server.


# The domain name that your Mailman pages are on.
# eg, in http://lists.example.com/mailman/listinfo/list-name
# it's the 'lists.example.com' part.
domain =

# protocol allows you to specify a way of getting the pages that
# isn't http -- if you want http (the usual), leave the next line
# commented out:
# protocol = https

# Name of your mailing list.
# This can be found in the URL of the list info page.
# eg, in http://lists.example.com/mailman/listinfo/list-name
# it's the 'list-name' part.
list_name =


# The email address and password you use to log in to the private Mailman pages.
# If the email is left blank, we assume the list is public and the archives require no login.
email =
password =


###############################################################################
[Conversion]
# Settings about how to filter the pages before saving copies.


# Remove email addresses or not?
# Either 1 or 0 (for yes or no).
filter_email_addresses = 0


# We'll replace all occurrences of the main list info url
# eg, http://lists.example.com/mailman/listinfo/list-name
# or, http://lists.example.com/pipermail/list-name
# with this string. If you don't want it changed, put the main list info url here.
list_info_url =


# Should quoted text be stripped out of messages?
# If so, how many levels?
# 0 - Leave all quoted text as found.
# 1 - Remove all text except the first level of quotes (>)
# 2 - Remove all text except the first two levels (> and >>)
# etc
strip_quotes = 0


# A list of search/replace pairs, one per line, separated by '//'.
# Search strings are case-insensitive.
# This happens before filter_email_addresses is done.
# Lines after the first should be indented with a tab. eg:
# search_replace = Old//New
#	Something Old//Something New
#	Ancient//Modern
search_replace =


# The path to a file containing HTML. The contents will be inserted in the <head> of every page.
# eg, Use this to insert links to stylesheets, Google Analytics javascript, etc.
head_html =


###############################################################################
[RSS]
# Settings related to the generated RSS file.
# You can leave all these blank if you don't want an RSS feed generated.


# The local path where the RSS file should be generated.
# eg /Users/phil/Sites/lists/html/list-name/rss.xml
rss_file =


# The number of recent messages to be included in the RSS file.
items_for_rss = 7


# The title of the RSS feed.
rss_title =


# The description of the RSS feed (usually one sentence).
rss_description =


###############################################################################
[Local]
# Settings about the local archive.


# The absolute path to a local directory in which all the lists' HTML files will be stored.
# If it doesn't exist the script will try to create it.
# eg /Users/phil/Sites/examplesite/html/list-name/
publish_dir =


# The full web address to the directory referred to by publish_dir.
# eg http://www.example.com/list-name/
# If you're not publishing these on the web, leave this blank.
publish_url =


# How many hours back should we look back for new messages when the script runs?
# Set it to more than however often the script is run via cron.
# eg, if you run the script every hour, set this to maybe 4ish, to allow for times the script might fail.
# Set to 0 to fetch ALL messages. Every single page in the archive. All of them.
hours_to_go_back = 6


# Should we output extra data while the script is running?
# Probably set it to True if you're running on the command line.
# Set to false if running via cron - it'll print something only if something goes wrong.
# 1 or 0
verbose = 1