-
Notifications
You must be signed in to change notification settings - Fork 38
Open
Description
Fails when trying to download messages from groups with "+" on name like sfnet.harrastus.audio+video.
anon@anon ~/google-groups % ./crawler.sh -sh
#!/usr/bin/env bash
export _ORG="${_ORG:-}"
export _GROUP="${_GROUP:-sfnet.harrastus.audio+video}"
export _D_OUTPUT="${_D_OUTPUT:-./sfnet.harrastus.audio+video/}"
export _USER_AGENT="${_USER_AGENT:-Mozilla/5.0 (X11; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0}"
export _WGET_OPTIONS="${_WGET_OPTIONS:--4}"
__wget_hook ()
{
:
}
__wget__ ()
{
if [[ ! -f "$1" ]]; then
wget --user-agent="$_USER_AGENT" $_WGET_OPTIONS "$2" -O "$1";
__wget_hook "$1" "$2";
fi
}
mkdir: created directory './sfnet.harrastus.audio+video'
mkdir: created directory './sfnet.harrastus.audio+video//threads/'
mkdir: created directory './sfnet.harrastus.audio+video//msgs/'
mkdir: created directory './sfnet.harrastus.audio+video//mbox/'
:: Downloading all topics (thread) pages...
:: Creating './sfnet.harrastus.audio+video//threads/t.0' with 'categories/sfnet.harrastus.audio+video'
:: Fetching data from 'https://groups.google.com/forum/?_escaped_fragment_=categories/sfnet.harrastus.audio+video'...
--2019-07-11 09:42:47-- https://groups.google.com/forum/?_escaped_fragment_=categories/sfnet.harrastus.audio+video
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving groups.google.com (groups.google.com)... 64.233.165.102, 64.233.165.113, 64.233.165.101, ...
Connecting to groups.google.com (groups.google.com)|64.233.165.102|:443... connected.
HTTP request sent, awaiting response... 400 Bad Request
2019-07-11 09:42:48 ERROR 400: Bad Request.
:: Downloading list of all messages...
:: Downloading all raw messages...
Reactions are currently unavailable