Skip to content

Commit 9cb5eca

Browse files
authored
Merge pull request #513 from rusq/fixes
Brush up Convert and Transform APIs + doc
2 parents 6eb47dc + 85cf7a8 commit 9cb5eca

File tree

35 files changed

+316
-172
lines changed

35 files changed

+316
-172
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# "New" Command
2+
3+
Creates a new API configuration file containing default values. You will need
4+
to specify the filename, for example:
5+
6+
slackdump config new myconfig.toml
7+
8+
If the extension is omitted, ".toml" is automatically appended to the name of
9+
the file.
10+
11+
Configuration file contains the following groups of settings:
12+
- File download concurrency and retries;
13+
- Rate limits;
14+
- Batch sizes per request;
15+
16+
### Slack Rate Limits
17+
Slack imposes rate limits on API calls. The default values are set to the
18+
maximum allowed by Slack. If you want to change the rate limits, you can do so
19+
in the configuration file.
20+
21+
Slack API has four tiers of [rate limits][1], with Tier 1 being the most
22+
restrictive and Tier 4 being the least restrictive. The rate limits are
23+
measured in requests per minute and enforced on a (token,method) pair.
24+
25+
Let's look at the example configuration file:
26+
27+
```toml
28+
# File download settings
29+
workers = 4
30+
download_retries = 3
31+
32+
# Rate limits
33+
[tier_2]
34+
boost = 20
35+
burst = 3
36+
retries = 20
37+
38+
[tier_3]
39+
boost = 60
40+
burst = 5
41+
retries = 3
42+
43+
[tier_4]
44+
boost = 10
45+
burst = 7
46+
retries = 3
47+
48+
# Batch size settings
49+
[per_request]
50+
conversations = 100
51+
channels = 100
52+
replies = 200
53+
```
54+
55+
The base Tier values are hardcoded in the application, but the configuration
56+
file allows to tweak the "boost" and "burst" values for each tier.
57+
58+
The "boost" value is the number of requests that slackdump will make *on top*
59+
of the base rate limit. **For example**: "Slack Web API Tier 2" has base limit
60+
of 20+ requests per minute, but if "boost" is set to 20, Slackdump will make 40
61+
requests per minute.
62+
63+
The "burst" value is the number of requests that slackdump will make *in
64+
addition* to the base rate limit and "boost". It is passed directly as an
65+
argument to the [rate limiting library][2].
66+
67+
"Retries" is the number of time Slackdump will retry the request if it fails
68+
with a **recoverable** error. Recoverable errors are:
69+
- Rate limit exceeded;
70+
- Unexpected network disconnect;
71+
- Network error (timeout, connection refused, etc.);
72+
- HTTP errors: 408, 500, 502 - 599.
73+
74+
If Slackdump receives a recoverable error, it will do one of the following:
75+
- If it is a rate limit error, it will wait for the specified amount of time
76+
and retry the request;
77+
- For network related errors, it will use the exponential backoff algorithm to
78+
wait and retry the request up to a limit of 5 minutes.
79+
- For other types of recoverable errors, it will use the cubic backoff, capped
80+
at 5 minutes as well.
81+
82+
83+
[1]: https://api.slack.com/apis/rate-limits
84+
[2]: https://pkg.go.dev/golang.org/x/time/rate#NewLimiter

cmd/slackdump/internal/apiconfig/new.go

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package apiconfig
22

33
import (
44
"context"
5+
_ "embed"
56
"errors"
67
"fmt"
78
"os"
@@ -14,20 +15,13 @@ import (
1415
"github.com/rusq/slackdump/v3/internal/network"
1516
)
1617

17-
var CmdConfigNew = &base.Command{
18-
UsageLine: "slackdump config new",
19-
Short: "creates a new API config with the default values",
20-
Long: `
21-
# "New" Command
22-
23-
Creates a new API configuration file containing default values. You will need
24-
to specify the filename, for example:
18+
//go:embed assets/config_new.md
19+
var configNewMD string
2520

26-
slackdump config new myconfig.toml
27-
28-
If the extension is omitted, ".toml" is automatically appended to the name of
29-
the file.
30-
`,
21+
var CmdConfigNew = &base.Command{
22+
UsageLine: "slackdump config new",
23+
Short: "creates a new API config with the default values",
24+
Long: configNewMD,
3125
FlagMask: cfg.OmitAll,
3226
PrintFlags: true,
3327
}
Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,71 @@
11
# Archive Command
22

3-
The `archive` command saves your Slack workspace as a directory of files. By
3+
The `archive` command saves your Slack workspace as a SQLite database. By
44
default, it archives the entire workspace that your user can access. You can
55
customize the archive to include specific channels, groups, or direct messages
6-
by providing their URLs or IDs.
6+
by providing their URLs or IDs on the command line or in the Wizard.
7+
8+
The database is located in `slackdump.sqlite` in the output directory.
9+
10+
Alternatively, you can use `-legacy` flag to archive into chunk file format, if
11+
you experience problems with the database. Note, that `-legacy` flag is
12+
temporary for the transition period, and will be removed in v3.2.0.
13+
<!-- TODO: remove above paragraph once -legacy is deleted -->
714

815
The benefits of using "Archive" over "Export" and "Dump" are:
9-
- it is well documented (see `slackdump help chunk`);
16+
- it is faster, because it does not need to convert the data to export or dump
17+
formats;
18+
- the database can be easily queried using SQL using SQLite CLI or SQLite
19+
Browser;
20+
- the archiving can be "resumed" by `resume` command, meaning that you can
21+
continue where you left off in case of previous failure or incremental
22+
backups.
1023
- it can be converted to other formats, including the native Slack Export
1124
format (see `slackdump help convert`);
12-
- it is easier to parse with tools like `jq` or `grep`;
1325
- it is more convenient to build your own tools around it;
1426
- it is used internally by Slackdump to generate Slack Export and dump files,
1527
so it can be seen as "master" format for the data;
16-
- It is natively supported by `slackdump view` command, everything else uses
17-
an adapter.
1828

1929
## Features
2030

21-
### Default Behaviour
22-
- Archives the full workspace accessible to your user.
31+
## Database Archive Contents
32+
33+
The archive contains the following files:
34+
35+
- **`slackdump.sqlite`**: The SQLite database file containing all the data
36+
from the workspace.
37+
- **`__uploads`**: A directory containing files attached to messages that were
38+
downloaded, if the file download is enabled.
39+
- **`__avatars`**: A directory containing user avatars that were downloaded,
40+
if the avatar download is enabled.
41+
42+
Sometimes you might see `slackdump.sqlite-shm` and `slackdump.sqlite-wal` files
43+
in the output directory. These are temporary files created by SQLite for
44+
performance reasons. They are not necessary for the archive, unless Slackdump
45+
was interrupted or crashed. You can safely delete them if you are sure that the
46+
archive is complete.
2347

24-
### Optional Customization
25-
- Specify channels, groups, or DMs to archive by providing their URLs or IDs.
48+
### Database Structure
2649

27-
### Output Format
28-
- The archive uses the **"Chunk" format**, which can be:
29-
- Viewed using the `view` command.
30-
- Converted to other formats, including the native Slack Export format.
50+
The database contains the following tables:
51+
- **CHANNEL**: Contains all channels in the workspace.
52+
- **CHANNEL_USER**: Contains the mapping between channels and users.
53+
- **CHUNK**: Contains the "chunk" metadata, including the chunk type, number
54+
of records retrieved and the SESSION ID.
55+
- **FILE**: Contains all discovered file metadata from messages.
56+
- **MESSAGE**: Contains all messages and thread messages from the workspace.
57+
- **SEARCH_FILE**: Contains search results for files.
58+
- **SEARCH_MESSAGE**: Contains search results for messages.
59+
- **SESSION**: Contains the session information, including the start and end
60+
time of the period.
61+
- **S_USER**: Contains all users in the workspace.
62+
- **WORKSPACE**: Contains the workspace information, including the workspace ID
63+
and name.
3164

32-
## Archive Contents
65+
There are also additional views, starting with `V_`, which are used by Slackdump
66+
during the archiving process, they should not be removed or modified.
67+
68+
## Legacy Archive Contents
3369

3470
The archive behaves like the Slackdump export feature. A successful run
3571
output includes:
@@ -49,8 +85,17 @@ output includes:
4985
- Note: The `archive` command does not create ZIP files, but you can manually
5086
compress the output directory into a ZIP file if needed.
5187

52-
## What is the Chunk Format?
88+
For details on this format, run: `slackdump help chunk`
89+
90+
## Migrating from v3.x
91+
If you're using Chunk files in your tooling, you can convert the database to the
92+
chunk format using the `convert` command. For example:
93+
```bash
94+
slackdump convert -f chunk ./slackdump_20211231_123456
95+
```
5396

54-
The Chunk format is a specific structure used for archiving data. For details
55-
on this format, run: `slackdump help chunk`
97+
or
5698

99+
```bash
100+
slackdump convert -f chunk ./slackdump_20211231_123456/slackdump.sqlite
101+
```

cmd/slackdump/internal/archive/assets/search.md

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@ Slack workspace based on specified query terms. This command supports searching
55
for messages, files, or both, and outputs the results in a directory.
66

77
### Subcommands
8-
- **`slackdump search messages`**: Searches and records messages matching the
9-
given query.
10-
- **`slackdump search files`**: Searches and records files matching the given
8+
- **`slackdump search messages <query>`**: Searches and records messages
9+
matching the given query.
10+
- **`slackdump search files <query>`**: Searches and records files matching the given
1111
query.
12-
- **`slackdump search all`**: Searches and records both messages and files
12+
- **`slackdump search all <query>`**: Searches and records both messages and files
1313
matching the query.
1414

1515
### Flags
16-
- **`--no-channel-users`**: Skips retrieving user data for channels, making the
16+
- **`-no-channel-users`**: Skips retrieving user data for channels, making the
1717
process approximately 2.5x faster.
1818

1919
### Requirements
@@ -41,16 +41,14 @@ slackdump search all "project updates"
4141
```
4242

4343
### Faster Searches
44-
To speed up searches, add the `--no-channel-users` flag:
44+
To speed up searches, add the `-no-channel-users` flag:
4545

4646
```bash
4747
slackdump search messages -no-channel-users "status update"
4848
```
4949

5050

51-
## Output Directory
52-
The search command outputs results to the specified directory. The directory
53-
contains:
54-
55-
- **`search.jsonl.gz`**: A list of messages matching the query.
56-
- directory with saved files (if files are included in the search).
51+
## Output
52+
The search command outputs results to the database in the output directory.
53+
See `slackdump help archive` for details on the database structure and
54+
contents.
Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,39 @@
11
# Convert Command
22

3-
Convert Slackdump Archive format to other supported formats.
3+
Converts between different Slackdump supported formats.
44

5-
By default it converts a directory with Slackdump archive to a ZIP-archive or
6-
directory in Slack Export format.
5+
## Usage
6+
```bash
7+
slackdump convert [-f format] [-o output] <input>
8+
```
79

8-
Reference:
9-
- Slackdump archive "chunk" format: `slackdump help chunk`
10+
Where format is one of the following:
11+
- `chunk` **Chunk format**: JSON.GZ files with metadata, output is a directory.
12+
- `database` **SQLite database**: SQLite database format used by Slackdump, output is a directory.
13+
- `dump` **Dump**: JSON files where each channel is a large JSON object. Output is a directory or a zip file.
14+
- `export`: **Slack Export**: The native Slack export format. Output is a directory or a zip file.
15+
16+
By default Slackdump converts to Slack Export format and writes to a ZIP file
17+
output.
18+
19+
If any files were saved in the source location, they will be copied to the target directory or ZIP file, unless
20+
`-files=false` is specified.
21+
22+
To copy avatars, use `-avatars` flag. By default, avatars are not copied.
23+
24+
## Example
25+
26+
Convert Slack Export to database format:
27+
```bash
28+
slackdump convert -f database -o MyArchive/ slack_export.zip
29+
```
30+
31+
Converting from database format to Slack Export format:
32+
```bash
33+
slackdump convert -f export -o my_archive.zip slackdump_20211231_150405/
34+
```
35+
Note, that there's no necessity to specify the "slackdump.sqlite" file, Slackdump
36+
will automatically find it in the directory.
37+
38+
See also:
1039
- `slackdump help archive`

cmd/slackdump/internal/dump/dump.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ func dumpv3(ctx context.Context, sess client.Slack, fsa fsadapter.FS, p dumppara
204204
opts = append(opts, transform.DumpWithPipeline(subproc.PathUpdateFunc))
205205
}
206206

207-
tf, err := transform.NewDumpConverter(fsa, src, opts...)
207+
tf, err := transform.NewDump(fsa, src, opts...)
208208
if err != nil {
209209
return fmt.Errorf("failed to create transform: %w", err)
210210
}
@@ -301,7 +301,7 @@ func dumpv31(ctx context.Context, client client.Slack, fsa fsadapter.FS, p dumpp
301301
opts = append(opts, transform.DumpWithPipeline(subproc.PathUpdateFunc))
302302
}
303303

304-
tf, err := transform.NewDumpConverter(fsa, src, opts...)
304+
tf, err := transform.NewDump(fsa, src, opts...)
305305
if err != nil {
306306
return fmt.Errorf("failed to create transform: %w", err)
307307
}

cmd/slackdump/internal/format/format.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ func runFormat(ctx context.Context, cmd *base.Command, args []string) error {
5959
return err
6060
} else {
6161
var ok bool
62-
formatterInit, ok := format.Converters[convType]
62+
formatterInit, ok := convType.FormatFunc()
6363
if !ok {
6464
base.SetExitStatus(base.SInvalidParameters)
6565
return errors.New("unknown converter type")

cmd/slackdump/internal/list/common.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ func saveData(ctx context.Context, data any, filename string, typ format.Type, u
149149
// users by ID.
150150
func fmtPrint(ctx context.Context, w io.Writer, a any, typ format.Type, u []slack.User, bare bool) error {
151151
// get the converter
152-
initFn, ok := format.Converters[typ]
152+
initFn, ok := typ.FormatFunc()
153153
if !ok {
154154
return fmt.Errorf("unknown converter type: %s", typ)
155155
}

internal/chunk/backend/directory/conversations.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ func (cv *Conversations) finalise(ctx context.Context, id chunk.FileID) error {
187187
return err
188188
}
189189
if cv.tf != nil {
190-
return cv.tf.Transform(ctx, id)
190+
channelID, threadTS := id.Split()
191+
return cv.tf.Transform(ctx, channelID, threadTS)
191192
}
192193
return nil
193194
}

internal/chunk/backend/directory/conversations_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ func TestConversations_finalise(t *testing.T) {
490490
expectFn: func(mt *Mocktracker, mtf *mock_chunk.MockTransformer) {
491491
mt.EXPECT().RefCount(chunk.FileID("fileID")).Return(0)
492492
mt.EXPECT().Unregister(chunk.FileID("fileID")).Return(nil)
493-
mtf.EXPECT().Transform(gomock.Any(), chunk.FileID("fileID")).Return(nil)
493+
mtf.EXPECT().Transform(gomock.Any(), "fileID", "").Return(nil)
494494
},
495495
wantErr: false,
496496
},
@@ -538,7 +538,7 @@ func TestConversations_finalise(t *testing.T) {
538538
expectFn: func(mt *Mocktracker, mtf *mock_chunk.MockTransformer) {
539539
mt.EXPECT().RefCount(chunk.FileID("fileID")).Return(0)
540540
mt.EXPECT().Unregister(chunk.FileID("fileID")).Return(nil)
541-
mtf.EXPECT().Transform(gomock.Any(), chunk.FileID("fileID")).Return(errors.New("transform error"))
541+
mtf.EXPECT().Transform(gomock.Any(), "fileID", "").Return(errors.New("transform error"))
542542
},
543543
wantErr: true,
544544
},

0 commit comments

Comments
 (0)