Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/en/configuration/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,16 @@ KTOOLBOX_JOB__COUNT=10
# without making a new sub directory to storage them
KTOOLBOX_JOB__POST_STRUCTURE__ATTACHMENTS=./

# Customize the attachments directory name using post properties
# This will create folders like "Title_of_Post" instead of "attachments"
KTOOLBOX_JOB__POST_STRUCTURE__ATTACHMENTS_DIRNAME_FORMAT={title}

# Rename attachments in numerical order, e.g. `1.png`, `2.png`, ...
KTOOLBOX_JOB__SEQUENTIAL_FILENAME=True

# Preserve original filename while using sequential numbering, e.g. `1_OriginalFileName.png`, `2_OriginalFileName.png`, ...
KTOOLBOX_JOB__SEQUENTIAL_FILENAME_INDENTATION=True

# Customize the filename format by inserting an empty `{}` to represent the basic filename.
# Similar to `post_dirname_format`, you can use some of the properties in `Post`.
# For example: `{title}_{}` > `HelloWorld_b4b41de2-8736-480d-b5c3-ebf0d917561b`, etc.
Expand Down
65 changes: 65 additions & 0 deletions docs/en/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,37 @@ KTOOLBOX_JOB__POST_STRUCTURE__ATTACHMENTS=./

`./` means attachments will be downloaded directly into the post directory.

!!! info "Notice"
For more information, please visit [Configuration-Guide](configuration/guide.md) page.

## How to customize the attachments folder name?

You can customize the attachments folder name using post properties like title, ID, etc. using the `attachments_dirname_format` configuration.

Set the configuration by `prod.env` dotenv file or system environment variables:
```dotenv
# Use post title as attachments folder name
KTOOLBOX_JOB__POST_STRUCTURE__ATTACHMENTS_DIRNAME_FORMAT={title}

# Use post ID and title combined
KTOOLBOX_JOB__POST_STRUCTURE__ATTACHMENTS_DIRNAME_FORMAT={id}_{title}

# Use published date and title
KTOOLBOX_JOB__POST_STRUCTURE__ATTACHMENTS_DIRNAME_FORMAT={published}_{title}
```

This will create folder structures like:
```
Post_Directory/
β”œβ”€ content.txt
β”œβ”€ post.json
└─ Title_of_Post/ # Instead of "attachments"
β”œβ”€ 1.jpg
└─ 2.png
```

Available properties: `{id}`, `{user}`, `{service}`, `{title}`, `{added}`, `{published}`, `{edited}`

!!! info "Notice"
For more information, please visit [Configuration-Guide](configuration/guide.md) page.

Expand Down Expand Up @@ -82,6 +113,40 @@ KTOOLBOX_JOB__SEQUENTIAL_FILENAME=True
KTOOLBOX_JOB__POST_DIRNAME_FORMAT=[{published}]{id}
```

## How to preserve original filenames while using sequential numbering?

When using sequential filename mode, you can preserve the original filename while adding sequential numbers as a prefix by enabling `sequential_filename_indentation`.

Set the configuration by `prod.env` dotenv file or system environment variables:
```dotenv
# Enable sequential filename numbering
KTOOLBOX_JOB__SEQUENTIAL_FILENAME=True

# Preserve original filename with sequential prefix, e.g. `1_OriginalFileName.png`, `2_OriginalFileName.png`, ...
KTOOLBOX_JOB__SEQUENTIAL_FILENAME_INDENTATION=True
```

This will create filenames like:
- Without indentation: `1.png`, `2.jpg`, `3.gif`
- With indentation: `1_OriginalImage.png`, `2_PhotoFile.jpg`, `3_Animation.gif`

!!! info "Notice"
This feature works together with `sequential_filename_excludes` to exclude certain file types from sequential numbering.

## Filename too long

In some cases, the filename or the post directory name can be too long and caused download failure.
To solve this issue, you can set **sequential filename** or use **custom post directory name**

Set the configuration by `prod.env` dotenv file or system environment variables:
```dotenv
# Rename attachments in numerical order, e.g. `1.png`, `2.png`, ...
KTOOLBOX_JOB__SEQUENTIAL_FILENAME=True

# Set the post directory name to its release/publish date and ID, e.g. `[2024-1-1]11223344`
KTOOLBOX_JOB__POST_DIRNAME_FORMAT=[{published}]{id}
```

## How to Configure a Proxy?

You can set the `HTTPS_PROXY`, `HTTP_PROXY`, and `ALL_PROXY` environment variables to achieve this.
Expand Down
15 changes: 12 additions & 3 deletions ktoolbox/action/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from ktoolbox._enum import PostFileTypeEnum, DataStorageNameEnum
from ktoolbox.action import ActionRet, fetch_creator_posts, FetchInterruptError
from ktoolbox.action.utils import generate_post_path_name, filter_posts_by_date, generate_filename, \
filter_posts_by_keywords, filter_posts_by_keywords_exclude, generate_grouped_post_path, extract_content_images
filter_posts_by_keywords, filter_posts_by_keywords_exclude, generate_grouped_post_path, extract_content_images, \
generate_attachments_dirname
from ktoolbox.api.model import Post, Attachment, Revision
from ktoolbox.api.posts import get_post_revisions as get_post_revisions_api, get_post as get_post_api
from ktoolbox.configuration import config
Expand Down Expand Up @@ -42,7 +43,9 @@ async def create_job_from_post(

# Load ``PostStructureConfiguration``
if post_dir:
attachments_path = post_path / config.job.post_structure.attachments # attachments
# Generate attachments directory name using the format if specified
attachments_dirname = generate_attachments_dirname(post)
attachments_path = post_path / attachments_dirname # attachments
attachments_path.mkdir(exist_ok=True)
content_path = post_path / config.job.post_structure.content # content
content_path.parent.mkdir(exist_ok=True)
Expand Down Expand Up @@ -84,7 +87,13 @@ async def create_job_from_post(
should_use_sequential = (config.job.sequential_filename and
file_path_obj.suffix.lower() not in config.job.sequential_filename_excludes)
if should_use_sequential:
basic_filename = f"{sequential_counter}{file_path_obj.suffix}"
if config.job.sequential_filename_indentation:
# Use sequential number with original filename: 1_OriginalFileName.png
basic_filename_without_ext = file_path_obj.stem
basic_filename = f"{sequential_counter}_{basic_filename_without_ext}{file_path_obj.suffix}"
else:
# Use only sequential number: 1.png
basic_filename = f"{sequential_counter}{file_path_obj.suffix}"
sequential_counter += 1
else:
basic_filename = file_path_obj.name
Expand Down
20 changes: 20 additions & 0 deletions ktoolbox/action/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
__all__ = [
"generate_post_path_name",
"generate_filename",
"generate_attachments_dirname",
"generate_year_dirname",
"generate_month_dirname",
"generate_grouped_post_path",
Expand Down Expand Up @@ -63,6 +64,25 @@ def generate_post_path_name(post: Post) -> str:
exit(1)


def generate_attachments_dirname(post: Post) -> str:
"""Generate directory name for attachments."""
try:
return sanitize_filename(
config.job.post_structure.attachments_dirname_format.format(
id=post.id,
user=post.user,
service=post.service,
title=post.title,
added=post.added.strftime(TIME_FORMAT) if post.added else "",
published=post.published.strftime(TIME_FORMAT) if post.published else "",
edited=post.edited.strftime(TIME_FORMAT) if post.edited else ""
)
)
except KeyError as e:
logger.error(f"`PostStructureConfiguration.attachments_dirname_format` contains invalid key: {e}")
exit(1)


def generate_year_dirname(post: Post) -> str:
"""Generate year directory name for post grouping."""
# Use published date, fall back to added date
Expand Down
12 changes: 11 additions & 1 deletion ktoolbox/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ class PostStructureConfiguration(BaseModel):
└─ ...
```

- Available properties for ``file``
- Available properties for ``file`` and ``attachments_dirname_format``

| Property | Type |
|---------------|--------|
Expand All @@ -139,6 +139,12 @@ class PostStructureConfiguration(BaseModel):
| ``edited`` | Date |

:ivar attachments: Sub path of attachment directory
:ivar attachments_dirname_format: Customize the attachments directory name format, you can use some of the \
[properties][ktoolbox.configuration.JobConfiguration] in ``Post``. \
e.g. ``{title}`` could result dirname ``Title_of_Post``, \
``{user}_{title}`` could result dirname like ``234234_Title_of_Post``. \
Meanwhile, you can also use the formatting feature of the Python Format Specification Mini-Language, for example: \
``{title:.6}`` could shorten the title length to 6 characters like ``HiEveryoneThisIsALongTitle`` to ``HiEver``
:ivar content: Sub path of post content file
:ivar external_links: Sub path of external links file (for cloud storage links found in content)
:ivar file: The format of the post `file` filename (`file` is not `attachment`, each post has only one `file`, usually the cover image) \
Expand All @@ -152,6 +158,7 @@ class PostStructureConfiguration(BaseModel):
:ivar revisions: Sub path of revisions directory
"""
attachments: Path = Path("attachments")
attachments_dirname_format: str = "attachments"
content: Path = Path("content.txt")
external_links: Path = Path("external_links.txt")
file: str = "{id}_{}"
Expand Down Expand Up @@ -197,6 +204,8 @@ class JobConfiguration(BaseModel):
:ivar mix_posts: Save all files from different posts at same path in creator directory. \
It would not create any post directory, and ``CreatorIndices`` would not been recorded.
:ivar sequential_filename: Rename attachments in numerical order, e.g. ``1.png``, ``2.png``, ...
:ivar sequential_filename_indentation: When enabled with ``sequential_filename``, preserve the original filename \
while adding sequential numbers as prefix, e.g. ``1_OriginalFileName.png``, ``2_OriginalFileName.png``, ...
:ivar sequential_filename_excludes: File extensions to exclude from sequential naming when ``sequential_filename`` is enabled. \
Files with these extensions will keep their original names. e.g. ``[".psd", ".zip", ".mp4"]``
:ivar filename_format: Customize the filename format by inserting an empty ``{}`` to represent the basic filename.
Expand Down Expand Up @@ -235,6 +244,7 @@ class JobConfiguration(BaseModel):
post_structure: PostStructureConfiguration = PostStructureConfiguration()
mix_posts: bool = False
sequential_filename: bool = False
sequential_filename_indentation: bool = False
sequential_filename_excludes: Set[str] = Field(default_factory=set)
filename_format: str = "{}"
# noinspection PyDataclass
Expand Down
169 changes: 169 additions & 0 deletions tests/ktoolbox/test_attachments_dirname_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import tempfile
from datetime import datetime
from pathlib import Path

import pytest

from ktoolbox._enum import PostFileTypeEnum
from ktoolbox.action.job import create_job_from_post
from ktoolbox.api.model import Post, Attachment, File
from ktoolbox.configuration import config, JobConfiguration, PostStructureConfiguration


@pytest.fixture
def mock_post():
"""Create a mock post for testing."""
return Post(
id="test_post_123",
user="test_user",
service="patreon",
title="Test Post Title",
content="Test content",
published=datetime(2024, 1, 1),
added=datetime(2024, 1, 1),
edited=datetime(2024, 1, 1),
attachments=[
Attachment(
name="image1.jpg",
path="/data/12/34/image1.jpg"
),
Attachment(
name="image2.png",
path="/data/56/78/image2.png"
)
],
file=File(
name="cover.jpg",
path="/data/kl/mn/cover.jpg"
)
)


class TestAttachmentsDirnameFormat:
"""Test attachments directory naming format functionality."""

def setup_method(self):
"""Setup test environment."""
# Reset configuration to defaults
config.job = JobConfiguration()
config.job.post_structure = PostStructureConfiguration()

@pytest.mark.asyncio
async def test_default_attachments_dirname(self, mock_post):
"""Test that default attachments directory name is used when format is default."""
config.job.post_structure.attachments_dirname_format = "attachments"

with tempfile.TemporaryDirectory() as temp_dir:
post_path = Path(temp_dir) / "test_post"
jobs = await create_job_from_post(mock_post, post_path, dump_post_data=False)

attachment_jobs = [job for job in jobs if job.type == PostFileTypeEnum.Attachment]

# Check that all attachment jobs use the default attachments directory
for job in attachment_jobs:
assert job.path.name == "attachments"

@pytest.mark.asyncio
async def test_custom_attachments_dirname_with_title(self, mock_post):
"""Test custom attachments directory naming using post title."""
config.job.post_structure.attachments_dirname_format = "{title}"

with tempfile.TemporaryDirectory() as temp_dir:
post_path = Path(temp_dir) / "test_post"
jobs = await create_job_from_post(mock_post, post_path, dump_post_data=False)

attachment_jobs = [job for job in jobs if job.type == PostFileTypeEnum.Attachment]

# Check that all attachment jobs use the custom directory name
for job in attachment_jobs:
assert job.path.name == "Test Post Title"

@pytest.mark.asyncio
async def test_custom_attachments_dirname_with_id(self, mock_post):
"""Test custom attachments directory naming using post id."""
config.job.post_structure.attachments_dirname_format = "{id}"

with tempfile.TemporaryDirectory() as temp_dir:
post_path = Path(temp_dir) / "test_post"
jobs = await create_job_from_post(mock_post, post_path, dump_post_data=False)

attachment_jobs = [job for job in jobs if job.type == PostFileTypeEnum.Attachment]

# Check that all attachment jobs use the custom directory name
for job in attachment_jobs:
assert job.path.name == "test_post_123"

@pytest.mark.asyncio
async def test_custom_attachments_dirname_with_combined_format(self, mock_post):
"""Test custom attachments directory naming using combined format."""
config.job.post_structure.attachments_dirname_format = "{id}_{title}"

with tempfile.TemporaryDirectory() as temp_dir:
post_path = Path(temp_dir) / "test_post"
jobs = await create_job_from_post(mock_post, post_path, dump_post_data=False)

attachment_jobs = [job for job in jobs if job.type == PostFileTypeEnum.Attachment]

# Check that all attachment jobs use the custom directory name
for job in attachment_jobs:
assert job.path.name == "test_post_123_Test Post Title"

@pytest.mark.asyncio
async def test_custom_attachments_dirname_with_date_format(self, mock_post):
"""Test custom attachments directory naming using date formatting."""
config.job.post_structure.attachments_dirname_format = "{published}_{title}"

with tempfile.TemporaryDirectory() as temp_dir:
post_path = Path(temp_dir) / "test_post"
jobs = await create_job_from_post(mock_post, post_path, dump_post_data=False)

attachment_jobs = [job for job in jobs if job.type == PostFileTypeEnum.Attachment]

# Check that all attachment jobs use the custom directory name with formatted date
for job in attachment_jobs:
assert job.path.name == "2024-01-01_Test Post Title"

@pytest.mark.asyncio
async def test_custom_attachments_dirname_with_format_specification(self, mock_post):
"""Test custom attachments directory naming using Python format specification."""
config.job.post_structure.attachments_dirname_format = "{title:.6}"

with tempfile.TemporaryDirectory() as temp_dir:
post_path = Path(temp_dir) / "test_post"
jobs = await create_job_from_post(mock_post, post_path, dump_post_data=False)

attachment_jobs = [job for job in jobs if job.type == PostFileTypeEnum.Attachment]

# Check that all attachment jobs use the truncated title
for job in attachment_jobs:
assert job.path.name == "Test P" # First 6 characters of "Test Post Title"

@pytest.mark.asyncio
async def test_attachments_dirname_format_when_mix_posts_enabled(self, mock_post):
"""Test that attachments dirname format doesn't affect behavior when mix_posts is enabled."""
config.job.mix_posts = True
config.job.post_structure.attachments_dirname_format = "{title}"

with tempfile.TemporaryDirectory() as temp_dir:
post_path = Path(temp_dir) / "test_post"
jobs = await create_job_from_post(mock_post, post_path, post_dir=False, dump_post_data=False)

attachment_jobs = [job for job in jobs if job.type == PostFileTypeEnum.Attachment]

# When mix_posts is enabled and post_dir=False, attachments go directly to post_path
for job in attachment_jobs:
assert job.path == post_path

@pytest.mark.asyncio
async def test_attachments_path_creation(self, mock_post):
"""Test that the custom attachments directory is actually created."""
config.job.post_structure.attachments_dirname_format = "{title}"

with tempfile.TemporaryDirectory() as temp_dir:
post_path = Path(temp_dir) / "test_post"
await create_job_from_post(mock_post, post_path, dump_post_data=False)

# Check that the custom directory was created
expected_attachments_path = post_path / "Test Post Title"
assert expected_attachments_path.exists()
assert expected_attachments_path.is_dir()
Loading