Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ OPT_TAG_GITHUB=true # Tag Link with "GitHub"
OPT_TAG_GITHUBSTARS=true # Tag Link with "GitHub Stars"
OPT_TAG_LANGUAGE=false # Tag Link with Language of repo (e.g. Python or JavaScript)
OPT_TAG_USERNAME=false # Tag GitHub username
OPT_TAG_CUSTOM=false # Add custom tags, separated by commas (e.g. tag1,tag2)
OPT_TAG_CUSTOM=false # Add custom tags, separated by commas (e.g. tag1,tag2)
OPT_DELETE_DUPLICATE=false # Delete existing duplicate links from Collection (COLLECTION_ID)
DEBUG=false # Enable/Disable Debug Mode
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ ENV OPT_TAG_GITHUBSTARS=true
ENV OPT_TAG_LANGUAGE=false
ENV OPT_TAG_USERNAME=false
ENV OPT_TAG_CUSTOM=false
ENV OPT_DELETE_DUPLICATE=false
ENV DEBUG=false

WORKDIR /app

Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
OPT_TAG_GITHUBSTARS=true
OPT_TAG_LANGUAGE=false
OPT_TAG_USERNAME=false
OPT_DELETE_DUPLICATE=false
DEBUG=false
```

## Usage
Expand Down Expand Up @@ -75,6 +77,8 @@ uv run starwarden.py -id YOUR_COLLECTION_ID
| OPT_TAG_LANGUAGE | false | Tag Link with Language of repo (e.g. Python or JavaScript) |
| OPT_TAG_USERNAME | false | Tag GitHub username |
| OPT_TAG_CUSTOM | | Add custom tags, separated by commas (e.g. tag1,tag2) |
| OPT_DELETE_DUPLICATE| false | Delete existing duplicate links from Collection (COLLECTION_ID) |
| DEBUG | false | Enable/Disable debug mode |

## Unsupervised Updates

Expand All @@ -97,6 +101,8 @@ For automated, unsupervised updates, you can use Docker with the provided docker
OPT_TAG_GITHUBSTARS=true
OPT_TAG_LANGUAGE=false
OPT_TAG_USERNAME=false
OPT_DELETE_DUPLICATE=false
DEBUG=false
```

3. Use the following `docker-compose.yml` file:
Expand Down
2 changes: 2 additions & 0 deletions starwarden/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ def load_env():
"github_username": os.getenv("GITHUB_USERNAME"),
"linkwarden_url": os.getenv("LINKWARDEN_URL"),
"linkwarden_token": os.getenv("LINKWARDEN_TOKEN"),
"debug": os.getenv("DEBUG", "false").lower() in ("true", "1"),
"opt_tag": os.getenv("OPT_TAG", "false").lower() in ("true", "1"),
"opt_tag_github": os.getenv("OPT_TAG_GITHUB", "false").lower() in ("true", "1"),
"opt_tag_githubStars": os.getenv("OPT_TAG_GITHUBSTARS", "false").lower() in ("true", "1"),
"opt_tag_language": os.getenv("OPT_TAG_LANGUAGE", "false").lower() in ("true", "1"),
"opt_tag_username": os.getenv("OPT_TAG_USERNAME", "false").lower() in ("true", "1"),
"opt_tag_custom": os.getenv("OPT_TAG_CUSTOM", ""),
"opt_delete_duplicate": os.getenv("OPT_DELETE_DUPLICATE", "false").lower() in ("true", "1"),
"APPRISE_URLS": os.getenv("APPRISE_URLS"),
"DOCKERIZED": os.getenv("DOCKERIZED", "false").lower() in ("true", "1"),
}
Expand Down
126 changes: 109 additions & 17 deletions starwarden/linkwarden_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,45 +10,86 @@
LINK_EXISTS_GLOBALLY = object()


def get_existing_links(linkwarden_url, linkwarden_token, collection_id):
url = f"{linkwarden_url.rstrip('/')}/api/v1/links"
def get_existing_links(linkwarden_url, linkwarden_token, collection_id, delete_duplicate=False):
url = f"{linkwarden_url.rstrip('/')}/api/v1/search"
headers = {
"Authorization": f"Bearer {linkwarden_token}",
"Content-Type": "application/json",
}
cursor = 0
seen_links = set()
cursor = None
seen_urls = set()
duplicate_link_ids = []
total_links_processed = 0

while True:
try:
logger.debug(f"Fetching links from cursor {cursor} for collection {collection_id}")
params = {"collectionId": collection_id, "sort": 1}
if cursor is not None:
params["cursor"] = cursor

response = requests.get(
url,
params={"collectionId": collection_id, "cursor": cursor, "sort": 1},
params=params,
headers=headers,
timeout=30,
)
response.raise_for_status()
data = response.json()
links = data.get("response", [])
data = response.json()
links = data.get("data", {}).get("links", [])
next_cursor = data.get("data", {}).get("nextCursor")

logger.debug(f"Fetched {len(links)} links from cursor {cursor}")

new_links = [link["url"] for link in links if link["url"] not in seen_links]
if not new_links:
logger.info(f"No new links found from cursor {cursor}. Stopping pagination.")
break

seen_links.update(new_links)
yield from new_links
if not links:
total_links_processed += len(links)

for link in links:
link_url = link["url"]
link_id = link["id"]

if link_url in seen_urls:
# Found a duplicate
logger.debug(f"Found duplicate link: {link_url} (ID: {link_id})")
duplicate_link_ids.append(link_id)
# Do not yield duplicates since they are queued for deletion
continue
else:
seen_urls.add(link_url)
# Only yield URLs that are not marked as duplicates
yield link_url

if next_cursor is None:
logger.info("Reached end of pagination (no nextCursor in response)")
break
cursor = links[-1].get("id")
else:
cursor = next_cursor
logger.debug(f"Advancing to next cursor: {cursor}")

except requests.RequestException as e:
logger.error(f"Error fetching links from cursor {cursor}: {str(e)}")
if hasattr(e, "response") and e.response is not None:
logger.error(f"Response status code: {e.response.status_code}")
logger.error(f"Response content: {e.response.text}")
break

# Handle duplicate deletion if requested
if delete_duplicate and duplicate_link_ids:
logger.info(f"Found {len(duplicate_link_ids)} duplicate links to delete: {duplicate_link_ids}")

batch_size = 100
total_deleted = 0

for i in range(0, len(duplicate_link_ids), batch_size):
batch = duplicate_link_ids[i:i + batch_size]
logger.debug(f"Deleting batch {i//batch_size + 1}: {len(batch)} links")

deleted_count = delete_links(linkwarden_url, linkwarden_token, batch)
if deleted_count is not None:
total_deleted += deleted_count
else:
logger.error(f"Failed to delete batch {i//batch_size + 1}")

logger.info(f"Successfully deleted {total_deleted} duplicate links out of {len(duplicate_link_ids)} found")
logger.info(f"Processed {total_links_processed} total links in collection {collection_id}")


def get_collections(linkwarden_url, linkwarden_token):
Expand Down Expand Up @@ -161,3 +202,54 @@ def upload_link(linkwarden_url, linkwarden_token, collection_id, repo, tags):
logger.error(f"Response content: {e.response.text}")

return None


def delete_links(linkwarden_url, linkwarden_token, link_ids):
url = f"{linkwarden_url.rstrip('/')}/api/v1/links"
headers = {
"Authorization": f"Bearer {linkwarden_token}",
"Content-Type": "application/json",
}

request_data = {
"linkIds": link_ids
}

logger.debug(f"Attempting to delete {len(link_ids)} links: {link_ids}")

try:
response = requests.delete(
url,
headers=headers,
json=request_data,
timeout=30,
)

logger.debug(f"Delete response status code: {response.status_code}")
logger.debug(f"Delete response content: {response.text}")

if response.status_code == 401:
logger.error("Unauthorized: Invalid or expired token for delete operation")
return None

response.raise_for_status()
response_json = response.json()

deleted_count = response_json.get("response", {}).get("count", 0)

logger.info(f"Successfully deleted {deleted_count} links out of {len(link_ids)} requested")

if deleted_count != len(link_ids):
logger.warning(f"Expected to delete {len(link_ids)} links, but only {deleted_count} were deleted")

return deleted_count

except Timeout:
logger.error("Request timed out while deleting links")
return None
except requests.RequestException as e:
logger.error(f"Error deleting links from Linkwarden: {str(e)}")
if hasattr(e, "response") and e.response is not None:
logger.error(f"Response status code: {e.response.status_code}")
logger.error(f"Response content: {e.response.text}")
return None
Loading