Merge pull request #30 from Linusp/dev

Linusp · web-flow · commit 0c8a076deab0 · 2024-05-03T19:13:01.000+08:00
Release 0.7.0
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,16 @@
 # CHANGELOG
 
+## v0.7.0
+
+Removed
+
+- Removed `InoreaderClient.get_stream_contents`.
+
+Changed
+
+- Add param `n` to `InoreaderClient.fetch_articles` to reduce the number of API calls, thanks to [tosborne-slalom](https://github.com/tosborne-slalom)
+- Supported `--batch-size` option in commands `fetch-articles`/`fetch-unread`/`fetch-starred`
+
 ## v0.6.0
 
 Publish to pypi!
diff --git a/codespell-ignore-words.txt b/codespell-ignore-words.txt
@@ -0,0 +1 @@
+ot
diff --git a/inoreader/client.py b/inoreader/client.py
@@ -143,52 +143,42 @@ def get_subscription_list(self):
         for item in response["subscriptions"]:
             yield Subscription.from_json(item)
 
-    def get_stream_contents(self, stream_id, c="", limit=None):
-        fetched_count = 0
-        stop = False
-        while not stop:
-            articles, c = self.__get_stream_contents(stream_id, c)
-            for a in articles:
-                try:
-                    yield Article.from_json(a)
-                    fetched_count += 1
-                except Exception as e:
-                    print(e)
-                    continue
-                if limit and fetched_count >= limit:
-                    stop = True
-                    break
-            if c is None:
-                break
-
-    def __get_stream_contents(self, stream_id, continuation=""):
+    def __get_stream_contents(
+        self, stream_id=None, n=50, r=None, ot=None, xt=None, it=None, c=None
+    ):
+        """reference: https://www.inoreader.com/developers/stream-contents"""
         self.check_token()
 
-        url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH + quote_plus(stream_id))
-        params = {"n": 50, "r": "", "c": continuation, "output": "json"}  # default 20, max 1000
+        url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH)
+        if stream_id:
+            url = urljoin(url, quote_plus(stream_id))
+
+        params = {"n": n, "r": r, "ot": ot, "xt": xt, "it": it, "c": c}
+        params = {arg: val for arg, val in params.items() if val is not None}
         response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies))
         if "continuation" in response:
             return response["items"], response["continuation"]
         else:
             return response["items"], None
 
-    def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, limit=None, n=50):
+    def fetch_articles(
+        self, stream_id=None, folder=None, tags=None, unread=True, starred=False, limit=None, n=50
+    ):
         self.check_token()
 
-        url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH)
-        if folder:
-            url = urljoin(url, quote_plus(self.GENERAL_TAG_TEMPLATE.format(folder)))
+        if not stream_id and folder:
+            stream_id = self.GENERAL_TAG_TEMPLATE.format(folder)
 
-        params = {"n": n, "c": str(uuid4())}
+        params = {"stream_id": stream_id, "n": n, "c": str(uuid4())}
         if unread:
             params["xt"] = self.READ_TAG
 
         if starred:
             params["it"] = self.STARRED_TAG
 
         fetched_count = 0
-        response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies))
-        for data in response["items"]:
+        items, continuation = self.__get_stream_contents(**params)
+        for data in items:
             categories = {
                 category.split("/")[-1]
                 for category in data.get("categories", [])
@@ -202,13 +192,10 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim
             if limit and fetched_count >= limit:
                 break
 
-        continuation = response.get("continuation")
         while continuation and (not limit or fetched_count < limit):
             params["c"] = continuation
-            response = self.parse_response(
-                self.session.post(url, params=params, proxies=self.proxies)
-            )
-            for data in response["items"]:
+            items, continuation = self.__get_stream_contents(**params)
+            for data in items:
                 categories = {
                     category.split("/")[-1]
                     for category in data.get("categories", [])
@@ -221,14 +208,14 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim
                 if limit and fetched_count >= limit:
                     break
 
-            continuation = response.get("continuation")
-
-    def fetch_unread(self, folder=None, tags=None, limit=None):
-        for article in self.fetch_articles(folder=folder, tags=tags, unread=True):
+    def fetch_unread(self, folder=None, tags=None, limit=None, n=None):
+        for article in self.fetch_articles(folder=folder, tags=tags, unread=True, n=n):
             yield article
 
-    def fetch_starred(self, folder=None, tags=None, limit=None):
-        for article in self.fetch_articles(folder=folder, tags=tags, unread=False, starred=True):
+    def fetch_starred(self, folder=None, tags=None, limit=None, n=None):
+        for article in self.fetch_articles(
+            folder=folder, tags=tags, unread=False, starred=True, n=n
+        ):
             yield article
 
     def add_general_label(self, articles, label):
diff --git a/inoreader/main.py b/inoreader/main.py
@@ -200,6 +200,9 @@ def list_tags():
 @main.command("fetch-unread")
 @click.option("-f", "--folder", required=True, help="Folder which articles belong to")
 @click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma")
+@click.option(
+    "--batch-size", type=int, default=50, help="Maximum number of articles per API request"
+)
 @click.option("-o", "--outfile", required=True, help="Filename to save articles")
 @click.option(
     "--out-format",
@@ -208,14 +211,14 @@ def list_tags():
     help="Format of output file, default: json",
 )
 @catch_error
-def fetch_unread(folder, tags, outfile, out_format):
+def fetch_unread(folder, tags, batch_size, outfile, out_format):
     """Fetch unread articles"""
     client = get_client()
 
     tag_list = [] if not tags else tags.split(",")
     fout = codecs.open(outfile, mode="w", encoding="utf-8")
     writer = csv.writer(fout, delimiter=",") if out_format == "csv" else None
-    for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list)):
+    for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list, n=batch_size)):
         if idx > 0 and (idx % 10) == 0:
             LOGGER.info("fetched %d articles", idx)
         title = article.title
@@ -391,6 +394,10 @@ def get_subscriptions(outfile, folder, out_format):
 
 @main.command("fetch-articles")
 @click.option("-i", "--stream-id", required=True, help="Stream ID which you want to fetch")
+@click.option(
+    "--batch-size", type=int, default=50, help="Maximum number of articles per API request"
+)
+@click.option("--only-unread", is_flag=True, help="Fetch unread articles only")
 @click.option("-o", "--outfile", required=True, help="Filename to save results")
 @click.option(
     "--out-format",
@@ -399,7 +406,7 @@ def get_subscriptions(outfile, folder, out_format):
     help="Format of output, default: json",
 )
 @catch_error
-def fetch_articles(outfile, stream_id, out_format):
+def fetch_articles(outfile, stream_id, batch_size, only_unread, out_format):
     """Fetch articles by stream id"""
     client = get_client()
 
@@ -409,7 +416,9 @@ def fetch_articles(outfile, stream_id, out_format):
         writer = csv.DictWriter(fout, ["title", "content"], delimiter=",", quoting=csv.QUOTE_ALL)
         writer.writeheader()
 
-    for idx, article in enumerate(client.get_stream_contents(stream_id)):
+    for idx, article in enumerate(
+        client.fetch_articles(stream_id=stream_id, n=batch_size, unread=only_unread)
+    ):
         if idx > 0 and (idx % 10) == 0:
             LOGGER.info("fetched %d articles", idx)
 
@@ -469,6 +478,9 @@ def dedupe(folder, thresh):
 @main.command("fetch-starred")
 @click.option("-f", "--folder", help="Folder which articles belong to")
 @click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma")
+@click.option(
+    "--batch-size", type=int, default=50, help="Maximum number of articles per API request"
+)
 @click.option(
     "-o", "--outfile", help="Filename to save articles, required when output format is `csv`"
 )
@@ -484,7 +496,7 @@ def dedupe(folder, thresh):
     help="Format of output file, default: json",
 )
 @catch_error
-def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format):
+def fetch_starred(folder, tags, batch_size, outfile, outdir, limit, save_image, out_format):
     """Fetch starred articles"""
     client = get_client()
 
@@ -506,7 +518,7 @@ def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format):
     tag_list = [] if not tags else tags.split(",")
     url_to_image = {}
     fetched_count = 0
-    for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit):
+    for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit, n=batch_size):
         if limit and fetched_count >= limit:
             break
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "python-inoreader"
-version = "0.6.0"
+version = "0.7.0"
 description = "Python wrapper of Inoreader API"
 authors = [
     {name = "Linusp", email = "linusp1024@gmail.com"},