diff --git a/.gitignore b/.gitignore
index 45d553be4..93ddeafd0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@ __pycache__
 .mypy_cache_test
 .env
 .venv*
+.idea
+
diff --git a/libs/community/extended_testing_deps.txt b/libs/community/extended_testing_deps.txt
index 6ad4f43e8..2dd58f9e7 100644
--- a/libs/community/extended_testing_deps.txt
+++ b/libs/community/extended_testing_deps.txt
@@ -59,8 +59,8 @@ openapi-pydantic>=0.3.2,<0.4
 oracle-ads>=2.9.1,<3
 oracledb>=2.2.0,<3
 pandas>=2.0.1,<3
-pdfminer-six==20231228
-pdfplumber>=0.11
+pdfminer-six>=20250324
+pdfplumber>=0.11.6
 pgvector>=0.1.6,<0.2
 playwright>=1.48.0,<2
 praw>=7.7.1,<8
diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py
index 6b3a0a065..fa151861e 100644
--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@@ -129,6 +129,7 @@ def _validate_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
 
     The standard keys are:
     - source
+    - page (if mode='page')
     - total_page
     - creationdate
     - creator
@@ -1386,97 +1387,534 @@ def _extract_images_from_page(self, page: pypdfium2._helpers.page.PdfPage) -> st
 
 
 class PDFPlumberParser(BaseBlobParser):
-    """Parse `PDF` with `PDFPlumber`."""
+    """Parse a blob from a PDF using `pdfplumber` library.
+
+    This class provides methods to parse a blob from a PDF document, supporting various
+    configurations such as handling password-protected PDFs, extracting images, and
+    defining extraction mode.
+    It integrates the 'pdfplumber' library for PDF processing and offers synchronous
+    blob parsing.
+
+    Examples:
+        Setup:
+
+        .. code-block:: bash
+
+            pip install -U langchain-community pdfplumber
+
+        Load a blob from a PDF file:
+
+        .. code-block:: python
+
+            from langchain_core.documents.base import Blob
+
+            blob = Blob.from_path("./example_data/layout-parser-paper.pdf")
+
+        Instantiate the parser:
+
+        .. code-block:: python
+
+            from langchain_community.document_loaders.parsers import PDFPlumberParser
+
+            parser = PDFPlumberParser(
+                # password = None,
+                mode = "single",
+                pages_delimiter = "\n\f",
+                # extract_tables="markdown",
+                metadata_format="standard",
+            )
+
+        Lazily parse the blob:
+
+        .. code-block:: python
+
+            docs = []
+            docs_lazy = parser.lazy_parse(blob)
+
+            for doc in docs_lazy:
+                docs.append(doc)
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+    """
 
     def __init__(
         self,
         text_kwargs: Optional[Mapping[str, Any]] = None,
         dedupe: bool = False,
         extract_images: bool = False,
+        *,
+        password: Optional[str] = None,
+        mode: Literal["single", "page"] = "page",
+        pages_delimiter: str = _DEFAULT_PAGES_DELIMITER,
+        images_parser: Optional[BaseImageBlobParser] = None,
+        images_inner_format: Literal["text", "markdown-img", "html-img"] = "text",
+        extract_tables: Optional[Literal["csv", "markdown", "html"]] = None,
+        extract_tables_settings: Optional[dict[str, Any]] = None,
+        metadata_format: Literal["legacy", "standard"] = "legacy",
     ) -> None:
         """Initialize the parser.
 
         Args:
+            password: Optional password for opening encrypted PDFs.
+            mode: The extraction mode, either "single" for the entire document or "page"
+                for page-wise extraction.
+            pages_delimiter: A string delimiter to separate pages in single-mode
+                extraction.
+            extract_images: Whether to extract images from the PDF.
+            images_parser: Optional image blob parser.
+            images_inner_format: The format for the parsed output.
+                - "text" = return the content as is
+                - "markdown-img" = wrap the content into an image markdown link, w/ link
+                pointing to (`![body)(#)`]
+                - "html-img" = wrap the content as the `alt` text of an tag and link to
+                (`<img alt="{body}" src="#"/>`)
+            extract_tables: Whether to extract images from the PDF in a specific
+                format, such as "csv", "markdown" or "html".
             text_kwargs: Keyword arguments to pass to ``pdfplumber.Page.extract_text()``
-            dedupe: Avoiding the error of duplicate characters if `dedupe=True`.
+            dedupe:  Avoiding the error of duplicate characters if `dedupe=True`
+            extract_tables_settings: Optional dictionary of settings for customizing
+            table extraction.
+            metadata_format: Use CamelCase keys with 'legacy'
+            and lower keys with 'standard'.
+
+        Returns:
+            This method does not directly return data. Use the `parse` or `lazy_parse`
+            methods to retrieve parsed documents with content and metadata.
+
+        Raises:
+            ValueError: If the `mode` is not "single" or "page".
+            ValueError: If the `extract_tables` is not "csv", "markdown" or "html".
+        """
+        super().__init__()
+        if mode not in ["single", "page"]:
+            raise ValueError("mode must be single or page")
+        if extract_tables and extract_tables not in ["csv", "markdown", "html"]:
+            raise ValueError("mode must be csv, markdown or html")
+        if extract_images and not images_parser:
+            images_parser = RapidOCRBlobParser()
+        self.password = password
+        self.extract_images = extract_images
+        self.images_parser = images_parser
+        self.images_inner_format = images_inner_format
+        self.mode = mode
+        self.pages_delimiter = pages_delimiter
+        self.dedupe = dedupe
+        self.text_kwargs = text_kwargs or {}
+        self.extract_tables = extract_tables
+        self.extract_tables_settings = extract_tables_settings or {
+            "vertical_strategy": "lines",
+            "horizontal_strategy": "lines",
+            "snap_y_tolerance": 5,
+            "intersection_x_tolerance": 15,
+        }
+        if metadata_format == "legacy":
+            warnings.warn(
+                "The default value 'legacy' use some CamelCase keys. "
+                "It's will be deprecated in the next major version."
+            )
+
+        self.metadata_format = metadata_format
+
+    def _validate_metadata(self, metadata: dict[str, Any]) -> dict[str, Any]:
+        if self.metadata_format == "legacy":
+            return metadata
+        else:
+            return _validate_metadata(metadata)
+
+    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
+        """Lazily parse the blob.
+
+        Args:
+            blob: The blob to parse.
+
+        Raises:
+            ImportError: If the `pypdf` package is not found.
+
+        Yield:
+            An iterator over the parsed documents.
         """
         try:
-            import PIL  # noqa:F401
+            import pdfplumber
         except ImportError:
             raise ImportError(
-                "pillow package not found, please install it with `pip install pillow`"
+                "pdfplumber package not found, please install it "
+                "with `pip install pdfplumber`"
             )
-        self.text_kwargs = text_kwargs or {}
-        self.dedupe = dedupe
-        self.extract_images = extract_images
-
-    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
-        """Lazily parse the blob."""
-        import pdfplumber
 
         with blob.as_bytes_io() as file_path:
-            doc = pdfplumber.open(file_path)  # open document
-
-            yield from [
-                Document(
-                    page_content=self._process_page_content(page)
-                    + "\n"
-                    + self._extract_images_from_page(page),
-                    metadata=dict(
-                        {
+            doc = pdfplumber.open(file_path, password=self.password)  # open document
+            from pdfplumber.utils import geometry
+
+            contents = []
+            # The legacy version, use CreationDate, Creator, etc.
+            # The new 'standard' version must use lower case key.
+            if self.metadata_format == "legacy":
+                doc_metadata = (
+                    {
+                        "producer": "PDFPlumber",
+                        "creator": "PDFPlumber",
+                        "creationdate": "",
+                    }
+                    | doc.metadata  # Add parser metdata
+                    | {  # with more keys
+                        "source": blob.source,
+                        "file_path": blob.source,
+                        "total_pages": len(doc.pages),
+                    }
+                )
+            else:
+                doc_metadata = _purge_metadata(
+                    (
+                        doc.metadata  # Add parser metdata
+                        | {  # with more keys
                             "source": blob.source,
                             "file_path": blob.source,
-                            "page": page.page_number - 1,
                             "total_pages": len(doc.pages),
-                        },
-                        **{
-                            k: doc.metadata[k]
-                            for k in doc.metadata
-                            if type(doc.metadata[k]) in [str, int]
-                        },
-                    ),
+                        }
+                    )
+                )
+
+            for page in doc.pages:
+                tables_bbox: list[tuple[float, float, float, float]] = (
+                    self._extract_tables_bbox_from_page(page)
+                )
+                tables_content = self._extract_tables_from_page(page)
+                images_bbox = [geometry.obj_to_bbox(image) for image in page.images]
+                image_from_page = self._extract_images_from_page(page)
+                page_text = []
+                extras = []
+                for content in self._split_page_content(
+                    page,
+                    tables_bbox,
+                    tables_content,
+                    images_bbox,
+                    image_from_page,
+                ):
+                    if isinstance(content, str):  # Text
+                        page_text.append(content)
+                    elif isinstance(content, list):  # Table
+                        page_text.append(_JOIN_TABLES + self._convert_table(content))
+                    else:  # Image
+                        if self.images_parser:
+                            try:
+                                from PIL import Image as Img
+
+                                Img.fromarray(content)  # Check if image is valid
+                                image_bytes = io.BytesIO()
+                                numpy.save(image_bytes, content)
+                                blob = Blob.from_data(
+                                    image_bytes.getvalue(),
+                                    mime_type="application/x-npy",
+                                )
+                                text_from_image = next(
+                                    self.images_parser.lazy_parse(blob)
+                                ).page_content
+                                extras.append(
+                                    _format_inner_image(
+                                        blob, text_from_image, self.images_inner_format
+                                    )
+                                )
+                            except TypeError:
+                                pass
+                            except EOFError:
+                                pass
+
+                all_text = _merge_text_and_extras(extras, "".join(page_text).strip())
+
+                if self.mode == "page":
+                    # For legacy compatibility, add the last '\n'_
+                    if not all_text.endswith("\n"):
+                        all_text += "\n"
+                    yield Document(
+                        page_content=all_text,
+                        metadata=self._validate_metadata(
+                            doc_metadata
+                            | {
+                                "page": page.page_number - 1,
+                            }
+                        ),
+                    )
+                else:
+                    contents.append(all_text)
+            if self.mode == "single":
+                yield Document(
+                    page_content=self.pages_delimiter.join(contents),
+                    metadata=self._validate_metadata(doc_metadata),
                 )
-                for page in doc.pages
-            ]
 
     def _process_page_content(self, page: pdfplumber.page.Page) -> str:
-        """Process the page content based on dedupe."""
+        """Process the page content based on dedupe.
+
+        Args:
+            page: The PDF page to process.
+
+        Returns:
+            The extracted text from the page.
+        """
         if self.dedupe:
             return page.dedupe_chars().extract_text(**self.text_kwargs)
         return page.extract_text(**self.text_kwargs)
 
-    def _extract_images_from_page(self, page: pdfplumber.page.Page) -> str:
-        """Extract images from page and get the text with RapidOCR."""
+    def _split_page_content(
+        self,
+        page: pdfplumber.page.Page,
+        tables_bbox: list[tuple[float, float, float, float]],
+        tables_content: list[list[list[Any]]],
+        images_bbox: list[tuple[float, float, float, float]],
+        images_content: list[np.ndarray],
+        **kwargs: Any,
+    ) -> Iterator[Union[str, list[list[str]], np.ndarray]]:
+        """Split the page content into text, tables, and images.
+
+        Args:
+            page: The PDF page to process.
+            tables_bbox: Bounding boxes of tables on the page.
+            tables_content: Content of tables on the page.
+            images_bbox: Bounding boxes of images on the page.
+            images_content: Content of images on the page.
+            **kwargs: Additional keyword arguments.
+
+        Yields:
+            An iterator over the split content (text, tables, images).
+        """
+        from pdfplumber.utils import (
+            geometry,
+            text,
+        )
+
+        # Iterate over words. If a word is in a table,
+        # yield the accumulated text, and the table
+        # A the word is in a previously see table, ignore it
+        # Finish with the accumulated text
+        kwargs.update(
+            {
+                "keep_blank_chars": True,
+                # "use_text_flow": True,
+                "presorted": True,
+                "layout_bbox": kwargs.get("layout_bbox") or page.cropbox,
+            }
+        )
+        chars = page.dedupe_chars().objects["char"] if self.dedupe else page.chars
+
+        extractor = text.WordExtractor(
+            **{k: kwargs[k] for k in text.WORD_EXTRACTOR_KWARGS if k in kwargs}
+        )
+        wordmap = extractor.extract_wordmap(chars)
+        extract_wordmaps: list[Any] = []
+        used_arrays = [False] * len(tables_bbox)
+        for word, o in wordmap.tuples:
+            is_table = False
+            word_bbox = geometry.obj_to_bbox(word)
+            for i, table_bbox in enumerate(tables_bbox):
+                if geometry.get_bbox_overlap(word_bbox, table_bbox):
+                    # Find a world in a table
+                    is_table = True
+                    if not used_arrays[i]:
+                        # First time I see a word in this array
+                        # Yield the previous part
+                        if extract_wordmaps:
+                            new_wordmap = text.WordMap(tuples=extract_wordmaps)
+                            new_textmap = new_wordmap.to_textmap(
+                                **{
+                                    k: kwargs[k]
+                                    for k in text.TEXTMAP_KWARGS
+                                    if k in kwargs
+                                }
+                            )
+                            yield new_textmap.to_string()
+                            extract_wordmaps.clear()
+                        # And yield the table
+                        used_arrays[i] = True
+                        yield tables_content[i]
+                    break
+            if not is_table:
+                extract_wordmaps.append((word, o))
+        if extract_wordmaps:
+            new_wordmap = text.WordMap(tuples=extract_wordmaps)
+            new_textmap = new_wordmap.to_textmap(
+                **{k: kwargs[k] for k in text.TEXTMAP_KWARGS if k in kwargs}
+            )
+            yield new_textmap.to_string()
+        # Add images
+        for content in images_content:
+            yield content
+
+    def _extract_images_from_page(self, page: pdfplumber.page.Page) -> list[np.ndarray]:
+        """Extract images from a PDF page.
+
+        Args:
+            page: The PDF page to extract images from.
+
+        Returns:
+            A list of extracted images as numpy arrays.
+        """
         from PIL import Image
 
-        if not self.extract_images:
-            return ""
+        if not self.images_parser:
+            return []
 
         images = []
         for img in page.images:
-            if img["stream"]["Filter"].name in _PDF_FILTER_WITHOUT_LOSS:
-                if img["stream"]["BitsPerComponent"] == 1:
-                    images.append(
-                        np.array(
-                            Image.frombytes(
-                                "1",
-                                (img["stream"]["Width"], img["stream"]["Height"]),
-                                img["stream"].get_data(),
-                            ).convert("L")
-                        )
-                    )
-                else:
+            if "Filter" in img["stream"]:
+                if img["stream"]["Filter"].name in _PDF_FILTER_WITHOUT_LOSS:
                     images.append(
                         np.frombuffer(img["stream"].get_data(), dtype=np.uint8).reshape(
                             img["stream"]["Height"], img["stream"]["Width"], -1
                         )
                     )
-            elif img["stream"]["Filter"].name in _PDF_FILTER_WITH_LOSS:
-                images.append(img["stream"].get_data())
-            else:
-                warnings.warn("Unknown PDF Filter!")
+                elif img["stream"]["Filter"].name in _PDF_FILTER_WITH_LOSS:
+                    buf = np.frombuffer(img["stream"].get_data(), dtype=np.uint8)
+                    images.append(np.array(Image.open(io.BytesIO(buf.tobytes()))))
+                else:
+                    logger.warning("Unknown PDF Filter!")
+
+        return images
+
+    def _extract_tables_bbox_from_page(
+        self,
+        page: pdfplumber.page.Page,
+    ) -> list[tuple[float, float, float, float]]:
+        """Extract bounding boxes of tables from a PDF page.
+
+        Args:
+            page: The PDF page to extract table bounding boxes from.
+
+        Returns:
+            A list of bounding boxes for tables on the page.
+        """
+        if not self.extract_tables:
+            return []
+        from pdfplumber.table import TableSettings
 
-        return extract_from_images_with_rapidocr(images)
+        table_settings = self.extract_tables_settings
+        tset = TableSettings.resolve(table_settings)
+        return [table.bbox for table in page.find_tables(tset)]
+
+    def _extract_tables_from_page(
+        self,
+        page: pdfplumber.page.Page,
+    ) -> list[list[list[Any]]]:
+        """Extract tables from a PDF page.
+
+        Args:
+            page: The PDF page to extract tables from.
+
+        Returns:
+            A list of tables, where each table is a list of rows, and each row is a
+            list of cell values.
+        """
+        if not self.extract_tables:
+            return []
+        table_settings = self.extract_tables_settings
+        tables_list = page.extract_tables(table_settings)
+        return tables_list
+
+    def _convert_table(self, table: list[list[str]]) -> str:
+        """Convert a table to the specified format.
+
+        Args:
+            table: The table to convert.
+
+        Returns:
+            The table content as a string in the specified format.
+        """
+        format = self.extract_tables
+        if format is None:
+            return ""
+        if format == "markdown":
+            return self._convert_table_to_markdown(table)
+        elif format == "html":
+            return self._convert_table_to_html(table)
+        elif format == "csv":
+            return self._convert_table_to_csv(table)
+        else:
+            raise ValueError(f"Unknown table format: {format}")
+
+    def _convert_table_to_csv(self, table: list[list[str]]) -> str:
+        """Convert a table to CSV format.
+
+        Args:
+            table: The table to convert.
+
+        Returns:
+            The table content as a string in CSV format.
+            Replace "\n" with " ".
+        """
+        if not table:
+            return ""
+
+        output = ["\n\n"]
+
+        # iterate over detail rows
+        for row in table:
+            line = ""
+            for i, cell in enumerate(row):
+                # output None cells with empty string
+                cell = "" if cell is None else cell.replace("\n", " ")
+                line += cell + ","
+            output.append(line)
+        return "\n".join(output) + "\n\n"
+
+    def _convert_table_to_html(self, table: list[list[str]]) -> str:
+        """
+        Convert table content as a string in HTML format.
+        If clean is true, markdown syntax is removed from cell content.
+
+        Args:
+            table: The table to convert.
+
+        Returns:
+            The table content as a string in HTML format.
+        """
+        if not len(table):
+            return ""
+        output = "<table>\n"
+        clean = True
+
+        # iterate over detail rows
+        for row in table:
+            line = "<tr>"
+            for i, cell in enumerate(row):
+                # output None cells with empty string
+                cell = "" if cell is None else cell.replace("\n", " ")
+                if clean:  # remove sensitive syntax
+                    cell = html.escape(cell.replace("-", "&#45;"))
+                line += "<td>" + cell + "</td>"
+            line += "</tr>\n"
+            output += line
+        return output + "</table>\n"
+
+    def _convert_table_to_markdown(self, table: list[list[str]]) -> str:
+        """Convert table content as a string in Github-markdown format.
+
+        Args:
+            table: The table to convert.
+
+        Returns:
+            The table content as a string in Markdown format.
+            Replace "-" to "&#45;" and "\n" to " ".
+        """
+        clean = False
+        if not table:
+            return ""
+        col_count = len(table[0])
+
+        output = "|" + "|".join("" for i in range(col_count)) + "|\n"
+        output += "|" + "|".join("---" for i in range(col_count)) + "|\n"
+
+        # skip first row in details if header is part of the table
+        # iterate over detail rows
+        for row in table:
+            line = "|"
+            for i, cell in enumerate(row):
+                # output None cells with empty string
+                cell = "" if cell is None else cell.replace("\n", " ")
+                if clean:  # remove sensitive syntax
+                    cell = html.escape(cell.replace("-", "&#45;"))
+                line += cell + "|"
+            line += "\n"
+            output += line
+        return output + "\n"
 
 
 class AmazonTextractPDFParser(BaseBlobParser):
diff --git a/libs/community/langchain_community/document_loaders/pdf.py b/libs/community/langchain_community/document_loaders/pdf.py
index 6b51e481e..6f2b7c77b 100644
--- a/libs/community/langchain_community/document_loaders/pdf.py
+++ b/libs/community/langchain_community/document_loaders/pdf.py
@@ -1007,7 +1007,60 @@ def load(self) -> list[Document]:
 
 
 class PDFPlumberLoader(BasePDFLoader):
-    """Load `PDF` files using `pdfplumber`."""
+    """Load and parse a PDF file using 'pdfplumber' library.
+
+    This class provides methods to load and parse PDF documents, supporting various
+    configurations such as handling password-protected files, extracting images, and
+    defining extraction mode. It integrates the `pdfplumber` library for PDF processing
+    and offers both synchronous and asynchronous document loading.
+
+    Examples:
+        Setup:
+
+        .. code-block:: bash
+
+            pip install -U langchain-community pdfplumber
+
+        Instantiate the loader:
+
+        .. code-block:: python
+
+            from langchain_community.document_loaders import PDFPlumberLoader
+
+            loader = PDFPlumberLoader(
+                file_path = "./example_data/layout-parser-paper.pdf",
+                # headers = None
+                # password = None,
+                mode = "single",
+                pages_delimiter = "\n\f",
+                images_inner_format = "text",
+                # extract_tables = None,
+                # extract_tables_settings = None,
+                # text_kwargs = {"use_text_flow": False, "keep_blank_chars": False},
+                # dedupe = False,
+                metadata_format="standard",
+            )
+
+        Lazy load documents:
+
+        .. code-block:: python
+
+            docs = []
+            docs_lazy = loader.lazy_load()
+
+            for doc in docs_lazy:
+                docs.append(doc)
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        Load documents asynchronously:
+
+        .. code-block:: python
+
+            docs = await loader.aload()
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+    """
 
     def __init__(
         self,
@@ -1016,34 +1069,80 @@ def __init__(
         dedupe: bool = False,
         headers: Optional[dict] = None,
         extract_images: bool = False,
+        *,
+        password: Optional[str] = None,
+        mode: Literal["single", "page"] = "page",
+        images_parser: Optional[BaseImageBlobParser] = None,
+        images_inner_format: Literal["text", "markdown-img", "html-img"] = "text",
+        pages_delimiter: str = _DEFAULT_PAGES_DELIMITER,
+        extract_tables: Optional[Literal["csv", "markdown", "html"]] = None,
+        extract_tables_settings: Optional[dict[str, Any]] = None,
+        metadata_format: Literal["legacy", "standard"] = "legacy",
     ) -> None:
-        """Initialize with a file path."""
-        try:
-            import pdfplumber  # noqa:F401
-        except ImportError:
-            raise ImportError(
-                "pdfplumber package not found, please install it with "
-                "`pip install pdfplumber`"
-            )
+        """Initialize with a file path.
 
-        super().__init__(file_path, headers=headers)
-        self.text_kwargs = text_kwargs or {}
-        self.dedupe = dedupe
-        self.extract_images = extract_images
+        Args:
+            file_path: The path to the PDF file to be loaded.
+            headers: Optional headers to use for GET request to download a file from a
+              web path.
+            password: Optional password for opening encrypted PDFs.
+            mode: The extraction mode, either "single" for the entire document or "page"
+                for page-wise extraction.
+            pages_delimiter: A string delimiter to separate pages in single-mode
+                extraction.
+            extract_images: Whether to extract images from the PDF.
+            images_parser: Optional image blob parser.
+            images_inner_format: The format for the parsed output.
+                - "text" = return the content as is
+                - "markdown-img" = wrap the content into an image markdown link, w/ link
+                pointing to (`![body)(#)`]
+                - "html-img" = wrap the content as the `alt` text of an tag and link to
+                (`<img alt="{body}" src="#"/>`)
+            extract_tables: Whether to extract tables in a specific format, such as
+                "csv", "markdown", or "html".
+            extract_tables_settings: Optional dictionary of settings for customizing
+                table extraction.
+            text_kwargs: Keyword arguments to pass to ``pdfplumber.Page.extract_text()``
+            dedupe:  Avoiding the error of duplicate characters if `dedupe=True`
+            metadata_format: Use CamelCase keys with 'legacy'
+            and lower keys with 'standard'.
 
-    def load(self) -> list[Document]:
-        """Load file."""
+        Returns:
+            This method does not directly return data. Use the `load`, `lazy_load`,
+            or `aload` methods
+            to retrieve parsed documents with content and metadata.
 
-        parser = PDFPlumberParser(
-            text_kwargs=self.text_kwargs,
-            dedupe=self.dedupe,
-            extract_images=self.extract_images,
+        Raises:
+            ImportError: If the `pdfplumber` package is not installed.
+        """
+        super().__init__(file_path, headers=headers)
+        self.parser = PDFPlumberParser(
+            password=password,
+            mode=mode,
+            pages_delimiter=pages_delimiter,
+            extract_images=extract_images,
+            images_parser=images_parser,
+            images_inner_format=images_inner_format,
+            extract_tables=extract_tables,
+            text_kwargs=text_kwargs,
+            extract_tables_settings=extract_tables_settings,
+            dedupe=dedupe,
+            metadata_format=metadata_format,
         )
+
+    def lazy_load(
+        self,
+    ) -> Iterator[Document]:
+        """
+        Lazy load given path as pages.
+        Insert image, if possible, between two paragraphs.
+        In this way, a paragraph can be continued on the next page.
+        """
         if self.web_path:
             blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path)
         else:
             blob = Blob.from_path(self.file_path)
-        return parser.parse(blob)
+        yield from self.parser.lazy_parse(blob)
 
 
 class AmazonTextractPDFLoader(BasePDFLoader):
diff --git a/libs/community/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py b/libs/community/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
index 1137dd79f..439feaac8 100644
--- a/libs/community/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
+++ b/libs/community/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
@@ -2,16 +2,21 @@
 
 import re
 from pathlib import Path
-from typing import TYPE_CHECKING, Iterator
+from typing import TYPE_CHECKING, Iterator, Type
 
 import pytest
 
-import langchain_community.document_loaders.parsers as pdf_parsers
 from langchain_community.document_loaders.base import BaseBlobParser
 from langchain_community.document_loaders.blob_loaders import Blob
 from langchain_community.document_loaders.parsers import (
     BaseImageBlobParser,
+)
+from langchain_community.document_loaders.parsers.pdf import (
+    PDFMinerParser,
     PDFPlumberParser,
+    PyMuPDFParser,
+    PyPDFium2Parser,
+    PyPDFParser,
 )
 
 if TYPE_CHECKING:
@@ -95,13 +100,6 @@ def _assert_with_duplicate_parser(parser: BaseBlobParser, dedupe: bool = False)
         assert "11000000 SSeerriieess" == docs[0].page_content.split("\n")[0]
 
 
-def test_pdfplumber_parser() -> None:
-    """Test PDFPlumber parser."""
-    _assert_with_parser(PDFPlumberParser())
-    _assert_with_duplicate_parser(PDFPlumberParser())
-    _assert_with_duplicate_parser(PDFPlumberParser(dedupe=True), dedupe=True)
-
-
 class EmptyImageBlobParser(BaseImageBlobParser):
     def _analyze_image(self, img: "Image") -> str:
         return "Hello world"
@@ -112,24 +110,25 @@ def _analyze_image(self, img: "Image") -> str:
     [("single", EmptyImageBlobParser()), ("page", None)],
 )
 @pytest.mark.parametrize(
-    "parser_factory,params",
+    "parser_class,params",
     [
-        ("PDFMinerParser", {}),
-        ("PyMuPDFParser", {}),
-        ("PyPDFium2Parser", {}),
-        ("PyPDFParser", {"extraction_mode": "plain"}),
-        ("PyPDFParser", {"extraction_mode": "layout"}),
+        (PDFMinerParser, {}),
+        (PDFPlumberParser, {"metadata_format": "standard"}),
+        (PyMuPDFParser, {}),
+        (PyPDFium2Parser, {}),
+        (PyPDFParser, {"extraction_mode": "plain"}),
+        (PyPDFParser, {"extraction_mode": "layout"}),
     ],
 )
 @pytest.mark.requires("pillow")
 def test_mode_and_extract_images_variations(
-    parser_factory: str,
+    parser_class: Type,
     params: dict,
     mode: str,
     image_parser: BaseImageBlobParser,
 ) -> None:
     _test_matrix(
-        parser_factory,
+        parser_class,
         params,
         mode,
         image_parser,
@@ -142,18 +141,19 @@ def test_mode_and_extract_images_variations(
     ["text", "markdown-img", "html-img"],
 )
 @pytest.mark.parametrize(
-    "parser_factory,params",
+    "parser_class,params",
     [
-        ("PDFMinerParser", {}),
-        ("PyMuPDFParser", {}),
-        ("PyPDFium2Parser", {}),
-        ("PyPDFParser", {"extraction_mode": "plain"}),
-        ("PyPDFParser", {"extraction_mode": "layout"}),
+        (PDFMinerParser, {}),
+        (PDFPlumberParser, {"metadata_format": "standard"}),
+        (PyMuPDFParser, {}),
+        (PyPDFium2Parser, {}),
+        (PyPDFParser, {"extraction_mode": "plain"}),
+        (PyPDFParser, {"extraction_mode": "layout"}),
     ],
 )
 @pytest.mark.requires("pillow")
 def test_mode_and_image_formats_variations(
-    parser_factory: str,
+    parser_class: Type,
     params: dict,
     images_inner_format: str,
 ) -> None:
@@ -161,7 +161,7 @@ def test_mode_and_image_formats_variations(
     image_parser = EmptyImageBlobParser()
 
     _test_matrix(
-        parser_factory,
+        parser_class,
         params,
         mode,
         image_parser,
@@ -170,7 +170,7 @@ def test_mode_and_image_formats_variations(
 
 
 def _test_matrix(
-    parser_factory: str,
+    parser_class: Type,
     params: dict,
     mode: str,
     image_parser: BaseImageBlobParser,
@@ -222,8 +222,6 @@ def _std_assert_with_parser(parser: BaseBlobParser) -> None:
             assert len(docs)
             parser.password = old_password
 
-    parser_class = getattr(pdf_parsers, parser_factory)
-
     parser = parser_class(
         mode=mode,
         images_parser=image_parser,
@@ -243,13 +241,14 @@ def _std_assert_with_parser(parser: BaseBlobParser) -> None:
     ["markdown", "html", "csv", None],
 )
 @pytest.mark.parametrize(
-    "parser_factory,params",
+    "parser_class,params",
     [
-        ("PyMuPDFParser", {}),
+        (PDFPlumberParser, {}),
+        (PyMuPDFParser, {}),
     ],
 )
 def test_parser_with_table(
-    parser_factory: str,
+    parser_class: Type,
     params: dict,
     mode: str,
     extract_tables: str,
@@ -303,8 +302,6 @@ class EmptyImageBlobParser(BaseImageBlobParser):
         def _analyze_image(self, img: Image) -> str:
             return "![image](.)"
 
-    parser_class = getattr(pdf_parsers, parser_factory)
-
     parser = parser_class(
         mode=mode,
         extract_tables=extract_tables,
diff --git a/libs/community/tests/integration_tests/document_loaders/test_pdf.py b/libs/community/tests/integration_tests/document_loaders/test_pdf.py
index 891308a04..b7e3a4696 100644
--- a/libs/community/tests/integration_tests/document_loaders/test_pdf.py
+++ b/libs/community/tests/integration_tests/document_loaders/test_pdf.py
@@ -1,14 +1,18 @@
 import os
 from pathlib import Path
-from typing import Sequence, Union
+from typing import Sequence, Type, Union
 
 import pytest
 
-import langchain_community.document_loaders as pdf_loaders
-from langchain_community.document_loaders import (
+from langchain_community.document_loaders.pdf import (
     AmazonTextractPDFLoader,
     MathpixPDFLoader,
+    PDFMinerLoader,
     PDFMinerPDFasHTMLLoader,
+    PDFPlumberLoader,
+    PyMuPDFLoader,
+    PyPDFium2Loader,
+    PyPDFLoader,
     UnstructuredPDFLoader,
 )
 
@@ -164,20 +168,19 @@ def test_amazontextract_loader_failures() -> None:
 
 
 @pytest.mark.parametrize(
-    "parser_factory,params",
+    "loader_class,params",
     [
-        ("PDFMinerLoader", {}),
-        ("PyMuPDFLoader", {}),
-        ("PyPDFium2Loader", {}),
-        ("PyPDFLoader", {}),
+        (PDFMinerLoader, {}),
+        (PDFPlumberLoader, {"metadata_format": "standard"}),
+        (PyMuPDFLoader, {}),
+        (PyPDFium2Loader, {}),
+        (PyPDFLoader, {}),
     ],
 )
 def test_standard_parameters(
-    parser_factory: str,
+    loader_class: Type,
     params: dict,
 ) -> None:
-    loader_class = getattr(pdf_loaders, parser_factory)
-
     file_path = Path(__file__).parent.parent / "examples/hello.pdf"
     loader = loader_class(file_path)
     docs = loader.load()