From 378cc4940101789606d52b8316f7f7c45ecf1a10 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Wed, 13 Aug 2025 22:12:40 -0700 Subject: [PATCH 01/43] Oracle Docs Server Signed-off-by: Gerald Venzl --- .gitignore | 5 + src/oracle-db-mcp-server/main.py | 309 ++++++++++++++++++++++++ src/oracle-db-mcp-server/pyproject.toml | 11 + 3 files changed, 325 insertions(+) create mode 100644 src/oracle-db-mcp-server/main.py create mode 100644 src/oracle-db-mcp-server/pyproject.toml diff --git a/.gitignore b/.gitignore index d6820d8..2bf27c1 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,8 @@ env/ env.bak/ venv.bak/ +index +.vscode +uv.lock + +.DS_Store diff --git a/src/oracle-db-mcp-server/main.py b/src/oracle-db-mcp-server/main.py new file mode 100644 index 0000000..6b9e916 --- /dev/null +++ b/src/oracle-db-mcp-server/main.py @@ -0,0 +1,309 @@ +# +# Since: August 2025 +# Author: Gerald Venzl +# Name: main.py +# Description: The Oracle Database Documentation MCP Server +# +# Copyright 2025 Oracle Corporation and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import hashlib +import html2text +import logging +from mcp.server.fastmcp import Context, FastMCP +import os +from pathlib import Path +from pydantic import Field +import shutil +from whoosh.index import create_in, open_dir +from whoosh.qparser import QueryParser +from whoosh.fields import Schema, TEXT +import zipfile +from mcp.server.fastmcp import FastMCP + + +INDEX = None +INDEX_DIR = Path("index") +INDEX_CHECKSUM_FILE = Path(INDEX_DIR / "index.checksum") +INDEX_SCHEMA = Schema(content=TEXT(stored=True)) +ZIP_TEMP_OUTPUT = "zip_temp" + +logger = logging.getLogger(__name__) + +mcp = FastMCP( + "oracle-doc", + instructions=""" + # Oracle Database Documentation MCP Server. + + This server is used to search the Oracle Database documentation for information. + It can be used to find information about SQL syntax, PL/SQL, database concepts, best practices, examples and many more. + It is also used to search the offical Oracle Database documentation for additional information on a particular feature, its use cases, restrictions or interoperability with other features. + The tool should be used to augment any existing knowledge or to find information that is not available in the current context. + The server is desinged to search the Oracle Database documentation for search phrases and will return a list of results. + + You can use the following tools to search the documentation: + - search: Search the documentation for a query string or search phrase. + + The search tool takes a search query as input and returns a list of results. + The results are returned as a list of strings containing relevant information. + + ## Best Practices + + - Use the search tool to search for phrases or query strings. + - Use the search tool to search for specific topics or features. + - Always use the search tool to search for additional and official information for Oracle Database features. + - If the search tool returns no results, try to rephrase the query. + - If the search tool returns too few results, increase the max_results limit. + - If the search tool returns too many results, reduce the max_results limit. + - If the search tool returns results that are not relevant, try to refine the query. + """, + dependencies=[ + "whoosh>=2.7.4", + "pydantic>=2.10.6", + ] +) + + +@mcp.tool() +def search( + ctx: Context, + search_query: str = Field(description="The serach phrase to search for."), + max_results: int = Field(description="The maximum number of results to return.", default=20, gt=0), + ) -> list[str]: + """Search for information about how to use Oracle Database for a query string and return a list of results. + + Args: + search_query: The search phrase to search for. + max_results: The maximum number of results to return, defaults to 20. + + Usage: + search(search_query="create table syntax") + search(search_query="alter a parameter", max_results=13) + search(search_query="database user concept", max_results=20 + search(search_query="data use case domains best practices", max_results=15) + search(search_query="external table definition", max_results=100) + + Returns: + A list of results. + Each result a string in markdown format with the most relevant serach topic. + + """ + logger.info(f"query={search_query!r}") + return search_index(INDEX, search_query, max_results) + + +# Function to search the index +def search_index(index, query_str, limit=10) -> list[str]: + """ + Search the index for the query string and return matching sections with context. + Returns a list of (id, content, score) tuples. + """ + results = [] + with index.searcher() as searcher: + query = QueryParser("content", index.schema).parse(query_str) + hits = searcher.search(query, limit=limit) + for hit in hits: + #results.append((hit['id'], hit['content'], hit.score)) + results.append(hit['content']) + return results + + +def maintain_index(location: Path) -> None: + """Create or update the index for the oracle-doc. + This function checks if the index needs to be created or updated based on the + contents of the provided location, which can be a directory or a zip file. + + Args: + location (Path): The path to the documentation directory or zip file. + + Returns: + None + """ + logger.debug("Creating or updating index for oracle-doc.") + # Logic to create or update the index goes here + + global INDEX + # Get the old index checksum, if it exists + index_checksum = "N/A" + # If the checksum file exists, read the checksum + if INDEX_CHECKSUM_FILE.exists(): + with INDEX_CHECKSUM_FILE.open("r") as f: + index_checksum = f.read().strip() + + # Only directories and zip files are currently supported + if location.is_file() and not location.suffix == '.zip': + logger.error(f"Unsupported file type: {location}. Must be a zip file or directory.") + return + + # Calculate the checksum of the input directory or zip file + logger.debug(f"Calculating checksum for location: {location}") + input_checksum = shasum_directory(location) + + # See whether checksum matches the old index checksum + if input_checksum == index_checksum: + logger.info("Index is up to date. No changes needed.") + INDEX = open_dir(INDEX_DIR) + return + + else: + logger.info("Checksum has changed, updating index.") + logger.debug(f"Old index checksum: {index_checksum}, New input checksum: {input_checksum}") + + # Extract the zip file to a temporary directory + if location.is_file() and location.suffix == '.zip': + + # Check if temp output directory exists and remove it + zip_output = Path(ZIP_TEMP_OUTPUT) + if zip_output.exists(): + logger.debug(f"Removing existing zip output directory: {zip_output}") + shutil.rmtree(zip_output) + + logger.debug(f"Creating zip output directory: {zip_output}") + zip_output.mkdir() + with zipfile.ZipFile(location, 'r') as zip_ref: + logger.debug(f"Extracting zip file {location} to {zip_output}") + zip_ref.extractall(ZIP_TEMP_OUTPUT) + + logger.debug(f"Done creating zip output directory: {zip_output}") + # Set the location to the extracted output directory + location = zip_output + + logger.debug("Indexing all html files in the directory...") + update_index(location) + + # Write the new checksum to the checksum file + with INDEX_CHECKSUM_FILE.open("w") as f: + logger.debug(f"Writing new checksum {input_checksum} to {INDEX_CHECKSUM_FILE}") + f.write(input_checksum) + + + # Delete temporary zip output directory if it exists + if Path(ZIP_TEMP_OUTPUT).exists(): + logger.debug(f"Removing temporary zip output directory: {zip_output}") + shutil.rmtree(zip_output) + + +def update_index(location: Path) -> None: + """Update the index with all HTML files in the directory. + + Args: + location (Path): The path to the documentation directory. + Returns: + None""" + + global INDEX + + logger.info("Creating/updating index...") + + if not INDEX_DIR.exists(): + logger.debug(f"Creating index directory: {INDEX_DIR}") + os.makedirs(INDEX_DIR) + + INDEX = create_in(INDEX_DIR, INDEX_SCHEMA) + writer = INDEX.writer() + + for ext in ("*.html", "*.htm"): + for file in location.rglob(ext): + logger.debug(f"Indexing file: {file}") + content = convert_to_markdown(file) + writer.add_document(content=content) + + logger.debug("Committing changes to the index.") + writer.commit() + logger.info("Indexing complete.") + + +def shasum_directory(directory: Path) -> str: + """Calculate the SHA256 checksum of all files in a directory.""" + sha256 = hashlib.sha256() + for file in sorted(directory.rglob("*")): + if file.is_file(): + # Include relative path for uniqueness + sha256.update(str(file.relative_to(directory)).encode()) + with file.open("rb") as f: + while chunk := f.read(8192): + sha256.update(chunk) + return sha256.hexdigest() + + +def convert_to_markdown(file: Path) -> str: + """Convert an HTML file to Markdown format. + + Args: + file (Path): The path to the HTML file. + + Returns: + str: The converted Markdown content. + """ + # Placeholder for conversion logic + logger.debug(f"Converting {file} to Markdown format.") + + # Initialize html2text converter + converter = html2text.HTML2Text() + + # Configure the converter + converter.ignore_links = False # Keep links in the output + converter.body_width = 0 # Disable line wrapping (optional) + converter.bypass_tables = False # Converts tables to Markdown + + with file.open("r", encoding="utf-8") as f: + html = f.read() + + # Convert HTML to Markdown + return converter.handle(html) + + +def main(): + """Main entrypoint for the Oracle Documentation MCP server.""" + + # Set up logging + ch = logging.StreamHandler() + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + logger.addHandler(ch) + + # Parse command line arguments + parser = argparse.ArgumentParser(description="Oracle Documentation MCP Server.") + parser.add_argument("--input", type=str, help="Path to the documentation input directory.") + parser.add_argument("--port", type=int, default=8000, help="Port to serve the MCP server on.") + parser.add_argument("-mcp", "--mcp", action="store_true", help="Run the MCP server.") + parser.add_argument("--log-level", type=str, default="INFO", help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") + args = parser.parse_args() + + + # Set log level + logger.setLevel(getattr(logging, args.log_level.upper(), logging.INFO)) + + if args.input: + input_path = Path(args.input) + + if not input_path.exists(): + logger.error(f"Input location {args.input} does not exist.") + return + + maintain_index(input_path) + + if not Path(INDEX_DIR).exists(): + logger.error(f"Index directory {INDEX_DIR} does not exist. Please run the server with a valid input directory.") + return + + if args.mcp: + logger.info("Serving MCP server for Oracle documentation.") + mcp.run() + + +if __name__ == "__main__": + main() diff --git a/src/oracle-db-mcp-server/pyproject.toml b/src/oracle-db-mcp-server/pyproject.toml new file mode 100644 index 0000000..bde01ed --- /dev/null +++ b/src/oracle-db-mcp-server/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "oracle-doc" +version = "0.1.0" +description = "Oracle Database Documentation MCP Server" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "html2text>=2025.4.15", + "mcp>=1.12.3", + "whoosh>=2.7.4", +] From 18028ee97d0ce6fc242571abd64a44afce03687d Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Thu, 14 Aug 2025 17:10:00 -0700 Subject: [PATCH 02/43] Indexing complete Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/Dockerfile | 39 ++++++++ .../{main.py => oracle-db-doc-mcp-server.py} | 90 ++++++++++--------- src/oracle-db-mcp-server/pyproject.toml | 11 --- src/oracle-db-mcp-server/requirements.txt | 3 + 4 files changed, 91 insertions(+), 52 deletions(-) create mode 100644 src/oracle-db-mcp-server/Dockerfile rename src/oracle-db-mcp-server/{main.py => oracle-db-doc-mcp-server.py} (80%) delete mode 100644 src/oracle-db-mcp-server/pyproject.toml create mode 100644 src/oracle-db-mcp-server/requirements.txt diff --git a/src/oracle-db-mcp-server/Dockerfile b/src/oracle-db-mcp-server/Dockerfile new file mode 100644 index 0000000..bc33fd8 --- /dev/null +++ b/src/oracle-db-mcp-server/Dockerfile @@ -0,0 +1,39 @@ +# +# Since: August 2025 +# Author: Gerald Venzl +# Name: Dockerfile +# Description: Dockerfile to build Docker image +# +# Copyright 2025 Oracle Corporation and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM alpine + +COPY oracle-db-doc-mcp-server.py requirements.txt . + +RUN apk --update --no-cache add python3 py3-pip curl && \ + pip install -r requirements.txt --break-system-packages && \ + rm requirements.txt && \ + mkdir /input && \ + curl -L -o /input/db23.zip https://docs.oracle.com/en/database/oracle/oracle-database/23/zip/oracle-database_23.zip && \ + python3 oracle-db-doc-mcp-server.py --log-level DEBUG --doc /input/db23.zip && \ + rm -r /input && \ + apk del curl && \ + rm -rf /var/cache/apk/* /tmp/* + +LABEL org.opencontainers.image.source=https://github.com/oracle/mcp +LABEL org.opencontainers.image.description="Oracle Database Documentation MCP Server" +LABEL org.opencontainers.image.licenses=Apache-2.0 + +ENTRYPOINT [ "python3", "oracle-db-doc-mcp-server.py", "-mcp" ] diff --git a/src/oracle-db-mcp-server/main.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py similarity index 80% rename from src/oracle-db-mcp-server/main.py rename to src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index 6b9e916..c443eeb 100644 --- a/src/oracle-db-mcp-server/main.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -1,22 +1,22 @@ # -# Since: August 2025 -# Author: Gerald Venzl -# Name: main.py -# Description: The Oracle Database Documentation MCP Server +# Since: August 2025 +# Author: Gerald Venzl +# Name: main.py +# Description: The Oracle Database Documentation MCP Server # -# Copyright 2025 Oracle Corporation and/or its affiliates. +# Copyright 2025 Oracle Corporation and/or its affiliates. # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import hashlib @@ -70,6 +70,8 @@ - If the search tool returns results that are not relevant, try to refine the query. """, dependencies=[ + "html2text>=2025.4.15", + "mcp>=1.12.3", "whoosh>=2.7.4", "pydantic>=2.10.6", ] @@ -101,40 +103,49 @@ def search( """ logger.info(f"query={search_query!r}") - return search_index(INDEX, search_query, max_results) + return search_index(search_query, max_results) # Function to search the index -def search_index(index, query_str, limit=10) -> list[str]: +def search_index(query_str, limit=10) -> list[str]: """ Search the index for the query string and return matching sections with context. - Returns a list of (id, content, score) tuples. + Returns a list of content. """ results = [] - with index.searcher() as searcher: - query = QueryParser("content", index.schema).parse(query_str) + with INDEX.searcher() as searcher: + query = QueryParser("content", INDEX.schema).parse(query_str) hits = searcher.search(query, limit=limit) for hit in hits: - #results.append((hit['id'], hit['content'], hit.score)) results.append(hit['content']) return results -def maintain_index(location: Path) -> None: - """Create or update the index for the oracle-doc. +def maintain_index(path: str) -> None: + """Creates or updates the index and opens it for the oracle-doc. This function checks if the index needs to be created or updated based on the contents of the provided location, which can be a directory or a zip file. Args: - location (Path): The path to the documentation directory or zip file. + path (str): The path to the documentation directory or zip file. Returns: None """ - logger.debug("Creating or updating index for oracle-doc.") + global INDEX + logger.info("Maintaining index...") # Logic to create or update the index goes here - global INDEX + # If no path was provided but index exists, open the index. + if path is None and INDEX_DIR.exists(): + INDEX = open_dir(INDEX_DIR) + return + + location = Path(path) + if not location.exists(): + logger.error(f"Provided path does not exist: {location}") + return + # Get the old index checksum, if it exists index_checksum = "N/A" # If the checksum file exists, read the checksum @@ -148,8 +159,8 @@ def maintain_index(location: Path) -> None: return # Calculate the checksum of the input directory or zip file - logger.debug(f"Calculating checksum for location: {location}") input_checksum = shasum_directory(location) + logger.debug(f"Checksum is {input_checksum} for location '{location}'") # See whether checksum matches the old index checksum if input_checksum == index_checksum: @@ -181,6 +192,7 @@ def maintain_index(location: Path) -> None: location = zip_output logger.debug("Indexing all html files in the directory...") + # Also opens the index update_index(location) # Write the new checksum to the checksum file @@ -201,11 +213,12 @@ def update_index(location: Path) -> None: Args: location (Path): The path to the documentation directory. Returns: - None""" + None + """ global INDEX - logger.info("Creating/updating index...") + logger.debug("Updating index...") if not INDEX_DIR.exists(): logger.debug(f"Creating index directory: {INDEX_DIR}") @@ -214,12 +227,15 @@ def update_index(location: Path) -> None: INDEX = create_in(INDEX_DIR, INDEX_SCHEMA) writer = INDEX.writer() + files_indexes = 0 for ext in ("*.html", "*.htm"): for file in location.rglob(ext): logger.debug(f"Indexing file: {file}") content = convert_to_markdown(file) writer.add_document(content=content) + files_indexes += 1 + logger.info(f"Indexed {files_indexes} html files from '{location}'.") logger.debug("Committing changes to the index.") writer.commit() logger.info("Indexing complete.") @@ -277,27 +293,19 @@ def main(): # Parse command line arguments parser = argparse.ArgumentParser(description="Oracle Documentation MCP Server.") - parser.add_argument("--input", type=str, help="Path to the documentation input directory.") + parser.add_argument("--doc", type=str, help="Path to the documentation input directory.") parser.add_argument("--port", type=int, default=8000, help="Port to serve the MCP server on.") parser.add_argument("-mcp", "--mcp", action="store_true", help="Run the MCP server.") - parser.add_argument("--log-level", type=str, default="INFO", help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") + parser.add_argument("--log-level", type=str, default="ERROR", help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") args = parser.parse_args() - # Set log level logger.setLevel(getattr(logging, args.log_level.upper(), logging.INFO)) - if args.input: - input_path = Path(args.input) - - if not input_path.exists(): - logger.error(f"Input location {args.input} does not exist.") - return - - maintain_index(input_path) + maintain_index(args.doc) - if not Path(INDEX_DIR).exists(): - logger.error(f"Index directory {INDEX_DIR} does not exist. Please run the server with a valid input directory.") + if INDEX is None: + logger.error(f"Index does not exist. Please run the server with a valid doc directory to index.") return if args.mcp: diff --git a/src/oracle-db-mcp-server/pyproject.toml b/src/oracle-db-mcp-server/pyproject.toml deleted file mode 100644 index bde01ed..0000000 --- a/src/oracle-db-mcp-server/pyproject.toml +++ /dev/null @@ -1,11 +0,0 @@ -[project] -name = "oracle-doc" -version = "0.1.0" -description = "Oracle Database Documentation MCP Server" -readme = "README.md" -requires-python = ">=3.11" -dependencies = [ - "html2text>=2025.4.15", - "mcp>=1.12.3", - "whoosh>=2.7.4", -] diff --git a/src/oracle-db-mcp-server/requirements.txt b/src/oracle-db-mcp-server/requirements.txt new file mode 100644 index 0000000..29cf58d --- /dev/null +++ b/src/oracle-db-mcp-server/requirements.txt @@ -0,0 +1,3 @@ +html2text >= 2025.4.15 +mcp >= 1.12.3 +whoosh >= 2.7.4 From 90f493a674a0176e3b85cb19b86d16c0f34a16d6 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 15 Aug 2025 13:03:21 -0700 Subject: [PATCH 03/43] Update parameters and defaults Signed-off-by: Gerald Venzl --- .../oracle-db-doc-mcp-server.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index c443eeb..e76f2c2 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -41,6 +41,7 @@ ZIP_TEMP_OUTPUT = "zip_temp" logger = logging.getLogger(__name__) +logging.basicConfig(filename='oracle-db-doc.log', filemode='w', level=logging.ERROR) mcp = FastMCP( "oracle-doc", @@ -292,15 +293,15 @@ def main(): logger.addHandler(ch) # Parse command line arguments - parser = argparse.ArgumentParser(description="Oracle Documentation MCP Server.") - parser.add_argument("--doc", type=str, help="Path to the documentation input directory.") - parser.add_argument("--port", type=int, default=8000, help="Port to serve the MCP server on.") - parser.add_argument("-mcp", "--mcp", action="store_true", help="Run the MCP server.") - parser.add_argument("--log-level", type=str, default="ERROR", help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") + parser = argparse.ArgumentParser(description="Oracle Database Documentation MCP Server.") + parser.add_argument("-doc", type=str, help="Path to the documentation input zip file or extracted directory.") + parser.add_argument("-port", type=int, default=8000, help="Port to serve the MCP server on.") + parser.add_argument("-mcp", action="store_true", help="Run the MCP server.") + parser.add_argument("-log-level", type=str, default="ERROR", help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") args = parser.parse_args() # Set log level - logger.setLevel(getattr(logging, args.log_level.upper(), logging.INFO)) + logger.setLevel(getattr(logging, args.log_level.upper(), logging.ERROR)) maintain_index(args.doc) From 55745b50e7cd839cceff3a99110bb1ba094890c6 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 15 Aug 2025 14:23:26 -0700 Subject: [PATCH 04/43] Update gitignore Signed-off-by: Gerald Venzl --- .gitignore | 1 - src/oracle-db-mcp-server/.gitignore | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 src/oracle-db-mcp-server/.gitignore diff --git a/.gitignore b/.gitignore index 2bf27c1..5ec6871 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,6 @@ env/ env.bak/ venv.bak/ -index .vscode uv.lock diff --git a/src/oracle-db-mcp-server/.gitignore b/src/oracle-db-mcp-server/.gitignore new file mode 100644 index 0000000..da28d38 --- /dev/null +++ b/src/oracle-db-mcp-server/.gitignore @@ -0,0 +1,2 @@ +*.log +index From 5016aebd34c4742e131064a92239d5cfa9dc9633 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 15 Aug 2025 14:29:27 -0700 Subject: [PATCH 05/43] Add ReadMe Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/README.md | 95 ++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/oracle-db-mcp-server/README.md diff --git a/src/oracle-db-mcp-server/README.md b/src/oracle-db-mcp-server/README.md new file mode 100644 index 0000000..3017b32 --- /dev/null +++ b/src/oracle-db-mcp-server/README.md @@ -0,0 +1,95 @@ +# Oracle Database Documentation MCP Server + +A Python-based MCP (Model Context Protocol) server that provides tools for searching the official Oracle Database documentation. + +## Features + +- **Search** + - Serach the documentation by keywords and phrases + +## Prerequisites + +- Python 3.x +- Downloaded [Oracle Database Documentation zip file](https://docs.oracle.com/en/database/oracle/oracle-database/23/zip/oracle-database_23.zip) + +## Installation + +```console +git clone https://github.com/oracle/mcp.git + +cd mcp/src/oracle-db-mcp-server + +python3 -m venv .venv + +source .venv/bin/activate + +python3 -m pip install -r requirements.txt +``` + +## Usage + +The MCP server has two modes, one to create or maintain the documentation index and one to run the MCP server. Both modes can be combined. + +```console +usage: oracle-db-doc-mcp-server.py [-h] [-doc DOC] [-mcp] [-log-level LOG_LEVEL] + +Oracle Database Documentation MCP Server. + +options: + -h, --help show this help message and exit + -doc DOC Path to the documentation input zip file or extracted directory. + -mcp Run the MCP server. + -log-level LOG_LEVEL Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). +``` + +### Index creation/maintenance + +To create or maintain the index, point the `-doc` parameter to either the Oracle Database Documentation zip file or an **already extracted** location of the Oracle Documentation. +The index creation will take several minutes to complete. +A checksum of the index is kept so that subsequent executions of the program will only reindex content that has changed. + +```console +python3 oracle-db-doc-mcp-server.py -doc ~/Downloads/oracle-database_23.zip +``` + +### Run MCP Server + +To run just the MCP server, provide the `-mcp` parameter. The index will have to exist. + +```console +python3 oracle-db-doc-mcp-server.py -mcp +``` + +### Combining index creation/maintenance and MCP server mode + +You can combine the index maintainenance and MCP server mode into one command, for example: + +```console +python3 oracle-db-doc-mcp-server.py -mcp -doc ~/Downloads/oracle-database_23.zip +``` + +### VSCode integration + +Replace the `<>` placeholders with the paths to the MCP server installation and Oracle Database Documentation zip file. + +``` +{ + "servers": { + "oracle-db-doc": { + "type": "stdio", + "command": "/.venv/bin/python3", + "args": [ "oracle-db-doc-mcp-server.py", "-doc", "", "-mcp" ] + } + } +} +``` + +## Tools + +### search + +Searches the documentation for key words and key phrases + +```python +search(search_query: str, max_results: int) -> list[str]: +``` From 14268f828b74fe808696c07f2d9fe56df258f399 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Mon, 18 Aug 2025 14:49:37 -0700 Subject: [PATCH 06/43] Remove context and port Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index e76f2c2..b8f8f7e 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -22,7 +22,7 @@ import hashlib import html2text import logging -from mcp.server.fastmcp import Context, FastMCP +from mcp.server.fastmcp import FastMCP import os from pathlib import Path from pydantic import Field @@ -81,7 +81,6 @@ @mcp.tool() def search( - ctx: Context, search_query: str = Field(description="The serach phrase to search for."), max_results: int = Field(description="The maximum number of results to return.", default=20, gt=0), ) -> list[str]: @@ -295,7 +294,6 @@ def main(): # Parse command line arguments parser = argparse.ArgumentParser(description="Oracle Database Documentation MCP Server.") parser.add_argument("-doc", type=str, help="Path to the documentation input zip file or extracted directory.") - parser.add_argument("-port", type=int, default=8000, help="Port to serve the MCP server on.") parser.add_argument("-mcp", action="store_true", help="Run the MCP server.") parser.add_argument("-log-level", type=str, default="ERROR", help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") args = parser.parse_args() From 50b9d624eec9b171450ace3e870b08ffcfd88228 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Tue, 19 Aug 2025 10:45:05 -0700 Subject: [PATCH 07/43] Update to use pocketsearch Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/.gitignore | 2 +- .../oracle-db-doc-mcp-server.py | 101 +++++++++--------- src/oracle-db-mcp-server/requirements.txt | 2 +- 3 files changed, 54 insertions(+), 51 deletions(-) diff --git a/src/oracle-db-mcp-server/.gitignore b/src/oracle-db-mcp-server/.gitignore index da28d38..992244a 100644 --- a/src/oracle-db-mcp-server/.gitignore +++ b/src/oracle-db-mcp-server/.gitignore @@ -1,2 +1,2 @@ *.log -index +index* diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index b8f8f7e..4e3e5af 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -23,26 +23,27 @@ import html2text import logging from mcp.server.fastmcp import FastMCP -import os from pathlib import Path -from pydantic import Field import shutil -from whoosh.index import create_in, open_dir -from whoosh.qparser import QueryParser -from whoosh.fields import Schema, TEXT +from pocketsearch import Schema, Text, PocketSearch, PocketWriter import zipfile from mcp.server.fastmcp import FastMCP - INDEX = None -INDEX_DIR = Path("index") -INDEX_CHECKSUM_FILE = Path(INDEX_DIR / "index.checksum") -INDEX_SCHEMA = Schema(content=TEXT(stored=True)) +INDEX_NAME = Path("index.db") +INDEX_CHECKSUM_FILE = Path("index.checksum") ZIP_TEMP_OUTPUT = "zip_temp" logger = logging.getLogger(__name__) logging.basicConfig(filename='oracle-db-doc.log', filemode='w', level=logging.ERROR) + +# Class for index structure +class IndexSchema(Schema): + entry_name = Text(is_id_field=True) + entry = Text(index=True) + + mcp = FastMCP( "oracle-doc", instructions=""" @@ -50,9 +51,9 @@ This server is used to search the Oracle Database documentation for information. It can be used to find information about SQL syntax, PL/SQL, database concepts, best practices, examples and many more. - It is also used to search the offical Oracle Database documentation for additional information on a particular feature, its use cases, restrictions or interoperability with other features. + It is also used to search the official Oracle Database documentation for additional information on a particular feature, its use cases, restrictions or interoperability with other features. The tool should be used to augment any existing knowledge or to find information that is not available in the current context. - The server is desinged to search the Oracle Database documentation for search phrases and will return a list of results. + The server is designed to search the Oracle Database documentation for search phrases and will return a list of results. You can use the following tools to search the documentation: - search: Search the documentation for a query string or search phrase. @@ -73,17 +74,16 @@ dependencies=[ "html2text>=2025.4.15", "mcp>=1.12.3", - "whoosh>=2.7.4", - "pydantic>=2.10.6", + "pocketsearch>=0.40.0", ] ) @mcp.tool() def search( - search_query: str = Field(description="The serach phrase to search for."), - max_results: int = Field(description="The maximum number of results to return.", default=20, gt=0), - ) -> list[str]: + search_query: str, + max_results: int = 10, +) -> list[str]: """Search for information about how to use Oracle Database for a query string and return a list of results. Args: @@ -93,31 +93,39 @@ def search( Usage: search(search_query="create table syntax") search(search_query="alter a parameter", max_results=13) - search(search_query="database user concept", max_results=20 + search(search_query="database user concept", max_results=20) search(search_query="data use case domains best practices", max_results=15) search(search_query="external table definition", max_results=100) Returns: A list of results. - Each result a string in markdown format with the most relevant serach topic. + Each result a string in Markdown format with the most relevant search topic. """ logger.info(f"query={search_query!r}") return search_index(search_query, max_results) +def open_index() -> None: + global INDEX + logger.debug("Opening index file.") + INDEX = PocketSearch(db_name=INDEX_NAME, schema=IndexSchema) + + # Function to search the index -def search_index(query_str, limit=10) -> list[str]: +def search_index(query_str: str, limit: int = 10) -> list[str]: """ Search the index for the query string and return matching sections with context. Returns a list of content. """ results = [] - with INDEX.searcher() as searcher: - query = QueryParser("content", INDEX.schema).parse(query_str) - hits = searcher.search(query, limit=limit) - for hit in hits: - results.append(hit['content']) + hits = INDEX.search(entry=query_str) + finds = 0 + for hit in hits: + results.append(hit.entry) + finds += 1 + if finds >= limit: + break return results @@ -137,8 +145,8 @@ def maintain_index(path: str) -> None: # Logic to create or update the index goes here # If no path was provided but index exists, open the index. - if path is None and INDEX_DIR.exists(): - INDEX = open_dir(INDEX_DIR) + if path is None and INDEX_NAME.exists(): + open_index() return location = Path(path) @@ -159,13 +167,14 @@ def maintain_index(path: str) -> None: return # Calculate the checksum of the input directory or zip file + logger.debug(f"Calculating checksum for location: {location}") input_checksum = shasum_directory(location) logger.debug(f"Checksum is {input_checksum} for location '{location}'") # See whether checksum matches the old index checksum if input_checksum == index_checksum: - logger.info("Index is up to date. No changes needed.") - INDEX = open_dir(INDEX_DIR) + logger.info("Index is up to date, no changes needed.") + open_index() return else: @@ -192,15 +201,15 @@ def maintain_index(path: str) -> None: location = zip_output logger.debug("Indexing all html files in the directory...") - # Also opens the index + update_index(location) + open_index() # Write the new checksum to the checksum file with INDEX_CHECKSUM_FILE.open("w") as f: logger.debug(f"Writing new checksum {input_checksum} to {INDEX_CHECKSUM_FILE}") f.write(input_checksum) - # Delete temporary zip output directory if it exists if Path(ZIP_TEMP_OUTPUT).exists(): logger.debug(f"Removing temporary zip output directory: {zip_output}") @@ -220,25 +229,18 @@ def update_index(location: Path) -> None: logger.debug("Updating index...") - if not INDEX_DIR.exists(): - logger.debug(f"Creating index directory: {INDEX_DIR}") - os.makedirs(INDEX_DIR) - - INDEX = create_in(INDEX_DIR, INDEX_SCHEMA) - writer = INDEX.writer() + with PocketWriter(db_name=INDEX_NAME, schema=IndexSchema) as writer: - files_indexes = 0 - for ext in ("*.html", "*.htm"): - for file in location.rglob(ext): - logger.debug(f"Indexing file: {file}") - content = convert_to_markdown(file) - writer.add_document(content=content) - files_indexes += 1 + files_indexes = 0 + for ext in ("*.html", "*.htm"): + for file in location.rglob(ext): + logger.debug(f"Indexing file: {file}") + markdown_content = convert_to_markdown(file) + writer.insert_or_update(entry_name=str(file.relative_to(location)), entry=markdown_content) + files_indexes += 1 - logger.info(f"Indexed {files_indexes} html files from '{location}'.") - logger.debug("Committing changes to the index.") - writer.commit() - logger.info("Indexing complete.") + logger.info(f"Indexed {files_indexes} html files from '{location}'.") + logger.info("Indexing complete.") def shasum_directory(directory: Path) -> str: @@ -272,7 +274,7 @@ def convert_to_markdown(file: Path) -> str: # Configure the converter converter.ignore_links = False # Keep links in the output converter.body_width = 0 # Disable line wrapping (optional) - converter.bypass_tables = False # Converts tables to Markdown + converter.bypass_tables = False # Converts tables to Markdown with file.open("r", encoding="utf-8") as f: html = f.read() @@ -295,7 +297,8 @@ def main(): parser = argparse.ArgumentParser(description="Oracle Database Documentation MCP Server.") parser.add_argument("-doc", type=str, help="Path to the documentation input zip file or extracted directory.") parser.add_argument("-mcp", action="store_true", help="Run the MCP server.") - parser.add_argument("-log-level", type=str, default="ERROR", help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") + parser.add_argument("-log-level", type=str, default="ERROR", + help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") args = parser.parse_args() # Set log level diff --git a/src/oracle-db-mcp-server/requirements.txt b/src/oracle-db-mcp-server/requirements.txt index 29cf58d..f7259d4 100644 --- a/src/oracle-db-mcp-server/requirements.txt +++ b/src/oracle-db-mcp-server/requirements.txt @@ -1,3 +1,3 @@ html2text >= 2025.4.15 mcp >= 1.12.3 -whoosh >= 2.7.4 +pocketsearch >= 0.40.0 From 27c2c3a27534d9881225c3087da7c09da7837616 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Tue, 19 Aug 2025 13:10:07 -0700 Subject: [PATCH 08/43] Update markdown library Signed-off-by: Gerald Venzl --- .../oracle-db-doc-mcp-server.py | 22 +++++++++---------- src/oracle-db-mcp-server/requirements.txt | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index 4e3e5af..5dcac33 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -20,7 +20,7 @@ import argparse import hashlib -import html2text +import markdownify as md import logging from mcp.server.fastmcp import FastMCP from pathlib import Path @@ -72,7 +72,7 @@ class IndexSchema(Schema): - If the search tool returns results that are not relevant, try to refine the query. """, dependencies=[ - "html2text>=2025.4.15", + "markdownify>=1.2.0", "mcp>=1.12.3", "pocketsearch>=0.40.0", ] @@ -240,7 +240,13 @@ def update_index(location: Path) -> None: files_indexes += 1 logger.info(f"Indexed {files_indexes} html files from '{location}'.") - logger.info("Indexing complete.") + + # Optimize index for query performance + index = PocketSearch(db_name=INDEX_NAME, schema=IndexSchema, writeable=True) + logger.debug("Optimizing index...") + index.optimize() + + logger.info("Indexing complete.") def shasum_directory(directory: Path) -> str: @@ -268,19 +274,11 @@ def convert_to_markdown(file: Path) -> str: # Placeholder for conversion logic logger.debug(f"Converting {file} to Markdown format.") - # Initialize html2text converter - converter = html2text.HTML2Text() - - # Configure the converter - converter.ignore_links = False # Keep links in the output - converter.body_width = 0 # Disable line wrapping (optional) - converter.bypass_tables = False # Converts tables to Markdown - with file.open("r", encoding="utf-8") as f: html = f.read() # Convert HTML to Markdown - return converter.handle(html) + return md.markdownify(html) def main(): diff --git a/src/oracle-db-mcp-server/requirements.txt b/src/oracle-db-mcp-server/requirements.txt index f7259d4..8b09506 100644 --- a/src/oracle-db-mcp-server/requirements.txt +++ b/src/oracle-db-mcp-server/requirements.txt @@ -1,3 +1,3 @@ -html2text >= 2025.4.15 +markdownify >= 1.2.0 mcp >= 1.12.3 pocketsearch >= 0.40.0 From a94af7bda47f413ccec03d9aab2ec47af2c5b49c Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Wed, 20 Aug 2025 09:50:32 -0700 Subject: [PATCH 09/43] Update dependencies to FastMCP Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py | 5 ++--- src/oracle-db-mcp-server/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index 5dcac33..71d940a 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -22,12 +22,11 @@ import hashlib import markdownify as md import logging -from mcp.server.fastmcp import FastMCP +from fastmcp import FastMCP from pathlib import Path import shutil from pocketsearch import Schema, Text, PocketSearch, PocketWriter import zipfile -from mcp.server.fastmcp import FastMCP INDEX = None INDEX_NAME = Path("index.db") @@ -73,7 +72,7 @@ class IndexSchema(Schema): """, dependencies=[ "markdownify>=1.2.0", - "mcp>=1.12.3", + "fastmcp>=2.11.3", "pocketsearch>=0.40.0", ] ) diff --git a/src/oracle-db-mcp-server/requirements.txt b/src/oracle-db-mcp-server/requirements.txt index 8b09506..b87ec94 100644 --- a/src/oracle-db-mcp-server/requirements.txt +++ b/src/oracle-db-mcp-server/requirements.txt @@ -1,3 +1,3 @@ markdownify >= 1.2.0 -mcp >= 1.12.3 +fastmcp >= 2.11.3 pocketsearch >= 0.40.0 From 50ab36859409ce1352ee1efa75bd059eeec45059 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Wed, 20 Aug 2025 15:08:30 -0700 Subject: [PATCH 10/43] Write index into home directory Signed-off-by: Gerald Venzl --- .../oracle-db-doc-mcp-server.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index 71d940a..3fdcd85 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -23,19 +23,18 @@ import markdownify as md import logging from fastmcp import FastMCP -from pathlib import Path +from pathlib import PurePath, Path import shutil from pocketsearch import Schema, Text, PocketSearch, PocketWriter import zipfile INDEX = None -INDEX_NAME = Path("index.db") -INDEX_CHECKSUM_FILE = Path("index.checksum") -ZIP_TEMP_OUTPUT = "zip_temp" +HOME_DIR = Path.home().joinpath(PurePath(".oracle/oracle-db-mcp-server")) +INDEX_NAME = HOME_DIR.joinpath(PurePath("index.db")) +INDEX_CHECKSUM_FILE = HOME_DIR.joinpath(PurePath("index.checksum")) +ZIP_TEMP_OUTPUT = HOME_DIR.joinpath("zip_temp") logger = logging.getLogger(__name__) -logging.basicConfig(filename='oracle-db-doc.log', filemode='w', level=logging.ERROR) - # Class for index structure class IndexSchema(Schema): @@ -298,7 +297,11 @@ def main(): help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") args = parser.parse_args() + if not HOME_DIR.exists(): + HOME_DIR.mkdir(parents=True) + # Set log level + logging.basicConfig(filename=HOME_DIR.joinpath(Path('oracle-db-doc.log')), filemode='w', level=logging.ERROR) logger.setLevel(getattr(logging, args.log_level.upper(), logging.ERROR)) maintain_index(args.doc) From 3c2042c2936a0e6660f5a65b20cd77cb265cb15c Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Wed, 20 Aug 2025 15:09:58 -0700 Subject: [PATCH 11/43] Update ReadMe Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/oracle-db-mcp-server/README.md b/src/oracle-db-mcp-server/README.md index 3017b32..bd0ca29 100644 --- a/src/oracle-db-mcp-server/README.md +++ b/src/oracle-db-mcp-server/README.md @@ -30,6 +30,8 @@ python3 -m pip install -r requirements.txt The MCP server has two modes, one to create or maintain the documentation index and one to run the MCP server. Both modes can be combined. +The server will create a new folder under `$HOME/.oracle/oracle-db-mcp-server` to store the index and the server log file. + ```console usage: oracle-db-doc-mcp-server.py [-h] [-doc DOC] [-mcp] [-log-level LOG_LEVEL] From 3f9d77a88e83930c577ed30e4eb9e18f65df9c40 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Tue, 2 Sep 2025 09:38:49 -0700 Subject: [PATCH 12/43] Chunk documents by header, support HTTP Signed-off-by: Gerald Venzl --- .../oracle-db-doc-mcp-server.py | 243 ++++++++++++------ 1 file changed, 166 insertions(+), 77 deletions(-) diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index 3fdcd85..6e628f6 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -20,27 +20,33 @@ import argparse import hashlib -import markdownify as md -import logging from fastmcp import FastMCP +import logging +import markdownify as md from pathlib import PurePath, Path +from pocketsearch import PocketSearch, PocketWriter +import re import shutil -from pocketsearch import Schema, Text, PocketSearch, PocketWriter import zipfile -INDEX = None +# Working home directory HOME_DIR = Path.home().joinpath(PurePath(".oracle/oracle-db-mcp-server")) -INDEX_NAME = HOME_DIR.joinpath(PurePath("index.db")) -INDEX_CHECKSUM_FILE = HOME_DIR.joinpath(PurePath("index.checksum")) + +# Index +INDEX = None +INDEX_FILE = HOME_DIR.joinpath(PurePath("index.db")) +INDEX_VERSION="1.0.0" +INDEX_VERSION_FILE = HOME_DIR.joinpath(PurePath("index.version")) +CONTENT_CHECKSUM_FILE = HOME_DIR.joinpath(PurePath("content.checksum")) + +# Resources folder +RESOURCES_DIR = HOME_DIR.joinpath(PurePath("resources")) + +# Temp directory for zip file extraction ZIP_TEMP_OUTPUT = HOME_DIR.joinpath("zip_temp") logger = logging.getLogger(__name__) -# Class for index structure -class IndexSchema(Schema): - entry_name = Text(is_id_field=True) - entry = Text(index=True) - mcp = FastMCP( "oracle-doc", @@ -86,7 +92,7 @@ def search( Args: search_query: The search phrase to search for. - max_results: The maximum number of results to return, defaults to 20. + max_results: The maximum number of results to return, defaults to 10. Usage: search(search_query="create table syntax") @@ -94,7 +100,6 @@ def search( search(search_query="database user concept", max_results=20) search(search_query="data use case domains best practices", max_results=15) search(search_query="external table definition", max_results=100) - Returns: A list of results. Each result a string in Markdown format with the most relevant search topic. @@ -104,12 +109,6 @@ def search( return search_index(search_query, max_results) -def open_index() -> None: - global INDEX - logger.debug("Opening index file.") - INDEX = PocketSearch(db_name=INDEX_NAME, schema=IndexSchema) - - # Function to search the index def search_index(query_str: str, limit: int = 10) -> list[str]: """ @@ -117,18 +116,18 @@ def search_index(query_str: str, limit: int = 10) -> list[str]: Returns a list of content. """ results = [] - hits = INDEX.search(entry=query_str) + hits = INDEX.search(text=query_str) finds = 0 for hit in hits: - results.append(hit.entry) + results.append(hit.text) finds += 1 if finds >= limit: break return results -def maintain_index(path: str) -> None: - """Creates or updates the index and opens it for the oracle-doc. +def maintain_content(path: str) -> None: + """Maintains the content for the MCP server. This function checks if the index needs to be created or updated based on the contents of the provided location, which can be a directory or a zip file. @@ -142,22 +141,16 @@ def maintain_index(path: str) -> None: logger.info("Maintaining index...") # Logic to create or update the index goes here - # If no path was provided but index exists, open the index. - if path is None and INDEX_NAME.exists(): - open_index() - return - location = Path(path) if not location.exists(): logger.error(f"Provided path does not exist: {location}") return # Get the old index checksum, if it exists - index_checksum = "N/A" - # If the checksum file exists, read the checksum - if INDEX_CHECKSUM_FILE.exists(): - with INDEX_CHECKSUM_FILE.open("r") as f: - index_checksum = f.read().strip() + content_checksum = get_file_content(CONTENT_CHECKSUM_FILE) + + # Get the old index version, if it exists + index_version = get_file_content(INDEX_VERSION_FILE) # Only directories and zip files are currently supported if location.is_file() and not location.suffix == '.zip': @@ -169,16 +162,22 @@ def maintain_index(path: str) -> None: input_checksum = shasum_directory(location) logger.debug(f"Checksum is {input_checksum} for location '{location}'") - # See whether checksum matches the old index checksum - if input_checksum == index_checksum: + # See whether checksum matches the old index checksum and the index has not changed + if input_checksum == content_checksum and index_version == INDEX_VERSION: logger.info("Index is up to date, no changes needed.") - open_index() return - + # Data has changed, re-index else: - logger.info("Checksum has changed, updating index.") - logger.debug(f"Old index checksum: {index_checksum}, New input checksum: {input_checksum}") + if input_checksum != content_checksum: + logger.info("Checksum has changed.") + logger.debug(f"Old index checksum: {content_checksum}, New input checksum: {input_checksum}") + + if index_version != INDEX_VERSION: + logger.info("Index version has changed.") + logger.debug(f"Old index version: {index_version}, New index version: {INDEX_VERSION}") + INDEX_FILE.unlink(missing_ok=True) + logger.info("Recreating index...") # Extract the zip file to a temporary directory if location.is_file() and location.suffix == '.zip': @@ -200,13 +199,16 @@ def maintain_index(path: str) -> None: logger.debug("Indexing all html files in the directory...") - update_index(location) - open_index() + update_content(location) # Write the new checksum to the checksum file - with INDEX_CHECKSUM_FILE.open("w") as f: - logger.debug(f"Writing new checksum {input_checksum} to {INDEX_CHECKSUM_FILE}") - f.write(input_checksum) + logger.debug(f"Writing new checksum {input_checksum} to {CONTENT_CHECKSUM_FILE}") + write_file_content(CONTENT_CHECKSUM_FILE, input_checksum) + + if index_version != INDEX_VERSION: + # Write index version to version file + logger.debug(f"Writing index version {INDEX_VERSION} to {INDEX_VERSION_FILE}") + write_file_content(INDEX_VERSION_FILE, INDEX_VERSION) # Delete temporary zip output directory if it exists if Path(ZIP_TEMP_OUTPUT).exists(): @@ -214,37 +216,56 @@ def maintain_index(path: str) -> None: shutil.rmtree(zip_output) -def update_index(location: Path) -> None: - """Update the index with all HTML files in the directory. +def update_content(location: Path) -> None: + """Updates the stored content with the source provided. Args: location (Path): The path to the documentation directory. Returns: None """ + logger.debug("Updating content") - global INDEX + files_processed = 0 + for file in location.rglob("*"): + process_file(file) + files_processed += 1 + logger.info(f"Processed {files_processed} files from '{location}'.") - logger.debug("Updating index...") + logger.debug("Optimizing index...") + optimize_index() + logger.debug("Index optimized") - with PocketWriter(db_name=INDEX_NAME, schema=IndexSchema) as writer: - files_indexes = 0 - for ext in ("*.html", "*.htm"): - for file in location.rglob(ext): - logger.debug(f"Indexing file: {file}") - markdown_content = convert_to_markdown(file) - writer.insert_or_update(entry_name=str(file.relative_to(location)), entry=markdown_content) - files_indexes += 1 +def process_file(file: Path) -> None: + """Process the file.""" + # Only index html file + if file.suffix == ".html" or file.suffix == ".htm": + name = file.stem.lower() + # Ignore ReadMes, table of contents, indexes + if name not in ("readme", "toc", "index"): + content_chunks = convert_to_markdown_chunks(file) + update_index(content_chunks) - logger.info(f"Indexed {files_indexes} html files from '{location}'.") - # Optimize index for query performance - index = PocketSearch(db_name=INDEX_NAME, schema=IndexSchema, writeable=True) - logger.debug("Optimizing index...") - index.optimize() +def optimize_index() -> None: + """Optimizes index.""" + ps = PocketSearch(db_name=INDEX_FILE, writeable=True) + ps.optimize() - logger.info("Indexing complete.") + +def update_index(content: list[str]) -> None: + """Update the index with content. + + Args: + content list[str]: The list of HTML content to index. + Returns: + None + """ + global INDEX + with PocketWriter(db_name=INDEX_FILE) as writer: + for segment in content: + writer.insert(text=segment) def shasum_directory(directory: Path) -> str: @@ -260,7 +281,7 @@ def shasum_directory(directory: Path) -> str: return sha256.hexdigest() -def convert_to_markdown(file: Path) -> str: +def convert_to_markdown_chunks(file: Path) -> list[str]: """Convert an HTML file to Markdown format. Args: @@ -269,14 +290,80 @@ def convert_to_markdown(file: Path) -> str: Returns: str: The converted Markdown content. """ - # Placeholder for conversion logic logger.debug(f"Converting {file} to Markdown format.") with file.open("r", encoding="utf-8") as f: html = f.read() # Convert HTML to Markdown - return md.markdownify(html) + markdown = remove_markdown_urls(md.markdownify(html)) + pattern = r'(^#{1,6}\s+[^\n]*\n?)(.*?)(?=(?:^#{1,6}\s+|\Z))' + + # Find all matches with re.MULTILINE and re.DOTALL flags + matches = re.finditer(pattern, markdown, re.MULTILINE | re.DOTALL) + + # Create sections list + sections = [] + for match in matches: + # Get heading without the leading "### " + heading = re.sub("^#{1,6}\\s+", "", match.group(1).strip()) + # Get content without URLs within them + content = match.group(2).strip() + sections.append(heading + "\n\n" + content) + + if len(sections) == 0: + return [markdown] + else: + return sections + + +def remove_markdown_urls(text): + # Regex pattern to match Markdown links [text](url) + pattern = r'\[([^\]]*)\]\([^\)]*\)' + # Replace the entire link with just the link text (group 1) + return re.sub(pattern, r'\1', text) + + +def build_folder_structure() -> None: + """Builds the home directory structure.""" + if not RESOURCES_DIR.exists(): + RESOURCES_DIR.mkdir(parents=True) + + +def get_file_content(path: str) -> str: + """Reads the content of a file and returns it or 'N/A' if the file does not exist. + + Args: + file (Path): The path to the file. + """ + if Path(path).exists(): + with Path(path).open("r") as f: + return f.read().strip() + else: + return "N/A" + + +def write_file_content(path: str, content: str) -> None: + """Writes the content to a file.""" + with Path(path).open("w") as f: + f.write(content) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Oracle Database Documentation MCP Server.") + parser.add_argument("-doc", type=str, + help="Path to the documentation input zip file or extracted directory.") + parser.add_argument("-mcp", action="store_true", help="Run the MCP server.") + parser.add_argument("-log-level", type=str, default="ERROR", + help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") + parser.add_argument("-mode", choices=["stdio", "http"], default="stdio") + parser.add_argument("-host", type=str, default="0.0.0.0", + help="The IP address that the MCP server is reachable at.") + parser.add_argument("-port", type=int, default="8000", + help="The port that the MCP server is reachable at.") + args = parser.parse_args() + + return args def main(): @@ -290,29 +377,31 @@ def main(): logger.addHandler(ch) # Parse command line arguments - parser = argparse.ArgumentParser(description="Oracle Database Documentation MCP Server.") - parser.add_argument("-doc", type=str, help="Path to the documentation input zip file or extracted directory.") - parser.add_argument("-mcp", action="store_true", help="Run the MCP server.") - parser.add_argument("-log-level", type=str, default="ERROR", - help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") - args = parser.parse_args() + args = parse_args() - if not HOME_DIR.exists(): - HOME_DIR.mkdir(parents=True) + build_folder_structure() # Set log level logging.basicConfig(filename=HOME_DIR.joinpath(Path('oracle-db-doc.log')), filemode='w', level=logging.ERROR) logger.setLevel(getattr(logging, args.log_level.upper(), logging.ERROR)) - maintain_index(args.doc) + if args.doc: + maintain_content(args.doc) - if INDEX is None: - logger.error(f"Index does not exist. Please run the server with a valid doc directory to index.") + if not INDEX_FILE.exists(): + logger.error(f"Index does not exist. Please create the index first pointing to a valid doc directory to index.") return + global INDEX + logger.debug("Opening index file.") + INDEX = PocketSearch(db_name=INDEX_FILE) + if args.mcp: logger.info("Serving MCP server for Oracle documentation.") - mcp.run() + if args.mode == "stdio": + mcp.run(transport="stdio") + elif args.mode == "http": + mcp.run(transport="http", host=args.host, port=args.port) if __name__ == "__main__": From 62a494acc7a5cfa3c205e47e66d9476fe072964e Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 10:44:44 -0700 Subject: [PATCH 13/43] Update doc server with futher index refinements Signed-off-by: Gerald Venzl --- .../oracle-db-doc-mcp-server.py | 122 ++++++++++++++++-- src/oracle-db-mcp-server/requirements.txt | 1 + 2 files changed, 110 insertions(+), 13 deletions(-) diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index 6e628f6..acfe891 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -19,6 +19,7 @@ # limitations under the License. import argparse +from bs4 import BeautifulSoup import hashlib from fastmcp import FastMCP import logging @@ -45,6 +46,8 @@ # Temp directory for zip file extraction ZIP_TEMP_OUTPUT = HOME_DIR.joinpath("zip_temp") +PREPROCESS = "BASIC" + logger = logging.getLogger(__name__) @@ -86,13 +89,13 @@ @mcp.tool() def search( search_query: str, - max_results: int = 10, + max_results: int = 4, ) -> list[str]: """Search for information about how to use Oracle Database for a query string and return a list of results. Args: search_query: The search phrase to search for. - max_results: The maximum number of results to return, defaults to 10. + max_results: The maximum number of results to return, defaults to 4. Usage: search(search_query="create table syntax") @@ -110,7 +113,7 @@ def search( # Function to search the index -def search_index(query_str: str, limit: int = 10) -> list[str]: +def search_index(query_str: str, limit: int = 4) -> list[str]: """ Search the index for the query string and return matching sections with context. Returns a list of content. @@ -295,8 +298,15 @@ def convert_to_markdown_chunks(file: Path) -> list[str]: with file.open("r", encoding="utf-8") as f: html = f.read() + if PREPROCESS == "ADVANCED": + # Preprocess HTML to remove boilerplate and navigation + html = preprocess_html(html) + # Convert HTML to Markdown - markdown = remove_markdown_urls(md.markdownify(html)) + markdown = md.markdownify(html) + if PREPROCESS != "NONE": + markdown = remove_markdown_urls(markdown) + pattern = r'(^#{1,6}\s+[^\n]*\n?)(.*?)(?=(?:^#{1,6}\s+|\Z))' # Find all matches with re.MULTILINE and re.DOTALL flags @@ -318,10 +328,88 @@ def convert_to_markdown_chunks(file: Path) -> list[str]: def remove_markdown_urls(text): - # Regex pattern to match Markdown links [text](url) - pattern = r'\[([^\]]*)\]\([^\)]*\)' - # Replace the entire link with just the link text (group 1) - return re.sub(pattern, r'\1', text) + # Remove Markdown links [text](url) and replace with just the text + text = re.sub(r'\[([^\]]*)\]\([^\)]*\)', r'\1', text) + + # Remove URLs with GUIDs (32-char hex with hyphens) + text = re.sub(r'https?://[^\s]*[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}[^\s]*', '', text) + + # Remove URLs with long hex strings (likely file hashes or identifiers) + text = re.sub(r'https?://[^\s]*[a-f0-9]{16,}[^\s]*', '', text) + + # Remove standalone URLs that start with http/https + text = re.sub(r'https?://[^\s]+', '', text) + + # Clean up extra whitespace left by removed URLs + text = re.sub(r'\s+', ' ', text) + text = re.sub(r'\n\s*\n', '\n\n', text) + + return text.strip() + + +def preprocess_html(html_content: str) -> str: + """Preprocess HTML to remove boilerplate and navigation elements. + + Args: + html_content (str): The raw HTML content. + + Returns: + str: Cleaned HTML content ready for markdown conversion. + """ + soup = BeautifulSoup(html_content, 'html.parser') + + # Remove script and style tags + for tag in soup.find_all(['script', 'style']): + tag.decompose() + + # Remove navigation elements + for tag in soup.find_all(['nav', 'header', 'footer']): + tag.decompose() + + # Remove elements with navigation-related classes/ids + nav_classes = [ + 'noscript', 'alert', 'pull-left', 'pull-right', 'skip', 'navigation', + 'breadcrumb', 'nav-', 'header-', 'footer-', 'menu', 'sidebar', 'toc' + ] + for nav_class in nav_classes: + for tag in soup.find_all(attrs={'class': lambda x: x and any(nav_class in str(cls).lower() for cls in (x if isinstance(x, list) else [x]))}): + tag.decompose() + for tag in soup.find_all(attrs={'id': lambda x: x and nav_class in str(x).lower()}): + tag.decompose() + + # Remove common Oracle doc boilerplate text patterns + boilerplate_patterns = [ + r'JavaScript.*(?:disabled|enabled).*browser', + r'Skip navigation.*', + r'Oracleยฎ.*(?:Database.*)?(?:Reference|Guide|Manual|Documentation)', + r'Release \d+[a-z]*[\s-]*[A-Z0-9-]*', + r'Previous.*Next', + r'All Classes.*', + r'Overview.*Package.*Class.*Use.*Tree.*Deprecated.*Index.*Help' + ] + + for pattern in boilerplate_patterns: + for tag in soup.find_all(string=re.compile(pattern, re.IGNORECASE)): + parent = tag.parent if hasattr(tag, 'parent') else None + if parent: + parent.decompose() + + # Remove elements likely to be navigation by common Oracle doc structure + # Remove elements with common Oracle navigation text content + nav_text_patterns = [ + 'Skip navigation links', + 'JavaScript is disabled on your browser', + 'All Classes', + 'SEARCH:' + ] + + for pattern in nav_text_patterns: + for element in soup.find_all(string=lambda text: text and pattern in text): + parent = element.parent if hasattr(element, 'parent') else None + if parent: + parent.decompose() + + return str(soup) def build_folder_structure() -> None: @@ -361,6 +449,8 @@ def parse_args() -> argparse.Namespace: help="The IP address that the MCP server is reachable at.") parser.add_argument("-port", type=int, default="8000", help="The port that the MCP server is reachable at.") + parser.add_argument("-preprocess", type=str, default="BASIC", + help="Preprocessing level of documentation (NONE, BASIC, ADVANCED).") args = parser.parse_args() return args @@ -369,6 +459,9 @@ def parse_args() -> argparse.Namespace: def main(): """Main entrypoint for the Oracle Documentation MCP server.""" + # Parse command line arguments + args = parse_args() + # Set up logging ch = logging.StreamHandler() formatter = logging.Formatter( @@ -376,16 +469,19 @@ def main(): ch.setFormatter(formatter) logger.addHandler(ch) - # Parse command line arguments - args = parse_args() - - build_folder_structure() - # Set log level logging.basicConfig(filename=HOME_DIR.joinpath(Path('oracle-db-doc.log')), filemode='w', level=logging.ERROR) logger.setLevel(getattr(logging, args.log_level.upper(), logging.ERROR)) + if args.doc and args.mcp: + logger.error("Cannot specify both -doc and -mcp options at the same time.") + return + + build_folder_structure() + if args.doc: + global PREPROCESS + PREPROCESS = args.preprocess.upper() maintain_content(args.doc) if not INDEX_FILE.exists(): diff --git a/src/oracle-db-mcp-server/requirements.txt b/src/oracle-db-mcp-server/requirements.txt index b87ec94..a08e586 100644 --- a/src/oracle-db-mcp-server/requirements.txt +++ b/src/oracle-db-mcp-server/requirements.txt @@ -1,3 +1,4 @@ +beautifulsoup4 >= 4.9.0 markdownify >= 1.2.0 fastmcp >= 2.11.3 pocketsearch >= 0.40.0 From 487de91ff3cd34b00ac77e74646fd0f1a2730a32 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 12:43:29 -0700 Subject: [PATCH 14/43] Update FastMCP requirements Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/fastmcp.json | 11 ++++++++++ .../oracle-db-doc-mcp-server.py | 20 ++++++++----------- 2 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 src/oracle-db-mcp-server/fastmcp.json diff --git a/src/oracle-db-mcp-server/fastmcp.json b/src/oracle-db-mcp-server/fastmcp.json new file mode 100644 index 0000000..656d093 --- /dev/null +++ b/src/oracle-db-mcp-server/fastmcp.json @@ -0,0 +1,11 @@ +{ + "entrypoint": "oracle-db-doc-mcp-server.py", + "environment": { + "dependencies": [ + "beautifulsoup4 >= 4.9.0", + "markdownify >= 1.2.0", + "fastmcp >= 2.11.3", + "pocketsearch >= 0.40.0" + ] + } +} \ No newline at end of file diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index acfe891..fb7bbca 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -77,12 +77,7 @@ - If the search tool returns too few results, increase the max_results limit. - If the search tool returns too many results, reduce the max_results limit. - If the search tool returns results that are not relevant, try to refine the query. - """, - dependencies=[ - "markdownify>=1.2.0", - "fastmcp>=2.11.3", - "pocketsearch>=0.40.0", - ] + """ ) @@ -462,6 +457,9 @@ def main(): # Parse command line arguments args = parse_args() + # Build the home directory structure, needed also for the log file + build_folder_structure() + # Set up logging ch = logging.StreamHandler() formatter = logging.Formatter( @@ -477,8 +475,6 @@ def main(): logger.error("Cannot specify both -doc and -mcp options at the same time.") return - build_folder_structure() - if args.doc: global PREPROCESS PREPROCESS = args.preprocess.upper() @@ -488,11 +484,11 @@ def main(): logger.error(f"Index does not exist. Please create the index first pointing to a valid doc directory to index.") return - global INDEX - logger.debug("Opening index file.") - INDEX = PocketSearch(db_name=INDEX_FILE) - if args.mcp: + global INDEX + logger.debug("Opening index file.") + INDEX = PocketSearch(db_name=INDEX_FILE) + logger.info("Serving MCP server for Oracle documentation.") if args.mode == "stdio": mcp.run(transport="stdio") From 9944f382af422f15b439c9b73b07780ecdb1e480 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 12:44:12 -0700 Subject: [PATCH 15/43] Update Dockerfile Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/oracle-db-mcp-server/Dockerfile b/src/oracle-db-mcp-server/Dockerfile index bc33fd8..8056e2d 100644 --- a/src/oracle-db-mcp-server/Dockerfile +++ b/src/oracle-db-mcp-server/Dockerfile @@ -20,14 +20,14 @@ # FROM alpine -COPY oracle-db-doc-mcp-server.py requirements.txt . +COPY oracle-db-doc-mcp-server.py fastmcp.json requirements.txt ./ RUN apk --update --no-cache add python3 py3-pip curl && \ pip install -r requirements.txt --break-system-packages && \ rm requirements.txt && \ mkdir /input && \ curl -L -o /input/db23.zip https://docs.oracle.com/en/database/oracle/oracle-database/23/zip/oracle-database_23.zip && \ - python3 oracle-db-doc-mcp-server.py --log-level DEBUG --doc /input/db23.zip && \ + python3 oracle-db-doc-mcp-server.py -log-level DEBUG -doc /input/db23.zip && \ rm -r /input && \ apk del curl && \ rm -rf /var/cache/apk/* /tmp/* @@ -36,4 +36,4 @@ LABEL org.opencontainers.image.source=https://github.com/oracle/mcp LABEL org.opencontainers.image.description="Oracle Database Documentation MCP Server" LABEL org.opencontainers.image.licenses=Apache-2.0 -ENTRYPOINT [ "python3", "oracle-db-doc-mcp-server.py", "-mcp" ] +ENTRYPOINT [ "python3", "oracle-db-doc-mcp-server.py", "-mcp", "-mode", "http" ] From b9d40effc0960687ee9fc638d659cb6ac91b0bcf Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 13:30:04 -0700 Subject: [PATCH 16/43] Create build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 52 +++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 .github/workflows/build-doc-mcp.yml diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml new file mode 100644 index 0000000..df24bce --- /dev/null +++ b/.github/workflows/build-doc-mcp.yml @@ -0,0 +1,52 @@ +name: ๐Ÿ› ๏ธ Build an image +on: + push: + paths: + - 'src/oracle-db-mcp-server/**' + - '.github/**' + pull_request: + paths: + - 'src/oracle-db-mcp-server/**' + +jobs: + + build-image: + strategy: + matrix: + runner: [ "ubuntu-24.04", "ubuntu-24.04-arm" ] + + permissions: + packages: write + + name: ๐Ÿ› ๏ธ Build image + runs-on: ${{ matrix.runner }} + + steps: + - name: ๐Ÿ“‚ Checkout repo + uses: actions/checkout@v4 + + - name: ๐Ÿ”„ Generate environment variables + id: os_arch + run: echo "OS_ARCH=$(uname -m)" >> "$GITHUB_OUTPUT" + + - name: โฌ Install build dependencies + run: sudo apt-get install -y buildah + + - name: Build image + run: buildah bud -f Dockerfile -t oracle-doc-mcp-${{ steps.os_arch.outputs.OS_ARCH }} . + + + - name: ๐Ÿ”“ Login to GHCR registry + uses: redhat-actions/podman-login@v1 + with: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + registry: ghcr.io + + - name: ๐Ÿซธ Push image to Container Registry + id: push-to-ghcr + uses: redhat-actions/push-to-registry@v2 + with: + registry: ghcr.io/gvenzl + image: mcp + tags: latest-${{ steps.os_arch.outputs.OS_ARCH }} From ace4d3b91f85c3fb4888ee6ac456f6f6dd917203 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 13:34:01 -0700 Subject: [PATCH 17/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index df24bce..af9bd4e 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -33,7 +33,9 @@ jobs: run: sudo apt-get install -y buildah - name: Build image - run: buildah bud -f Dockerfile -t oracle-doc-mcp-${{ steps.os_arch.outputs.OS_ARCH }} . + run: | + cd src/oracle-db-mcp-server/ + buildah bud -f Dockerfile -t oracle-doc-mcp-${{ steps.os_arch.outputs.OS_ARCH }} . - name: ๐Ÿ”“ Login to GHCR registry From 3f1ead50cccb1e8861018beebcfe89653188267d Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 13:49:27 -0700 Subject: [PATCH 18/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index af9bd4e..d27cea1 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -27,7 +27,12 @@ jobs: - name: ๐Ÿ”„ Generate environment variables id: os_arch - run: echo "OS_ARCH=$(uname -m)" >> "$GITHUB_OUTPUT" + run: | + if [ "$(uname -m)" == "aarch64"] ; then + echo "OS_ARCH=arm64" >> "$GITHUB_OUTPUT" + else + echo "OS_ARCH=amd64" >> "$GITHUB_OUTPUT" + fi; - name: โฌ Install build dependencies run: sudo apt-get install -y buildah @@ -35,7 +40,7 @@ jobs: - name: Build image run: | cd src/oracle-db-mcp-server/ - buildah bud -f Dockerfile -t oracle-doc-mcp-${{ steps.os_arch.outputs.OS_ARCH }} . + buildah bud -f Dockerfile -t oracle-doc-mcp:latest-${{ steps.os_arch.outputs.OS_ARCH }} . - name: ๐Ÿ”“ Login to GHCR registry From e69f7483423049173185167a894866424905b63a Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 13:58:35 -0700 Subject: [PATCH 19/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index d27cea1..c3653f8 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -54,6 +54,6 @@ jobs: id: push-to-ghcr uses: redhat-actions/push-to-registry@v2 with: - registry: ghcr.io/gvenzl - image: mcp + registry: ghcr.io/gvenzl/mcp + image: oracle-doc-mcp tags: latest-${{ steps.os_arch.outputs.OS_ARCH }} From 706681ee5bc47471a9dc2128251faa468b408bf4 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 14:28:23 -0700 Subject: [PATCH 20/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index c3653f8..e03d486 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -40,7 +40,7 @@ jobs: - name: Build image run: | cd src/oracle-db-mcp-server/ - buildah bud -f Dockerfile -t oracle-doc-mcp:latest-${{ steps.os_arch.outputs.OS_ARCH }} . + buildah bud -f Dockerfile -t oracle-doc:latest-${{ steps.os_arch.outputs.OS_ARCH }} . - name: ๐Ÿ”“ Login to GHCR registry @@ -54,6 +54,6 @@ jobs: id: push-to-ghcr uses: redhat-actions/push-to-registry@v2 with: - registry: ghcr.io/gvenzl/mcp - image: oracle-doc-mcp + registry: ghcr.io/gvenzl + image: oracle-doc tags: latest-${{ steps.os_arch.outputs.OS_ARCH }} From becf1e0823e8f5a13161ab8f792ef1ed4c22711d Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 14:44:42 -0700 Subject: [PATCH 21/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index e03d486..cb315f4 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -28,7 +28,7 @@ jobs: - name: ๐Ÿ”„ Generate environment variables id: os_arch run: | - if [ "$(uname -m)" == "aarch64"] ; then + if [ "$(uname -m)" == "aarch64" ]; then echo "OS_ARCH=arm64" >> "$GITHUB_OUTPUT" else echo "OS_ARCH=amd64" >> "$GITHUB_OUTPUT" From e7dac9d6ee64c6f7f3b83fa77911a6eb1339ec08 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 14:44:55 -0700 Subject: [PATCH 22/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index cb315f4..1cd15e1 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -54,6 +54,6 @@ jobs: id: push-to-ghcr uses: redhat-actions/push-to-registry@v2 with: - registry: ghcr.io/gvenzl + registry: ghcr.io/gvenzl/mcp image: oracle-doc tags: latest-${{ steps.os_arch.outputs.OS_ARCH }} From a62b4f2d9431cb8102874108bad6f923d07175d4 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 14:50:10 -0700 Subject: [PATCH 23/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index 1cd15e1..7050f97 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -34,9 +34,6 @@ jobs: echo "OS_ARCH=amd64" >> "$GITHUB_OUTPUT" fi; - - name: โฌ Install build dependencies - run: sudo apt-get install -y buildah - - name: Build image run: | cd src/oracle-db-mcp-server/ @@ -51,7 +48,6 @@ jobs: registry: ghcr.io - name: ๐Ÿซธ Push image to Container Registry - id: push-to-ghcr uses: redhat-actions/push-to-registry@v2 with: registry: ghcr.io/gvenzl/mcp From 5de57d4c193d914ac2365f5fa523ca74c10976c6 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 15:11:47 -0700 Subject: [PATCH 24/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index 7050f97..aaa2db5 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -53,3 +53,20 @@ jobs: registry: ghcr.io/gvenzl/mcp image: oracle-doc tags: latest-${{ steps.os_arch.outputs.OS_ARCH }} + + + upload-multi-arch: + name: ๐Ÿซธ Push multi-arch manifest + runs-on: "ubuntu-24.04" + + permissions: + packages: write + + steps: + - name: ๐Ÿซธ Push multi-arch manifest + run: | + podman manifest create gvenzl/mcp/oracle-doc:latest + podman manifest add gvenzl/mcp/oracle-doc:latest gvenzl/mcp/oracle-doc:latest-amd64 + podman manifest add gvenzl/mcp/oracle-doc:latest gvenzl/mcp/oracle-doc:latest-arm64 + podman push gvenzl/mcp/oracle-doc:latest + From 40f8b1d11f50fac0b68666805d95c932494e939e Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 15:12:59 -0700 Subject: [PATCH 25/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index aaa2db5..51b42d9 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -58,6 +58,7 @@ jobs: upload-multi-arch: name: ๐Ÿซธ Push multi-arch manifest runs-on: "ubuntu-24.04" + needs: build-image permissions: packages: write From 4d3bfb3bfd15acaf94a25ee8f1ee13649b4c0648 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 15:24:23 -0700 Subject: [PATCH 26/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index 51b42d9..1efc3b0 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -39,7 +39,6 @@ jobs: cd src/oracle-db-mcp-server/ buildah bud -f Dockerfile -t oracle-doc:latest-${{ steps.os_arch.outputs.OS_ARCH }} . - - name: ๐Ÿ”“ Login to GHCR registry uses: redhat-actions/podman-login@v1 with: @@ -64,6 +63,14 @@ jobs: packages: write steps: + + - name: ๐Ÿ”“ Login to GHCR registry + uses: redhat-actions/podman-login@v1 + with: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + registry: ghcr.io + - name: ๐Ÿซธ Push multi-arch manifest run: | podman manifest create gvenzl/mcp/oracle-doc:latest From d26283a27a3423d951f24bee60732c54ec792a48 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 15:49:00 -0700 Subject: [PATCH 27/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index 1efc3b0..cbe1dcd 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -73,8 +73,8 @@ jobs: - name: ๐Ÿซธ Push multi-arch manifest run: | - podman manifest create gvenzl/mcp/oracle-doc:latest - podman manifest add gvenzl/mcp/oracle-doc:latest gvenzl/mcp/oracle-doc:latest-amd64 - podman manifest add gvenzl/mcp/oracle-doc:latest gvenzl/mcp/oracle-doc:latest-arm64 - podman push gvenzl/mcp/oracle-doc:latest + podman manifest create ghcr.io/gvenzl/mcp/oracle-doc:latest + podman manifest add ghcr.io/gvenzl/mcp/oracle-doc:latest ghcr.io/gvenzl/mcp/oracle-doc:latest-amd64 + podman manifest add ghcr.io/gvenzl/mcp/oracle-doc:latest ghcr.io/gvenzl/mcp/oracle-doc:latest-arm64 + podman push ghcr.io/gvenzl/mcp/oracle-doc:latest From bc98c2b685455ba859c30f7372f150104e131936 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 16:03:08 -0700 Subject: [PATCH 28/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index cbe1dcd..a514995 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -64,13 +64,6 @@ jobs: steps: - - name: ๐Ÿ”“ Login to GHCR registry - uses: redhat-actions/podman-login@v1 - with: - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - registry: ghcr.io - - name: ๐Ÿซธ Push multi-arch manifest run: | podman manifest create ghcr.io/gvenzl/mcp/oracle-doc:latest From 7fed6513052c75313b8dd0165fa0babee51b72fb Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 16:14:17 -0700 Subject: [PATCH 29/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index a514995..cbe1dcd 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -64,6 +64,13 @@ jobs: steps: + - name: ๐Ÿ”“ Login to GHCR registry + uses: redhat-actions/podman-login@v1 + with: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + registry: ghcr.io + - name: ๐Ÿซธ Push multi-arch manifest run: | podman manifest create ghcr.io/gvenzl/mcp/oracle-doc:latest From 3a8e723feb8e160cfadedbff31702d8e855f5e73 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 16:33:10 -0700 Subject: [PATCH 30/43] Update build-doc-mcp.yml --- .github/workflows/build-doc-mcp.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index cbe1dcd..c2f630b 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -3,7 +3,6 @@ on: push: paths: - 'src/oracle-db-mcp-server/**' - - '.github/**' pull_request: paths: - 'src/oracle-db-mcp-server/**' From cb5c7248303bce52e229ae509c9cb8cea98dd8ee Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 12 Sep 2025 17:06:04 -0700 Subject: [PATCH 31/43] Update ReadMe, catch Ctrl+C Signed-off-by: Gerald Venzl --- src/oracle-db-mcp-server/README.md | 17 +++++++---------- .../oracle-db-doc-mcp-server.py | 15 ++++++++++----- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/oracle-db-mcp-server/README.md b/src/oracle-db-mcp-server/README.md index bd0ca29..e66d44d 100644 --- a/src/oracle-db-mcp-server/README.md +++ b/src/oracle-db-mcp-server/README.md @@ -28,7 +28,12 @@ python3 -m pip install -r requirements.txt ## Usage -The MCP server has two modes, one to create or maintain the documentation index and one to run the MCP server. Both modes can be combined. +The MCP server has two modes: + +1. `-doc`: Create or maintain the documentation index +2. `-mcp`: Run the MCP server. + +Building the index will take some time and some MCP clients will time out while waiting for the index to be built. Hence the two modes cannot be intermixed. The server will create a new folder under `$HOME/.oracle/oracle-db-mcp-server` to store the index and the server log file. @@ -62,14 +67,6 @@ To run just the MCP server, provide the `-mcp` parameter. The index will have to python3 oracle-db-doc-mcp-server.py -mcp ``` -### Combining index creation/maintenance and MCP server mode - -You can combine the index maintainenance and MCP server mode into one command, for example: - -```console -python3 oracle-db-doc-mcp-server.py -mcp -doc ~/Downloads/oracle-database_23.zip -``` - ### VSCode integration Replace the `<>` placeholders with the paths to the MCP server installation and Oracle Database Documentation zip file. @@ -80,7 +77,7 @@ Replace the `<>` placeholders with the paths to the MCP server installation and "oracle-db-doc": { "type": "stdio", "command": "/.venv/bin/python3", - "args": [ "oracle-db-doc-mcp-server.py", "-doc", "", "-mcp" ] + "args": [ "oracle-db-doc-mcp-server.py", "-mcp" ] } } } diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index fb7bbca..92184ce 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -480,11 +480,13 @@ def main(): PREPROCESS = args.preprocess.upper() maintain_content(args.doc) - if not INDEX_FILE.exists(): - logger.error(f"Index does not exist. Please create the index first pointing to a valid doc directory to index.") - return - if args.mcp: + + # If no index is present (not index was built), refuse to start the server. + if not INDEX_FILE.exists(): + logger.error(f"Index does not exist. Please create the index first via the '-doc' option.") + return + global INDEX logger.debug("Opening index file.") INDEX = PocketSearch(db_name=INDEX_FILE) @@ -497,4 +499,7 @@ def main(): if __name__ == "__main__": - main() + try: + main() + except KeyboardInterrupt: + None From 9c8800d26d9fdd78f02916345cd346e0f77c2735 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Tue, 7 Oct 2025 18:34:58 -0700 Subject: [PATCH 32/43] new tool name, show no banner Signed-off-by: Gerald Venzl --- .../oracle-db-doc-mcp-server.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py index 92184ce..50e5c55 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py @@ -82,7 +82,7 @@ @mcp.tool() -def search( +def search_oracle_database_documentation( search_query: str, max_results: int = 4, ) -> list[str]: @@ -93,11 +93,11 @@ def search( max_results: The maximum number of results to return, defaults to 4. Usage: - search(search_query="create table syntax") - search(search_query="alter a parameter", max_results=13) - search(search_query="database user concept", max_results=20) - search(search_query="data use case domains best practices", max_results=15) - search(search_query="external table definition", max_results=100) + search_oracle_database_documentation(search_query="create table syntax") + search_oracle_database_documentation(search_query="alter a parameter", max_results=13) + search_oracle_database_documentation(search_query="database user concept", max_results=20) + search_oracle_database_documentation(search_query="data use case domains best practices", max_results=15) + search_oracle_database_documentation(search_query="external table definition", max_results=100) Returns: A list of results. Each result a string in Markdown format with the most relevant search topic. @@ -493,9 +493,9 @@ def main(): logger.info("Serving MCP server for Oracle documentation.") if args.mode == "stdio": - mcp.run(transport="stdio") + mcp.run(transport="stdio", show_banner=False) elif args.mode == "http": - mcp.run(transport="http", host=args.host, port=args.port) + mcp.run(transport="http", host=args.host, port=args.port, show_banner=False) if __name__ == "__main__": From 9021adc52dbf1c19a4e375c9915acb20c744fa69 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 15:11:03 -0700 Subject: [PATCH 33/43] Update Oracle DB Doc MCP server Signed-off-by: Gerald Venzl --- .github/workflows/build-doc-mcp.yml | 22 +-- .../.gitignore | 0 .../Dockerfile | 6 +- src/oracle-db-doc-mcp-server/README.md | 155 ++++++++++++++++++ .../fastmcp.json | 0 .../oracle-db-doc-mcp-server.py | 60 ++++--- .../requirements.txt | 0 src/oracle-db-mcp-server/README.md | 94 ----------- 8 files changed, 201 insertions(+), 136 deletions(-) rename src/{oracle-db-mcp-server => oracle-db-doc-mcp-server}/.gitignore (100%) rename src/{oracle-db-mcp-server => oracle-db-doc-mcp-server}/Dockerfile (85%) create mode 100644 src/oracle-db-doc-mcp-server/README.md rename src/{oracle-db-mcp-server => oracle-db-doc-mcp-server}/fastmcp.json (100%) rename src/{oracle-db-mcp-server => oracle-db-doc-mcp-server}/oracle-db-doc-mcp-server.py (89%) rename src/{oracle-db-mcp-server => oracle-db-doc-mcp-server}/requirements.txt (100%) delete mode 100644 src/oracle-db-mcp-server/README.md diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index c2f630b..3464598 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -2,21 +2,20 @@ name: ๐Ÿ› ๏ธ Build an image on: push: paths: - - 'src/oracle-db-mcp-server/**' + - "src/oracle-db-doc-mcp-server/**" pull_request: paths: - - 'src/oracle-db-mcp-server/**' + - "src/oracle-db-doc-mcp-server/**" jobs: - build-image: strategy: matrix: - runner: [ "ubuntu-24.04", "ubuntu-24.04-arm" ] + runner: ["ubuntu-24.04", "ubuntu-24.04-arm"] permissions: packages: write - + name: ๐Ÿ› ๏ธ Build image runs-on: ${{ matrix.runner }} @@ -35,7 +34,7 @@ jobs: - name: Build image run: | - cd src/oracle-db-mcp-server/ + cd src/oracle-db-doc-mcp-server/ buildah bud -f Dockerfile -t oracle-doc:latest-${{ steps.os_arch.outputs.OS_ARCH }} . - name: ๐Ÿ”“ Login to GHCR registry @@ -52,7 +51,6 @@ jobs: image: oracle-doc tags: latest-${{ steps.os_arch.outputs.OS_ARCH }} - upload-multi-arch: name: ๐Ÿซธ Push multi-arch manifest runs-on: "ubuntu-24.04" @@ -62,7 +60,6 @@ jobs: packages: write steps: - - name: ๐Ÿ”“ Login to GHCR registry uses: redhat-actions/podman-login@v1 with: @@ -72,8 +69,7 @@ jobs: - name: ๐Ÿซธ Push multi-arch manifest run: | - podman manifest create ghcr.io/gvenzl/mcp/oracle-doc:latest - podman manifest add ghcr.io/gvenzl/mcp/oracle-doc:latest ghcr.io/gvenzl/mcp/oracle-doc:latest-amd64 - podman manifest add ghcr.io/gvenzl/mcp/oracle-doc:latest ghcr.io/gvenzl/mcp/oracle-doc:latest-arm64 - podman push ghcr.io/gvenzl/mcp/oracle-doc:latest - + podman manifest create ghcr.io/gvenzl/mcp/oracle-db-doc:latest + podman manifest add ghcr.io/gvenzl/mcp/oracle-db-doc:latest ghcr.io/gvenzl/mcp/oracle-db-doc:latest-amd64 + podman manifest add ghcr.io/gvenzl/mcp/oracle-db-doc:latest ghcr.io/gvenzl/mcp/oracle-db-doc:latest-arm64 + podman push ghcr.io/gvenzl/mcp/oracle-db-doc:latest diff --git a/src/oracle-db-mcp-server/.gitignore b/src/oracle-db-doc-mcp-server/.gitignore similarity index 100% rename from src/oracle-db-mcp-server/.gitignore rename to src/oracle-db-doc-mcp-server/.gitignore diff --git a/src/oracle-db-mcp-server/Dockerfile b/src/oracle-db-doc-mcp-server/Dockerfile similarity index 85% rename from src/oracle-db-mcp-server/Dockerfile rename to src/oracle-db-doc-mcp-server/Dockerfile index 8056e2d..ca7a2da 100644 --- a/src/oracle-db-mcp-server/Dockerfile +++ b/src/oracle-db-doc-mcp-server/Dockerfile @@ -26,8 +26,8 @@ RUN apk --update --no-cache add python3 py3-pip curl && \ pip install -r requirements.txt --break-system-packages && \ rm requirements.txt && \ mkdir /input && \ - curl -L -o /input/db23.zip https://docs.oracle.com/en/database/oracle/oracle-database/23/zip/oracle-database_23.zip && \ - python3 oracle-db-doc-mcp-server.py -log-level DEBUG -doc /input/db23.zip && \ + curl -L -o /input/dbdoc.zip https://docs.oracle.com/en/database/oracle/oracle-database/26/zip/oracle-database_26.zip && \ + python3 oracle-db-doc-mcp-server.py -log-level DEBUG -doc /input/dbdoc.zip && \ rm -r /input && \ apk del curl && \ rm -rf /var/cache/apk/* /tmp/* @@ -36,4 +36,4 @@ LABEL org.opencontainers.image.source=https://github.com/oracle/mcp LABEL org.opencontainers.image.description="Oracle Database Documentation MCP Server" LABEL org.opencontainers.image.licenses=Apache-2.0 -ENTRYPOINT [ "python3", "oracle-db-doc-mcp-server.py", "-mcp", "-mode", "http" ] +ENTRYPOINT [ "python3", "oracle-db-doc-mcp-server.py", "mcp" ] diff --git a/src/oracle-db-doc-mcp-server/README.md b/src/oracle-db-doc-mcp-server/README.md new file mode 100644 index 0000000..8ec99ee --- /dev/null +++ b/src/oracle-db-doc-mcp-server/README.md @@ -0,0 +1,155 @@ +# Oracle Database Documentation MCP Server + +A Python-based MCP (Model Context Protocol) server that provides tools for searching the official Oracle Database documentation. + +The MCP server leverages an inverted index to serve snippets of the Oracle Database documentation. Because the Oracle Database documentation is large and gets updated from time to time, it is unfeasible to ship a ready to go documentation index with this repository. Doing so will bloat the repository and runs risk of users searching on an outdated documentation. + +Instead, users can create their own index and maintain it as often as required. See [Index creation/maintenance](#index-creation-maintenance) for more on that topic. + +## Features + +- **Search** + - Serach the documentation by keywords and phrases + +## Prerequisites + +- Python 3.x +- Downloaded [Oracle Database Documentation zip file](https://docs.oracle.com/en/database/oracle/oracle-database/26/zip/oracle-database_26.zip) to build the initial index + +## Installation + +```console +git clone https://github.com/oracle/mcp.git + +cd mcp/src/oracle-db-doc-mcp-server + +python3 -m venv .venv + +source .venv/bin/activate + +python3 -m pip install -r requirements.txt +``` + +## Usage + +```console +usage: oracle-db-doc-mcp-server.py [-h] [-log-level LOG_LEVEL] {idx,mcp} ... + +Oracle Database Documentation MCP Server. + +options: + -h, --help show this help message and exit + -log-level LOG_LEVEL Set the log level (DEBUG, INFO, WARNING, ERROR (default), CRITICAL). + +subcommands: + {idx,mcp} + idx create/maintain the index + mcp run the MCP server +``` + +The MCP server has two subcommands: + +1. `idx`: Creates or maintains the documentation index. +2. `mcp`: Runs the MCP server. + +Building the index will take some time and some MCP clients will time out while waiting for the index to be built. Hence the two subcommands cannot be intermixed. Users will first have to create the documentation index via the `idx` subcommand and once completed, run the server with the `mcp` subcommand. + +### Index creation/maintenance + +```console +usage: oracle-db-doc-mcp-server.py idx [-h] -path PATH [-preprocess PREPROCESS] + +options: + -h, --help show this help message and exit + -path PATH path to the documentation input zip file or extracted directory + -preprocess PREPROCESS + preprocessing level of documentation (NONE, BASIC (default), ADVANCED) +``` + +To create or maintain the index, use the `idx` subcommand and point the `-path` parameter to either the Oracle Database Documentation zip file (the file will be automatically unzipped into a temorary location under `$HOME/.oracle/oracle-db-doc-mcp-server`) or an **already extracted** location of the Oracle Database Documentation. + +The server will create a new folder under `$HOME/.oracle/oracle-db-doc-mcp-server` and store the index and the server log file within. Subsequent runs of `mcp` will open that index. The index can be updated by running the `idx` mode again. + +The index creation will take several minutes to complete depending on your environment and the level of preprocessing specified via the `-preprocess` parameter. + +A checksum of the index is kept so that subsequent executions of the program will only reindex content that has changed. + +For example, to create an index on a downloaded Oracle Database documentation zip file under `~/Downloads/oracle-database_26.zip`, run: + +```console +python3 oracle-db-doc-mcp-server.py idx -path ~/Downloads/oracle-database_26.zip +``` + +### Running the MCP Server + +```console +usage: oracle-db-doc-mcp-server.py mcp [-h] [-mode {stdio,http}] [-host HOST] [-port PORT] + +options: + -h, --help show this help message and exit + -mode {stdio,http} the transport mode for the MCP server (stdio (default) or http) + -host HOST the IP address (default 0.0.0.0) that the MCP server is reachable at + -port PORT the port (default 8000) that the MCP server is reachable at +``` + +To run the MCP server, use the `mcp` subcommand. + +**Note:** The index will have to exist. If it doesn't, the MCP server will exit with an error. + +By default, the MCP server runs on `stdio`. Hence, the simplest way to run it, is: + +```console +python3 oracle-db-doc-mcp-server.py mcp +``` + +### VSCode integration + +#### Running the MCP server via Docker/Podman + +To run the MCP server from inside a Docker container: + +1. Add a new file `.vscode/mcp.json` file to your project folder. +2. Add the following content to your `mcp.json` file. + +``` +{ + "servers": { + "oracle-db-doc": { + "type": "stdio", + "command": "docker", + "args": [ "run", "-ti", ghcr.io/oracle/mcp/oracle-db-doc" ] + } + } +} +``` + +#### Running the MCP server directly + +To run the MCP server directly from your machine: + +1. Follow the [Installation](#installation) instructions first. +2. Create an index as explained in [Index creation/maintenance](#index-creation-maintenance) +3. Add a new file `mcp.json` file to your project folder. +4. Add the following content to your `.vscode/mcp.json` file. Replace the `<>` placeholders with the paths to the MCP server installation. + +``` +{ + "servers": { + "oracle-db-doc": { + "type": "stdio", + "command": "/.venv/bin/python3", + "args": [ "oracle-db-doc-mcp-server.py", "mcp" ] + } + } +} +``` + +## Tools + +### search_oracle_database_documentation + +Searches the documentation for key words and key phrases. + +```python +search_oracle_database_documentation(search_query: str, max_results: int) -> list[str]: +``` diff --git a/src/oracle-db-mcp-server/fastmcp.json b/src/oracle-db-doc-mcp-server/fastmcp.json similarity index 100% rename from src/oracle-db-mcp-server/fastmcp.json rename to src/oracle-db-doc-mcp-server/fastmcp.json diff --git a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py similarity index 89% rename from src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py rename to src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py index 50e5c55..2fd57b3 100644 --- a/src/oracle-db-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py @@ -31,7 +31,7 @@ import zipfile # Working home directory -HOME_DIR = Path.home().joinpath(PurePath(".oracle/oracle-db-mcp-server")) +HOME_DIR = Path.home().joinpath(PurePath(".oracle/oracle-db-doc-mcp-server")) # Index INDEX = None @@ -300,8 +300,10 @@ def convert_to_markdown_chunks(file: Path) -> list[str]: # Convert HTML to Markdown markdown = md.markdownify(html) if PREPROCESS != "NONE": + markdown = markdown.replace("Previous\nNext\n JavaScript must be enabled to correctly display this content", "") markdown = remove_markdown_urls(markdown) + # Split markdown into sections based on headings pattern = r'(^#{1,6}\s+[^\n]*\n?)(.*?)(?=(?:^#{1,6}\s+|\Z))' # Find all matches with re.MULTILINE and re.DOTALL flags @@ -335,9 +337,11 @@ def remove_markdown_urls(text): # Remove standalone URLs that start with http/https text = re.sub(r'https?://[^\s]+', '', text) - # Clean up extra whitespace left by removed URLs - text = re.sub(r'\s+', ' ', text) - text = re.sub(r'\n\s*\n', '\n\n', text) + # Clean up extra spaces/tabs but preserve new lines (\s includes \n) + text = re.sub(r'[ \t]+', ' ', text) + + # Clean up extra spaces within new lines + text = re.sub(r'\n *\n', '\n\n', text) return text.strip() @@ -434,18 +438,26 @@ def write_file_content(path: str, content: str) -> None: def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Oracle Database Documentation MCP Server.") - parser.add_argument("-doc", type=str, - help="Path to the documentation input zip file or extracted directory.") - parser.add_argument("-mcp", action="store_true", help="Run the MCP server.") + parser.add_argument("-log-level", type=str, default="ERROR", - help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).") - parser.add_argument("-mode", choices=["stdio", "http"], default="stdio") - parser.add_argument("-host", type=str, default="0.0.0.0", - help="The IP address that the MCP server is reachable at.") - parser.add_argument("-port", type=int, default="8000", - help="The port that the MCP server is reachable at.") - parser.add_argument("-preprocess", type=str, default="BASIC", - help="Preprocessing level of documentation (NONE, BASIC, ADVANCED).") + help="Set the log level (DEBUG, INFO, WARNING, ERROR (default), CRITICAL).") + + subparser = parser.add_subparsers(title="subcommands", dest="command", required=True) + + parser_doc = subparser.add_parser("idx", help="create/maintain the index") + parser_doc.add_argument("-path", type=str, required=True, + help="path to the documentation input zip file or extracted directory") + parser_doc.add_argument("-preprocess", type=str, default="BASIC", + help="preprocessing level of documentation (NONE, BASIC (default), ADVANCED)") + + parser_mcp = subparser.add_parser("mcp", help="run the MCP server") + parser_mcp.add_argument("-mode", choices=["stdio", "http"], default="stdio", + help="the transport mode for the MCP server (stdio (default) or http)") + parser_mcp.add_argument("-host", type=str, default="0.0.0.0", + help="the IP address (default 0.0.0.0) that the MCP server is reachable at") + parser_mcp.add_argument("-port", type=int, default=8000, + help="the port (default 8000) that the MCP server is reachable at") + args = parser.parse_args() return args @@ -468,30 +480,26 @@ def main(): logger.addHandler(ch) # Set log level - logging.basicConfig(filename=HOME_DIR.joinpath(Path('oracle-db-doc.log')), filemode='w', level=logging.ERROR) + logging.basicConfig(filename=HOME_DIR.joinpath(Path('oracle-db-doc-mcp-server.log')), filemode='w', level=logging.ERROR) logger.setLevel(getattr(logging, args.log_level.upper(), logging.ERROR)) - if args.doc and args.mcp: - logger.error("Cannot specify both -doc and -mcp options at the same time.") - return - - if args.doc: + if args.command == "idx": global PREPROCESS PREPROCESS = args.preprocess.upper() - maintain_content(args.doc) + maintain_content(args.path) - if args.mcp: + if args.command == "mcp": # If no index is present (not index was built), refuse to start the server. if not INDEX_FILE.exists(): - logger.error(f"Index does not exist. Please create the index first via the '-doc' option.") + logger.error(f"Index does not exist. Please create the index first via the 'idx' subcommand.") return global INDEX logger.debug("Opening index file.") INDEX = PocketSearch(db_name=INDEX_FILE) - logger.info("Serving MCP server for Oracle documentation.") + logger.info("Serving MCP server for Oracle Database documentation.") if args.mode == "stdio": mcp.run(transport="stdio", show_banner=False) elif args.mode == "http": @@ -502,4 +510,4 @@ def main(): try: main() except KeyboardInterrupt: - None + logger.info("Shutting down Oracle Database Documentation MCP Server.") \ No newline at end of file diff --git a/src/oracle-db-mcp-server/requirements.txt b/src/oracle-db-doc-mcp-server/requirements.txt similarity index 100% rename from src/oracle-db-mcp-server/requirements.txt rename to src/oracle-db-doc-mcp-server/requirements.txt diff --git a/src/oracle-db-mcp-server/README.md b/src/oracle-db-mcp-server/README.md deleted file mode 100644 index e66d44d..0000000 --- a/src/oracle-db-mcp-server/README.md +++ /dev/null @@ -1,94 +0,0 @@ -# Oracle Database Documentation MCP Server - -A Python-based MCP (Model Context Protocol) server that provides tools for searching the official Oracle Database documentation. - -## Features - -- **Search** - - Serach the documentation by keywords and phrases - -## Prerequisites - -- Python 3.x -- Downloaded [Oracle Database Documentation zip file](https://docs.oracle.com/en/database/oracle/oracle-database/23/zip/oracle-database_23.zip) - -## Installation - -```console -git clone https://github.com/oracle/mcp.git - -cd mcp/src/oracle-db-mcp-server - -python3 -m venv .venv - -source .venv/bin/activate - -python3 -m pip install -r requirements.txt -``` - -## Usage - -The MCP server has two modes: - -1. `-doc`: Create or maintain the documentation index -2. `-mcp`: Run the MCP server. - -Building the index will take some time and some MCP clients will time out while waiting for the index to be built. Hence the two modes cannot be intermixed. - -The server will create a new folder under `$HOME/.oracle/oracle-db-mcp-server` to store the index and the server log file. - -```console -usage: oracle-db-doc-mcp-server.py [-h] [-doc DOC] [-mcp] [-log-level LOG_LEVEL] - -Oracle Database Documentation MCP Server. - -options: - -h, --help show this help message and exit - -doc DOC Path to the documentation input zip file or extracted directory. - -mcp Run the MCP server. - -log-level LOG_LEVEL Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). -``` - -### Index creation/maintenance - -To create or maintain the index, point the `-doc` parameter to either the Oracle Database Documentation zip file or an **already extracted** location of the Oracle Documentation. -The index creation will take several minutes to complete. -A checksum of the index is kept so that subsequent executions of the program will only reindex content that has changed. - -```console -python3 oracle-db-doc-mcp-server.py -doc ~/Downloads/oracle-database_23.zip -``` - -### Run MCP Server - -To run just the MCP server, provide the `-mcp` parameter. The index will have to exist. - -```console -python3 oracle-db-doc-mcp-server.py -mcp -``` - -### VSCode integration - -Replace the `<>` placeholders with the paths to the MCP server installation and Oracle Database Documentation zip file. - -``` -{ - "servers": { - "oracle-db-doc": { - "type": "stdio", - "command": "/.venv/bin/python3", - "args": [ "oracle-db-doc-mcp-server.py", "-mcp" ] - } - } -} -``` - -## Tools - -### search - -Searches the documentation for key words and key phrases - -```python -search(search_query: str, max_results: int) -> list[str]: -``` From 35e522ae7a3d220879982ace96096443970056ac Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 15:30:20 -0700 Subject: [PATCH 34/43] Update Dockerfile with new parameters Signed-off-by: Gerald Venzl --- src/oracle-db-doc-mcp-server/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oracle-db-doc-mcp-server/Dockerfile b/src/oracle-db-doc-mcp-server/Dockerfile index ca7a2da..4fa72ed 100644 --- a/src/oracle-db-doc-mcp-server/Dockerfile +++ b/src/oracle-db-doc-mcp-server/Dockerfile @@ -27,7 +27,7 @@ RUN apk --update --no-cache add python3 py3-pip curl && \ rm requirements.txt && \ mkdir /input && \ curl -L -o /input/dbdoc.zip https://docs.oracle.com/en/database/oracle/oracle-database/26/zip/oracle-database_26.zip && \ - python3 oracle-db-doc-mcp-server.py -log-level DEBUG -doc /input/dbdoc.zip && \ + python3 oracle-db-doc-mcp-server.py -log-level DEBUG idx -path /input/dbdoc.zip && \ rm -r /input && \ apk del curl && \ rm -rf /var/cache/apk/* /tmp/* From 2211208283fd68d0f648f5485ee9f907a2a55717 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 15:57:36 -0700 Subject: [PATCH 35/43] Update Dockerfile with new image name Signed-off-by: Gerald Venzl --- .github/workflows/build-doc-mcp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index 3464598..8081a1f 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -35,7 +35,7 @@ jobs: - name: Build image run: | cd src/oracle-db-doc-mcp-server/ - buildah bud -f Dockerfile -t oracle-doc:latest-${{ steps.os_arch.outputs.OS_ARCH }} . + buildah bud -f Dockerfile -t oracle-db-doc:latest-${{ steps.os_arch.outputs.OS_ARCH }} . - name: ๐Ÿ”“ Login to GHCR registry uses: redhat-actions/podman-login@v1 From a93270e280ae9446ec7b38aa9a56c5540c36adbf Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 17:10:45 -0700 Subject: [PATCH 36/43] Fix linting issue Signed-off-by: Gerald Venzl --- src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py index 2fd57b3..da8f8ac 100644 --- a/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py @@ -20,8 +20,8 @@ import argparse from bs4 import BeautifulSoup -import hashlib from fastmcp import FastMCP +import hashlib import logging import markdownify as md from pathlib import PurePath, Path From db686d45fed0c235953a43747ee953bfbb5b2708 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 17:28:57 -0700 Subject: [PATCH 37/43] Fix linting issues, update image build Signed-off-by: Gerald Venzl --- .github/workflows/build-doc-mcp.yml | 4 +- .../oracle-db-doc-mcp-server.py | 227 ++++++++++++------ 2 files changed, 155 insertions(+), 76 deletions(-) diff --git a/.github/workflows/build-doc-mcp.yml b/.github/workflows/build-doc-mcp.yml index 8081a1f..a5c14a7 100644 --- a/.github/workflows/build-doc-mcp.yml +++ b/.github/workflows/build-doc-mcp.yml @@ -1,4 +1,4 @@ -name: ๐Ÿ› ๏ธ Build an image +name: ๐Ÿ› ๏ธ Build Docker image for Oracle Database Documentation MCP Server on: push: paths: @@ -48,7 +48,7 @@ jobs: uses: redhat-actions/push-to-registry@v2 with: registry: ghcr.io/gvenzl/mcp - image: oracle-doc + image: oracle-db-doc tags: latest-${{ steps.os_arch.outputs.OS_ARCH }} upload-multi-arch: diff --git a/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py b/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py index da8f8ac..09360fb 100644 --- a/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py +++ b/src/oracle-db-doc-mcp-server/oracle-db-doc-mcp-server.py @@ -19,16 +19,17 @@ # limitations under the License. import argparse -from bs4 import BeautifulSoup -from fastmcp import FastMCP import hashlib import logging -import markdownify as md -from pathlib import PurePath, Path -from pocketsearch import PocketSearch, PocketWriter import re import shutil import zipfile +from pathlib import Path, PurePath + +import markdownify as md +from bs4 import BeautifulSoup +from fastmcp import FastMCP +from pocketsearch import PocketSearch, PocketWriter # Working home directory HOME_DIR = Path.home().joinpath(PurePath(".oracle/oracle-db-doc-mcp-server")) @@ -36,7 +37,7 @@ # Index INDEX = None INDEX_FILE = HOME_DIR.joinpath(PurePath("index.db")) -INDEX_VERSION="1.0.0" +INDEX_VERSION = "1.0.0" INDEX_VERSION_FILE = HOME_DIR.joinpath(PurePath("index.version")) CONTENT_CHECKSUM_FILE = HOME_DIR.joinpath(PurePath("content.checksum")) @@ -57,10 +58,14 @@ # Oracle Database Documentation MCP Server. This server is used to search the Oracle Database documentation for information. - It can be used to find information about SQL syntax, PL/SQL, database concepts, best practices, examples and many more. - It is also used to search the official Oracle Database documentation for additional information on a particular feature, its use cases, restrictions or interoperability with other features. - The tool should be used to augment any existing knowledge or to find information that is not available in the current context. - The server is designed to search the Oracle Database documentation for search phrases and will return a list of results. + It can be used to find information about SQL syntax, PL/SQL, database concepts, best practices, + examples and many more. + It is also used to search the official Oracle Database documentation for additional information + on a particular feature, its use cases, restrictions or interoperability with other features. + The tool should be used to augment any existing knowledge or to find information that is + not available in the current context. + The server is designed to search the Oracle Database documentation for search phrases and + will return a list of results. You can use the following tools to search the documentation: - search: Search the documentation for a query string or search phrase. @@ -72,21 +77,23 @@ - Use the search tool to search for phrases or query strings. - Use the search tool to search for specific topics or features. - - Always use the search tool to search for additional and official information for Oracle Database features. + - Always use the search tool to search for additional and official information + for Oracle Database features. - If the search tool returns no results, try to rephrase the query. - If the search tool returns too few results, increase the max_results limit. - If the search tool returns too many results, reduce the max_results limit. - If the search tool returns results that are not relevant, try to refine the query. - """ + """, ) @mcp.tool() def search_oracle_database_documentation( - search_query: str, - max_results: int = 4, + search_query: str, + max_results: int = 4, ) -> list[str]: - """Search for information about how to use Oracle Database for a query string and return a list of results. + """Search for information about how to use Oracle Database for a query string + and return a list of results. Args: search_query: The search phrase to search for. @@ -96,7 +103,8 @@ def search_oracle_database_documentation( search_oracle_database_documentation(search_query="create table syntax") search_oracle_database_documentation(search_query="alter a parameter", max_results=13) search_oracle_database_documentation(search_query="database user concept", max_results=20) - search_oracle_database_documentation(search_query="data use case domains best practices", max_results=15) + search_oracle_database_documentation(search_query="data use case domains best practices", + max_results=15) search_oracle_database_documentation(search_query="external table definition", max_results=100) Returns: A list of results. @@ -135,7 +143,6 @@ def maintain_content(path: str) -> None: Returns: None """ - global INDEX logger.info("Maintaining index...") # Logic to create or update the index goes here @@ -151,8 +158,10 @@ def maintain_content(path: str) -> None: index_version = get_file_content(INDEX_VERSION_FILE) # Only directories and zip files are currently supported - if location.is_file() and not location.suffix == '.zip': - logger.error(f"Unsupported file type: {location}. Must be a zip file or directory.") + if location.is_file() and not location.suffix == ".zip": + logger.error( + f"Unsupported file type: {location}. Must be a zip file or directory." + ) return # Calculate the checksum of the input directory or zip file @@ -168,16 +177,20 @@ def maintain_content(path: str) -> None: else: if input_checksum != content_checksum: logger.info("Checksum has changed.") - logger.debug(f"Old index checksum: {content_checksum}, New input checksum: {input_checksum}") + logger.debug( + f"Old index checksum: {content_checksum}, New input checksum: {input_checksum}" + ) if index_version != INDEX_VERSION: logger.info("Index version has changed.") - logger.debug(f"Old index version: {index_version}, New index version: {INDEX_VERSION}") + logger.debug( + f"Old index version: {index_version}, New index version: {INDEX_VERSION}" + ) INDEX_FILE.unlink(missing_ok=True) logger.info("Recreating index...") # Extract the zip file to a temporary directory - if location.is_file() and location.suffix == '.zip': + if location.is_file() and location.suffix == ".zip": # Check if temp output directory exists and remove it zip_output = Path(ZIP_TEMP_OUTPUT) @@ -187,7 +200,7 @@ def maintain_content(path: str) -> None: logger.debug(f"Creating zip output directory: {zip_output}") zip_output.mkdir() - with zipfile.ZipFile(location, 'r') as zip_ref: + with zipfile.ZipFile(location, "r") as zip_ref: logger.debug(f"Extracting zip file {location} to {zip_output}") zip_ref.extractall(ZIP_TEMP_OUTPUT) @@ -200,12 +213,16 @@ def maintain_content(path: str) -> None: update_content(location) # Write the new checksum to the checksum file - logger.debug(f"Writing new checksum {input_checksum} to {CONTENT_CHECKSUM_FILE}") + logger.debug( + f"Writing new checksum {input_checksum} to {CONTENT_CHECKSUM_FILE}" + ) write_file_content(CONTENT_CHECKSUM_FILE, input_checksum) if index_version != INDEX_VERSION: # Write index version to version file - logger.debug(f"Writing index version {INDEX_VERSION} to {INDEX_VERSION_FILE}") + logger.debug( + f"Writing index version {INDEX_VERSION} to {INDEX_VERSION_FILE}" + ) write_file_content(INDEX_VERSION_FILE, INDEX_VERSION) # Delete temporary zip output directory if it exists @@ -260,7 +277,6 @@ def update_index(content: list[str]) -> None: Returns: None """ - global INDEX with PocketWriter(db_name=INDEX_FILE) as writer: for segment in content: writer.insert(text=segment) @@ -300,11 +316,14 @@ def convert_to_markdown_chunks(file: Path) -> list[str]: # Convert HTML to Markdown markdown = md.markdownify(html) if PREPROCESS != "NONE": - markdown = markdown.replace("Previous\nNext\n JavaScript must be enabled to correctly display this content", "") + markdown = markdown.replace( + "Previous\nNext\n JavaScript must be enabled to correctly display this content", + "", + ) markdown = remove_markdown_urls(markdown) # Split markdown into sections based on headings - pattern = r'(^#{1,6}\s+[^\n]*\n?)(.*?)(?=(?:^#{1,6}\s+|\Z))' + pattern = r"(^#{1,6}\s+[^\n]*\n?)(.*?)(?=(?:^#{1,6}\s+|\Z))" # Find all matches with re.MULTILINE and re.DOTALL flags matches = re.finditer(pattern, markdown, re.MULTILINE | re.DOTALL) @@ -326,22 +345,26 @@ def convert_to_markdown_chunks(file: Path) -> list[str]: def remove_markdown_urls(text): # Remove Markdown links [text](url) and replace with just the text - text = re.sub(r'\[([^\]]*)\]\([^\)]*\)', r'\1', text) + text = re.sub(r"\[([^\]]*)\]\([^\)]*\)", r"\1", text) # Remove URLs with GUIDs (32-char hex with hyphens) - text = re.sub(r'https?://[^\s]*[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}[^\s]*', '', text) + text = re.sub( + r"https?://[^\s]*[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}[^\s]*", + "", + text, + ) # Remove URLs with long hex strings (likely file hashes or identifiers) - text = re.sub(r'https?://[^\s]*[a-f0-9]{16,}[^\s]*', '', text) + text = re.sub(r"https?://[^\s]*[a-f0-9]{16,}[^\s]*", "", text) # Remove standalone URLs that start with http/https - text = re.sub(r'https?://[^\s]+', '', text) + text = re.sub(r"https?://[^\s]+", "", text) # Clean up extra spaces/tabs but preserve new lines (\s includes \n) - text = re.sub(r'[ \t]+', ' ', text) + text = re.sub(r"[ \t]+", " ", text) # Clean up extra spaces within new lines - text = re.sub(r'\n *\n', '\n\n', text) + text = re.sub(r"\n *\n", "\n\n", text) return text.strip() @@ -355,56 +378,77 @@ def preprocess_html(html_content: str) -> str: Returns: str: Cleaned HTML content ready for markdown conversion. """ - soup = BeautifulSoup(html_content, 'html.parser') + soup = BeautifulSoup(html_content, "html.parser") # Remove script and style tags - for tag in soup.find_all(['script', 'style']): + for tag in soup.find_all(["script", "style"]): tag.decompose() # Remove navigation elements - for tag in soup.find_all(['nav', 'header', 'footer']): + for tag in soup.find_all(["nav", "header", "footer"]): tag.decompose() # Remove elements with navigation-related classes/ids nav_classes = [ - 'noscript', 'alert', 'pull-left', 'pull-right', 'skip', 'navigation', - 'breadcrumb', 'nav-', 'header-', 'footer-', 'menu', 'sidebar', 'toc' + "noscript", + "alert", + "pull-left", + "pull-right", + "skip", + "navigation", + "breadcrumb", + "nav-", + "header-", + "footer-", + "menu", + "sidebar", + "toc", ] for nav_class in nav_classes: - for tag in soup.find_all(attrs={'class': lambda x: x and any(nav_class in str(cls).lower() for cls in (x if isinstance(x, list) else [x]))}): + for tag in soup.find_all( + attrs={ + "class": lambda x: x + and any( + nav_class in str(cls).lower() + for cls in (x if isinstance(x, list) else [x]) + ) + } + ): tag.decompose() - for tag in soup.find_all(attrs={'id': lambda x: x and nav_class in str(x).lower()}): + for tag in soup.find_all( + attrs={"id": lambda x: x and nav_class in str(x).lower()} + ): tag.decompose() # Remove common Oracle doc boilerplate text patterns boilerplate_patterns = [ - r'JavaScript.*(?:disabled|enabled).*browser', - r'Skip navigation.*', - r'Oracleยฎ.*(?:Database.*)?(?:Reference|Guide|Manual|Documentation)', - r'Release \d+[a-z]*[\s-]*[A-Z0-9-]*', - r'Previous.*Next', - r'All Classes.*', - r'Overview.*Package.*Class.*Use.*Tree.*Deprecated.*Index.*Help' + r"JavaScript.*(?:disabled|enabled).*browser", + r"Skip navigation.*", + r"Oracleยฎ.*(?:Database.*)?(?:Reference|Guide|Manual|Documentation)", + r"Release \d+[a-z]*[\s-]*[A-Z0-9-]*", + r"Previous.*Next", + r"All Classes.*", + r"Overview.*Package.*Class.*Use.*Tree.*Deprecated.*Index.*Help", ] for pattern in boilerplate_patterns: for tag in soup.find_all(string=re.compile(pattern, re.IGNORECASE)): - parent = tag.parent if hasattr(tag, 'parent') else None + parent = tag.parent if hasattr(tag, "parent") else None if parent: parent.decompose() # Remove elements likely to be navigation by common Oracle doc structure # Remove elements with common Oracle navigation text content nav_text_patterns = [ - 'Skip navigation links', - 'JavaScript is disabled on your browser', - 'All Classes', - 'SEARCH:' + "Skip navigation links", + "JavaScript is disabled on your browser", + "All Classes", + "SEARCH:", ] for pattern in nav_text_patterns: for element in soup.find_all(string=lambda text: text and pattern in text): - parent = element.parent if hasattr(element, 'parent') else None + parent = element.parent if hasattr(element, "parent") else None if parent: parent.decompose() @@ -420,8 +464,8 @@ def build_folder_structure() -> None: def get_file_content(path: str) -> str: """Reads the content of a file and returns it or 'N/A' if the file does not exist. - Args: - file (Path): The path to the file. + Args: + file (Path): The path to the file. """ if Path(path).exists(): with Path(path).open("r") as f: @@ -437,26 +481,54 @@ def write_file_content(path: str, content: str) -> None: def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Oracle Database Documentation MCP Server.") + parser = argparse.ArgumentParser( + description="Oracle Database Documentation MCP Server." + ) - parser.add_argument("-log-level", type=str, default="ERROR", - help="Set the log level (DEBUG, INFO, WARNING, ERROR (default), CRITICAL).") + parser.add_argument( + "-log-level", + type=str, + default="ERROR", + help="Set the log level (DEBUG, INFO, WARNING, ERROR (default), CRITICAL).", + ) - subparser = parser.add_subparsers(title="subcommands", dest="command", required=True) + subparser = parser.add_subparsers( + title="subcommands", dest="command", required=True + ) parser_doc = subparser.add_parser("idx", help="create/maintain the index") - parser_doc.add_argument("-path", type=str, required=True, - help="path to the documentation input zip file or extracted directory") - parser_doc.add_argument("-preprocess", type=str, default="BASIC", - help="preprocessing level of documentation (NONE, BASIC (default), ADVANCED)") + parser_doc.add_argument( + "-path", + type=str, + required=True, + help="path to the documentation input zip file or extracted directory", + ) + parser_doc.add_argument( + "-preprocess", + type=str, + default="BASIC", + help="preprocessing level of documentation (NONE, BASIC (default), ADVANCED)", + ) parser_mcp = subparser.add_parser("mcp", help="run the MCP server") - parser_mcp.add_argument("-mode", choices=["stdio", "http"], default="stdio", - help="the transport mode for the MCP server (stdio (default) or http)") - parser_mcp.add_argument("-host", type=str, default="0.0.0.0", - help="the IP address (default 0.0.0.0) that the MCP server is reachable at") - parser_mcp.add_argument("-port", type=int, default=8000, - help="the port (default 8000) that the MCP server is reachable at") + parser_mcp.add_argument( + "-mode", + choices=["stdio", "http"], + default="stdio", + help="the transport mode for the MCP server (stdio (default) or http)", + ) + parser_mcp.add_argument( + "-host", + type=str, + default="0.0.0.0", + help="the IP address (default 0.0.0.0) that the MCP server is reachable at", + ) + parser_mcp.add_argument( + "-port", + type=int, + default=8000, + help="the port (default 8000) that the MCP server is reachable at", + ) args = parser.parse_args() @@ -475,12 +547,17 @@ def main(): # Set up logging ch = logging.StreamHandler() formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) ch.setFormatter(formatter) logger.addHandler(ch) # Set log level - logging.basicConfig(filename=HOME_DIR.joinpath(Path('oracle-db-doc-mcp-server.log')), filemode='w', level=logging.ERROR) + logging.basicConfig( + filename=HOME_DIR.joinpath(Path("oracle-db-doc-mcp-server.log")), + filemode="w", + level=logging.ERROR, + ) logger.setLevel(getattr(logging, args.log_level.upper(), logging.ERROR)) if args.command == "idx": @@ -492,7 +569,9 @@ def main(): # If no index is present (not index was built), refuse to start the server. if not INDEX_FILE.exists(): - logger.error(f"Index does not exist. Please create the index first via the 'idx' subcommand.") + logger.error( + "Index does not exist. Please create the index first via the 'idx' subcommand." + ) return global INDEX @@ -510,4 +589,4 @@ def main(): try: main() except KeyboardInterrupt: - logger.info("Shutting down Oracle Database Documentation MCP Server.") \ No newline at end of file + logger.info("Shutting down Oracle Database Documentation MCP Server.") From b72a6c0f07c818ef1cc06f7a626b507406d8d611 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 17:34:29 -0700 Subject: [PATCH 38/43] Ignore .python-version Signed-off-by: Gerald Venzl --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1ec0d71..343680d 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ venv.bak/ # Python uv uv.lock +.python-version # VScode .vscode From c53b9ad1c4ff5aef8660ed5d96ef87106cd860ed Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 17:34:36 -0700 Subject: [PATCH 39/43] Add uv support Signed-off-by: Gerald Venzl --- src/oracle-db-doc-mcp-server/pyproject.toml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/oracle-db-doc-mcp-server/pyproject.toml diff --git a/src/oracle-db-doc-mcp-server/pyproject.toml b/src/oracle-db-doc-mcp-server/pyproject.toml new file mode 100644 index 0000000..642e452 --- /dev/null +++ b/src/oracle-db-doc-mcp-server/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "oracle-db-doc-mcp-server" +version = "0.1.0" +description = "The Oracle Database Documentation MCP Server" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "beautifulsoup4>=4.9.0", + "fastmcp>=2.11.3", + "markdownify>=1.2.0", + "pocketsearch>=0.40.0", +] From 8754427984c21da1216c50527bbaec60c0355413 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 17:38:16 -0700 Subject: [PATCH 40/43] Ignore Oracle DB Doc server from uv builds Signed-off-by: Gerald Venzl --- .github/workflows/build.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3ed1f15..4e8e0fd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,7 +20,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.13' + python-version: "3.13" - name: Install requirements run: pip install -r requirements-dev.txt @@ -53,7 +53,6 @@ jobs: working-directory: src/${{ matrix.directory }} run: uv pip install . - get-directories: runs-on: ubuntu-latest outputs: @@ -65,5 +64,5 @@ jobs: - name: Get directories id: get-directories run: | - directories=$(ls src | grep -v dbtools-mcp-server | grep -v mysql-mcp-server | grep -v oci-pricing-mcp-server | jq -R -s -c 'split("\n")[:-1]') + directories=$(ls src | grep -v dbtools-mcp-server | grep -v mysql-mcp-server | grep -v oci-pricing-mcp-server | grep -v oracle-db-doc-mcp-server | jq -R -s -c 'split("\n")[:-1]') echo "directories=$directories" >> $GITHUB_OUTPUT From 62a99418abcf5c62ad449e0d1bbb15a19c653aa8 Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 17:40:38 -0700 Subject: [PATCH 41/43] Update version Signed-off-by: Gerald Venzl --- src/oracle-db-doc-mcp-server/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oracle-db-doc-mcp-server/pyproject.toml b/src/oracle-db-doc-mcp-server/pyproject.toml index 642e452..326bfee 100644 --- a/src/oracle-db-doc-mcp-server/pyproject.toml +++ b/src/oracle-db-doc-mcp-server/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "oracle-db-doc-mcp-server" -version = "0.1.0" +version = "1.0.0" description = "The Oracle Database Documentation MCP Server" readme = "README.md" requires-python = ">=3.13" From fb9992a7a1ebcd5fcc12047a98d1c27c7520694a Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 18:08:24 -0700 Subject: [PATCH 42/43] Update ReadMe Signed-off-by: Gerald Venzl --- src/oracle-db-doc-mcp-server/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oracle-db-doc-mcp-server/README.md b/src/oracle-db-doc-mcp-server/README.md index 8ec99ee..e15ed9a 100644 --- a/src/oracle-db-doc-mcp-server/README.md +++ b/src/oracle-db-doc-mcp-server/README.md @@ -117,7 +117,7 @@ To run the MCP server from inside a Docker container: "oracle-db-doc": { "type": "stdio", "command": "docker", - "args": [ "run", "-ti", ghcr.io/oracle/mcp/oracle-db-doc" ] + "args": [ "run", "--rm", "-i", "ghcr.io/oracle/mcp/oracle-db-doc" ] } } } From bfedbb14b0c49248148eb558c8267fa60f9e83fb Mon Sep 17 00:00:00 2001 From: Gerald Venzl Date: Fri, 24 Oct 2025 18:51:13 -0700 Subject: [PATCH 43/43] Update header link in ReadMe Signed-off-by: Gerald Venzl --- src/oracle-db-doc-mcp-server/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/oracle-db-doc-mcp-server/README.md b/src/oracle-db-doc-mcp-server/README.md index e15ed9a..f247fa3 100644 --- a/src/oracle-db-doc-mcp-server/README.md +++ b/src/oracle-db-doc-mcp-server/README.md @@ -4,7 +4,7 @@ A Python-based MCP (Model Context Protocol) server that provides tools for searc The MCP server leverages an inverted index to serve snippets of the Oracle Database documentation. Because the Oracle Database documentation is large and gets updated from time to time, it is unfeasible to ship a ready to go documentation index with this repository. Doing so will bloat the repository and runs risk of users searching on an outdated documentation. -Instead, users can create their own index and maintain it as often as required. See [Index creation/maintenance](#index-creation-maintenance) for more on that topic. +Instead, users can create their own index and maintain it as often as required. See [Index creation/maintenance](#index-creationmaintenance) for more on that topic. ## Features @@ -128,7 +128,7 @@ To run the MCP server from inside a Docker container: To run the MCP server directly from your machine: 1. Follow the [Installation](#installation) instructions first. -2. Create an index as explained in [Index creation/maintenance](#index-creation-maintenance) +2. Create an index as explained in [Index creation/maintenance](#index-creationmaintenance) 3. Add a new file `mcp.json` file to your project folder. 4. Add the following content to your `.vscode/mcp.json` file. Replace the `<>` placeholders with the paths to the MCP server installation.