diff --git a/.github/ISSUE_TEMPLATE/content_changes.yml b/.github/ISSUE_TEMPLATE/content_changes.yml new file mode 100644 index 0000000..6ba86ec --- /dev/null +++ b/.github/ISSUE_TEMPLATE/content_changes.yml @@ -0,0 +1,72 @@ +name: Documentation Update +description: Suggest additions, modifications, or improvements to the documentation +title: "[DOCS] " +labels: ["docs", "enhancement"] +assignees: [] + +body: + - type: markdown + attributes: + value: | + Thanks for helping us improve our documentation! + + - type: dropdown + id: change-type + attributes: + label: Type of Change + description: What kind of update are you suggesting? + options: + - Addition (new content) + - Modification (updates to existing content) + - Removal (outdated or redundant content) + - Other + validations: + required: true + + - type: textarea + id: proposed-content + attributes: + label: Proposed Content / Change + description: Describe the content you'd like to add, modify, or remove + placeholder: I think the documentation should include/update/remove... + validations: + required: true + + - type: textarea + id: location + attributes: + label: Location + description: Where should this content be placed or updated in the documentation structure? + placeholder: This should be added/updated in the section on... + validations: + required: true + + - type: textarea + id: rationale + attributes: + label: Rationale + description: Why is this change valuable for the project documentation? + placeholder: This content change would be valuable because... + validations: + required: true + + - type: textarea + id: content-outline + attributes: + label: Suggested Outline (Optional) + description: If you have ideas for how the content should be structured, provide an outline + placeholder: | + 1. Introduction + 2. Key concepts + 3. Examples + validations: + required: false + + - type: textarea + id: references + attributes: + label: References + description: Include links to any reference material or examples + placeholder: Related resources or examples + validations: + required: false diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..b0080cb --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,41 @@ + + +## Description + + +## Type of Change + +- [ ] 📄 New content addition +- [ ] ✏️ Content update/revision +- [ ] 📚 Structure/organization improvement +- [ ] 🔤 Typo/formatting fix +- [ ] 🐛 Bug fix +- [ ] 🔧 Tooling/config change (docs build, CI/CD, etc.) +- [ ] Other (please describe): + +## Motivation and Context + + +## Areas Affected + +- e.g., `docs/getting-started.md`, `docs/configuration/` + +## Screenshots (if applicable) + + +## Checklist + +- [ ] I have read the **CONTRIBUTING** guidelines +- [ ] My changes follow the project’s documentation style guide +- [ ] I have previewed my changes locally (`mkdocs serve` or equivalent) +- [ ] All internal/external links are valid +- [ ] Images/diagrams are optimized (size, format) and display correctly +- [ ] Any new references/resources are cited appropriately +- [ ] All existing checks/tests pass (if applicable) + +## Additional Notes + + +--- + +By submitting this pull request, I confirm that my contribution can be used, modified, and redistributed under the terms of this project’s license. diff --git a/.github/workflows/build-deploy.yml b/.github/workflows/build-deploy.yml new file mode 100644 index 0000000..cc98191 --- /dev/null +++ b/.github/workflows/build-deploy.yml @@ -0,0 +1,34 @@ +name: Build GitHub Pages +on: + push: + branches: + - main +permissions: + contents: write +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + - name: Configure Git Credentials + run: | + git config user.name github-actions[bot] + git config user.email 41898282+github-actions[bot]@users.noreply.github.com + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + - run: | + pip install -r requirements.txt + - run: | + mike deploy --push --update-aliases 0.0.x latest + mike set-default --push 0.0.x \ No newline at end of file diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml new file mode 100644 index 0000000..7df11ef --- /dev/null +++ b/.github/workflows/build-test.yml @@ -0,0 +1,44 @@ +name: Build and Test + +on: + pull_request: + branches: [ main ] + types: [ opened, synchronize, reopened, ready_for_review, review_requested, review_request_removed ] + + schedule: + - cron: "00 10 * * *" # Run at 10:00 AM every day + +permissions: read-all + +jobs: + build: + name: Build Documentation + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Install dependencies + run: | + pip install -r requirements.txt + - name: Build docs + run: | + mkdocs build + + check_links: + name: Check Links + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Restore lychee cache + uses: actions/cache@v4 + with: + path: .lycheecache + key: cache-lychee-${{ github.sha }} + restore-keys: cache-lychee- + + - name: Check links with lychee + id: lychee + uses: lycheeverse/lychee-action@v2 + with: + args: "--base . --cache --max-cache-age 1d ." \ No newline at end of file diff --git a/.lycheeignore b/.lycheeignore new file mode 100644 index 0000000..8935025 --- /dev/null +++ b/.lycheeignore @@ -0,0 +1,7 @@ +# see examples in https://github.com/opensafely/documentation/blob/main/.lycheeignore + +# localhost +https?://locahost.* +https?://127\.0\.0\.1.* +.*localhost.* +https://github.com/Datuanalytics/datu-core/issues* \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8c41c75..46ca028 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,7 +23,7 @@ reported the issue. Please try to include as much information as you can. Detail Looking at the existing issues is a great way to find something to contribute to. We label issues that are well-defined and ready for community contributions with the "ready for contribution" label. Check our "Ready for Contribution" issues for items you can work on: -- [SDK Python Issues](https://github.com/Datuanalytics/datu-core/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22ready%20for%20contribution%22) +- [Datu Core Issues](https://github.com/Datuanalytics/datu-core/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22ready%20for%20contribution%22) Before starting work on any issue: 1. Check if someone is already assigned or working on it diff --git a/docs/README.md b/docs/README.md index 4b32ba9..b750800 100644 --- a/docs/README.md +++ b/docs/README.md @@ -21,7 +21,7 @@ Then follow next steps. Ready to learn more? Check out these resources: - [Quickstart](user-guide/quickstart.md) - A more detailed introduction to Datu core -- [Examples](examples/README.md) - Examples for connecting multiple datasources. +- [Datasources](user-guide/datasources/datasources.md) - Connecting multiple datasources. [Learn how to contribute]({{ server_repo }}/CONTRIBUTING.md) or join our community discussions to shape the future of Datu ❤️. diff --git a/docs/examples/README.md b/docs/examples/README.md deleted file mode 100644 index 3879843..0000000 --- a/docs/examples/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Examples Overview - -The examples directory provides a collection of sample implementations to help you get started with connecting data sources with Datu. -## Purpose - -With Datu, you can quickly connect to your data sources and turn raw information into actionable insights. The sample projects cover everything from running straightforward queries to managing advanced, multi-step analysis pipelines, giving you a clear view of how Datu works in practice. - -Each example is designed to highlight proven techniques and practical workflows you can apply to your own analytics tasks. Whether you’re streamlining reports, exploring trends, or building complex data processes, these references show how Datu can be adapted to fit your specific goals. -## Prerequisites - -- Python 3.11 or higher -- For specific examples, additional requirements may be needed (see individual example READMEs) - diff --git a/docs/user-guide/datasources/datasources.md b/docs/user-guide/datasources/datasources.md new file mode 100644 index 0000000..b431772 --- /dev/null +++ b/docs/user-guide/datasources/datasources.md @@ -0,0 +1,63 @@ +# Datasources Overview + +The directory provides a collection of configurations to help you get started with connecting data sources with Datu. +## Purpose + +With Datu, you can quickly connect to your data sources and turn raw information into actionable insights. + +### How to add datasources + +As per the current design the application will fetch all the schema that is listed in the profiles.yml. It is to avoid fetching the schema every single time.But it will only work on the **target** datasource that is selected. + +**Structure of profiles.yml** + +```sh +datu_demo: + target: dev-postgres # Target is used to select the datasource that is currently active. Change this if you would like to use a different datasource. + outputs: + dev-postgres: + type: postgres + {% raw %} + host: "{{ env_var('DB_HOST', 'localhost') }}" # if a environment variable is supplied that gets priority. This is useful for not hardcoding. + {% endraw %} + port: 5432 + user: postgres + password: postgres + dbname: my_sap_bronze + schema: bronze + dev-sqlserver: + type: sqlserver + driver: 'ODBC Driver 18 for SQL Server' # Mandatory for sqlserver. + host: localhost + port: 1433 + user: sa + password: Password123! + dbname: my_sap_bronze + schema: bronze +``` + +### About profiles.yml + +Datu core needs profiles.yml file that contains all the datasources configured. If you have used [dbt](https://github.com/dbt-labs/dbt-core),this is somewhat like to their profiles.yml though not exaclty the same. + +```sh +: + target: # this is the default target + outputs: + : + type: + schema: + + ### Look for each datasources specific variables + ... + + ... + +: # additional profiles + ... + +``` + +### env_var + +You can use `env_var` with any attribute in the `profiles.yml` `outputs` section to load configuration values from environment variables. diff --git a/docs/user-guide/datasources/postgres.md b/docs/user-guide/datasources/postgres.md new file mode 100644 index 0000000..585b66c --- /dev/null +++ b/docs/user-guide/datasources/postgres.md @@ -0,0 +1,20 @@ +### Postgres as a datasource + +Install datucore with extras postgres + +```sh +pip install "datu-core[postgres]" +``` + +In profiles.yml + +```sh +dev-postgres: + type: postgres + host: [hostname] + user: [username] + password: [password] + port: [port] + dbname: [database name] + schema: [schema] +``` \ No newline at end of file diff --git a/docs/user-guide/datasources/sqlserver.md b/docs/user-guide/datasources/sqlserver.md new file mode 100644 index 0000000..b075cfb --- /dev/null +++ b/docs/user-guide/datasources/sqlserver.md @@ -0,0 +1,25 @@ +### Sqlserver as a datasource + +Install datucore with extras postgres + +```sh +pip install "datu-core[sqldb]" +``` + +For sqlserver to work you have to make sure the below ODBC driver is installed on your machine according to the Operating System. + +[Install ODBC driver](https://learn.microsoft.com/en-us/sql/connect/python/pyodbc/step-1-configure-development-environment-for-pyodbc-python-development?view=sql-server-ver16&tabs=windows) + +In profiles.yml + +```sh +dev-sqlserver: + type: sqlserver + driver: 'ODBC Driver 18 for SQL Server' # Mandatory for sqlserver. + host: [hostname] + user: [username] + password: [password] + port: [port] + dbname: [database name] + schema: [schema] +``` \ No newline at end of file diff --git a/docs/user-guide/deploy/deploy_as_container_service.md b/docs/user-guide/deploy/deploy_as_container_service.md index 58bf8ce..0e328a9 100644 --- a/docs/user-guide/deploy/deploy_as_container_service.md +++ b/docs/user-guide/deploy/deploy_as_container_service.md @@ -7,7 +7,7 @@ Use below recommended method to run Datu application as container service. To deploy your Datu, you need to containerize it using Podman or Docker. The Dockerfile defines how your application is packaged and run. Below is an example Docker file that installs all needed dependencies, the application, and configures the FastAPI server to run via unicorn dockerfile. ```sh -FROM python:3.10-slim +FROM python:3.11-slim SHELL ["/bin/bash", "-c"] RUN apt-get update && \ diff --git a/docs/user-guide/quickstart.md b/docs/user-guide/quickstart.md index a679065..f499519 100644 --- a/docs/user-guide/quickstart.md +++ b/docs/user-guide/quickstart.md @@ -59,7 +59,14 @@ my_sources: After creating the datasources profiles.yml. -**Environment variables**: Set `DATU_OPENAI_API_KEY` +### 🔧 Environment Variables + +set the following environment variables: + +- **`DATU_OPENAI_API_KEY`** – your OpenAI API key +- **`DATU_DBT_PROFILES`** – path to your `profiles.yml` + +Then run ```bash datu @@ -75,5 +82,5 @@ To enable debug logs in Datu server . Ready to learn more? Check out these resources: -- [Examples](../examples/README.md) - Examples for connecting multiple datasources. +- [Datasources](datasources/datasources.md) - Connecting multiple datasources. - [More configurations](configurations.md) - Datu server configurations includes port, schema configurations etc. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 9bf64d5..bcf6610 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,6 +1,7 @@ site_name: Datu AI Analyst site_description: Datu AI Analyst is a Python SDK for building AI agents that can interact with data, perform analysis, and generate insights. It provides a framework for creating agents that can work with various data sources and tools, enabling developers to build intelligent applications that leverage AI capabilities. site_dir: site +site_url: https://docs.datu.fi repo_url: https://github.com/Datuanalytics/datu-core @@ -58,13 +59,17 @@ nav: - User Guide: - Welcome: README.md - Quickstart: user-guide/quickstart.md + - Datasources: + - user-guide/datasources/datasources.md + - Postgres: user-guide/datasources/postgres.md + - Sqlserver: user-guide/datasources/sqlserver.md + - Configurations: + - user-guide/configurations.md - Deploy: - Container service: user-guide/deploy/deploy_as_container_service.md - Contribute ❤️: https://github.com/Datuanalytics/datu-core/blob/main/CONTRIBUTING.md - Architecture: - Overview: architecture/README.md - - Examples: - - Overview: examples/README.md - Contribute ❤️: https://github.com/Datuanalytics/datu-core/blob/main/CONTRIBUTING.md exclude_docs: | @@ -91,8 +96,6 @@ plugins: User Guide: - README.md - user-guide/**/*.md - Examples: - - examples/**/*.md extra: social: diff --git a/requirements.txt b/requirements.txt index 7fa26d2..6465f08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ mkdocs-macros-plugin~=1.3.7 mkdocs-material~=9.6.12 mkdocstrings-python~=1.16.10 mkdocs-llmstxt~=0.2.0 -git+https://github.com/Datuanalytics/datu-core@main \ No newline at end of file +#datu-core~=0.1.0 \ No newline at end of file