diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..6e87a00 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,13 @@ +# Editor configuration, see http://editorconfig.org +root = true + +[*] +charset = utf-8 +indent_style = space +indent_size = 2 +insert_final_newline = true +trim_trailing_whitespace = true + +[*.md] +max_line_length = off +trim_trailing_whitespace = false diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..c24ca3c --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,4 @@ +# Lines starting with '#' are comments. +# Each line is a file pattern followed by one or more owners. + +* @rfprod diff --git a/.github/actions/.gitkeep b/.github/actions/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/.github/actions/check-changes/action.yml b/.github/actions/check-changes/action.yml new file mode 100644 index 0000000..78b29c4 --- /dev/null +++ b/.github/actions/check-changes/action.yml @@ -0,0 +1,31 @@ +name: check-changes +description: Find changes matching a pattern using Git. + +inputs: + trunk: + description: The name of the trunk (default branch). + default: main + pattern: + description: A regular expression for giltering Git output. + default: '^.*' +outputs: + change: + description: A string representation of a boolean value denoting presence or absence of changes matching the pattern. + value: ${{ steps.check-changes.outputs.change }} + +runs: + using: 'composite' + steps: + - name: Check changes + id: check-changes + shell: bash + run: | + COMPARE_WITH=origin/"$TRUNK" + if [ "$TRUNK" = "" ]; then COMPARE_WITH='HEAD~1'; fi + CHANGE=false + COMMAND=$(git diff --name-only HEAD "$COMPARE_WITH" | grep "$PARRETN" || echo "false") + if [ "$COMMAND" != "false" ]; then CHANGE='true'; fi + echo "change=$(echo ${CHANGE})" >> $GITHUB_OUTPUT + env: + TRUNK: ${{ inputs.trunk }} + PATTERN: ${{ inputs.pattern }} diff --git a/.github/actions/setup-environment/action.yml b/.github/actions/setup-environment/action.yml new file mode 100644 index 0000000..431ea87 --- /dev/null +++ b/.github/actions/setup-environment/action.yml @@ -0,0 +1,41 @@ +name: setup-environment +description: Create Python venv and install dependencies + +inputs: + venv: + description: Python virtual environment path + required: true + install: + description: Indicates whether to install project dependencies + required: false + default: 'true' + +runs: + using: "composite" + steps: + - id: setup + shell: bash + run: | + rm -rf "$VENV" + case $VENV in + *py311*) + sudo apt install -y python3.11 python3.11-venv + python3.11 -m venv "$VENV" + ;; + *py312*) + sudo apt install -y python3.12 python3.12-venv + python3.12 -m venv "$VENV" + ;; + *) + echo "$VENV is not supported" + exit 1 + ;; + esac + source "$VENV/bin/activate" + python3 --version + if [ "$INSTALL" = "true" ]; then \ + python3 -m pip install --quiet -r requirements.txt \ + fi + env: + VENV: ${{ inputs.venv }} + INSTALL: ${{ inputs.install }} diff --git a/.github/workflows/validate-codeowners.yml b/.github/workflows/validate-codeowners.yml new file mode 100644 index 0000000..f166801 --- /dev/null +++ b/.github/workflows/validate-codeowners.yml @@ -0,0 +1,51 @@ +name: validate-codeowners + +on: + schedule: + - cron: '0 0 * * 0' + workflow_dispatch: + workflow_call: + +defaults: + run: + shell: bash + +concurrency: + group: ${{ github.head_ref }}.${{ github.sha }}.validate-codeowners + cancel-in-progress: true + +jobs: + codeowners: + runs-on: ubuntu-latest + + outputs: + matrix: ${{ steps.codeowners.outputs.matrix }} + + steps: + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Get codeowners + id: codeowners + run: | + RESULT=$(bash tools/shell/actions/codeowners/codeowners.sh) + echo "matrix=$RESULT" >> $GITHUB_OUTPUT + echo "$RESULT" + + validate: + needs: codeowners + runs-on: ubuntu-latest + + strategy: + matrix: + name: ${{ fromJSON(needs.codeowners.outputs.matrix) }} + + steps: + - name: Validate codeowners + uses: octokit/request-action@v2.x + with: + route: GET /repos/{repository}/collaborators/{collaborator} + repository: ${{ github.repository }} + collaborator: ${{ matrix.name }} + env: + GITHUB_TOKEN: ${{ github.token }} diff --git a/.github/workflows/validate-pr.yml b/.github/workflows/validate-pr.yml new file mode 100644 index 0000000..62b33b5 --- /dev/null +++ b/.github/workflows/validate-pr.yml @@ -0,0 +1,138 @@ +name: validate-pr + +on: + pull_request: + branches: [main] + +defaults: + run: + shell: bash + +concurrency: + group: ${{ github.head_ref }}.${{ github.sha }}.validate-pr + cancel-in-progress: true + +jobs: + checks: + runs-on: ubuntu-latest + + outputs: + codeowners-change: ${{ steps.codeowners-change.outputs.change }} + src-change: ${{ steps.src-change.outputs.change }} + shelltools-change: ${{ steps.shelltools-change.outputs.change }} + + steps: + - name: Checkout sources + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + + - name: CODEOWNERS shange + id: codeowners-change + uses: ./.github/actions/check-changes + with: + pattern: '^.github/CODEOWNERS' + + - name: Source code change + id: src-change + uses: ./.github/actions/check-changes + with: + pattern: '^src' + + - name: Shell tools change + id: shelltools-change + uses: ./.github/actions/check-changes + with: + pattern: '^tools' + + - name: Print changes + run: | + echo "### Changes" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Name | Value |" >> $GITHUB_STEP_SUMMARY + echo "| ---------- | ------------- |" >> $GITHUB_STEP_SUMMARY + echo "| codeowners | ${CODEOWNERS} |" >> $GITHUB_STEP_SUMMARY + echo "| src | ${SRC} |" >> $GITHUB_STEP_SUMMARY + echo "| tools | ${TOOLS} |" >> $GITHUB_STEP_SUMMARY + env: + CODEOWNERS: ${{ steps.codeowners-change.outputs.change }} + SRC: ${{ steps.src-change.outputs.change }} + SHELLTOOLS: ${{ steps.shelltools-change.outputs.change }} + + - name: Setup environment + uses: ./.github/actions/setup-environment + with: + venv: '~/py312-venv' + install: false + + - name: Commitlint + run: | + source "$VENV/bin/activate" + python3 -m pip install --upgrade commitizen + git checkout -b premerge + git fetch origin main:main + cz check --rev-range main..premerge + deactivate + env: + VENV: '~/py312-venv' + + validate-codeowners: + needs: checks + if: ${{ needs.checks.outputs.codeowners-change == 'true' }} + uses: ./.github/workflows/validate-codeowners.yml + secrets: inherit + + premerge-matrix: + needs: checks + runs-on: ubuntu-latest + + strategy: + matrix: + venv: ['~/py311-venv', '~/py312-venv'] + + outputs: + success: ${{ steps.check.outputs.success || 'true' }} + + steps: + - name: Checkout sources + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + + - name: Setup environment + uses: ./.github/actions/check-changes + with: + venv: ${{ matrix.venv }} + + - name: Lint tools + if: needs.checks.outputs.shelltools-change == 'true' + run: | + sudo apt install shellcheck + shellcheck tools/actions/**/*.sh + + - name: Lint source code + if: needs.checks.outputs.src-change == 'true' + run: | + source "$VENV/bin/activate" + black ./src/ --check + deactivate + + - name: Set failure + id: check + if: ${{ failure() || cencelled() }} + run: echo "success=$(echo 'false')" >> $GITHUB_OUTPUT + + premerge: + needs: premerge-matrix + if: always() + runs-on: ubuntu-latest + + steps: + - name: Check result + run: | + if [[ "$PREMERGE_MATRIX" != "true" ]]; then exit 1; fi + echo "### :rocket: Premerge checks succeeded" >> $GITHUB_STEP_SUMMARY + env: + PREMERGE_MATRIX: ${{ needs.premerge-matrix.outputs.success }} diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..30d084b --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,13 @@ +{ + "recommendations": [ + "pkief.material-icon-theme", + "atishay-jain.all-autocomplete", + "sadesyllas.vscode-workspace-switcher", + "editorconfig.editorconfig", + "mikestead.dotenv", + "redhat.vscode-yaml", + "ms-python.python", + "foxundermoon.shell-format", + "timonwong.shellcheck", + ] +} diff --git a/.vscode/recommended-settings.json b/.vscode/recommended-settings.json new file mode 100644 index 0000000..92f4502 --- /dev/null +++ b/.vscode/recommended-settings.json @@ -0,0 +1,36 @@ +{ + "files.encoding": "utf8", + "editor.formatOnPaste": true, + "editor.formatOnSave": true, + "files.autoSave": "afterDelay", + "files.autoSaveDelay": 5000, + "files.hotExit": "onExit", + "files.trimTrailingWhitespace": true, + "[sh]": { + "editor.defaultFormatter": "timonwong.shellcheck" + }, + "[shellscript]": { + "editor.defaultFormatter": "foxundermoon.shell-format" + }, + "shellcheck.customArgs": ["-x"], + "shellcheck.enable": true, + "shellcheck.enableQuickFix": false, + "shellcheck.exclude": [], + "shellcheck.executablePath": "shellcheck", + "shellcheck.ignoreFileSchemes": ["git", "gitfs"], + "shellcheck.ignorePatterns": { + "**/.git/objects/**": true, + "**/.git/subtree-cache/**": true + }, + "shellcheck.run": "onType", + "shellcheck.useWSL": false, + "shellcheck.useWorkspaceRootAsCwd": true, + "yaml.format.enable": true, + "yaml.format.singleQuote": true, + "yaml.format.bracketSpacing": true, + "yaml.format.proseWrap": true, + "yaml.format.printWidth": true, + "yaml.validate": true, + "yaml.hover": true, + "yaml.completion": true +} diff --git a/LICENSE b/LICENSE index aa4396a..d3af486 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2025 Vadim +Copyright (c) 2025 rfprod Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md new file mode 100644 index 0000000..a7fa4b9 --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# RAG application based on Ollama + +[![Commitizen friendly](https://img.shields.io/badge/commitizen-friendly-brightgreen.svg)](http://commitizen.github.io/cz-cli/) + +## Requirments + +In order to run own copy of the project one must fulfill the following requirements. + +### Core dependencies + +- [Python 3.12](https://www.python.org/downloads/release/python-3120/) +- [Git](https://git-scm.com/) + +### Virtual environments + +It is recommended to use a virtual environment to work on this project. + +The following sequence of commands creates an environment, activates the environment, and installs project dependencies. + +```bash +python3 -m venv ~/path-to-venv; \ + source ~/path-to-venv/bin/activate; \ + pip3 install -r ./requirements.txt +``` + +## Committing changes to the repo + +Using [commitizen cli](https://pypi.org/project/commitizen/) is mandatory. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d85f487 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +black +python-dotenv +langchain +langchain_community +langchain-ollama +langchain-neo4j +scikit-learn +BeautifulSoup4 diff --git a/src/init.py b/src/init.py new file mode 100644 index 0000000..72ef812 --- /dev/null +++ b/src/init.py @@ -0,0 +1,71 @@ +import os +from dotenv import load_dotenv + +from langchain_community.document_loaders import WebBaseLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter + +from langchain_neo4j import Neo4jVector +from langchain_ollama import OllamaEmbeddings + +from langchain_ollama import ChatOllama +from langchain.prompts import PromptTemplate +from langchain_core.output_parsers import StrOutputParser + +load_dotenv() + +urls = ["https://github.com/rfprod/nx-ng-starter/blob/main/README.md"] +docs = [WebBaseLoader(url).load() for url in urls] +docs_list = [item for sublist in docs for item in sublist] +text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( + chunk_size=250, chunk_overlap=0 +) + +doc_splits = text_splitter.split_documents(docs_list) +vector_store = Neo4jVector.from_documents( + documents=doc_splits, + embedding=OllamaEmbeddings( + base_url="http://localhost:11434", model="llama3.2:latest" + ), + url=os.getenv("NEO4J_URL"), + username=os.getenv("NEO4J_USER"), + password=os.getenv("NEO4J_PASS"), +) +retriever = vector_store.as_retriever(k=4) + +prompt = PromptTemplate( + template="""You are an assistant for question-answering tasks. + Use the following documents to answer the question. + If you don't know the answer, just say that you don't know. + Use three sentences maximum and keep the answer concise: + Question: {question} + Documents: {documents} + Answer: + """, + input_variables=["question", "documents"], +) + +llm = ChatOllama( + base_url="http://localhost:11434", model="llama3.2:latest", temperature=0 +) + +rag_chain = prompt | llm | StrOutputParser() + + +class RAGApplication: + def __init__(self, retriever, rag_chain): + self.retriever = retriever + self.rag_chain = rag_chain + + def run(self, question): + documents = self.retriever.invoke(question) + doc_texts = "\\n".join([doc.page_content for doc in documents]) + answer = self.rag_chain.invoke([{"question": question, "documents": doc_texts}]) + return answer + + +rag_application = RAGApplication(retriever, rag_chain) + +question = "What operating systems does nx-ng-starter support?" +answer = rag_application.run(question) +print("Question:", question) +print("Answer:", answer) diff --git a/tools/.gitkeep b/tools/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tools/actions/.gitkeep b/tools/actions/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tools/actions/codeowners/codeowners.sh b/tools/actions/codeowners/codeowners.sh new file mode 100644 index 0000000..e76e3fe --- /dev/null +++ b/tools/actions/codeowners/codeowners.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +declare -a CODEOWNERS=() + +while read -r LINE; do + if [ "$DEBUG" = "true" ]; then + echo "[DEBUG] read: $LINE" + fi + + case "$LINE" in + *@*) + NAME=$(echo "$LINE" | sed -r 's/^.*@//g') + if [ "$DEBUG" = "true" ]; then + echo "[DEBUG] name: $NAME" + fi + + case "${CODEOWNERS[@]}" in *"\"$NAME\""*) + if [ "$DEBUG" = "true" ]; then + echo "[DEBUG] duplicated $NAME" + fi + ;; + *) + CODEOWNERS+=("\"$NAME\"") + ;; + esac + ;; + esac +done <.github/CODEOWNERS + +if [ "$DEBUG" = "true" ]; then + printf "[DEBUG] codeowners %s" "${CODEOWNERS[@]}" +fi + +OUTPUT="[" + +for ITEM in "${CODEOWNERS[@]}"; do + if [ "$DEBUG" = "true" ]; then + echo "[DEBUG] i: $ITEM" + fi + OUTPUT+="$ITEM" + OUTPUT+="," +done + +OUTPUT=${OUTPUT::-1} + +OUTPUT+="]"