diff --git a/.cookiecutter.json b/.cookiecutter.json new file mode 100644 index 00000000..73fac8b3 --- /dev/null +++ b/.cookiecutter.json @@ -0,0 +1,16 @@ +{ + "_checkout": "2025.06.25", + "_output_dir": "/home/data/git/odoodataflow/odoo-data-flow", + "_repo_dir": "/home/bosd/.cookiecutters/cookiecutter-uv-hypermodern-python", + "_template": "gh:bosd/cookiecutter-uv-hypermodern-python", + "author": "bosd", + "copyright_year": "2025", + "development_status": "Development Status :: 3 - Alpha", + "email": "c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me", + "friendly_name": "Odoo Data Flow", + "github_user": "bosd", + "license": "GPL-3.0", + "package_name": "odoo-data-flow", + "project_name": "odoo-data-flow", + "version": "0.0.0" +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..a8faee78 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{py,toml}] +indent_style = space +indent_size = 4 + +[*.yml,yaml,json] +indent_style = space +indent_size = 2 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..6313b56c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..fc0e0c6e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,49 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: "/" + schedule: + interval: weekly + groups: + github-actions-updates: + applies-to: version-updates + dependency-type: development + github-actions-security-updates: + applies-to: security-updates + dependency-type: development + - package-ecosystem: pip + directory: "/.github/workflows" + schedule: + interval: weekly + groups: + workflow-updates: + applies-to: version-updates + dependency-type: development + workflow-security-updates: + applies-to: security-updates + dependency-type: development + - package-ecosystem: pip + directory: "/docs" + schedule: + interval: weekly + groups: + doc-updates: + applies-to: version-updates + dependency-type: development + doc-security-updates: + applies-to: security-updates + dependency-type: production + - package-ecosystem: pip + directory: "/" + schedule: + interval: weekly + versioning-strategy: lockfile-only + allow: + - dependency-type: "all" + groups: + pip-version-updates: + applies-to: version-updates + dependency-type: development + pip-security-updates: + applies-to: security-updates + dependency-type: production diff --git a/.github/labels.yml b/.github/labels.yml new file mode 100644 index 00000000..f7f83aad --- /dev/null +++ b/.github/labels.yml @@ -0,0 +1,66 @@ +--- +# Labels names are important as they are used by Release Drafter to decide +# regarding where to record them in changelog or if to skip them. +# +# The repository labels will be automatically configured using this file and +# the GitHub Action https://github.com/marketplace/actions/github-labeler. +- name: breaking + description: Breaking Changes + color: bfd4f2 +- name: bug + description: Something isn't working + color: d73a4a +- name: build + description: Build System and Dependencies + color: bfdadc +- name: ci + description: Continuous Integration + color: 4a97d6 +- name: dependencies + description: Pull requests that update a dependency file + color: 0366d6 +- name: documentation + description: Improvements or additions to documentation + color: 0075ca +- name: duplicate + description: This issue or pull request already exists + color: cfd3d7 +- name: enhancement + description: New feature or request + color: a2eeef +- name: github_actions + description: Pull requests that update Github_actions code + color: "000000" +- name: good first issue + description: Good for newcomers + color: 7057ff +- name: help wanted + description: Extra attention is needed + color: 008672 +- name: invalid + description: This doesn't seem right + color: e4e669 +- name: performance + description: Performance + color: "016175" +- name: python + description: Pull requests that update Python code + color: 2b67c6 +- name: question + description: Further information is requested + color: d876e3 +- name: refactoring + description: Refactoring + color: ef67c4 +- name: removal + description: Removals and Deprecations + color: 9ae7ea +- name: style + description: Style + color: c120e5 +- name: testing + description: Testing + color: b1fc6f +- name: wontfix + description: This will not be worked on + color: ffffff diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 00000000..7a04410f --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,29 @@ +categories: + - title: ":boom: Breaking Changes" + label: "breaking" + - title: ":rocket: Features" + label: "enhancement" + - title: ":fire: Removals and Deprecations" + label: "removal" + - title: ":beetle: Fixes" + label: "bug" + - title: ":racehorse: Performance" + label: "performance" + - title: ":rotating_light: Testing" + label: "testing" + - title: ":construction_worker: Continuous Integration" + label: "ci" + - title: ":books: Documentation" + label: "documentation" + - title: ":hammer: Refactoring" + label: "refactoring" + - title: ":lipstick: Style" + label: "style" + - title: ":package: Dependencies" + labels: + - "dependencies" + - "build" +template: | + ## Changes + + $CHANGES diff --git a/.github/workflows/constraints.txt b/.github/workflows/constraints.txt new file mode 100644 index 00000000..d57727b6 --- /dev/null +++ b/.github/workflows/constraints.txt @@ -0,0 +1,3 @@ +pip==24.3.1 +nox==2024.10.09 +virtualenv==20.27.1 diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 00000000..a36ce84e --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,22 @@ +name: Labeler + +on: + push: + branches: + - main + - master + +jobs: + labeler: + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + steps: + - name: Check out the repository + uses: actions/checkout@v4 + + - name: Run Labeler + uses: crazy-max/ghaction-github-labeler@v5.0.0 + with: + skip-delete: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..bb404ef9 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,73 @@ +name: Release + +on: + push: + branches: + - main + - master + +jobs: + release: + name: Release + runs-on: ubuntu-latest + steps: + - name: Check out the repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Check if there is a parent commit + id: check-parent-commit + run: | + echo "sha=$(git rev-parse --verify --quiet HEAD^)" >> $GITHUB_OUTPUT + + - name: Detect and tag new version + id: check-version + if: steps.check-parent-commit.outputs.sha + uses: salsify/action-detect-and-tag-new-version@v2.0.3 + with: + version-command: | + bash -o pipefail -c "uv version | awk '{ print \$2 }'" + + - name: Bump version for developmental release + if: "! steps.check-version.outputs.tag" + run: | + sed -i -e "s/0.0.0/${GITHUB_REF#refs/*/}/" pyproject.toml + # uv bump patch && + # version=$(uv version | awk '{ print $2 }') && + # uv bump $version.dev.$(date +%s) + + - name: Build package + run: | + uv build + + - name: Publish package on PyPI + if: steps.check-version.outputs.tag + uses: pypa/gh-action-pypi-publish@v1.12.2 + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} + + - name: Publish package on TestPyPI + if: "! steps.check-version.outputs.tag" + uses: pypa/gh-action-pypi-publish@v1.12.2 + with: + user: __token__ + password: ${{ secrets.TEST_PYPI_TOKEN }} + repository_url: https://test.pypi.org/legacy/ + + - name: Publish the release notes + uses: release-drafter/release-drafter@v6.0.0 + with: + publish: ${{ steps.check-version.outputs.tag != '' }} + tag: ${{ steps.check-version.outputs.tag }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..730f1c32 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,156 @@ +name: Tests + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +jobs: + tests: + name: ${{ matrix.session }} / py${{ matrix.python }} / ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - { python: "3.12", os: "ubuntu-latest", session: "pre-commit" } + # - { python: "3.13", os: "ubuntu-latest", session: "safety" } + - { python: "3.13", os: "ubuntu-latest", session: "mypy" } + - { python: "3.12", os: "ubuntu-latest", session: "mypy" } + - { python: "3.11", os: "ubuntu-latest", session: "mypy" } + - { python: "3.10", os: "ubuntu-latest", session: "mypy" } + - { python: "3.9", os: "ubuntu-latest", session: "mypy" } + - { python: "3.13", os: "ubuntu-latest", session: "tests" } + - { python: "3.12", os: "ubuntu-latest", session: "tests" } + - { python: "3.11", os: "ubuntu-latest", session: "tests" } + - { python: "3.10", os: "ubuntu-latest", session: "tests" } + - { python: "3.9", os: "ubuntu-latest", session: "tests" } + - { python: "3.12", os: "windows-latest", session: "tests" } + - { python: "3.12", os: "macos-latest", session: "tests" } + - { python: "3.12", os: "ubuntu-latest", session: "typeguard" } + - { python: "3.13", os: "windows-latest", session: "tests" } + - { python: "3.13", os: "macos-latest", session: "tests" } + - { python: "3.13", os: "ubuntu-latest", session: "xdoctest" } + - { python: "3.13", os: "ubuntu-latest", session: "docs-build" } + + env: + NOXSESSION: ${{ matrix.session }} + FORCE_COLOR: "1" + PRE_COMMIT_COLOR: "always" + + steps: + - name: Check out the repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + + - name: Install uv + uses: astral-sh/setup-uv@v5 + + - name: Create uv virtual environment + run: uv venv + + # - name: Install Nox + # run: | + # uv pip install nox + # python -m nox --version + + - name: Install dependencies + run: | + uv sync --all-groups + + - name: Compute pre-commit cache key + if: matrix.session == 'pre-commit' + id: pre-commit-cache + shell: python + run: | + import hashlib + import sys + + python = "py{}.{}".format(*sys.version_info[:2]) + payload = sys.version.encode() + sys.executable.encode() + digest = hashlib.sha256(payload).hexdigest() + result = "${{ runner.os }}-{}-{}-pre-commit".format(python, digest[:8]) + + print(f"result={result}") + + - name: Restore pre-commit cache + uses: actions/cache@v4 + if: matrix.session == 'pre-commit' + with: + path: ~/.cache/pre-commit + key: ${{ steps.pre-commit-cache.outputs.result }}-${{ hashFiles('.pre-commit-config.yaml') }} + restore-keys: | + ${{ steps.pre-commit-cache.outputs.result }}- + + - name: Run Nox + run: | + uv run python -m nox --python=${{ matrix.python }} + + - name: Upload coverage data + if: always() && matrix.session == 'tests' + uses: "actions/upload-artifact@v4" + with: + name: coverage-data-${{ matrix.session }}-${{ matrix.python }}-${{ matrix.os }} + path: ".coverage.*" + if-no-files-found: ignore + include-hidden-files: true + + - name: Upload documentation + if: matrix.session == 'docs-build' + uses: actions/upload-artifact@v4 + with: + name: docs + path: docs/_build + + coverage: + runs-on: ubuntu-latest + needs: tests + steps: + - name: Check out the repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install uv + uses: astral-sh/setup-uv@v1 + + - name: Create uv virtual environment + run: uv venv + + - name: Install Nox + run: | + uv pip install nox + uv run python -m nox --version + + - name: Install dependencies + run: | + uv sync --all-groups + + - name: Download coverage data + uses: actions/download-artifact@v4 + with: + pattern: coverage-data-* + merge-multiple: true + + - name: Combine coverage data and display human readable report + run: | + uv run python -m nox --session=coverage + + - name: Create coverage report + run: | + uv run python -m nox --session=coverage -- xml -i + + - name: Upload coverage report + uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + with: + files: ./coverage.xml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..717e3237 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,49 @@ +repos: + - repo: local + hooks: + - id: check-added-large-files + name: Check for added large files + entry: check-added-large-files + language: system + - id: check-toml + name: Check Toml + entry: check-toml + language: system + types: [toml] + - id: check-yaml + name: Check Yaml + entry: check-yaml + language: system + types: [yaml] + - id: end-of-file-fixer + name: Fix End of Files + entry: end-of-file-fixer + language: system + types: [text] + stages: [pre-commit, pre-push, manual] + - id: trailing-whitespace + name: Trim Trailing Whitespace + entry: trailing-whitespace-fixer + language: system + types: [text] + stages: [pre-commit, pre-push, manual] + - id: pydoclint + name: pydoclint + entry: pydoclint + language: system + types: [python] + - id: ruff + name: ruff + entry: ruff check + args: [--fix] + language: python + types_or: [python, pyi] + - id: ruff-format + name: ruff-format + entry: ruff format + language: python + types_or: [python, pyi] + # - repo: https://github.com/pre-commit/mirrors-prettier + # rev: v4.0.0-alpha.8 + # hooks: + # - id: prettier diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index e6bf1bb3..00000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Set the OS, Python version, and other tools you might need -build: - os: ubuntu-24.04 - tools: - python: "3.13" - -# Build documentation in the "docs/" directory with Sphinx -sphinx: - configuration: docs/conf.py - -# Optionally, but recommended, -# declare the Python requirements required to build your documentation -# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -# python: -# install: -# - requirements: docs/requirements.txt - diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..dd5f40b6 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,12 @@ +version: 2 +build: + os: ubuntu-20.04 + tools: + python: "3.13" +sphinx: + configuration: docs/conf.py +formats: all +python: + install: + - requirements: docs/requirements.txt + - path: . diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..b802047a --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or advances of + any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, + without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +[c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me](mailto:c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me). +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][mozilla coc]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][faq]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[mozilla coc]: https://github.com/mozilla/diversity +[faq]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..3a7efe7d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,113 @@ +# Contributor Guide + +Thank you for your interest in improving this project. +This project is open-source under the [LGPL license] and +welcomes contributions in the form of bug reports, feature requests, and pull requests. + +Here is a list of important resources for contributors: + +- [Source Code] +- [Documentation] +- [Issue Tracker] +- [Code of Conduct] + +[lgpl 3.0 license]: https://www.gnu.org/licenses/lgpl-3.0 +[source code]: https://github.com/bosd/odoo-data-flow +[documentation]: https://odoo-data-flow.readthedocs.io/ +[issue tracker]: https://github.com/bosd/odoo-data-flow/issues + +## How to report a bug + +Report bugs on the [Issue Tracker]. + +When filing an issue, make sure to answer these questions: + +- Which operating system and Python version are you using? +- Which version of this project are you using? +- What did you do? +- What did you expect to see? +- What did you see instead? + +The best way to get your bug fixed is to provide a test case, +and/or steps to reproduce the issue. + +## How to request a feature + +Request features on the [Issue Tracker]. + +## How to set up your development environment + +You need Python 3.9+ and the following tools: + +- [uv] +- [Nox] + +Install the package with development requirements: + +```console +$ uv install +``` + +You can now run an interactive Python session, +or the command-line interface: + +```console +$ uv run python +$ uv run odoo-data-flow +``` + +[uv]: https://docs.astral.sh/uv/ +[nox]: https://nox.thea.codes/ + +## How to test the project + +Run the full test suite: + +```console +$ nox +``` + +List the available Nox sessions: + +```console +$ nox --list-sessions +``` + +You can also run a specific Nox session. +For example, invoke the unit test suite like this: + +```console +$ nox --session=tests +``` + +Unit tests are located in the _tests_ directory, +and are written using the [pytest] testing framework. + +[pytest]: https://pytest.readthedocs.io/ + +## How to submit changes + +Open a [pull request] to submit changes to this project. + +Your pull request needs to meet the following guidelines for acceptance: + +- The Nox test suite must pass without errors and warnings. +- Include unit tests. This project maintains 100% code coverage. +- If your changes add functionality, update the documentation accordingly. + +Feel free to submit early, though—we can always iterate on this. + +To run linting and code formatting checks before committing your change, you can install pre-commit as a Git hook by running the following command: + +```console +$ nox --session=pre-commit -- install +``` + +It is recommended to open an issue before starting work on anything. +This will allow a chance to talk it over with the owners and validate your approach. + +[pull request]: https://github.com/bosd/odoo-data-flow/pulls + + + +[code of conduct]: CODE_OF_CONDUCT.md diff --git a/README.md b/README.md index 86d009f0..17d96933 100644 --- a/README.md +++ b/README.md @@ -1,1616 +1,116 @@ -Odoo CSV Import Export Library -============================== -This library provides tools to easily and quickly import data into Odoo or export data from Odoo using CSV file. -It also provide a framework to manipulate data from CSV. +# Odoo Data Flow -- [Odoo CSV Import Export Library](#odoo-csv-import-export-library) -- [Installation](#installation) -- [Importing Data](#importing-data) - - [Import Parameters](#import-parameters) - - [--config CONFIG](#config-config) - - [--file FILENAME](#file-filename) - - [--sep SEPARATOR](#sep-separator) - - [--skip LINE](#skip-line) - - [--model MODEL](#model-model) - - [--size BATCH_SIZE](#size-batchsize) - - [--worker WORKER](#worker-worker) - - [--groupby SPLIT](#groupby-split) - - [--ignore IGNORE](#ignore-ignore) - - [--context CONTEXT](#context-context) - - [Import Related Keys](#import-related-keys) - - [ORM and Performance Related Keys](#orm-and-performance-related-keys) - - [Model Specific Keys](#model-specific-keys) - - [--o2m](#o2m) - - [--check](#check) - - [--fail](#fail) - - [Using the Script](#using-the-script) - - [Transformations](#transformations) - - [Basic Concepts](#basic-concepts) - - [A Simple Partner Import](#a-simple-partner-import) - - [Dealing with Relationships](#dealing-with-relationships) - - [Many2one Relationships](#many2one-relationships) - - [One2many Relationships](#one2many-relationships) - - [Many2many Relationships](#many2many-relationships) - - [Controlling the Load sequence](#controlling-the-load-sequence) - - [Mapper Functions](#mapper-functions) - - [mapper.const(value)](#mapperconstvalue) - - [mapper.val(field, default='', postprocess=lambda x: x, skip=False)](#mappervalfield-default-postprocesslambda-x-x-skipfalse) - - [mapper.map_val(field, mapping, default='')](#mappermapvalfield-mapping-default) - - [mapper.num(field, default='0.0')](#mappernumfield-default00) - - [mapper.bool_val(field, true_vals=[], false_vals=[])](#mapperboolvalfield-truevals-falsevals) - - [mapper.binary(field, path_prefix, skip=False, encoding="utf-8")](#mapperbinaryfield-pathprefix-skipfalse-encoding%22utf-8%22) - - [mapper.concat(separator, *fields)](#mapperconcatseparator-fields) - - [mapper.m2o(PREFIX, field, default='', skip=False)](#mapperm2oprefix-field-default-skipfalse) - - [mapper.m2o_map(PREFIX, mapper, default='', skip=False)](#mapperm2omapprefix-mapper-default-skipfalse) - - [mapper.m2m(PREFIX, *fields)](#mapperm2mprefix-fields) - - [mapper.m2m_id_list(PREFIX, *args, **kwargs)](#mapperm2midlistprefix-args-kwargs) - - [mapper.m2m_value_list(*args, **kwargs)](#mapperm2mvaluelistargs-kwargs) - - [mapper.m2m_template_attribute_value(*args, **kwargs)](#mapperm2m_template_attribute_valueprefix-template_id_field-args) - - [Advanced Transformations](#advanced-transformations) - - [User Defined Mappers](#user-defined-mappers) - - [Managing the Client CSV file](#managing-the-client-csv-file) - - [Adding a column](#adding-a-column) - - [Removing Lines](#removing-lines) - - [Updating Records With Database IDs](#updating-records-with-database-ids) - - [XML Processing](#XML-Processing) - - [A Real Life Example](#a-real-life-example) - - [Performances Considerations](#performances-considerations) - - [Importing Related or Computed Fields](#importing-related-or-computed-fields) - - [Troubleshooting](#troubleshooting) - - [When the number of records does not match](#when-the-number-of-records-does-not-match) - - [Tips and Tricks](#tips-and-tricks) - - [Importing Data of Multiple Companies](#importing-data-of-multiple-companies) - - [Importing Translations](#importing-translations) - - [Importing Account Move Lines](#importing-account-move-lines) -- [Exporting Data](#exporting-data) -- [Requirements](#requirements) +[![PyPI](https://img.shields.io/pypi/v/odoo-data-flow.svg)][pypi status] +[![Status](https://img.shields.io/pypi/status/odoo-data-flow.svg)][pypi status] +[![Python Version](https://img.shields.io/pypi/pyversions/odoo-data-flow)][pypi status] +[![License](https://img.shields.io/pypi/l/odoo-data-flow)][license] -# Installation -* From GitHub +[![Read the documentation at https://odoo-data-flow.readthedocs.io/](https://img.shields.io/readthedocs/odoo-data-flow/latest.svg?label=Read%20the%20Docs)][read the docs] +[![Tests](https://github.com/OdooDataFlow/odoo-data-flow/workflows/Tests/badge.svg)][tests] +[![Codecov](https://codecov.io/gh/OdooDataFlow/odoo-data-flow/branch/main/graph/badge.svg)][codecov] -``` -git clone git@github.com:tfrancoi/odoo_csv_import.git -``` - -* From PyPi - -``` -[sudo] pip install odoo_import_export_client -``` -# Importing Data -The Odoo CSV Import Export library provides the script `odoo_import_thread.py` to import data into Odoo. The script is designed to load one data file into one model. That means you might need to run the script several times with different data files, models and other options to complete an import. - -

- -Data are not inserted directly into the database, instead they are loaded by calling the method `models.load`. Doing so, the standard behaviour of each model is respected. - -This script has several options. Type the command `odoo_import_thread.py --help` to get the usage. - -``` -usage: odoo_import_thread.py [-h] -c CONFIG --file FILENAME --model MODEL - [--worker WORKER] [--size BATCH_SIZE] - [--skip SKIP] [--fail] [-s SEPARATOR] - [--groupby SPLIT] [--ignore IGNORE] [--check] - [--context CONTEXT] [--o2m] - -Import data in batch and in parallel - -optional arguments: - -h, --help show this help message and exit - -c CONFIG, --config CONFIG - Configuration File that contains connection parameters - --file FILENAME File to import - --model MODEL Model to import - --worker WORKER Number of simultaneous connection - --size BATCH_SIZE Number of line to import per connection - --skip SKIP Skip until line [SKIP] - --fail Fail mode - -s SEPARATOR, --sep SEPARATOR - CSV separator - --groupby SPLIT Group data per batch with the same value for the given - column in order to avoid concurrent update error - --ignore IGNORE list of column separate by comma. Those column will be - remove from the import request - --check Check if record are imported after each batch. - --context CONTEXT context that will be passed to the load function, need - to be a valid python dict - --o2m When you want to import o2m field, don't cut the batch - until we find a new id -``` -One of the most important feature is the ability to import in parallel while controlling the transaction size. -These options allow to import huge data files while dealing with performance, what is not possible with the builtin Odoo import wizard. - -Here is how looks like a typical execution of the script: - -

- -

The parameter values are set for illustrating purposes.

- -When running the script, an amount of threads are spawned. Each of them handles an amount of records by transaction. Each transaction inserts or updates records in the Odoo instance defined in the configuration file. - -## Import Parameters - -### --config CONFIG -The configuration file `CONFIG` is a text file that defines the parameters used in the import. Here is an example. - -``` -[Connection] -hostname = mydb.odoo.com -database = mydb -login = admin -password = admin -protocol = jsonrpcs -port = 443 -uid = 2 -``` -The section `[connection]` is mandatory. Then the following parameters must be set accordingly. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
- hostname - - Name of the host where the Odoo resides. -
- database - - Name of the Postgresql database. -
- login - - The login used to create or update the records. -
- password - - The login's password. -
- protocol - - Protocol used for RPC calls. It can be one of the following values: xmlrpc, xmlrpcs, jsonrpc, jsonrpcs.
- For a remote database, it's strongly advised to used an encrypted protocol (xmlrcps or jsonrpcs). -
- port - - TCP port where Odoo can be reached. Usually 443 for encrypted remote connections, or 8069 for a local Odoo with its default configuration. -
- uid - - This is the database id of the res.user identified by the parameter 'login'.
- Well known ids are:
- 1 = admin user prior to V12.
- 2 = admin user as of V12. -
- -> **Tips:** On premise, it's advised to use a dedicated user with the minimal access rights on all the models related to the import. - -By default, `CONFIG` is set to `conf/connection.conf`. Under windows, you must always set this option because the path separator is not compliant with the OS. - -### --file FILENAME -Define the CSV `FILENAME` to import. The CSV format is mandatory. In order to be importable in Odoo, this file must follow some rules: -- The file must be in UTF-8 encoding. -- One file must contain data of only one model. -- The first line is the column names. All columns must have the technical name of the fields. -- All lines must have an `id` column fullfilled with an XML_ID that identifies the record. -- Some field formats must be respected: - - Boolean values must be 0 or 1. - - Binary data must be encoded in base64. - - Datetime fields format depends on the language (often %Y-%m-%d %H:%M:%S). - - The decimal separator of float values also depends on the language (often '.'). - - Selection fields must always contain database values. - - Many2one fields must be suffixed with `/id` if their value is an XML_ID or `.id` if it's a database id. - - Many2many fields must be a comma separated list of XML_IDs. -- If a field value is splitted into multiple lines, it must be enclosed with double quotes ("). - -The fields separator can be set with the option `--sep SEPARATOR`. - -You can skip the first lines of the file with the option `--skip LINE`. - -The name of the CSV file can be used to set the model by default. Ex: the file name `res_partner.csv` sets the model to `res.partner`. See the `--model` option for more detail. - -### --sep SEPARATOR -Define the column separator. Ex: `--sep=,`. By default, it's the semicolon (;). - -If the separator is present in a field value, the value must be enclosed with double quotes ("). - -### --skip LINE -Allow to skip the first `LINE`line(s) of the file. Ex: `--skip=3` will skip the first three lines. - -The first line must be the column names. Don't skip any line if it's the case. - -### --model MODEL -Set the `MODEL` to import data into. Ex: `--model=res.partner`. - -By default the model is the name of the CSV file with the underscores (_) replaced by dots (.) and without extension. Meaning, if the CSV file is named`res_partner.csv`, the model is `res.partner` by default . - -### --size BATCH_SIZE -Controls the number of records (`BATCH_SIZE`) imported in one transaction. - -When using the standard import wizard of Odoo, an import is always handled by one worker in one single transaction for the whole file. - -

- -When dealing with big data files, this may lead to two main issues: -- the time needed to import the whole file could exceed the maximum time allowed for a transaction to run. This time is set by the Odoo parameters `limit-time-cpu` and `limit-time-real`, -- if an error occurs on one record, the whole transaction fails and all the records are rejected. - -The solution is then to reduce the number of records in one transaction by setting the `--size` parameter to the desired number. - -

- -Here colored in blue, the transaction contains two records (`--size=2`). Now, only two records instead of the whole file must be imported during the time allocated for the transaction. - -This option is also helpful when importing large records over a WAN connection because a smaller transaction size leads to smaller JSON or XML payloads to send over the network. However it causes a bit more network overhead which could slow down the total run time. This run time can be drastically decreased by using the `--worker` parameter. - -### --worker WORKER -Controls the number of import threads in parallel. - -Here is how a import looks like whith `--worker=2`. - -

- -The whole file is now handled by two workers in parallel. The total run time is then divided by two. - -As a rule of thumb, you can set the number of workers up to 80% of the number Odoo workers. So that other users can still work while the import runs. - -When working with multiple workers, there is a potential drawback: the concurrent updates. - -In the following example, suppose Import Thread 1 is importing my_partner_2 while Import Thread 2 is importing my_partner_3. - -

- -Both partners have the same parent_id: my_partner_1. As the `parent_id` is a related field non readonly, the insert/update of my_partner_2 and my_partner_3 will both trigger an update on my_partner_1. That's a concurrent update. As a consequence the current transaction of both threads will fail. To solve such an issue, the parameter `--groupby` can be used. - -### --groupby SPLIT -Selects the field to group in one thread. - -To avoid the concurrent update issue described previously, you can use `--groupby=parent_id/id`. By doing this, we ensure all the records with the same `parent_id/id` are imported by the same thread. It thus eliminates the concurrent updates **caused by the parent_id**. - -

- -### --ignore IGNORE -Specifies the columns that do not need to be imported. Multiple columns can be set in a comma separated list. Ex: `--ignore=col1,col2,col3`. - -This is typically used to avoid cascade updates while importing related fields. Refer to [Importing Related or Computed fields](#Importing-Related-or-Computed-Fields). - -### --context CONTEXT -Define the context of the ORM while importing. Ex:`--context="{'tracking_disable': True}"`. - -Here are some useful context keys. - -#### Import Related Keys - - - - - - - - - - - -
KeyDescription
- write_metadata - - When True, allow to import audit log fields (create_uid, create_date, write_uid, write_date). The import must run with the `admin` user. Requires the module import_metadata available here. -
- update_many2many - - Set it to True when the data file contains a many2many relationship splitted as one record per line instead of a comma separated list of XML_IDs in one column. Suppose we want to assign categories to products, a regular record is:
- product1;categ1,categ2,categ3
- With 'update_many2many': True, you can import a file with the following structure:
- product1;categ1
- product1;categ2
- product1;categ3
-
- -#### ORM and Performance Related Keys - - - - - - - - - - - - - - - - - - - - - - - -
KeyDescription
- tracking_disable - - When True, don't create messages in the chatter -
- defer_fields_computation - - When True, recompute the computed fields at the end of the transaction instead of after each record. Useless if --size=1. Requires the module defer_fields_computation available here. -
- defer_parent_store_computation - - Defer the computation of the fields parent_left and parent_right at the end of the transaction. - Valid up to Odoo 11. -
- lang - - Set the current language. Ex: 'lang': fr_FR -
- force_company - - Set the current company. Use the database identifier of the company. -
- -#### Model Specific Keys - - - - - - - - - - - -
KeyDescription
- check_move_validity - - Set it to False when you import account moves and account move lines. Refer to "Importing Account Move Lines" for more details. -
- create_product_product - - Set it to True when you import product templates and also the variants. Without this key, the ORM will automatically create the variants when the templates are imported. -
- -These are some examples. Feel free to look into Odoo code to find out all context keys. - -### --o2m -Use this option when you import a data file with one2many relationships. The import file must follow a specific structure. - -

- -Suppose the model `master` has two one2many fields `child1_ids` and `child2_ids`, linking respectively the models `child1` and `child2`. -In the line beginning a master record, you can set all the master fields, like a regular import file. In addition, you can add the fields of child records. In the next lines, you can add the data of the next childs, leaving empty the columns of the master record and the unexisting childs. - -With the `--o2m` option, the master record will be imported with its two `child1` and its three `child2` in the same time. -It worths noticing that it's impossible to set XML_IDs on the child records. As a consequence: -- you cannot run the import again to update the childs data, -- the childs cannot be referenced in another import file. - - - -### --check -With this option, at the end of each transaction, the number of records in the transaction is compared to the number of imported records. If these numbers do not match, an error message is printed. Most likely, the transaction contains records with duplicate XML_IDs. Refer to [When the number of records does not match](#when-the-number-of-records-does-not-match) for more explanations. - -### --fail -Engage the fail mode. - -When you run `odoo_import_thread.py` whithout the `--fail` option, it runs in _normal_ mode. In this mode, any rejected record is printed in a file with the same name as the parameter `--file` suffixed by `.fail` and located in the same folder. - -This `.fail` file may contain records rejected for good reasons (ie. a required field is missing) or _bad_ reasons. If you run an import with multiple workers, a bad reason could be a concurrent update. And even in a single thread, when an error occurs in one record, all the records of the transaction (`--size`) are rejected. -This means the `.fail` file may contain records that could be imported if the process had run by a single thread and in a specific transaction for each record. That's the role of the fail mode. - -

- -In fail mode, `odoo_import_thread.py` will try to import the records of the `.fail` file. Additionaly, neither `--worker` nor `--size` is set. The default values of `1` are then used to ensure a single thread and a single record per transaction. - -In this mode, the rejected records are placed in the `.fail.bis` file in the same folder. This file contains only rejections that need your attention and must be solved before importing again. - -## Using the Script -To use all the benefits of the script, most of the time imports are run with multiple workers and a user defined transaction size. In this case and because of the fail mode, you always need two command lines to import one file. - -Assuming: -- your configuration file is `connection.conf` located in the current folder, -- your data file is `my_data.csv` located in the current folder, -- the target model is `my.model`, -- you want to run a fast multithreaded import. - -Run the following two commands (as an example): -``` -odoo_import_thread.py -c connection.conf --file=my_data.csv --model=my.model --worker=4 --size=200 [other options] -odoo_import_thread.py -c connection.conf --file=my_data.csv --model=my.model --fail [other options] -``` - -The first command runs the import in parallel and in batch. The rejected records are printed in the file `my_data.csv.fail`. We don't care about this file, it will be handled in the next command. - -In the second command, the parameter `--fail` replaces `--worker` and `--size`. The import runs in fail mode. It will read `my_data.csv.fail` (note the parameter `--file` is unchanged) and print the rejected records in the file `my_data.csv.fail.bis`. If the `.fail.bis` file empty, all the records are imported (inserted or updated). - -Refer to the [Troubleshooting](#troubleshooting) section to know how to solve some issues. - -## Transformations - -When the file to import doesn't respect the [expected format](#file-FILENAME) of `odoo_import_thread.py`, it's necessary to apply some transformations in order to create compliant CSV files. The Odoo CSV Import Export library helps in creating a python script able to transform CSV files thanks to the `Processor` and the `mapper` objects. - -

- -### Basic Concepts - -Let's start with a simple use case to introduce the main concepts of the tranformations. Once you're familiar with, a more complete use case is provided [here](#a-real-life-example). - -#### A Simple Partner Import -A customer wants to import some partners. He provides the following CSV file, say `client_file.csv`: - -``` -Firstname;Lastname;Birthdate -John;Doe;31/12/1980 -David;Smith;28/02/1985 -``` -This file cannot be imported directly because: -- the fields `Lastname`, `Firstname`, `Birthdate` do not exist in the model `res.partner`, -- the date format is not compliant, -- there is no `id` field. - -The first step to do is to ensure that all the fields exist in the target model. Here, the birthdate is a new data. We assume it must be stored in the field `birthdate` created before the import. Instead, `Firstname` and `Lastname` will be used as the `name`of the partner. - -Now the transformation step can begin. It consists on writing a python script that builds another CSV file compliant with the model `res.partner`, this is our Odoo CSV file. -In this case, the transformation steps will: -- define how we build the `name` field from the columns `Lastname` and `Firstname`, -- change the date format to a compliant one. - -Another important point to consider is what happens when we load the data several times (*it could occur if the tranformations must be rewritten*). Basically, when we import the Odoo CSV file the first time, the two partners will be created. But if we run it again, we don't want to create duplicates. Instead, we want to update the partner information. So the transformation phase is also necessary to: - -- assign an XML_ID to each partner of the file. - -The presence of an XML_ID ensures that a record is created if it doesn't exist, or updated if it already exists. This behaviour is included in the method `load` of each Odoo model. - -Let's build the transformation script, say `res_partner.py`. We start with importing the needed objects from the library. - -``` -# -*- coding: utf-8 -*- -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor - -# Custom import -from datetime import datetime # used to change the format of datetime fields -``` - -Then we build a `Processor` object from the client CSV file `client_file.csv`. Assuming this file resides in the current folder: - -``` -processor = Processor('client_file.csv', delimiter=';') -``` - -Now we create a mapping dictionary where the keys are the fields of the target model (`res.partner`) we want to import -**at least the required fields without default value**- and how we get them from the client file. - -``` -res_partner_mapping = { - 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')), - 'name: mapper.concat(' ','Firstname','Lastname'), - 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")), -} -``` -All the fields are extracted with the methods of the `mapper` object. These are described [here](#mapper-functions). - -An important thing to notice is the `id` field. It is mandatory by the script `odoo_import_thread.py`. It contains an XML_ID that we build *as we want* as soon as its unicity is garanteed. In this example, we assume the concatenation of three columns (`Lastname`, `Firstname` and `Birthdate`) is enough to identify a record. It will create XML_IDs like `my_import_res_partner.John_Doe_31/12/1980` in the Odoo CSV file. You are free to choose whatever module name you want, here `my_import_res_partner`, but it's a good idea to include the model name and something like the *project* name. - -Now we can invoke the transformation by itself. - -``` -processor.process(res_partner_mapping, 'res.partner.csv', {'model': 'res.partner', 'context': "{'tracking_disable': True}", 'worker': 2, 'batch_size': 20}) -``` -This step will create the import file `res.partner.csv` for the model. `res.partner`. It should look like this, conforming to `res_partner_mapping`: - -``` -id;name;birthdate -my_import_res_partner.John_Doe_31/12/1980;John Doe;31-12-1980 00:00:00 -my_import_res_partner.David_Smith_28/02/1985;David Smith;28-02-1985 00:00:00 -``` -> **Note:** The order of the columns is not related to the client file or the keys in the transform mapping dictionary. - -Notice some options are set when invoking the transformation: `'context': "{'tracking_disable': True}", 'worker': 2, 'batch_size': 20}`. -They don't play any role in the transformation by itself. Instead it will be used by the import shell script later. Hopefully, we can automatically create the shell script by adding this line: - -``` -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` - -This will create the script `res_partner.sh` that will load the data with `odoo_import_thread.py`, first in normal mode, then in fail mode. It looks like this: - -``` -odoo_import_thread.py -c conf/connection.conf --file=res.partner.csv --model=res.partner --worker=2 --size=20 --groupby= --ignore= --sep=";" --context="{'tracking_disable': True}" -odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.csv --model=res.partner --ignore= --sep=";" --context="{'tracking_disable': True}" -``` - -The complete python script: -``` -# -*- coding: utf-8 -*- -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor - -# Custom import -from datetime import datetime # used to change the format of datetime fields - -processor = Processor('client_file.csv', delimiter=';') - -res_partner_mapping = { - 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')), - 'name: mapper.concat(' ','Firstname','Lastname'), - 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")), -} - -processor.process(res_partner_mapping, 'res.partner.csv', {'model': 'res.partner', 'context': "{'tracking_disable': True}", 'worker': 2, 'batch_size': 20}) -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` - -Run the transformations - -``` -python res_partner.py -``` - -You should have created: -- the import file `res.partner.csv` in the same folder as the client file `res_partner.csv`, -- the shell script `res_partner.sh` in your current folder. - -#### Dealing with Relationships - -##### Many2one Relationships - -Coming back to our simple example, let's suppose the client adds the partner's company in his data. Here, we are not in a multi companies environment, the company is just the partner's parent. The file could look like this: -``` -Company;Firstname;Lastname;Birthdate -The World Company;John;Doe;31/12/1980 -The Famous Company;David;Smith;28/02/1985 -``` -In this case we must import four partners (the two companies and the two persons) and set the field `parent_id` of the two persons to their respective company. In a relational database we link records thanks to their internal identifiers (`id`). But at this step, these ids are unknown because the records are not imported yet. We will then use the XML_IDs to link the records. - -It means when we transform a company, we assign an XML_ID to it, then we use this XML_ID as the `parent_id` of the person who is a member of this company. As a consequence the companies must be imported before the persons. More precisely, the XML_IDs set in the `parent_id` must exist before being used as a relationship value. - -Let's create the transformation script. As usual, we start with the needed imports and the creation of a `Processor` on the client file. -``` -# -*- coding: utf-8 -*- -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor -from datetime import datetime # used to change the format of datetime fields - -processor = Processor('client_file.csv', delimiter=';') -``` - -Now we can define the mapping to extract the companies. These are records in the model `res.partner` with the boolean field `is_company` set. We also assume the company name is unique so that we can use it as an identifier in the XML_ID. -``` -res_partner_company_mapping = { - 'id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')), - 'name': mapper.val('Company'), - 'is_company': mapper.const('1'), -} - -processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set') -``` -It worths noting the option`'set'` of `processor.process` while invoking the companies transformation. This option allows to remove duplicates in the Odoo CSV file. It could be the case if several partners belong to the same company. - -And here is the mapping to extract the persons. It's exactly the same as before except we've added the field `parent_id`. -``` -res_partner_mapping = { - 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')), - 'name': mapper.concat(' ','Firstname','Lastname'), - 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")), - 'parent_id/id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')), -} - -processor.process(res_partner_mapping, 'res.partner.csv', {}) -``` -The important thing to notice here is that we use exactly the same transformation method for the partner ids in both mappings in order to generate the same XML_ID (`res_partner_mapping['parent_id/id']` = `res_partner_company_mapping['id']`). *Remember also the suffix `/id` when using XML_IDs in relation fields.* - -The results will be two differents Odoo CSV files: - -- The partners who are companies: `res.partner.company.csv`. -``` -id;name;is_company -my_import_res_partner.The World Company;The World Company;1 -my_import_res_partner.The Famous Company;The Famous Company;1 -``` - -- The persons: `res.partner.csv` where the column `partner_id/id` refer to an existing `id` in `res.partner.company.csv`. -``` -id;parent_id/id;name;birthdate -my_import_res_partner.John_Doe_31/12/1980;my_import_res_partner.The World Company;John Doe;31-12-1980 00:00:00 -my_import_res_partner.David_Smith_28/02/1985;my_import_res_partner.The Famous Company;David Smith;28-02-1985 00:00:00 -``` - -Finally we generate the shell script that will load the files by adding this line in the transformation script. - -``` -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` - -This will create the shell script `res_partner.sh` that will load the data. It looks like this: - -``` -odoo_import_thread.py -c conf/connection.conf --file=res.partner.company.csv --model=res.partner --groupby= --ignore= --sep=";" --context="{}" -odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.company.csv --model=res.partner --ignore= --sep=";" --context="{}" - -odoo_import_thread.py -c conf/connection.conf --file=res.partner.csv --model=res.partner --groupby= --ignore= --sep=";" --context="{}" -odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.csv --model=res.partner --ignore= --sep=";" --context="{}" -``` -The script contains all the commands to load both Odoo CSV files. They are written in the same order as in the transformation script. So the import sequence is respected. - -The complete python script: -``` -# -*- coding: utf-8 -*- -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor -from datetime import datetime # used to change the format of datetime fields - -processor = Processor('client_file.csv', delimiter=';') - -res_partner_company_mapping = { - 'id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')), - 'name': mapper.val('Company'), - 'is_company': mapper.const('1'), -} - -processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set') - -res_partner_mapping = { - 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')), - 'name': mapper.concat(' ','Firstname','Lastname'), - 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")), - 'parent_id/id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')), -} - -processor.process(res_partner_mapping, 'res.partner.csv', {}) - -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` - - -##### One2many Relationships - -Usually we don't import `One2many` fields. Instead, we import the inverse `Many2one` relation in the linked model. - -##### Many2many Relationships - -Let's suppose our customer adds some categories to the partners. The client file could look like this: -``` -Company;Firstname;Lastname;Birthdate;Category -The World Company;John;Doe;31/12/1980;Premium -The Famous Company;David;Smith;28/02/1985;Normal,Bad Payer -``` -The categories are set in one column in the same line of the partner record and separated with a comma. - -By looking into Odoo, we see that the model `res.partner` contains a field `category_id` which is a Many2many to the model `res.partner.category`. If you remember the rule _Many2many fields must be a comma separated list of XML_IDs_ and that an XML_ID must be created before being used in a relationship, you get an idea of the procedure to apply. - -1- Create all the categories by extracting them from the client file and assign them and XML_ID. - -2- Build a comma separated list of XML_IDs of categories for each partner. - -Let's start the transformation script. As usual, we start with the needed imports and the creation of a `Processor` on the client file. -``` -# -*- coding: utf-8 -*- -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor -from datetime import datetime # used to change the format of datetime fields - -processor = Processor('client_file.csv', delimiter=';') -``` - -The first transformation extracts the categories. We assume the name is unique to create the XML_IDs. -``` -partner_category_mapping = { - 'id': mapper.m2m_id_list('res_partner_category', 'Category'), - 'name': mapper.m2m_value_list('Category'), -} - -processor.process(partner_category_mapping, 'res.partner.category.csv', {}, m2m=True) -``` -Notice we use two mapper functions to deal with Many2many relationships: `m2m_id_list` and `m2m_value_list`, and the option `m2m=True` in the `processor.process`command. - -This will create the file `res.partner.category.csv` with all the unique categories as follow: -``` -id;name -res_partner_category.Premium;Premium -res_partner_category.Normal;Normal -res_partner_category.Bad Payer;Bad Payer -``` - -Now we can complete the person mapping. It's exactly the same as before except we have added the field `category_id`. +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit] +[![Ruff codestyle][ruff badge]][ruff project] -``` -res_partner_mapping = { - 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')), - 'name': mapper.concat(' ','Firstname','Lastname'), - 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")), - 'parent_id/id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')), - 'category_id/id': mapper.m2m('res_partner_category', 'Category'), -} - -processor.process(res_partner_mapping, 'res.partner.csv', {}) -``` -The important thing here is to use the same XML_ID `'res_partner_category' + Category'` for both -`res_partner_mapping[category_id/id] = mapper.m2m` and `partner_category_mapping[id] = mapper.m2m_id_list`. - - The mapping `res_partner_mapping` will create a new file `res.partner.csv` like this: -``` -id;parent_id/id;name;birthdate;category_id/id -my_import_res_partner.John_Doe_31/12/1980;my_import_res_partner.The World Company;John Doe;31-12-1980 00:00:00;res_partner_category.Premium -my_import_res_partner.David_Smith_28/02/1985;my_import_res_partner.The Famous Company;David Smith;28-02-1985 00:00:00;res_partner_category.Normal,res_partner_category.Bad Payer -``` -Notice the column `category_id/id` that contains a comma separated list of XML_IDs of partner categories. - -Finally we create the load script by adding this line: -``` -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` -It creates the script `res_partner.sh` looking like this: -``` -odoo_import_thread.py -c conf/connection.conf --file=res.partner.category.csv --model=res.partner.category --groupby= --ignore= --sep=";" --context="{}" -odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.category.csv --model=res.partner.category --ignore= --sep=";" --context="{}" - -odoo_import_thread.py -c conf/connection.conf --file=res.partner.csv --model=res.partner --groupby= --ignore= --sep=";" --context="{}" -odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.csv --model=res.partner --ignore= --sep=";" --context="{}" -``` - -The complete python script: -``` -# -*- coding: utf-8 -*- -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor -from datetime import datetime # used to change the format of datetime fields - -processor = Processor('client_file.csv', delimiter=';') - -partner_category_mapping = { - 'id': mapper.m2m_id_list('res_partner_category', 'Category'), - 'name': mapper.m2m_value_list('Category'), -} - -processor.process(partner_category_mapping, 'res.partner.category.csv', {}, m2m=True) - -res_partner_mapping = { - 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')), - 'name': mapper.concat(' ','Firstname','Lastname'), - 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")), - 'parent_id/id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')), - 'category_id/id': mapper.m2m('res_partner_category', 'Category'), -} - -processor.process(res_partner_mapping, 'res.partner.csv', {}) - -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` - -> **Note:** it's possible to import many2many relationships with another file structure. Refer to the context key [update_many2many](#import-related-keys) to learn how. - - -#### Controlling the Load sequence -The load order in the shell script depends on the order of the `processor.process` instructions in the transformation script. - -This example script: -``` -processor = Processor('client_file.csv', delimiter=';') - -res_partner_company_mapping = { -} - -res_partner_mapping = { -} - -processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set') -processor.process(res_partner_mapping, 'res.partner.csv', {}) -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` -will create the load script `res_partner.sh` with: - -1- the load of `res.partner.company.csv` because it's the first invoked transformation, - -2- the load of `res.partner.csv`. - -If you want to import the persons first (which is a bad idea here), just inverse the two `processor.process` commands. -``` -processor = Processor('client_file.csv', delimiter=';') - -res_partner_company_mapping = { -} - -res_partner_mapping = { -} - -processor.process(res_partner_mapping, 'res.partner.csv', {}) -processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set') -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` - -Now if you want to create two separated shell scripts, one for companies and another for the persons, you need to create a new `Processor`. -``` -#For the 1st load script -processor = Processor('client_file.csv', delimiter=';') -res_partner_company_mapping = { -} -processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set') -processor.write_to_file("res_partner_company.sh", python_exe='', path='') - -#For the 2nd load script -processor = Processor('client_file.csv', delimiter=';') -res_partner_mapping = { -} -processor.process(res_partner_mapping, 'res.partner.csv', {}) -processor.write_to_file("res_partner.sh", python_exe='', path='') -``` -This will create the script `res_partner_company.sh` that loads only `res.partner.company.csv`, and the script `res_partner.sh` that loads only `res.partner.csv`. It can be useful if you need to do something between importing the companies and the persons (ie. to call another load or RPC calls). The drawback is that the client file is read twice. - -### Mapper Functions -You can get the value of columns in the client file with several methods defined in the `mapper` object. Take a look at `lib/mapper.py` to get an up to date list of methods. Here are the most commonly used. - -#### mapper.const(value) -Use it to assign always the same value to a field. - - - - - - - - -
Client FileMapperImport File
- my_column
- my_value1
- my_value2 -
- {
- 'my_field': mapper.const('forced_value'),
- }
-
- my_field
- forced_value
- forced_value -
- -Example: setting a company missing in the client file: -``` -fields_mapping = { - ... - 'company_id/id': mapper.const('base.main_company'), - ... -} -``` - -#### mapper.val(field, default='', postprocess=lambda x: x, skip=False) - -Takes the value of the column. Use it for a `Char` or `Text` field. - - - - - - - - -
Client FileMapperImport File
- my_column
- my_value1
- my_value2 -
- {
- 'my_field': mapper.val('my_column'),
- }
-
- my_field
- my_value1
- my_value2 -
- -#### mapper.map_val(field, mapping, default='') -Takes the value from a dictionary where the key is the value of the column. - - - - - - - - -
Client FileMapperImport File
- my_column
- key1
- key2 -
- mapping = {
- 'key1': 'value1',
- 'key2': 'value2'
- }

- {
- 'my_field': mapper.map_val('my_column', mapping),
- }
-
- my_field
- value1
- value2 -
- -Example: setting a country. - -``` -Country_column; -BE; -FR; -``` - -``` -country_map = { - 'BE': 'base.be', - 'FR': 'base.fr', -} - -fields_mapping = { - ... - 'country_id/id': mapper.map_val('Country_column', country_map), - ... -} -``` - -#### mapper.num(field, default='0.0') - -Takes the numeric value of the column. Transform the comma decimal separator by a dot. Use it for `Integer` or `Float` fields. - - - - - - - - -
Client FileMapperImport File
- my_column
- 01;
- 2,3; -
- {
- 'my_field': mapper.num('my_column'),
- }
-
- my_field
- 01
- 2.3 -
- -#### mapper.bool_val(field, true_vals=[], false_vals=[]) - -A boolean field in Odoo is always imported as 1 or 0. `true_vals` and `false_vals` are used to map the original values to 1 and 0. If the value in the client file is not in `true_vals` or `false_vals`, it is considered as TRUE if a value is present or FALSE if the column is empty. - - - - - - - - -
Client FileMapperImport File
- my_column
- Yes
- No
-
- something else -
- {
- 'my_field': mapper.bool_val('my_column', ['Yes'], ['No']),
- }
-
- my_field
- 1
- 0
- 0
- 1 -
- - -#### mapper.binary(field, path_prefix, skip=False, encoding="utf-8") -Use it to convert a binary file in base64 and to put the result in a binary field. Typically used to import images and attachments. - -Assuming images `pict_1.png` and `pict2.png` are located in `/home/Pictures`: - - - - - - - - -
Client FileMapperImport File
- my_column
- pict_1.png
- pict_2.png -
- {
- 'my_field': mapper.binary('my_column', '/home/Pictures/'),
- }
-
- my_field
- kllkxqlxsqnxqxhHJVJSFSVSJDYVDV......
- KKjdsndb77573çinjhffxxcdkllkxq...... -
- -#### mapper.concat(separator, *fields) - -Concatenate the value of one or several columns separated with `separator`. - - - - - - - - -
Client FileMapperImport File
- my_column1;my_column2
- val1;val2
- val3,val4 -
- {
- 'my_field': mapper.concat('_','my_column1','my_column2'),
- }
-
- my_field
- val1_val2;
- val3_val4; -
- - -#### mapper.m2o(PREFIX, field, default='', skip=False) - -Use it to create an XML_ID where the module is `PREFIX` and the name is the value of the column. - - - - - - - - -
Client FileMapperImport File
- my_column
- my_value1
- my_value2 -
- {
- 'my_field/id': mapper.m2o('my_import_my_model','my_column'),
- }
-
- my_field/id
- my_import_my_model.my_value1
- my_import_my_model.my_value2 -
- -> Notice the field name suffixed with /id in the mapping dictionary. - -#### mapper.m2o_map(PREFIX, mapper, default='', skip=False) - -Use it to create an XML_ID where the module is `PREFIX` and the name is the result of `mapper`. It is often used with the mapper `mapper.concat` to create XML_IDs as the concatenation of several columns. - - - - - - - - -
Client FileMapperImport File
- my_column1;my_column2
- val1,val2
- val3,val4 -
- {
- 'my_field/id': mapper.m2o_map('my_import_my_model',mapper.concat('_','my_column1','my_column2')),
- }
-
- my_field/id
- my_import_my_model.val1_val2;
- my_import_my_model.val3_val4; -
- -> Notice the field name suffixed with /id in the mapping dictionary. - -#### mapper.m2m(PREFIX, *fields) -Returns a comma separated list of one or several columns, each value being prefixed by `PREFIX`. Use it to build XML_IDs list for a `Many2many` field. - - - - - - - -
Client FileMapperImport File
- my_column1;my_column2
- val1,val2;val3
-
- {
- 'my_field/id': mapper.m2m('my_import_my_model','my_column1', 'my_column2')),
- }
-
- my_field/id;
- my_import_my_model.val1,my_import_my_model.val2,my_import_my_model.val3 -
- -> Notice the field name suffixed with /id in the mapping dictionary. - -> Notice val1, val2 of my_column1 are handled the same way as val3 in my_column2. - - - -#### mapper.m2m_id_list(PREFIX, *args, **kwargs) -Build one record (line) per distinct value of a comma separated list of data inside a column, and prefix the value with `PREFIX`. - - - - - - - - -
Client FileMapperImport File (1)
- my_column1
- val1,val2
- val1,val3
- val4 -
- {
- 'my_field/id': mapper.m2m_id_list('my_import_my_model','my_column1')),
- }
-
- my_field/id
- my_import_my_model.val1
- my_import_my_model.val2
- my_import_my_model.val3
- my_import_my_model.val4 -
- -(1) **To use in conjunction with the option `m2m` while invoking the transformation** (see [Many2many Relationships](#many2many-relationships)). - -``` -processor.process(mapping, 'output.file.csv', {}, m2m=True) -``` -Without this option, the import file would look like this: -``` -my_field/id -[my_import_my_model.val1, my_import_my_model.val2] -[my_import_my_model.val1, my_import_my_model.val3] -[my_import_my_model.val4] -``` +[pypi status]: https://pypi.org/project/odoo-data-flow/ +[read the docs]: https://odoo-data-flow.readthedocs.io/ +[tests]: https://github.com/OdooDataFlow/odoo-data-flow/actions?workflow=Tests +[codecov]: https://app.codecov.io/gh/OdooDataFlow/odoo-data-flow +[pre-commit]: https://github.com/pre-commit/pre-commit +[ruff badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json +[ruff project]: https://github.com/charliermarsh/ruff -#### mapper.m2m_value_list(*args, **kwargs) -Build one record (line) per distinct value of a comma separated list of data inside a column. - - - - - - - -
Client FileMapperImport File (1)
- my_column1
- val1,val2
- val1,val3
- val4 -
- {
- 'my_field': mapper.m2m_value_list('my_column1')),
- }
-
- my_field
- val1
- val2
- val3
- val4 -
+A powerful Python library for defining robust, repeatable, and high-performance data import/export workflows for Odoo. It replaces complex, manual data preparation with a clean, "configuration-as-code" approach. -(1) **To use in conjunction with the option `m2m` while invoking the transformation** (see [Many2many Relationships](#many2many-relationships)). -``` -processor.process(mapping, 'output.file.csv', {}, m2m=True) -``` -Without this option, the import file would look like this: -``` -my_field/id -[val1, val2] -[val1, val3] -[val4] -``` - -#### mapper.m2m_template_attribute_value(PREFIX, template_id_field, *args) - -Generates a mapping function for `product.template.attribute.value` XMLIDs, including the product template identifier. - -This function is specifically designed to create a mapper that constructs comma-separated strings of XML IDs for product attribute values, incorporating the identifier of the associated product template. This is useful when you need to establish relationships based on attribute values within a specific product template context. +--- - - - - - - - -
Client FileMapperImport File
- product_template_ref;color;size
- template_1;red;medium
- template_1;blue;large
- template_2;green;small -
- {
- 'my_field/id': mapper.m2m_template_attribute_value('PRODUCT_ATTRIBUTE_VALUE', 'product_template_ref', 'color', 'size'),
- }
-
- my_field/id
- PRODUCT_ATTRIBUTE_VALUE_template_1_color_red,PRODUCT_ATTRIBUTE_VALUE_template_1_size_medium
- PRODUCT_ATTRIBUTE_VALUE_template_1_color_blue,PRODUCT_ATTRIBUTE_VALUE_template_1_size_large
- PRODUCT_ATTRIBUTE_VALUE_template_2_color_green,PRODUCT_ATTRIBUTE_VALUE_template_2_size_small -
+## Key Features -Args: +- **Declarative Transformations:** Use simple Python scripts and a rich set of `mapper` functions to transform any source CSV or XML data into an Odoo-ready format. +- **Two-Phase Workflow:** Cleanly separates data **transformation** from data **loading**, making complex migrations easier to manage, reuse, and debug. +- **High-Performance CLI:** Import and export data with a clean, modern command-line interface, featuring high performance parallel processing (`--worker`), batching (`--size`), and robust error handling. +- **Automatic Scripting:** Automatically generate shell scripts for the loading phase, ensuring a repeatable and reliable process every time. +- **Robust Error Handling and Recovery:** Verify the number of records processed in a batch against the number successfully imported, helping to quickly identify issues. +- **Direct Server-to-Server Migration:** Perform a complete export, transform, and import from one Odoo instance to another in a single, in-memory step with the `migrate` command. +- **Post-Import Workflows:** Run automated actions on your data _after_ it has been imported (e.g., validating invoices, registering payments) using the powerful `workflow` command. +- **export data from Odoo** into CSV format. While specific details on export parameters are not extensively documented in the provided text, this capability complements the import functionality, offering a complete solution for Odoo data management. -* `PREFIX (str)`: The prefix to use for the generated XML IDs (e.g., 'PRODUCT_ATTRIBUTE_VALUE'). This prefix should be consistent with how your XML IDs are structured. -* `template_id_field (str)`: The name of the field/column in the CSV data that contains the identifier (e.g., XML ID, database ID, or other unique key) of the related product template. This identifier will be included in the generated XML IDs. -* `*args (str)`: A variable number of field/column names from the CSV data that represent attribute values. These values will be used to construct the XML IDs. +## Installation -Returns: - -* `function`: A mapper function that takes a CSV row (as a dictionary) as input and returns a comma-separated string of generated XML IDs. If the `template_id_field` is missing in the CSV row, it returns an empty string. - -Important Notes: - -* The generated XML IDs are constructed by concatenating the `PREFIX`, the value from `template_id_field`, and the values from the provided attribute columns. -* The function handles cases where the `template_id_field` might be missing in the CSV data, returning an empty string to avoid errors. -* Ensure that the `PREFIX` and the column names in `args` are consistent with your actual data structure and XML ID conventions. - -### Advanced Transformations - -#### User Defined Mappers -Sometimes, the builtin mappers do not meet your needs, even with a `postprocess` function. In this case, you can map a field to a tailor made function. -This function takes an argument representing an entire line of the client file as a dictionary where the columns are the keys. +You can install _Odoo Data Flow_ via `uv` or `pip` from [PyPI]: +```console +$ uv pip install odoo-data-flow ``` -def my_field_mapper(line): - if line['column1'] == 'a_value': - return 'something' - return 'something_else' -fields_mapping = { - ... - 'my_field': my_field_mapper, - ... -} -``` +## Quick Usage Example -#### Managing the Client CSV file -Sometimes it's useful to change the client file according to some needs (ie. removing useless lines, adding columns with data, ...). You can do that with a preprocessor function when building the `Processor`. See this example that does... nothing. +The core workflow involves two simple steps: -``` -def myPreprocessor(header, data): - return header, data - -processor = Processor('client_file.csv', delimiter=';', preprocess=myPreprocessor) -``` +**1. Transform your source data with a Python script.** +Create a `transform.py` file to define the mapping from your source file to Odoo's format. -The `preprocessor` function takes two arguments: `header` is a list of all the columns, and `data` is a list of dictionaries, each dictionary being a line. Let's see two learning examples of preprocessing. +````python +# transform.py +from odoo_data_flow.lib.transform import Processor +from odoo_data_flow.lib import mapper -##### Adding a column -Here we add the column `NEW_COLUMN` filled with the value `NEW_VALUE` for all the records. -``` -def myPreprocessor(header, data): - header.append('NEW_COLUMN') - for i, j in enumerate(data): - data[i].append(NEW_VALUE) - return header, data -``` -##### Removing Lines -Say we want to remove all the lines having the column `Firstname` with the value `John`. -``` -def myPreprocessor(header, data): - data_new = [] - for i, j in enumerate(data): - line = dict(zip(header, j)) - if line['Firstname'] != 'John': - data_new.append(j) - return header, data_new -``` - -> **Note:** The client file is not physically changed. Only the buffer used by the Processor is changed in memory. Anyway the new columns are usable in the fields mapping dictionary, and the removed lines are not processed. - -#### Updating Records With Database IDs -It is possible to **update** records knowing their database ID instead of their XML_ID. The field `.id` designates a database ID. But also, the script `odoo_import_thread.py` requires an `id` field. So the trick is to build an empty `id` field and to map the `.id`from the client file. -``` my_mapping = { - 'id': mapper.const(''), - '.id': mapper.val('id_column'), - ... + 'id': mapper.concat('prod_', 'SKU'), + 'name': mapper.val('ProductName'), + 'list_price': mapper.num('Price'), } -``` - -#### XML Processing - -The `XMLProcessor` class allows you to transform data from XML files into a format suitable for Odoo import, providing an alternative to the `Processor` class for XML-based data sources. -```python -# -*- coding: utf-8 -*- -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor -from lxml import etree +processor = Processor('origin/products.csv') +processor.process(my_mapping, 'data/products_clean.csv', {'model': 'product.product'}) +processor.write_to_file("load.sh") +```console +$ python transform.py +```` -processor = XMLProcessor(filename, root_node_path) -``` - -The XMLProcessor is initialized with the XML file to process and an XPath expression to identify the data records. - -`XMLProcessor.__init__(filename, root_node_path, conf_file=False)` -Constructor for the XMLProcessor class. - -*Args:* - -`filename` (str): The path to the XML file to be processed. - -`root_node_path` (str): An XPath expression specifying the root node(s) within the XML file to iterate over. Each node found by this XPath will be treated as a data record. - -`conf_file` (str, optional): The path to a configuration file. Inherited from the Processor class but may not be used in the same way by XMLProcessor. Defaults to False. - -`XMLProcessor.process(mapping, filename_out, import_args, t='list', null_values=['NULL', False], verbose=True, m2m=False)` -Transforms data from the XML file based on the provided mapping. - -*Args:* - -`mapping` (dict): A dictionary that defines how data from the XML file should be mapped to fields in the output format (e.g., CSV). The keys of the dictionary are the target field names, and the values are XPath expressions to extract the corresponding data from the XML. - -`filename_out` (str): The name of the output file where the transformed data will be written. - -`import_args` (dict): A dictionary containing arguments that will be passed to the odoo_import_thread.py script (e.g., `{'model': 'res.partner', 'context': "{'tracking_disable': True}"}`). - -`t (str, optional)`: This argument is kept for compatibility but is not used in XMLProcessor. Defaults to 'list'. - -`null_values` (list, optional): This argument is kept for compatibility but is not used in XMLProcessor. Defaults to `['NULL', False]`. +**2. Load the clean data into Odoo using the CLI.** +The `transform.py` script generates a `load.sh` file containing the correct CLI command. -`verbose` (bool, optional): This argument is kept for compatibility but is not used in XMLProcessor. Defaults to True. +````bash +# Contents of the generated load.sh +odoo-data-flow import --config conf/connection.conf --file data/products_clean.csv --model product.product +... +```console +$ bash load.sh +```` -`m2m (bool, optional)`: This argument is kept for compatibility but is not used in XMLProcessor. Defaults to False. +## Documentation -*Returns:* +For a complete user guide, tutorials, and API reference, please see the **[full documentation on Read the Docs][read the docs]**. +Please see the [Command-line Reference] for details. -`tuple`: A tuple containing the header (list of field names) and the transformed data (list of lists). +## Contributing -> **Important Notes:** -The t, null_values, verbose, and m2m arguments are present for compatibility with the Processor class but are not actually used by the XMLProcessor. -The mapping dictionary values should be XPath expressions that select the desired data from the XML nodes. +Contributions are very welcome. +To learn more, see the [Contributor Guide]. -`XMLProcessor.split(split_fun)` -Raises a NotImplementedError because the split functionality is not supported for XMLProcessor. +## License -*Args:* +Distributed under the terms of the [LGPL 3.0 license][license], +_Odoo Data Flow_ is free and open source software. -`split_fun`: This argument is not used. +## Issues -*Raises:* +If you encounter any problems, +please [file an issue] along with a detailed description. -`NotImplementedError`: Indicates that the split method is not available for XML processing. +## Credits -##### Example of XML to CSV Transformation - -Let's say you have the following XML data: - -```XML - - - - 1 - 2008 - 141100 - - - - - 4 - 2011 - 59900 - - - - 68 - 2011 - 13600 - - - - -``` - -To transform this into a CSV file with columns "name", "gdp", "year", and "neighbor", you would use the following Python script and mapping: - -```Python -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import XMLProcessor - -processor = XMLProcessor('countries.xml', '/data/country') - -mapping = { - 'name': '/data/country/@name', - 'gdp': '/data/country/gdppc/text()', - 'year': '/data/country/year/text()', - 'neighbor': '/data/country/neighbor/@name' -} - -processor.process(mapping, 'countries.csv', {}) -``` - -This would generate a CSV file like this: - -```CSV -"name";"gdp";"year";"neighbor" -"Liechtenstein";"141100";"2008";"Austria" -"Singapore";"59900";"2011";"Malaysia" -"Panama";"13600";"2011";"Costa Rica" -``` +This project was generated from [@bosd]'s [uv hypermodern python cookiecutter] template. +[@bosd]: https://github.com/bosd +[pypi]: https://pypi.org/ +[uv hypermodern python cookiecutter]: https://github.com/bosd/cookiecutter-uv-hypermodern-python +[file an issue]: https://github.com/OdooDataFlow/odoo-data-flow/issues +[pip]: https://pip.pypa.io/ -## A Real Life Example -A complete import project (transformation and load) is available in the repo [odoo_import_example](https://github.com/tfrancoi/odoo_import_example). It demonstrates use cases such as: -- importing partners with multiple categories -- importing products and variants with their suppliers -- importing messages - -> **Note:** The project was done in Odoo 11. Some models may differ in other versions. - -## Performances Considerations - -### Importing Related or Computed Fields - -Importing related fields (non readonly) causes cascade updates that drastically increase the import run time. Suppose the following example. - -

- -Importing my_partner_2000 will trigger an update of my_partner_1 because the parent_id is a related field non readonly. And updating my_parent_1 will also trigger the update of all its childs. When importing the next record, my_partner_2001, the same scenario happens. My_partner_1 will be updated a second time and this will trigger the update of all its childs again. But this time the number of childs is increased by my_partner_2000. So each time a new partner is created, the number of updates behind the scene increases. - -You can figure out this scenario by looking at the transaction time that increases exponantially. - -The solution is to use the parameter `--ignore`. In this case, you should import with the option `--ignore=parent_id/id`. - -## Troubleshooting - -### When the number of records does not match -Suppose your Odoo CSV file contain 100 records but after the load, you count less than 100 records more. - -Possible cause: -- One or more records have the same XML_ID. So the first record with that XML_ID was inserted, while the other records have updated the first one instead of creating new ones. - -To check the unicity of _what you think is_ a record identifier in the client file: -- create a new CSV file with one column containing the supposed identifier, -- check the unicity of the values with the command, for example, in bash: -``` -sort my_xml_ids.csv | uniq -c | sort -nr -``` -- look for an output line begining with a number > 1. - -## Tips and Tricks - -### Importing Data of Multiple Companies -When you run an import, the current company is the company of the user defined by the parameters `login` and `uid`in the [configuration file](#config-CONFIG). -As a rule of thumb, it's advised to separate the imports by company. - -Assuming you have to import data for Company_A and Company_B: -- build import files with data of Company_A -- build import files with data of Company_B -- set the import user as a member of Company_A -- import the files with the data of Company_A -- change the import user company to Company_B -- import the files with the data of Company_B - -### Importing Translations - -Of course, the translations can be imported with data files tailored for the translation models. But it's a lot easier with the `lang` key set in the context. Let's take an example with the products. - -First, import the sources terms. This can be done while importing legacy data. Say we have the following CSV file `product_template.csv` - -``` -id;name;price -my_module.product_wallet;Wallet;10.0 -my_module.product_bicyle;Bicycle;400.0 -``` - -Import the products with the following command: -``` -odoo_import_thread.py -c connection.conf --file=product_template.csv -``` -_The option `--model=product.template` is not necessary since the CSV file is correctly named according to the model._ - -Then, build another CSV for the translations, say `product_template_FR.csv` for the french translations. This file contain only the translated terms for the products. - -``` -id;name -my_module.product_wallet;Portefeuille -my_module.product_bicyle;Bicyclette -``` - -Import this file by setting the language in the context. -``` -odoo_import_thread.py -c connection.conf --file=product_template_FR.csv --model=product.template --context="{'lang': fr_FR}" -``` - And it's done. - - Actually, it doesn't import the translations explicitly. What happened is an update of the products name in the fr_FR language, which is equivalent and a more convenient process to build the translations file. First because it is based on the legacy file and overall because it lets the ORM manage the translations process. - -### Importing Account Move Lines - -This is an interesting use case of one2many relationship. Let's take a look at the simplified relationship model of `account.move` and `account.move.line`. - -

- -As a rule of thumb, avoid to import one2many relationships because the inverse many2one relation always exists. So, one strategy could be to import first all account.move in one CSV file, then all the account.move.line in another CSV file. But here, this strategy doesn't work because there is a balance check on the account.move. *One account.move must have at least two account.move.line where the credit amount of the one balances the debit amount of the other.* - -That means, the import of the first account.move.line will fail because there its amount is not balanced yet. And the import of the second account.move.line will also fail because the first one is missing, and so on. - -One possible solution is to use a context with `'check_move_validity': False`. In that case you can import first all the account.move, then all the account.move.line. As there is no more balance check, you must be sure that all the account.move are well balanced. - -Another solution is to import the account.move and the account.move.line together. To do this, you can build a mixed CSV file like this, say `account_move.csv`. - -

- -Then import it with the `--o2m` option. -``` -odoo_import_thread.py -c connection.conf --file=account_move.csv --model=account.move --o2m -``` - -The option `--o2m` prevents the batch to be cut while importing the same account.move. The account.move is imported with its account.move.line in the same time. So the balance check can be performed at the end of the transaction. - -The drawback of this method is that you cannot set an XML_ID to an account.move.line. See [--o2m](#o2m) for more details. -# Exporting Data - -The Odoo CSV Import Export library provides the script `odoo_export_thread.py` to export data from Odoo. This script has several options. Type the command `odoo_export_thread.py --help` to get the usage. - -``` -usage: odoo_export_thread.py [-h] -c CONFIG --file FILENAME --model MODEL - --field FIELDS [--domain DOMAIN] - [--worker WORKER] [--size BATCH_SIZE] - [-s SEPARATOR] [--context CONTEXT] - -Import data in batch and in parallel - -optional arguments: - -h, --help show this help message and exit - -c CONFIG, --config CONFIG - Configuration File that contains connection parameters - --file FILENAME Output File - --model MODEL Model to Export - --field FIELDS Fields to Export - --domain DOMAIN Filter - --worker WORKER Number of simultaneous connection - --size BATCH_SIZE Number of line to import per connection - -s SEPARATOR, --sep SEPARATOR - CSV separator - --context CONTEXT context that will be passed to the load function, need - to be a valid python dict -``` -(To be continued...) + -# Requirements -- [odoo-client-lib](https://github.com/odoo/odoo-client-lib) +[license]: https://github.com/OdooDataFlow/odoo-data-flow/blob/main/LICENSE +[contributor guide]: https://github.com/OdooDataFlow/odoo-data-flow/blob/main/CONTRIBUTING.md +[command-line reference]: https://odoo-data-flow.readthedocs.io/en/latest/usage.html diff --git a/README.rst b/README.rst deleted file mode 100644 index ded12846..00000000 --- a/README.rst +++ /dev/null @@ -1,8 +0,0 @@ -Odoo CSV Import Export Library -============================== -This library provides tools to easily and quickly import data into Odoo or export data from Odoo using CSV file. -It also provide a framework to manipulate date from csv. - -Requirements --------------- -* openerp-client-lib diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..9ac26504 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,9 @@ +comment: false +coverage: + status: + project: + default: + target: "100" + patch: + default: + target: "100" diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md new file mode 100644 index 00000000..ee54e68c --- /dev/null +++ b/docs/ROADMAP.md @@ -0,0 +1,29 @@ +# Project Roadmap + +This document outlines the planned future enhancements and major refactoring efforts for the `odoo-data-flow` library. Contributions are welcome! + +## Planned Features & Enhancements + +### 1. Modernize Post-Import Workflows + +- **Current Status:** The library includes a legacy `InvoiceWorkflowV9` class designed specifically for Odoo version 9. This class uses outdated API calls (e.g., `exec_workflow`) and will not work on modern Odoo versions. +- **Goal:** Refactor the workflow system to support recent Odoo versions (16.0, 17.0, 18.0+). +- **Tasks:** + - Create a new `InvoiceWorkflowV18` (or similar) class that uses the modern Odoo API for validating and paying invoices (e.g., calling button actions like `action_post`). + - Update the `workflow_runner.py` and the `__main__.py` CLI to allow users to specify which workflow version they want to run (e.g., `odoo-data-flow workflow invoice-v18`). + - Consider creating a base `Workflow` class that new, custom workflows can inherit from to promote a consistent structure. + +### 2. Add Support for More Data Formats + +- **Goal:** Expand the `Processor` to natively handle other common data formats beyond CSV and XML. +- **Potential Formats:** + - JSONL (JSON Lines) + - Direct database connections (e.g., PostgreSQL, MySQL) + +### 3. Enhance Test Coverage + +- **Goal:** Increase unit and integration test coverage to improve reliability. +- **Tasks:** + - Add specific tests for each `mapper` function to validate its behavior with edge cases. + - Add unit and integration tests for the workflow_runner and the legacy InvoiceWorkflowV9 module. + - Create integration tests that run against a live, containerized Odoo instance as part of the CI/CD pipeline. diff --git a/docs/codeofconduct.md b/docs/codeofconduct.md new file mode 100644 index 00000000..58fd373b --- /dev/null +++ b/docs/codeofconduct.md @@ -0,0 +1,3 @@ +```{include} ../CODE_OF_CONDUCT.md + +``` diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..cf0d57b8 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,14 @@ +"""Sphinx configuration.""" + +project = "Odoo Data Flow" +author = "bosd" +copyright = "2025, bosd" +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinxmermaid", + "sphinx_click", + "myst_parser", +] +autodoc_typehints = "description" +html_theme = "furo" diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 00000000..b9419640 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,7 @@ +```{include} ../CONTRIBUTING.md +--- +end-before: +--- +``` + +[code of conduct]: codeofconduct diff --git a/docs/core_concepts.md b/docs/core_concepts.md new file mode 100644 index 00000000..ea713b7f --- /dev/null +++ b/docs/core_concepts.md @@ -0,0 +1,166 @@ +# Core Concepts + +The `odoo-data-flow` library is built on a few key concepts that enable robust and manageable data migrations. Understanding these will help you get the most out of the tool for both importing and exporting data. + +## The Two-Phase Import Workflow + +For importing data, the library promotes a two-phase workflow to separate data manipulation from the actual loading process. + +1. **Transform Phase**: This phase focuses purely on data manipulation. A Python script reads your raw source files, applies cleaning and transformation rules using **mappers**, and produces clean, structured CSV files that are perfectly formatted for Odoo. + +```mermaid +--- +config: + theme: redux +--- +flowchart TD + A(["Odoo-Data-Flow"]) -- Processor + Mapper --- B["Transform Python Script"] + B --- C["Client CSV File"] + B --> D["Transformed CSV Files for import"] + A@{ shape: proc} + C@{ shape: doc} + D@{ shape: docs} + style A fill:#BBDEFB + style B fill:#C8E6C9 + style C fill:#FFF9C4 + style D fill:#FFF9C4 +``` + +2. **Load Phase**: This phase focuses purely on data import. A generated shell script takes the clean CSV files and uses the efficient `odoo-data-flow import` command to load them into Odoo. + +```mermaid +--- +config: + theme: redux +--- +flowchart TD + A["Odoo-Data-Flow"] -- Import --- B["odoo-client lib"] + B --- C["Transformed CSV Files"] + B --> D["odoo"] + n1["Configuration File"] --> B + A@{ shape: doc} + B@{ shape: proc} + C@{ shape: docs} + D@{ shape: cyl} + n1@{ shape: doc} + style A fill:#BBDEFB + style B fill:#FFE0B2 + style C fill:#FFF9C4 + style D fill:#AA00FF + style n1 fill:#C8E6C9 +``` + +This separation provides several key advantages: + +- **Debugging**: If there's a problem, you can easily tell if it's a data transformation issue or an Odoo connection issue. +- **Reusability**: You can run the time-consuming transformation once and then use the resulting clean data to load into multiple Odoo instances (e.g., testing, staging, and production). +- **Simplicity**: Each script has a single, clear responsibility. + +## The Import Strategy: One File, One Model + +It is important to understand that the `odoo-data-flow import` command is designed to load **one data file into one specific Odoo model** at a time. This means that a complete data migration (e.g., for partners, products, and sales orders) will require you to run the transform and load process several times with different data files and different target models. + +This deliberate design ensures clarity and respects Odoo's internal logic. Data is not inserted directly into the database; instead, it is loaded by calling Odoo's standard `load` method. This ensures that all the business logic, validations, and automations associated with each model are triggered correctly, just as they would be in the Odoo user interface. + +## Post-Import Processing (Workflows) + +In addition to transforming and loading data, the library provides a powerful **workflow** system for running automated, post-import actions on your records directly in Odoo. + +This is an advanced feature designed for complex use cases, such as validating a large batch of imported invoices, registering payments, or triggering other specific business logic that needs to happen _after_ the initial data has been loaded. + +This is handled by the `odoo-data-flow workflow` command, which allows you to run predefined processes on your data. + +### Overall Data Flow Including Workflows + +This diagram shows how the workflow phase fits in after the main transform and load phases. + +```mermaid +graph TD + subgraph "1. Transform Phase" + A["Raw Source Data"] --> B{Processor}; + end + + subgraph "2. Load Phase" + B --> C["Clean Data"]; + C --> D{odoo-data-flow import}; + D --> E([Odoo Database]); + end + + subgraph "3. Workflow Phase" + E --> F{odoo-data-flow workflow}; + F -- "Validate, Pay, etc." --> E; + end +``` + +## Core Components of the Transform Phase + +The transformation is driven by three main components in your Python script: + +### 1. The `Processor` + +The `Processor` is the engine of the library. You initialize it with your source file path and its settings (like the separator). Its main job is to apply your mapping rules and generate the clean data and the load script. + +### 2. The `mapper` Functions + +Mappers are the individual building blocks for your transformations. They are simple, reusable functions that define _how_ to create the value for a single column in your destination file. The library provides a rich set of mappers for concatenation, direct value mapping, static values, and handling complex relationships. + +> For a complete list of all available mappers and their options, see the [Data Transformations Guide](guides/03_data_transformations.md). + +### 3. The Mapping Dictionary + +This standard Python `dict` ties everything together. The keys are the column names for your **destination** CSV file, and the values are the `mapper` functions that will generate the data for that column. + +## Understanding the Load Phase and Error Handling + +A key strength of this library is its robust error handling, which ensures that a few bad records won't cause an entire import to fail. This is managed through a clever two-pass system orchestrated by the generated `load.sh` script. + +### The Two-Pass Load Sequence + +The generated `load.sh` script contains two commands designed to maximize both speed and accuracy. + +```bash +# First pass: Fast, parallel import. Writes recoverable errors to a .fail file. +odoo-data-flow import --config conf/connection.conf --file data/res_partner.csv --model res.partner +# Second pass: Slower, precise import of the failed records. +odoo-data-flow import --config conf/connection.conf --fail --file data/res_partner.csv --model res.partner +``` + +1. **First Pass (Normal Mode)**: The command runs in its default, high-speed mode. If a record is rejected for any reason, it is written to a `res_partner.csv.fail` file, and the process continues. + +2. **Second Pass (`--fail` Mode)**: The command is invoked again with the `--fail` flag. In this mode, it automatically targets the `.fail` file and retries each failed record individually with a single worker. Records that still fail are written to a final `.fail.bis` file, which contains only the items needing manual review. + +### Error Handling Flow Diagram + +This diagram visualizes how records flow through the two-pass system. + +```mermaid +graph TD + A["data.csv
(100 records)"] --> B{First Pass
odoo-data-flow import}; + B -- 95 successful records --> C([Odoo Database]); + B -- 5 failed records --> D["data.csv.fail
(5 records)"]; + + D --> E{Second Pass
odoo-data-flow import --fail}; + E -- 3 recovered records --> C; + E -- 2 true errors --> F["fa:fa-user-edit data.csv.fail.bis
(2 records to fix)"]; + + style C fill:#d5f5e3 + style F fill:#f5b7b1 +``` + +## The Export Concept + +The library can also be used to export data from Odoo, which is useful for backups, analysis, or migrating data between systems. The export process is a direct command-line call. + +### Export Flow Diagram + +```mermaid +graph TD + subgraph "Export Process" + ExportA["Odoo Instance"] --> ExportB{odoo-data-flow export}; + ExportC["Configuration
(CLI Options)"] --> ExportB; + ExportB --> ExportD["Output File
(e.g., exported_partners.csv)"]; + end +``` + +> For detailed instructions, see the [Exporting Data Guide](guides/02_exporting_data.md). diff --git a/docs/guides/01_importing_data.md b/docs/guides/01_importing_data.md new file mode 100644 index 00000000..f7531e44 --- /dev/null +++ b/docs/guides/01_importing_data.md @@ -0,0 +1,139 @@ +# Guide: A Deep Dive into Importing + +This guide expands on the import workflow, providing a detailed look at the `Processor` class and, most importantly, the requirements for your input data files. + +## Input File Requirements + +For a successful import into Odoo, the clean CSV file you generate (the `target_file` in your script) must follow some important rules. + +- **Encoding**: The file must be in `UTF-8` encoding. +- **One Model per File**: Each CSV file should only contain data for a single Odoo model (e.g., all `res.partner` records). +- **Header Row**: The first line of the file must be the header row. All column names must be the technical field names from the Odoo model (e.g., `name`, `parent_id`, `list_price`). +- **External ID**: All rows must have an `id` column containing a unique External ID (also known as an XML ID). This is essential for Odoo to identify records, allowing it to both create new records and update existing ones on re-import. +- **Field Separator**: The character separating columns can be defined with the `--sep` command-line option. The default is a semicolon (`;`). **Crucially, if a field's value contains the separator character, the entire field value must be enclosed in double quotes (`"`).** +- **Skipping Lines**: If your source file contains introductory lines before the header, you can use the `--skip` option to ignore them during the import process. + +### Field Formatting Rules + +Odoo's `load` method expects data for certain field types to be in a specific format. + +- **Boolean**: Must be `1` for True and `0` for False. The `mapper.bool_val` can help with this. +- **Binary**: Must be a base64 encoded string. The `mapper.binary` and `mapper.binary_url_map` functions handle this automatically. +- **Date & Datetime**: The format depends on the user's language settings in Odoo, but the standard, safe formats are `YYYY-MM-DD` for dates and `YYYY-MM-DD HH:MM:SS` for datetimes. +- **Float**: The decimal separator must be a dot (`.`). The `mapper.num` function handles converting comma separators automatically. +- **Selection**: Must contain the internal value for the selection, not the human-readable label (e.g., `'draft'` instead of `'Draft'`). +- **Many2one**: The column header must be suffixed with `/id` (e.g., `partner_id/id`), and the value should be the external ID of the related record. +- **Many2many**: The column header must be suffixed with `/id`, and the value should be a comma-separated list of external IDs for the related records. + +### Automatic Model Detection + +If you name your final CSV file using the technical name of the model (e.g., `res_partner.csv`), you do not need to specify the `--model` option when running the import command. The tool will automatically infer the model from the filename. + +--- + +## The `Processor` Class + +The `Processor` is the central component of the transform phase. It handles reading the source file, applying the mapping, and generating the output files required for the load phase. + +### Initialization + +You initialize the processor by providing the path to your source data file and optional formatting parameters. + +```python +from odoo_data_flow.lib.transform import Processor + +processor = Processor( + 'origin/my_data.csv', # Path to the source file + separator=';', # The character used to separate columns + quotechar='"' # The character used for quoting fields +) +``` + +The constructor takes the following arguments: + +- **`source_file` (str)**: The path to the CSV or XML file you want to transform. +- **`separator` (str, optional)**: The column separator for CSV files. Defaults to `;`. +- **`quotechar` (str, optional)**: The field quote character for CSV files. Defaults to `"`. +- **`preprocessor` (function, optional)**: A function to modify the raw data _before_ mapping begins. See the [Data Transformations Guide](./03_data_transformations.md#pre-processing-data) for details. +- **`xml_root_tag` / `xml_record_tag` (str, optional)**: Required arguments for processing XML files. See the [Advanced Usage Guide](./04_advanced_usage.md#processing-xml-files). + +## The `process()` Method + +This is the main method that executes the transformation. It takes your mapping dictionary and applies it to each row of the source file, writing the output to a new target file. + +```python +processor.process( + mapping=my_mapping_dict, + target_file='data/clean_data.csv', + params=import_params_dict +) +``` + +The method takes these key arguments: + +- **`mapping` (dict)**: **Required**. The mapping dictionary that defines the transformation rules for each column. +- **`target_file` (str)**: **Required**. The path where the clean, transformed CSV file will be saved. +- **`params` (dict, optional)**: A crucial dictionary that holds the configuration for the `odoo-data-flow import` command. These parameters will be used when generating the `load.sh` script. + +### Configuring the Import Client with `params` + +The `params` dictionary allows you to control the behavior of the import client without ever leaving your Python script. The keys in this dictionary map directly to the command-line options of the `odoo-data-flow import` command. + +| `params` Key | `odoo-data-flow import` Option | Description | +| ------------ | ------------------------------ | -------------------------------------------------------------------------------------------------------------------- | +| `model` | `--model` | **Required**. The technical name of the Odoo model (e.g., `sale.order`). | +| `context` | `--context` | An Odoo context dictionary string. Essential for disabling mail threads, etc. (e.g., `"{'tracking_disable': True}"`) | +| `worker` | `--worker` | The number of parallel processes to use for the import. | +| `size` | `--size` | The number of records to process in a single Odoo transaction. | +| `ignore` | `--ignore` | A comma-separated string of fields to ignore during the import. Crucial for performance with related fields. | +| `skip` | `--skip` | The number of initial lines to skip in the source file before reading the header. | + +## Generating the Script with `write_to_file()` + +After calling `process()`, you can generate the final shell script that will be used in the load phase. + +```python +processor.write_to_file("load_my_data.sh") +``` + +This method takes a single argument: the path where the `load.sh` script should be saved. It automatically uses the `target_file` and `params` you provided to the `process()` method to construct the correct commands. + +## Full Example + +Here is a complete `transform.py` script that ties everything together. + +```python +from odoo_data_flow.lib.transform import Processor +from odoo_data_flow.lib import mapper + +# 1. Define the mapping rules +sales_order_mapping = { + 'id': mapper.m2o_map('import_so_', 'OrderRef'), + 'partner_id/id': mapper.m2o_map('main_customers_', 'CustomerCode'), + 'name': mapper.val('OrderRef'), + # ... other fields +} + +# 2. Define the parameters for the load script +import_params = { + 'model': 'sale.order', + 'context': "{'tracking_disable': True, 'mail_notrack': True}", + 'worker': 4, + 'size': 500 +} + +# 3. Initialize the processor +processor = Processor('origin/sales_orders.csv', separator=',') + +# 4. Run the transformation +processor.process( + mapping=sales_order_mapping, + target_file='data/sale_order.csv', + params=import_params +) + +# 5. Generate the final script +processor.write_to_file("load_sales_orders.sh") + +print("Transformation complete.") +``` diff --git a/docs/guides/02_exporting_data.md b/docs/guides/02_exporting_data.md new file mode 100644 index 00000000..d445ae21 --- /dev/null +++ b/docs/guides/02_exporting_data.md @@ -0,0 +1,68 @@ +# Guide: Exporting Data from Odoo + +In addition to importing, `odoo-data-flow` provides a powerful command-line utility for exporting data directly from Odoo into a structured CSV file. This is ideal for creating backups, feeding data into other systems, or for analysis. + +## The `odoo-data-flow export` Command + +The export process is handled by the `export` sub-command of the main `odoo-data-flow` tool. Unlike the import workflow, exporting is a single-step operation where you execute one command with the right parameters to pull data from your Odoo database. + +### Command-Line Options + +The command is configured using a set of options. Here are the most essential ones: + +| Option | Description | +| ---------- | --------------------------------------------------------------------------------------------------------- | +| `--config` | **Required**. Path to your `connection.conf` file containing the Odoo credentials. | +| `--model` | **Required**. The technical name of the Odoo model you want to export records from (e.g., `res.partner`). | +| `--fields` | **Required**. A comma-separated list of the technical field names you want to include in the export file. | +| `--file` | **Required**. The path and filename for the output CSV file (e.g., `data/exported_partners.csv`). | +| `--domain` | A filter to select which records to export, written as a string. Defaults to `[]` (export all records). | +| `--worker` | The number of parallel processes to use for the export. Defaults to `1`. | +| `--size` | The number of records to fetch in a single batch. Defaults to `10`. | + +### Understanding the `--domain` Filter + +The `--domain` option allows you to precisely select which records to export. It uses Odoo's standard domain syntax, which is a list of tuples formatted as a string. + +A domain is a list of search criteria. Each criterion is a tuple `('field_name', 'operator', 'value')`. + +**Simple Domain Example:** +To export only companies (not individual contacts), the domain would be `[('is_company', '=', True)]`. You would pass this to the command line as a string: + +`--domain "[('is_company', '=', True)]"` + +**Complex Domain Example:** +To export all companies from the United States, you would combine two criteria: + +`--domain "[('is_company', '=', True), ('country_id.code', '=', 'US')]"` + +### Specifying Fields with `--fields` + +The `--fields` option is a simple comma-separated list of the field names you want in your output file. You can also access fields on related records using dot notation. + +- Simple fields: `name,email,phone` +- Relational fields: `name,parent_id/name,parent_id/city` (This would get the contact's name, their parent company's name, and their parent company's city). + +## Full Export Example + +Let's combine these concepts into a full example. We want to export the name, email, and city for all individual contacts (not companies) located in Belgium. + +Here is the full command you would run from your terminal: + +```bash +odoo-data-flow export \ + --config conf/connection.conf \ + --model "res.partner" \ + --domain "[('is_company', '=', False), ('country_id.code', '=', 'BE')]" \ + --fields "name,email,city,country_id/name" \ + --file "data/belgian_contacts.csv" +``` + +### Result + +This command will: + +1. Connect to the Odoo instance defined in `conf/connection.conf`. +2. Search the `res.partner` model for records that are not companies and have their country set to Belgium. +3. For each matching record, it will retrieve the `name`, `email`, `city`, and the `name` of the related country. +4. It will save this data into a new CSV file located at `data/belgian_contacts.csv`. diff --git a/docs/guides/03_data_transformations.md b/docs/guides/03_data_transformations.md new file mode 100644 index 00000000..3b66c362 --- /dev/null +++ b/docs/guides/03_data_transformations.md @@ -0,0 +1,394 @@ +# Guide: Data Transformations with Mappers + +Mappers are the core of the data transformation process. They are powerful, reusable functions that you use within your mapping dictionary to define how each column of your destination file should be generated. + +This guide provides a comprehensive reference for all mappers available in the `odoo_data_flow.lib.mapper` module. + +--- + +## Data Quality Validation (`Processor.check`) + +Before you start the main transformation process, it's often a good idea to validate the quality and structure of your source data. The library provides a `.check()` method on the `Processor` object for this purpose. + +You can call `.check()` multiple times with different "checker" functions to validate your data against a set of rules. If a check fails, a warning will be logged to the console, and you can prevent the transformation from continuing. + +### Using Checkers + +In your `transform.py` script, after initializing the `Processor` but before calling `.process()`, you can add your checks: + +```python +from odoo_data_flow.lib import checker +from odoo_data_flow.lib.transform import Processor + +# Initialize processor +processor = Processor('origin/my_data.csv') + +# --- Add Data Quality Checks --- +print("Running data quality checks...") +processor.check(checker.line_length_checker(15)) +processor.check(checker.cell_len_checker(120)) +processor.check(checker.id_validity_checker('SKU', r'^[A-Z]{2}-\d{4}$')) + +# Now, proceed with the mapping and processing +# processor.process(...) +``` + +### Available Checker Functions + +The following checkers are available in the `odoo_data_flow.lib.checker` module. + +#### `checker.line_length_checker(expected_length)` + +Verifies that every row in your data file has exactly the `expected_length` number of columns. This is useful for catching malformed CSV rows. + +#### `checker.cell_len_checker(max_cell_len)` + +Verifies that no single cell (field) in your entire dataset exceeds the `max_cell_len` number of characters. + +#### `checker.line_number_checker(expected_line_count)` + +Verifies that the file contains exactly `expected_line_count` number of data rows (not including the header). + +#### `checker.id_validity_checker(id_field, pattern)` + +Verifies that the value in the specified `id_field` column for every row matches the given regex `pattern`. This is extremely useful for ensuring key fields like SKUs or external IDs follow a consistent format. + +--- + +## Basic Mappers + +### `mapper.val(field, [postprocess])` + +Retrieves the value from a single source column, identified by `field`. This is the most fundamental mapper. + +- **`field` (str)**: The name of the column in the source file. +- **`postprocess` (function, optional)**: A function to modify the value after it has been read. + +### `mapper.const(value)` + +Fills a column with a fixed, constant `value` for every row. + +- **`value`**: The static value to use (e.g., string, bool, integer). + +#### How it works + +**Input Data (`source.csv`)** +| AnyColumn | +| --------- | +| a | +| b | + +**Transformation Code** + +```python +'sale_type': mapper.const('service') +``` + +**Output Data** +| sale_type | +| --------- | +| service | +| service | + +--- + +## Combining and Formatting + +### `mapper.concat(separator, *fields)` + +Joins values from one or more source columns together, separated by a given `separator`. + +- **`separator` (str)**: The string to place between each value. +- **`*fields` (str)**: A variable number of source column names (`field`) or static strings to join. + +--- + +## Conditional and Boolean Logic + +### `mapper.cond(field, true_value, false_value)` + +Checks the value of the source column `field`. If it's considered "truthy" (not empty, not "False", not 0), it returns `true_value`, otherwise it returns `false_value`. + +### `mapper.bool_val(field, true_values)` + +Checks if the value in the source column `field` exists within the `true_values` list and returns a boolean. + +- **`field` (str)**: The column to check. +- **`true_values` (list)**: A list of strings that should be considered `True`. + +#### How it works + +**Input Data (`source.csv`)** +| Status | +| ------ | +| Active | +| Done | + +**Transformation Code** + +```python +'is_active': mapper.bool_val('Status', ['Active', 'In Progress']), +``` + +**Output Data** +| is_active | +| --------- | +| True | +| False | + +--- + +## Numeric Mappers + +### `mapper.num(field, default='0.0')` + +Takes the numeric value of the source column `field`. It automatically transforms a comma decimal separator (`,`) into a dot (`.`). Use it for `Integer` or `Float` fields in Odoo. + +- **`field` (str)**: The column containing the numeric string. +- **`default` (str, optional)**: A default value to use if the source value is empty. Defaults to `'0.0'`. + +#### How it works + +**Input Data (`source.csv`)** +| my_column | +| --------- | +| 01 | +| 2,3 | +| | + +**Transformation Code** + +```python +'my_field': mapper.num('my_column'), +'my_field_with_default': mapper.num('my_column', default='-1.0') +``` + +**Output Data** +| my_field | my_field_with_default | +| -------- | --------------------- | +| 1 | 1 | +| 2.3 | 2.3 | +| 0.0 | -1.0 | + +--- + +## Relational Mappers + +### `mapper.m2o_map(prefix, *fields)` + +A specialized `concat` for creating external IDs for **Many2one** relationship fields (e.g., `partner_id`). + +### `mapper.relation(model, search_field, value, raise_if_not_found=False, skip=False)` + +Finds a single record in Odoo and returns its database ID. **Note:** This can be slow as it performs a search for each row. + +- **`model` (str)**: The Odoo model to search in. +- **`search_field` (str)**: The field to search on. +- **`value` (mapper)**: A mapper that provides the value to search for. +- **`raise_if_not_found` (bool, optional)**: If `True`, the process will stop if no record is found. Defaults to `False`. +- **`skip` (bool, optional)**: If `True` and the record is not found, the entire source row will be skipped. Defaults to `False`. + +### Many-to-Many Mappers + +These mappers create a comma-separated list of external IDs or database ID command tuples for **Many2many** fields. + +#### `mapper.m2m(*args, **kwargs)` + +Has two modes: + +1. **Multiple Columns**: Joins non-empty values from multiple source columns. `mapper.m2m('Tag1', 'Tag2')` +2. **Single Column with Separator**: Splits a single column by a separator. `mapper.m2m('Tags', sep=';')` + +#### `mapper.m2m_map(prefix, field, sep)` + +Splits a single source column `field` by `sep` and prepends a `prefix` to each value. + +#### `mapper.m2m_id_list(field, sep=',')` + +Takes a source column `field` containing a list of database IDs and formats them for Odoo's `(6, 0, [IDs])` command, which replaces all existing relations with the new list. + +#### `mapper.m2m_value_list(model, field, sep=',')` + +Takes a source column `field` containing a list of values (e.g., names). For each value, it finds the corresponding record in the specified `model` (by searching on the `name` field) and returns a list of their database IDs, formatted as a command tuple. + +#### `mapper.m2m_template_attribute_value(field, sep=',')` + +A highly specialized mapper for product template attributes. It takes a list of attribute values from the source column `field`, finds or creates them (`product.attribute.value`), and returns them formatted for the `attribute_line_ids` field. + +--- + +## Advanced Mapping + +### `mapper.map_val(map_dict, key, default=None, m2m=False)` + +Looks up a `key` in a `map_dict` and returns the corresponding value. This is extremely useful for translating values from a source system to Odoo values. + +- **`map_dict` (dict)**: The Python dictionary to use as a translation table. +- **`key` (mapper)**: A mapper that provides the key to look up in the dictionary (often `mapper.val`). +- **`default` (optional)**: A default value to return if the key is not found. +- **`m2m` (bool, optional)**: If set to `True`, the `key` is expected to be a list of values. The mapper will look up each value in the list and return a comma-separated string of the results. + +#### Example: Advanced Country Mapping + +**Transformation Code** + +```python +# The mapping dictionary translates source codes to Odoo external IDs. +country_map = { + 'BE': 'base.be', + 'FR': 'base.fr', + 'NL': 'base.nl', +} + +# Use map_val to look up the code and return the external ID. +'country_id/id': mapper.map_val(country_map, mapper.val('CountryCode')) +``` + +--- + +## Binary Mappers + +### `mapper.binary(field)` + +Reads a local file path from the source column `field` and converts the file content into a base64-encoded string. + +- **`field` (str)**: The name of the column that contains the relative path to the image file. + +#### How it works + +**Input Data (`images.csv`)** +| ImagePath | +| --------------------- | +| images/product_a.png | + +**Transformation Code** + +```python +# Reads the file at the path and encodes it for Odoo +'image_1920': mapper.binary('ImagePath') +``` + +**Output Data** +| image_1920 | +| ---------------------------------- | +| iVBORw0KGgoAAAANSUhEUg... (etc.) | + +### `mapper.binary_url_map(field)` + +Reads a URL from the source column `field`, downloads the content from that URL, and converts it into a base64-encoded string. + +- **`field` (str)**: The name of the column that contains the full URL to the image or file. + +#### How it works + +**Input Data (`image_urls.csv`)** +| ImageURL | +| -------------------------------------- | +| https://www.example.com/logo.png | + +**Transformation Code** + +```python +# Downloads the image from the URL and encodes it +'image_1920': mapper.binary_url_map('ImageURL') +``` + +**Output Data** +| image_1920 | +| ---------------------------------- | +| iVBORw0KGgoAAAANSUhEUg... (etc.) | + +--- + +## Advanced Techniques + +### Pre-processing Data + +For complex manipulations before the mapping starts, you can pass a `preprocessor` function to the `Processor`. This function receives the CSV header and data and must return them after modification. + +#### Adding Columns + +```python +def myPreprocessor(header, data): + header.append('NEW_COLUMN') + for i, j in enumerate(data): + data[i].append('NEW_VALUE') + return header, data +``` + +#### Removing Lines + +```python +def myPreprocessor(header, data): + data_new = [] + for i, j in enumerate(data): + line = dict(zip(header, j)) + if line['Firstname'] != 'John': + data_new.append(j) + return header, data_new +``` + +### Creating Custom Mappers + +Any Python function can act as a custom mapper when used with `postprocess`. The function will receive the value from the source column as its first argument and the shared `state` dictionary as its second. + +### Updating Records With Database IDs + +To update records using their database ID, map your source ID to the special `.id` field and provide an empty `id` field. + +```python +my_mapping = { + 'id': mapper.const(''), + '.id': mapper.val('id_column_from_source'), + 'name': mapper.val('name_from_source'), + # ... other fields to update +} +``` + +### Creating Related Records (`mapper.record`) + +This special mapper takes a full mapping dictionary to create related records (e.g., sales order lines) during the transformation of a main record. + +#### Example: Importing Sales Orders and their Lines + +**Input Data (`orders.csv`)** +| OrderID | Warehouse | SKU | Qty | +| ------- | --------- | ------ | --- | +| SO001 | MAIN | | | +| | | PROD_A | 2 | +| | | PROD_B | 5 | + +**Transformation Code** + +```python +from odoo_data_flow.lib import mapper + +def get_order_id(val, state): + if val: + state['current_order_id'] = val + return val + return None + +def remember_value(key): + def postprocess(val, state): + if val: + state[key] = val + return val + return postprocess + +order_line_mapping = { + 'order_id/id': lambda state: state.get('current_order_id'), + 'product_id/id': mapper.m2o_map('prod_', 'SKU'), + 'product_uom_qty': mapper.num('Qty'), + 'warehouse_id/id': lambda state: state.get('current_warehouse_id') +} + +sales_order_mapping = { + # Using a postprocess on val() is a flexible way to filter + '_filter': mapper.val('OrderID', postprocess=lambda x: not x), + 'id': mapper.val('OrderID'), + 'name': mapper.val('OrderID', postprocess=get_order_id), + 'warehouse_id/id': mapper.m2o_map('wh_', 'Warehouse', postprocess=remember_value('current_warehouse_id')), + 'order_line': mapper.cond('SKU', mapper.record(order_line_mapping)) +} +``` diff --git a/docs/guides/04_advanced_usage.md b/docs/guides/04_advanced_usage.md new file mode 100644 index 00000000..3a5e651f --- /dev/null +++ b/docs/guides/04_advanced_usage.md @@ -0,0 +1,369 @@ +# Guide: Advanced Usage + +This guide covers more complex scenarios and advanced features of the library that can help you solve specific data transformation challenges. + +## Processing XML Files + +While CSV is common, you may have source data in XML format. The `Processor` can handle XML files with a couple of extra configuration arguments. + +- **`xml_root_tag` (str)**: The name of the root tag in your XML document that contains the collection of records. +- **`xml_record_tag` (str)**: The name of the tag that represents a single record. + +### Example: Importing Partners from an XML File + +```python +from odoo_data_flow.lib.transform import Processor +from odoo_data_flow.lib import mapper + +# Access nested XML tags using dot notation. +res_partner_mapping = { + 'id': mapper.m2o_map('xml_client_', 'ClientID'), + 'name': mapper.val('Name'), + 'email': mapper.val('Contact.Email'), + 'phone': mapper.val('Contact.Phone'), +} + +# Initialize the Processor with XML-specific arguments +processor = Processor( + 'origin/clients.xml', + xml_root_tag='ClientList', + xml_record_tag='Client' +) +# ... rest of the process +``` + +--- + +## Importing Data for Multiple Companies + +When working in a multi-company Odoo environment, you need a clear strategy to ensure records are created in the correct company. There are two primary methods to achieve this. + +### Method 1: The Procedural Approach (Recommended) + +This is the safest and most common approach. The core idea is to separate your data by company and run a distinct import process for each one. + +1. **Separate your source files:** Create one set of data files for Company A and a completely separate set for Company B. +2. **Set the User's Company:** In Odoo, log in as the user defined in your `connection.conf`. In the user preferences, set their default company to **Company A**. +3. **Run the Import for Company A:** Execute your transformation and load scripts for Company A's data. All records created will be assigned to Company A by default. +4. **Change the User's Company:** Go back to Odoo and change the same user's default company to **Company B**. +5. **Run the Import for Company B:** Execute the import process for Company B's data. These new records will now be correctly assigned to Company B. + +This method is robust because it relies on Odoo's standard multi-company behavior and prevents accidental data mixing. + +### Method 2: The Programmatic Approach (`company_id`) + +This method is useful when your source file contains data for multiple companies mixed together. You can explicitly tell Odoo which company a record belongs to by mapping a value to the `company_id/id` field. + +**Example: A source file with mixed-company products** + +```text +SKU,ProductName,CompanyCode +P100,Product A,COMPANY_US +P101,Product B,COMPANY_EU +``` + +**Transformation Script** +Your mapping dictionary can use the `CompanyCode` to link to the correct company record in Odoo using its external ID. + +```python +from odoo_data_flow.lib import mapper + +product_mapping = { + 'id': mapper.m2o_map('prod_', 'SKU'), + 'name': mapper.val('ProductName'), + # This line explicitly sets the company for each row. + # Assumes your res.company records have external IDs like 'main_COMPANY_US'. + 'company_id/id': mapper.m2o_map('main_', 'CompanyCode'), +} +``` + +**Warning:** While powerful, this method requires that you have stable and correct external IDs for your `res.company` records. The procedural approach is often simpler and less error-prone. + +--- + +## Importing Translations + +The most efficient way to import translations is to perform a standard import with a special `lang` key in the context. This lets Odoo's ORM handle the translation creation process correctly. + +The process involves two steps: + +1. **Import the base terms:** First, import your records with their default language values (e.g., English). +2. **Import the translated terms:** Then, import a second file containing only the external IDs and the translated values, while setting the target language in the context. + +### Example: Translating Product Names to French + +**Step 1: Import the base product data in English** + +**Source File (`product_template.csv`):** + +```csv +id;name;price +my_module.product_wallet;Wallet;10.0 +my_module.product_bicyle;Bicycle;400.0 +``` + +You would import this file normally. The `id` column provides the stable external ID for each product. + +**Step 2: Import the French translations** + +**Source File (`product_template_FR.csv`):** +This file only needs to contain the external ID and the fields that are being translated. + +```csv +id;name +my_module.product_wallet;Portefeuille +my_module.product_bicyle;Bicyclette +``` + +**Transformation and Load** +While you can use a `transform.py` script to generate the load script, for a simple translation update, you can also run the command directly. + +**Command-line Example:** + +```bash +odoo-data-flow import \ + --config conf/connection.conf \ + --file product_template_FR.csv \ + --model product.template \ + --context "{'lang': 'fr_FR'}" +``` + +This does not overwrite the English name; instead, it correctly creates or updates the French translation for the `name` field on the specified products. + +--- + +## Importing Account Move Lines + +Importing journal entries (`account.move`) with their debit/credit lines (`account.move.line`) is a classic advanced use case that requires creating related records using `mapper.record` and stateful processing. + +### Performance Tip: Skipping Validation + +For a significant performance boost when importing large, pre-validated accounting entries, you can tell Odoo to skip its balancing check (debits == credits) during the import. This is done by passing a special context key. + +### Example: Importing an Invoice + +**Source File: `invoices.csv`** + +```csv +Journal,Reference,Date,Account,Label,Debit,Credit +INV,INV2023/12/001,2023-12-31,,, +,,"Customer Invoices",600,"Customer Debtor",250.00, +,,"Customer Invoices",400100,"Product Sales",,200.00 +,,"Customer Invoices",451000,"VAT Collected",,50.00 +``` + +**Transformation Script** + +```python +from odoo_data_flow.lib.transform import Processor +from odoo_data_flow.lib import mapper + +# ... (see Data Transformations guide for full stateful processing example) + +# Define parameters, including the crucial context key +params = { + 'model': 'account.move', + # WARNING: Only use check_move_validity: False if you are certain + # your source data is balanced. + 'context': "{'check_move_validity': False, 'tracking_disable': True}" +} + +processor = Processor('origin/invoices.csv') +# ... rest of process +``` + +--- + +## Importing One-to-Many Relationships (`--o2m` flag) + +The `--o2m` flag enables a special import mode for handling source files where child records (the "many" side) are listed directly under their parent record (the "one" side). + +### Use Case and File Structure + +This mode is designed for files structured like this, where a master record has lines for two different one-to-many fields (`child1_ids` and `child2_ids`): + +**Source File (`master_with_children.csv`)** + +```csv +MasterID,MasterName,Child1_SKU,Child2_Ref +M01,Master Record 1,field_value1_of_child1,field_value1_of_child2 +, , , field_value2_of_child1,field_value2_of_child2 +, , , ,field_value3_of_child2 +``` + +With the `--o2m` option, the processor understands that the lines with empty master fields belong to the last master record encountered. It will import "Master Record 1" with two `child1` records and three `child2` records simultaneously. + +### Transformation and Load + +Your mapping would use `mapper.record` and `mapper.cond` to process the child lines, similar to the `account.move.line` example. The key difference is enabling the `o2m` flag in your `params` dictionary. + +```python +# In your transform.py +params = { + 'model': 'master.model', + 'o2m': True # Enable the special o2m handling +} +``` + +The generated `load.sh` script will then include the `--o2m` flag in the `odoo-data-flow import` command. + +### Important Limitations + +This method is convenient but has significant consequences because **it is impossible to set XML_IDs on the child records**. As a result: + +- You **cannot run the import again to update** the child records. Any re-import will create new child records. +- The child records **cannot be referenced** by their external ID in any other import file. + +This method is best suited for simple, one-off imports of transactional data where the child lines do not need to be updated or referenced later. + +--- + +## Validating Imports (`--check` flag) + +The `--check` flag provides an extra layer of validation during the import process. When this flag is used, at the end of each transaction, the client compares the number of records sent in the batch with the number of records Odoo reports as successfully imported. + +If these numbers do not match, an error message is printed. This is an extremely useful tool for catching silent errors. The most common cause for a mismatch is having records with duplicate XML_IDs within the same batch. + +For more details on why this might happen, see the [Record Count Mismatch](../../faq.md#record-count-mismatch) section in the FAQ. + +### Usage + +To enable this feature, set the `check` key to `True` in your `params` dictionary. + +```python +# In your transform.py +params = { + 'model': 'res.partner', + 'check': True # Enable import validation +} +``` + +The generated `load.sh` script will then include the `--check` flag in the `odoo-data-flow import` command. + +--- + +## Advanced Product Imports: Creating Variants + +When you import `product.template` records along with their attributes and values, Odoo does not create the final `product.product` variants by default. You must explicitly tell Odoo to do so using a context key. + +### The `create_product_product` Context Key + +By setting `create_product_product: True` in the context of your `product.template` import, you trigger the Odoo mechanism that generates all possible product variants based on the attribute lines you have imported for that template. + +This is typically done as the final step _after_ you have already imported the product attributes, attribute values, and linked them to the templates via attribute lines. + +### Example: Triggering Variant Creation + +Assume you have already run separate imports for `product.attribute`, `product.attribute.value`, and `product.attribute.line`. Now, you want to trigger the variant creation. + +The easiest way is to re-import your `product.template.csv` file with the special context key. + +**Transformation and Load** +In the `params` dictionary of your `product.template` transformation script, add the key: + +```python +# In your transform.py for product templates + +params = { + 'model': 'product.template', + # This context key tells Odoo to generate the variants + 'context': "{'create_product_product': True, 'tracking_disable': True}" +} + +# The mapping would be the same as your initial template import +template_mapping = { + 'id': mapper.m2o_map('prod_tmpl_', 'Ref'), + 'name': mapper.val('Name'), + # ... other template fields +} +``` + +When you run the generated `load.sh` script for this process, Odoo will find each product template, look at its attribute lines, and create all the necessary `product.product` variants (e.g., a T-Shirt in sizes S, M, L and colors Red, Blue). + +--- + +## Merging Data from Multiple Files (`join_file`) + +Sometimes, the data you need for a single import is spread across multiple source files. The `.join_file()` method allows you to enrich your main dataset by merging columns from a second file, similar to a VLOOKUP in a spreadsheet. + +### The `.join_file()` Method + +You first initialize a `Processor` with your primary file. Then, you call `.join_file()` to merge data from a secondary file based on a common key. + +- **`filename` (str)**: The path to the secondary file to merge in. +- **`key1` (str)**: The name of the key column in the **primary** file. +- **`key2` (str)**: The name of the key column in the **secondary** file. + +### Example: Merging Customer Details into an Order File + +**Transformation Script (`transform_merge.py`)** + +```python +from odoo_data_flow.lib.transform import Processor +from odoo_data_flow.lib import mapper + +# 1. Initialize a processor with the primary file (orders) +processor = Processor('origin/orders.csv') + +# 2. Join the customer details file. +print("Joining customer details into orders data...") +processor.join_file('origin/customer_details.csv', 'CustomerCode', 'Code') + +# 3. Define a mapping that uses columns from BOTH files +order_mapping = { + 'id': mapper.m2o_map('import_so_', 'OrderID'), + 'name': mapper.val('OrderID'), + 'date_order': mapper.val('OrderDate'), + # 'ContactPerson' comes from the joined file + 'x_studio_contact_person': mapper.val('ContactPerson'), +} + +# The processor now contains the merged data and can be processed as usual +processor.process( + mapping=order_mapping, + target_file='data/orders_with_details.csv', + params={'model': 'sale.order'} +) +``` + +--- + +## Splitting Large Datasets for Import + +When dealing with extremely large source files, processing everything in a single step can be memory-intensive and unwieldy. The library provides a `.split()` method on the `Processor` to break down a large dataset into smaller, more manageable chunks. + +### The `.split()` Method + +The `.split()` method divides the processor's in-memory dataset into a specified number of parts. It does not write any files itself; instead, it returns a dictionary where each key is an index and each value is a new, smaller `Processor` object containing a slice of the original data. + +You can then iterate over this dictionary to process each chunk independently. + +### Example: Splitting a Large File into 4 Parts + +**Transformation Script (`transform_split.py`)** + +```python +from odoo_data_flow.lib.transform import Processor +from odoo_data_flow.lib import mapper + +# 1. Define your mapping as usual +product_mapping = { + 'id': mapper.concat('large_prod_', 'SKU'), + 'name': mapper.val('ProductName'), +} + +# 2. Initialize a single processor with the large source file +processor = Processor('origin/large_products.csv') + +# 3. Split the processor into 4 smaller, independent processors +split_processors = processor.split(mapper.split_file_number(4)) + +# 4. Loop through the dictionary of new processors +for index, chunk_processor in split_processors.items(): + output_filename = f"data/products_chunk_{index}.csv" + chunk_processor.process( + mapping=product_mapping, + target_file=output_filename, + params={'model': 'product.product'} + ) +``` diff --git a/docs/guides/05_performance_tuning.md b/docs/guides/05_performance_tuning.md new file mode 100644 index 00000000..2694b695 --- /dev/null +++ b/docs/guides/05_performance_tuning.md @@ -0,0 +1,240 @@ +# Guide: Performance Tuning + +When working with large datasets, the performance of your data import can become critical. This guide covers the key parameters and strategies you can use to tune the import process for maximum speed and efficiency. + +The primary way to control performance is by adjusting the parameters passed to the `odoo-data-flow import` command, which you can set in the `params` dictionary in your `transform.py` script. + +--- + +## Using Multiple Workers + +The most significant performance gain comes from parallel processing. The import client can run multiple "worker" processes simultaneously, each handling a chunk of the data. + +- **CLI Option**: `--worker` +- **`params` Key**: `'worker'` +- **Default**: `1` + +By increasing the number of workers, you can leverage multiple CPU cores on the machine running the import script and on the Odoo server itself. + +### Example + +To use 4 parallel processes for an import: + +```python +# In your transform.py script + +import_params = { + 'model': 'sale.order', + 'worker': 4, # Use 4 workers + # ... other params +} + +processor.process( + mapping=my_mapping, + target_file='data/sale_order.csv', + params=import_params +) +``` + +This will add the `--worker=4` flag to the command in your generated `load.sh` script. + +### Trade-offs and Considerations + +- **CPU Cores**: A good rule of thumb is to set the number of workers to be equal to, or slightly less than, the number of available CPU cores on your Odoo server. +- **Database Deadlocks**: The biggest risk with multiple workers is the potential for database deadlocks. This can happen if two workers try to write records that depend on each other at the same time. The library's two-pass error handling system is designed to mitigate this. + +## Solving Concurrent Updates with `--groupby` + +The `--groupby` option is a powerful feature designed to solve the "race condition" problem that occurs during high-performance, multi-worker imports. + +- **CLI Option**: `--groupby` +- **`params` Key**: `'split'` (Note: the internal key is `split`) +- **Default**: `None` + +#### The Problem: A Race Condition + +Imagine you are using multiple workers to import contacts that all link to the _same_ parent company. + +- **Worker 1** takes a contact and tries to update "Company A". +- At the exact same time, **Worker 2** takes another contact and _also_ tries to update "Company A". + +The database locks the company record for Worker 1, so when Worker 2 tries to access it, it fails with a "concurrent update" error. + +#### The Solution: The "Sorting Hat" + +The `--groupby` option acts like a "sorting hat." Before the import begins, it looks at the column you specify (e.g., `parent_id/id`) and ensures that **all records with the same value in that column are sent to the exact same worker.** + +This guarantees that two different workers will never try to update the same parent record at the same time, completely eliminating these errors. + +#### Visualizing the Difference + +```mermaid +graph TD + subgraph "Without --groupby (High Risk of Error)" + A["Records:
C1 (Parent A)
C2 (Parent B)
C3 (Parent A)"] --> B{Random Distribution}; + B --> W1["Worker 1 gets C1"]; + B --> W2["Worker 2 gets C3"]; + B --> W3["Worker 3 gets C2"]; + W1 -- "tries to update" --> P_A(("Parent A")); + W2 -- "tries to update" --> P_A; + W3 -- "updates" --> P_B(("Parent B")); + P_A --> X["ERROR
Concurrent Update"]; + end + + subgraph "With --groupby=parent_id/id (Safe)" + C["Records:
C1 (Parent A)
C2 (Parent B)
C3 (Parent A)"] --> D{Smart Distribution}; + D -- "parent_id = A" --> W3b["Worker 1 gets C1, C3"]; + D -- "parent_id = B" --> W4b["Worker 2 gets C2"]; + W3b --> S1[("Update Parent A")]; + W4b --> S2[("Update Parent B")]; + S1 & S2 --> Y(["SUCCESS"]); + end +``` + +### Example + +To safely import contacts in parallel, grouped by their parent company: + +```python +# In your transform.py script + +import_params = { + 'model': 'res.partner', + 'worker': 4, + # This is the crucial part + 'split': 'parent_id/id', # The internal key is 'split' +} +``` + +This will add `--groupby=parent_id/id` to your generated `load.sh` script. + +## Understanding Batch Size (`--size`) + +The `--size` option is one of the most critical parameters for controlling the performance and reliability of your imports. In simple terms, it controls **how many records are processed in a single database transaction**. + +To understand why this is so important, think of it like going through a checkout at a grocery store. + +#### The Default Odoo Behavior: One Big Basket + +When you use Odoo's standard import wizard, it's like putting all of your items (every single row in your file) into **one giant shopping basket**. This "all-or-nothing" approach has two major problems: + +1. **Transaction Timeouts:** The Odoo server has a time limit to process your entire basket. If you have too many items (a very large file), it might take too long, and the server will give up with a "Transaction timed out" error. None of your records are imported. +2. **Single Point of Failure:** If just one record in your giant basket is "bad" (e.g., a missing price), the server rejects the **entire basket**. All of your other perfectly good records are rejected along with the single bad one. + +#### How `--size` Solves the Problem: Multiple Small Baskets + +The `odoo-data-flow` library allows you to break up your import into smaller, more manageable chunks. When you use `--size 100`, you are telling the tool to use **multiple, smaller baskets**, each containing only 100 items. + +This solves both problems: + +1. Each small basket is processed very quickly, avoiding server timeouts. +2. If one small basket has a bad record, only that basket of 100 records is rejected. All the other baskets are still successfully imported. + +#### Visualizing the Difference + +```mermaid +flowchart TD + subgraph subGraph0["Default Odoo Import (One Big Basket)"] + B{"One Large Transaction
Size=1000"} + A["1000 Records"] + D@{ label: "FAIL
All 1000 records rejected" } + C["Odoo Database"] + end + subgraph subGraph1["odoo-data-flow with --size=100 (Multiple Small Baskets)"] + F{"Transaction 1
100 records"} + E["1000 Records"] + G["Odoo Database"] + H{"Transaction 2
100 records"} + I@{ label: "FAIL
Only 100 records rejected" } + J["...continues with Transaction 3"] + end + A --> B + B -- Single Error --> D + B -- No Errors --> C + E --> F + F --> G & H + H -- Single Error --> I + H -- No Errors --> G + I --> J + J --> G + + D@{ shape: rect} + C@{ shape: cyl} + G@{ shape: cyl} + I@{ shape: rect} + style C fill:#AA00FF + style G fill:#AA00FF + +``` + +#### Trade-offs and Considerations + +- **Larger Batch Size**: Can be faster as it reduces the overhead of creating database transactions, but consumes more memory. If one record in a large batch fails, Odoo may reject the entire batch. +- **Smaller Batch Size**: More resilient to individual record errors and consumes less memory, but can be slower due to increased network overhead. +- **WAN Performance:** For slow networks, sending smaller chunks of data is often more stable than sending one massive payload. + +## Mapper Performance + +The choice of mappers can impact performance. + +- **Fast Mappers**: Most mappers, like `val`, `const`, `concat`, and `num`, are extremely fast as they operate only on the data in the current row. + +- **Slow Mappers**: The `mapper.relation` function should be used with caution. For **every single row**, it performs a live search request to the Odoo database, which can be very slow for large datasets. + +**Recommendation**: If you need to map values based on data in Odoo, it is much more performant to first export the mapping from Odoo into a Python dictionary and then use the much faster `mapper.map_val` to do the translation in memory. + +--- + +## Importing Related or Computed Fields (A Major Performance Trap) + +A common but very slow practice is to import values into related or computed fields. This can lead to a massive number of "behind the scenes" updates and cause your import time to increase exponentially. + +### The Problem: Cascading Updates + +Consider an example where you are importing a list of contacts and setting their `parent_id` (parent company). + +```python +# SLOW - DO NOT DO THIS +my_mapping = { + 'id': mapper.m2o_map('child_', 'Ref'), + 'name': mapper.val('Name'), + # This next line causes the performance issue + 'parent_id/id': mapper.m2o_map('parent_', 'ParentRef'), +} +``` + +This triggers a cascade of updates. Each time a new child contact is imported for the same parent, Odoo re-writes the _entire_ list of children on the parent record. The number of database writes grows with every new record, slowing the import to a crawl. + +### The Solution: Use the `--ignore` Option + +The correct way to handle this is to prevent the import client from writing to the problematic field. You can do this by adding the `ignore` key to your `params` dictionary. + +- **CLI Option**: `--ignore` +- **`params` Key**: `'ignore'` + +```python +# In your transform.py script + +# The mapping still defines the relationship +my_mapping = { + 'id': mapper.m2o_map('child_', 'Ref'), + 'name': mapper.val('Name'), + 'parent_id/id': mapper.m2o_map('parent_', 'ParentRef'), +} + +# The params tell the client to IGNORE the parent_id/id field +import_params = { + 'model': 'res.partner', + 'ignore': 'parent_id/id', # The field to ignore +} + +processor.process( + mapping=my_mapping, + target_file='data/contacts.csv', + params=import_params +) +``` + +This will generate a `load.sh` script with the `--ignore=parent_id/id` flag. The import client will then skip this column, avoiding the cascading updates entirely. Odoo's internal logic will still correctly establish the relationship based on the other direction of the field, but far more efficiently. + +**Recommendation**: For performance, **always** use `--ignore` for related fields that have an inverse relation (like `parent_id` and `child_ids`). Only import the "forward" direction of the relationship. diff --git a/docs/guides/06_post_import_workflows.md b/docs/guides/06_post_import_workflows.md new file mode 100644 index 00000000..453adc2c --- /dev/null +++ b/docs/guides/06_post_import_workflows.md @@ -0,0 +1,61 @@ +# Guide: Post-Import Workflows + +The `odoo-data-flow` library provides a powerful system for running automated actions on your data _after_ it has been imported into Odoo. This is handled by the `odoo-data-flow workflow` command. + +This feature is designed for complex data migrations where simple importing is not enough. A common use case is in accounting, where imported draft invoices must be validated, reconciled, and paid. Instead of performing these actions manually in the Odoo UI for thousands of records, you can automate them with a workflow. + +## The `invoice-v9` Workflow + +The library currently includes a built-in workflow specifically for processing customer invoices (`account.invoice`) in Odoo version 9. + +**Warning:** This workflow uses legacy Odoo v9 API calls and will **not** work on modern Odoo versions (10.0+). It is provided as a reference and an example of how a post-import process can be structured. + +The workflow allows you to perform the following actions on your imported invoices: + +- **`tax`**: Computes taxes for imported draft invoices. +- **`validate`**: Validates draft invoices, moving them to the 'Open' state. +- **`pay`**: Registers a payment against an open invoice, moving it to the 'Paid' state. +- **`proforma`**: Converts draft invoices to pro-forma invoices. +- **`rename`**: A utility to move a value from a custom field to the official `number` field. + +### Usage + +You run the workflow from the command line, specifying which action(s) you want to perform. + +```bash +odoo-data-flow workflow invoice-v9 [OPTIONS] +``` + +### Command-Line Options + +| Option | Description | +| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `-c`, `--config` | **Required**. Path to your `connection.conf` file. | +| `--action` | The workflow action to run (`tax`, `validate`, `pay`, `proforma`, `rename`). This option can be used multiple times. If omitted, all actions are run in a logical order. | +| `--field` | **Required**. The name of the field in `account.invoice` that holds the legacy status from your source system. The workflow uses this to find the right invoices. | +| `--status-map` | **Required**. A dictionary string that maps Odoo states to your legacy statuses. For example: `"{'open': ['OP', 'Validated'], 'paid': ['PD']}"` | +| `--paid-date-field` | **Required**. The name of the field containing the payment date, used by the `pay` action. | +| `--payment-journal` | **Required**. The database ID (integer) of the `account.journal` to be used for payments. | +| `--max-connection` | The number of parallel threads to use for processing. Defaults to `4`. | + +### Example Command + +Imagine you have imported thousands of invoices. Now, you want to find all the invoices with a legacy status of "Validated" and move them to the "Open" state in Odoo. + +You would run the following command: + +```bash +odoo-data-flow workflow invoice-v9 \ + --config conf/connection.conf \ + --action validate \ + --field x_studio_legacy_status \ + --status-map "{'open': ['Validated']}" \ + --paid-date-field x_studio_payment_date \ + --payment-journal 5 +``` + +This command will: + +1. Connect to Odoo. +2. Search for all `account.invoice` records where `x_studio_legacy_status` is 'Validated'. +3. Run the `validate_invoice` function on those records, triggering the workflow to open them. diff --git a/docs/guides/07_server_to_server_migration.md b/docs/guides/07_server_to_server_migration.md new file mode 100644 index 00000000..5fbdf8cf --- /dev/null +++ b/docs/guides/07_server_to_server_migration.md @@ -0,0 +1,79 @@ +# Guide: Server-to-Server Migration + +The `odoo-data-flow` library includes a powerful `migrate` command designed to perform a direct, in-memory data migration from one Odoo database to another. This is an advanced feature that chains together the export, transform, and import processes into a single step, without needing to create intermediate CSV files on your local machine. + +## Use Case + +This command is ideal for scenarios such as: + +- Migrating data from a staging or development server to a production server. +- Consolidating data from one Odoo instance into another. +- Performing a data transformation and re-importing into the same database. + +## The `odoo-data-flow migrate` Command + +The migration is handled by the `migrate` sub-command. It works by exporting data from a source instance, applying an in-memory transformation using the same `mapper` functions, and then immediately importing the result into a destination instance. + +### Command-Line Options + +The command is configured using a set of options that combine parameters from both the `export` and `import` commands. + +| Option | Description | +| --------------------- | ------------------------------------------------------------------------------------------------------------------- | +| `--config-export` | **Required**. Path to the `connection.conf` file for the **source** Odoo instance (where data is exported from). | +| `--config-import` | **Required**. Path to the `connection.conf` file for the **destination** Odoo instance (where data is imported to). | +| `--model` | **Required**. The technical name of the Odoo model you want to migrate (e.g., `res.partner`). | +| `--fields` | **Required**. A comma-separated list of the technical field names you want to migrate. | +| `--domain` | An Odoo domain filter to select which records to export from the source instance. Defaults to `[]` (all records). | +| `--mapping` | A dictionary string defining the transformation rules. If omitted, a direct 1-to-1 mapping is used. | +| `--export-worker` | The number of parallel workers to use for the export phase. Defaults to `1`. | +| `--export-batch-size` | The batch size for the export phase. Defaults to `100`. | +| `--import-worker` | The number of parallel workers to use for the import phase. Defaults to `1`. | +| `--import-batch-size` | The batch size for the import phase. Defaults to `10`. | + +## Full Migration Example + +Let's say we want to migrate all partners from a staging server to a production server. We also want to add a prefix to their names during the migration to indicate they came from the staging environment. + +**Step 1: Create two connection files** + +You would have two configuration files: `conf/staging.conf` and `conf/production.conf`. + +**Step 2: Define the mapping (optional)** + +If you need to transform the data, you can define a mapping. For this example, we'll pass it as a string on the command line. +The mapping would look like this in Python: + +```python +my_mapping = { + 'id': mapper.concat('migrated_partner_', 'id'), + 'name': mapper.concat('Staging - ', 'name'), + 'phone': mapper.val('phone'), + # ... other fields +} +``` + +As a command-line string, it would be: `"{'id': mapper.concat('migrated_partner_', 'id'), 'name': mapper.concat('Staging - ', 'name'), ...}"` + +**Step 3: Run the `migrate` command** + +You would run the following command from your terminal: + +```bash +odoo-data-flow migrate \ + --config-export "conf/staging.conf" \ + --config-import "conf/production.conf" \ + --model "res.partner" \ + --fields "id,name,phone" \ + --mapping "{'name': mapper.concat('Staging - ', 'name'), 'phone': mapper.val('phone')}" +``` + +### Result + +This single command will: + +1. Connect to the staging Odoo database. +2. Export the `id`, `name`, and `phone` fields for all `res.partner` records. +3. In memory, transform the data by prepending "Staging - " to each partner's name. +4. Connect to the production Odoo database. +5. Import the transformed data, creating new partners with the updated names. diff --git a/docs/guides/faq.md b/docs/guides/faq.md new file mode 100644 index 00000000..5d2bdd47 --- /dev/null +++ b/docs/guides/faq.md @@ -0,0 +1,137 @@ +# FAQ & Troubleshooting + +This document answers frequently asked questions and provides solutions to common problems you may encounter while using `odoo-data-flow`. + +## Frequently Asked Questions + +### What is `odoo-data-flow`? + +It is a powerful Python library designed to handle the import and export of data to and from Odoo. It allows you to define complex data transformations in Python, providing a robust and repeatable process for data migrations. + +### How is this different from Odoo's standard import tool? + +While Odoo's built-in import is great for simple tasks, `odoo-data-flow` offers several key advantages for complex or large-scale migrations: + +- **Separation of Concerns**: It cleanly separates the data **transformation** logic (cleaning your source data) from the data **loading** logic (importing into Odoo). +- **Robust Error Handling**: Its two-pass import system intelligently handles errors, ensuring that one bad record doesn't stop the entire process. +- **Powerful Transformations**: You can use the full power of Python and a rich set of built-in `mapper` functions to handle almost any data transformation challenge. +- **Repeatability and Version Control**: Since your transformation logic is code, it can be version-controlled (with Git), tested, and reused across multiple environments (like staging and production) with confidence. + +### Can I use this for both importing and exporting? + +Yes. The library provides tools for both workflows. The `Processor` and `mapper` modules are used for transforming and preparing data for import, while the `odoo-data-flow export` command is used to export data from Odoo into CSV files. + +### Can I migrate data directly between two Odoo databases? + +Yes. The library includes a powerful `odoo-data-flow migrate` command that performs a complete export, transform, and import from one Odoo instance to another in a single step, without creating intermediate files. This is ideal for migrating data from a staging server to production. + +> For detailed instructions, see the [Server-to-Server Migration Guide](guides/07_server_to_server_migration.md). + +### How do I process a CSV file that has no header? + +The `Processor` can be initialized directly with in-memory data. If your source file has no header, you can read it manually using Python's standard `csv` module and provide your own header list. + +1. Read the raw data from the CSV file into a list of lists. +2. Create a Python list containing the header names in the correct order. +3. Initialize the `Processor` using the `header=` and `data=` arguments instead of `filename=`. + +```python +import csv +from odoo_data_flow.lib.transform import Processor + +# 1. Define the header manually +my_header = ['LegacyID', 'FirstName', 'LastName', 'Email'] +my_data = [] + +# 2. Read the file into a list +with open('origin/contacts_no_header.csv', 'r') as f: + reader = csv.reader(f) + my_data = list(reader) + +# 3. Initialize the Processor with the in-memory data +processor = Processor(header=my_header, data=my_data) + +# You can now proceed with your mapping as usual +# my_mapping = {'name': mapper.concat(' ', 'FirstName', 'LastName'), ...} +``` + +### Where can I find a complete, real-world example? + +A full example project, demonstrating a realistic data migration workflow with multiple models and complex transformations, is available on GitHub. This is an excellent resource for seeing how all the pieces fit together. + +- **[Odoo Data Flow Example Repository](https://github.com/OdooDataFlow/odoo-data-flow-example/tree/18.0)** + +--- + +## Troubleshooting Common Errors + +When an import fails, understanding why is key. Here are some of the most common issues and how to solve them. + +### Understanding the `.fail` and `.fail.bis` Files + +The two-pass import process is designed to isolate errors effectively. + +- **`my_file.csv.fail`**: This file is created during the **first pass** of the import. It contains every record that failed for _any_ reason. This can include genuine data errors or temporary database issues like deadlocks. + +- **`my_file.csv.fail.bis`**: This file is created during the **second pass** (the `--fail` run), which retries the records from the `.fail` file using a single worker. The `.fail.bis` file contains only the records that _still_ failed. These are almost always genuine data errors that you need to investigate and fix manually. + +**Your workflow should be:** + +1. Run your `load.sh` script (which contains the `odoo-data-flow import` commands). +2. If a `.fail.bis` file is created, open it to identify the data issue. +3. Fix the issue in your original source file or your `transform.py` script. +4. Rerun the transformation and load process. + +### Record Count Mismatch + +Sometimes, the number of records in your source file doesn't match the number of records created in Odoo, even if there are no errors in the `.fail.bis` file. + +- **Cause:** This usually happens when your mapping logic unintentionally filters out rows. For example, using a `postprocess` function that can return an empty value for an external ID (`id` field). If the external ID is empty, the entire record is skipped without error. + +- **Solution:** + 1. **Check your `id` field**: The most common culprit is the mapping for the `id` field. Ensure it _always_ returns a non-empty, unique value for every row you intend to import. + 2. **Use a `preprocessor`**: For complex debugging, you can use a [preprocessor function](guides/03_data_transformations.md#pre-processing-data) to add a unique line number to each row. Import this line number into a custom field in Odoo (`x_studio_import_line_number`). After the import, you can easily compare the line numbers in your source file with those in Odoo to find exactly which rows were skipped. + +### Connection Errors + +These errors usually happen when the `odoo-data-flow` client cannot reach your Odoo instance. + +- **Error:** `Connection refused` + - **Cause:** The `hostname` or `port` in your `conf/connection.conf` is incorrect, or the Odoo server is not running. + - **Solution:** Double-check your connection details and ensure the Odoo instance is active and accessible. + +- **Error:** `Wrong login/password` + - **Cause:** The credentials in `conf/connection.conf` are incorrect. + - **Solution:** Verify your `database`, `login`, and `password`. + +### Odoo Access & Validation Errors + +These errors come directly from Odoo when the data is not valid enough to save. + +- **Error:** `AccessError`, `You are not allowed to modify this document` + - **Cause:** The user specified by `uid` in your `conf/connection.conf` lacks the necessary permissions (e.g., Create or Write access) for the target model. + - **Solution:** Check the user's Access Rights in Odoo's settings. + +- **Error:** `ValidationError: A required field was not provided` + - **Cause:** Your transformed CSV file is missing a column for a field marked as `required=True` on the Odoo model. + - **Solution:** Check the model's definition in Odoo and ensure your `transform.py` script generates a value for that field. + +- **Error:** `No matching record found for external id '__export__.my_external_id_123'` + - **Cause:** You are trying to link to a related record (e.g., setting the `partner_id` on a sales order), but the external ID you are providing does not exist in the database. + - **Solution:** + 1. Ensure you have successfully imported the parent records first. + 2. Check for typos. The prefix and value used in your `m2o_map` must exactly match the external ID of the parent record. + 3. See the section below on Import Order. + +### Understanding Import Order for Relational Data + +A very common reason for the `No matching record found` error is that you are trying to import records in the wrong order. + +- **The Rule:** You must always import "parent" records **before** you import the "child" records that refer to them. +- **Example:** Imagine you are importing Contacts (`res.partner`) and assigning them to Contact Tags (`res.partner.category`). Odoo cannot assign a contact to the "VIP" tag if that "VIP" tag doesn't exist in the database yet. + +- **Correct Import Sequence:** + 1. **First, import `res.partner.category`**: Run a transformation and load process for your contact tags. This creates the tags and their external IDs in Odoo. + 2. **Then, import `res.partner`**: Run a separate process for your contacts. The mapping for the `category_id/id` field can now successfully use `mapper.m2o_map` to look up the external IDs of the tags you created in the first step. + +- **General Advice:** Always map out the dependencies in your data. If Model B has a Many2one field pointing to Model A, you must always import Model A first. diff --git a/docs/guides/index.md b/docs/guides/index.md new file mode 100644 index 00000000..3d9a66ef --- /dev/null +++ b/docs/guides/index.md @@ -0,0 +1,18 @@ +# How-To Guides + +This section provides detailed, step-by-step guides for common tasks and advanced features of the `odoo-data-flow` library. + +```{toctree} +:maxdepth: 1 +:caption: "Guides" + +01_importing_data +02_exporting_data +03_data_transformations +04_advanced_usage +05_performance_tuning +06_post_import_workflows +07_server_to_server_migration +faq +roadmap +``` diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..14868cd2 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,81 @@ +```{include} ../README.md +--- +end-before: +--- +``` + +# Odoo Data Flow + +**A robust, declarative library for managing complex data imports and exports with Odoo.** + +Odoo Data Flow is a powerful and flexible Python library designed to simplify the import and export of data to and from Odoo. It allows you to define data mappings and transformations in a declarative way, making complex data operations manageable and repeatable. +You can easily manage complex transformations, relationships, and validations, making your data integration tasks simpler and more reliable. + +This library is the successor to the `odoo-csv-import-export` library, refactored for modern development practices and enhanced clarity. + +```{mermaid} +graph TD + subgraph External Data + A[CSV / XLSX File] + end + + subgraph odoo-data-flow + B{Model Definition in Python} + C["@field Decorators"] + D[Transformation & Validation Logic] + end + + subgraph Odoo + E[Odoo Database] + end + + A --> B + B -- Defines --> C + C -- Applies --> D + B -- Orchestrates --> E + + style B fill:#777,stroke:#333,stroke-width:2px,color:#fff +``` + +## Key Features + +- **Declarative Python Configuration**: Define your entire data flow using clear and readable Python objects. This "configuration-as-code" approach allows for powerful, dynamic, and easily debugged setups. Making complex data operations manageable and repeatable. +- **Multiple Data Sources**: Natively supports CSV, JSON, and XML files. Easily extendable to support other sources like databases or APIs. +- **Built-in Data Transformation:** Clean, modify, and format data on the fly using simple `lambda` functions or your own custom python code. +- **Relational Field Handling:** Easily import and export `Many2one`, `One2many`, and `Many2many` relationships. +- **Data Validation:** Ensure data integrity before it even reaches Odoo. +- **Extensible and Customizable:** Write your own custom methods to handle unique or complex data-processing requirements. +- **Support for CSV and Excel:** Works seamlessly with the most common file formats for business data exchange. +- **Robust Error Handling**: Provides clear logging and error reports to help you debug your data flows quickly. + +## Getting Started + +Ready to simplify your Odoo data integrations? + +| Step | Description | +| ------------------------------------------ | --------------------------------------------------------------- | +| 🚀 **[Quickstart](./quickstart.md)** | Your first end-to-end example. Go from file to Odoo in minutes. | +| ⚙️ **[Installation](./installation.md)** | How to install the library in your project. | +| 🧠 **[Core Concepts](./core_concepts.md)** | Understand the key ideas behind the library. | + +[license]: license +[contributor guide]: contributing +[command-line reference]: usage + +```{toctree} +--- +hidden: +maxdepth: 1 +--- + +installation +quickstart +core_concepts +guides/index +faq +reference +contributing +Code of Conduct +License +Changelog +``` diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 00000000..6654e63a --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,109 @@ +# Installation with uv + +This guide uses `uv`, a high-performance Python package installer and resolver, to set up your environment. It's a modern, fast alternative to `pip` and `venv`. + +## 1. Install `uv` (if you haven't already) + +First, ensure `uv` is installed on your system. If not, run the appropriate command for your operating system: + +```bash +# macOS / Linux +curl -LsSf [https://astral.sh/uv/install.sh](https://astral.sh/uv/install.sh) | sh + +# Windows (in PowerShell) +irm [https://astral.sh/uv/install.ps1](https://astral.sh/uv/install.ps1) | iex +``` + +For other installation options, please refer to the [official `uv` documentation](https://astral.sh/uv#installation). + +## 2. Prerequisites + +- **Python 3.10 or newer:** `uv` will automatically find and use a compatible Python version on your system. +- **Access to an Odoo instance:** To import or export data, you will need the URL, database name, and login credentials for an Odoo instance. + +## 3. The Connection Configuration File + +Before you can use the tool, you must create a configuration file to store your Odoo connection details. + +Create a folder named `conf/` in your project directory, and inside it, create a file named `connection.conf`. + +**File: `conf/connection.conf`** + +```ini +[Connection] +hostname = my-odoo-instance.odoo.com +database = my_odoo_db +login = admin +password = +protocol = jsonrpcs +port = 443 +uid = 2 +``` + +### Configuration Keys Explained + +| Key | Description | +| :--------- | :---------------------------------------------------------------------------------------------------------------------------------------- | +| `hostname` | The domain or IP address of your Odoo server. | +| `database` | The name of the Odoo database you want to connect to. | +| `login` | The login username for the Odoo user that will perform the operations. | +| `password` | The password for the specified Odoo user. | +| `protocol` | The protocol to use for the connection. For Odoo.sh or a standard HTTPS setup, use `jsonrpcs`. For a local, non-SSL setup, use `jsonrpc`. | +| `port` | The port for the connection. Standard ports are `443` for HTTPS (`jsonrpcs`) and `8069` for HTTP (`jsonrpc`). | +| `uid` | The database ID of the Odoo user. `2` is often the default administrator user in a new database. | + +## 4. Standard Installation + +1. **Create and activate a virtual environment:** + + This command creates a standard virtual environment in a `.venv` folder. + + ```bash + uv venv + ``` + + Next, activate the environment: + + ```bash + # For Unix/macOS + source .venv/bin/activate + + # For Windows + .venv\Scripts\activate + ``` + + Your terminal prompt should now indicate that you are in the `.venv` environment. + +2. **Install `odoo-data-flow`:** + + With the environment active, use `uv` to install the package from PyPI. + + ```bash + uv pip install odoo-data-flow + ``` + +## 5. Installing for Development + +If you want to contribute to the project or test the latest unreleased changes, you can install the library directly from the source code. + +1. **Clone the GitHub repository:** + + ```bash + git clone [https://github.com/OdooDataFlow/odoo-data-flow.git](https://github.com/OdooDataFlow/odoo-data-flow.git) + cd odoo-data-flow + ``` + +2. **Create and activate an environment:** + + ```bash + uv venv + source .venv/bin/activate + ``` + +3. **Install in editable mode:** + This command links the installed package to the source code in your directory. Any edits you make to the code will be immediately available. + ```bash + uv pip install -e . + ``` + +You are now set up and ready to create your first data flow. diff --git a/docs/license.md b/docs/license.md new file mode 100644 index 00000000..218790f5 --- /dev/null +++ b/docs/license.md @@ -0,0 +1,7 @@ +# License + +```{literalinclude} ../LICENSE +--- +language: none +--- +``` diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 00000000..f5147066 --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,143 @@ +# Quickstart: A Real-World Import Workflow + +This guide demonstrates a realistic and robust workflow for importing data. Instead of a single script that does everything, we will separate the process into two distinct phases, which is highly recommended for any serious data migration: + +1. **Transform Phase**: A Python script reads a raw source file, cleans the data using the library's powerful **mappers**, and produces a clean CSV file ready for Odoo. It also generates a shell script for the next phase. +2. **Load Phase**: The generated shell script uses the new `odoo-data-flow` command-line tool to efficiently load the clean CSV data into Odoo. + +This separation makes the process more manageable, easier to debug, and allows you to reuse the transformed data for multiple Odoo instances (e.g., staging and production). + +## Step 1: Project Setup + +First, create the recommended directory structure for a data flow project. + +``` +. +├── conf/ +│ └── connection.conf +├── origin/ +│ └── clients.csv +├── data/ +│ └── (this will be created by our script) +└── transform.py +``` + +- `conf/`: Holds configuration files, like Odoo connection details. +- `origin/`: Contains the original, raw data files from the source system. +- `data/`: Will store the transformed, clean CSV files ready for import. +- `transform.py`: Our main Python script for the transformation logic. + +## Step 2: Connection Configuration (`connection.conf`) + +Create the `conf/connection.conf` file. The section header `[Connection]` and the keys (`database`, `login`) must match this example, as they are used by the import client. + +**File: `conf/connection.conf`** + +```ini +[Connection] +hostname = my-odoo-instance.odoo.com +database = my_odoo_db +login = admin +password = +protocol = jsonrpcs +port = 443 +uid = 2 +``` + +## Step 3: The Raw Data (`origin/clients.csv`) + +Create a raw data file in `origin/clients.csv`. + +**File: `origin/clients.csv`** + +```text +ID,Firstname,Lastname,EmailAddress +C001,John,Doe,john.doe@test.com +C002,Jane,Smith,jane.s@test.com +``` + +## Step 4: The Transformation Script (`transform.py`) + +This script is the core of our logic. It uses the `Processor` to read the source file and a `mapping` dictionary to define the transformations. + +Create the file `transform.py`: + +**File: `transform.py`** + +```python +from odoo_data_flow.lib.transform import Processor +from odoo_data_flow.lib import mapper + +# 1. Define the mapping rules in a dictionary. +res_partner_mapping = { + 'id': mapper.concat('example_client_', 'ID'), + 'name': mapper.concat(' ', 'Firstname', 'Lastname'), + 'email': mapper.val('EmailAddress'), + 'is_company': mapper.const(False), +} + +# 2. Initialize the Processor. +processor = Processor( + 'origin/clients.csv', + separator=',' +) + +# 3. Define parameters for the import client. +params = { + 'model': 'res.partner', + 'context': "{'tracking_disable': True}" +} + +# 4. Run the process. +processor.process( + mapping=res_partner_mapping, + target_file='data/res_partner.csv', + params=params +) + +# 5. Generate the shell script for the loading phase. +processor.write_to_file("load.sh") + +print("Transformation complete. Clean data and load script are ready.") +``` + +## Step 5: Run the Transformation + +Execute the script from your terminal: + +```bash +python transform.py +``` + +## Step 6: Review the Generated Files + +Let's look at what was created. + +**File: `data/res_partner.csv` (Transformed & Clean Data)** + +```csv +id,name,email,is_company +example_client_C001,"John Doe",john.doe@test.com,False +example_client_C002,"Jane Smith",jane.s@test.com,False +``` + +**File: `load.sh` (The Loading Script)** +This file now contains commands that use the new, clean `odoo-data-flow` command-line interface. + +```bash +#!/bin/bash +odoo-data-flow import --config conf/connection.conf --file data/res_partner.csv --model res.partner --context "{'tracking_disable': True}" +odoo-data-flow import --config conf/connection.conf --fail --file data/res_partner.csv --model res.partner --context "{'tracking_disable': True}" +``` + +## Step 7: Load the Data into Odoo + +Finally, execute the generated shell script to upload the data. + +```bash +bash load.sh +``` + +The `odoo-data-flow` tool will connect to your database and import the records. Log in to your Odoo instance and navigate to the **Contacts** app to see your newly imported contacts. + +Congratulations! You have successfully completed a full transform and load workflow with the new `odoo-data-flow` tool. diff --git a/docs/reference.md b/docs/reference.md new file mode 100644 index 00000000..1a607e03 --- /dev/null +++ b/docs/reference.md @@ -0,0 +1,57 @@ +# API Reference + +This section provides an auto-generated API reference for the core components of the `odoo-data-flow` library. + +## Command-Line Interface (`__main__`) + +This module contains the main `click`-based command-line interface. + +```{eval-rst} +.. automodule:: odoo_data_flow.__main__ + :members: +``` + +## Transformation Processor (`lib.transform`) + +This module contains the main `Processor` class used for data transformation. + +```{eval-rst} +.. automodule:: odoo_data_flow.lib.transform + :members: Processor + :member-order: bysource +``` + +## Mapper Functions (`lib.mapper`) + +This module contains all the built-in `mapper` functions for data transformation. + +```{eval-rst} +.. automodule:: odoo_data_flow.lib.mapper + :members: + :undoc-members: +``` + +## High-Level Runners + +These modules contain the high-level functions that are called by the CLI commands. + +### Importer (`importer`) + +```{eval-rst} +.. automodule:: odoo_data_flow.importer + :members: run_import +``` + +### Exporter (`exporter`) + +```{eval-rst} +.. automodule:: odoo_data_flow.exporter + :members: run_export +``` + +### Migrator (`migrator`) + +```{eval-rst} +.. automodule:: odoo_data_flow.migrator + :members: run_migration +``` diff --git a/docs/requirements.txt b/docs/requirements.txt index e69de29b..56885546 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -0,0 +1,4 @@ +furo==2024.04.27 +sphinx==7.4.7 +sphinx-click==5.2.1 +myst_parser==4.0.0 diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..74714a62 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,7 @@ +# Usage + +```{eval-rst} +.. click:: odoo-data-flow.__main__:main + :prog: odoo-data-flow + :nested: full +``` diff --git a/node_modules/.cache/prettier/.prettier-caches/7489ccd242e7353fd73f0ec8f06ab7d81f775baa.json b/node_modules/.cache/prettier/.prettier-caches/7489ccd242e7353fd73f0ec8f06ab7d81f775baa.json new file mode 100644 index 00000000..953e7e4d --- /dev/null +++ b/node_modules/.cache/prettier/.prettier-caches/7489ccd242e7353fd73f0ec8f06ab7d81f775baa.json @@ -0,0 +1 @@ +{"d703e8e9cf68355c758af900275812f777970780":{"files":{"docs/guides/04_advanced_usage.md":["2sgUBFKZJdbbZdmhIfxmHd+LqDo=",true],"docs/core_concepts.md":["40X4ceg+JO/JrntqPTjeeTpYavs=",true],".github/dependabot.yml":["bM81XiQmUBhzaHxnrPt342mMUaY=",true],"docs/usage.md":["4htTUUcAbG5R82/1GzTeH0V40sA=",true],"docs/ROADMAP.md":["HL4HM+VCHvWy6t24d6Xo/kbrQKs=",true],"docs/guides/03_data_transformations.md":["zKVT4HJ9uLEJRjgkSM1ICciwb68=",true],".github/workflows/tests.yml":["5xQkCgVbgF8tXIuoVKCEx2n4z1M=",true],"docs/reference.md":["nh0YuXWpQIzs09pgrUcO+wmujm8=",true],"docs/guides/06_post_import_workflows.md":["vuYg3SgUm7rMpC7UM+LnVS79iu8=",true],"docs/guides/01_importing_data.md":["jyKYfrVvRcLE/drBjBRE6HalLuM=",true],"docs/license.md":["U/fxqUzZ0MffdWqVkOmHHuZuzpU=",true],"README.md":["BnUKSGEdoe6wSbtpav5QAmlYmlo=",true],".github/workflows/labeler.yml":["MQZ3wEKIL6XaSygcs3ZFrotZ7B8=",true],"docs/codeofconduct.md":["baQDB/0muBxvRonfxIBeoVoMPmk=",true],".github/release-drafter.yml":["Rk9vfvTBxlEWMreYisv1aBneCfg=",true],"docs/guides/05_performance_tuning.md":["fq9EiymfpkhpYKjjD6Dc+6TZrWY=",true],".pre-commit-config.yaml":["zeu62Zjv5PnVjoajTgTlzGPe/YU=",true],"node_modules/.cache/prettier/.prettier-caches/7489ccd242e7353fd73f0ec8f06ab7d81f775baa.json":["/SOZTcUSzeGO91eUi+m3T7DmtfU=",true],"CONTRIBUTING.md":["LJHAjddHnMdArLn/OplqffccLuc=",true],"docs/guides/02_exporting_data.md":["LJCWLrg5AS2RwxGwB3xSOqcYReM=",true],".github/labels.yml":["NxBHYODSSY6KD617S8sNPA7KNUA=",true],".readthedocs.yml":["rODs66JeScon0nljqb+nhziP9is=",true],"CODE_OF_CONDUCT.md":["SSU9kZw05PD/j3IePSCg9c/0i/U=",true],".cookiecutter.json":["1oOe21Mljg1JOq1LoMmZ/1Te8TQ=",true],"docs/index.md":["0YQX81r3LosYxjOpc6SzrofzOwM=",true],"codecov.yml":["hYmd4Rdr7nOEQ8C/nTCdBHJ80t0=",true],"docs/guides/faq.md":["j1tHaMpbvx+azTOo2+My2zzpGzw=",true],"docs/installation.md":["z7ErHl+jeC3aFdbLDQ84xohFG9w=",true],"docs/quickstart.md":["QiLLme2+mOxojnLhWbIy3l80tTA=",true]},"modified":1750791422789}} \ No newline at end of file diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 00000000..4b2fe337 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,280 @@ +"""Nox sessions.""" + +import os +import shlex +import shutil +import sys +from pathlib import Path +from textwrap import dedent + +import nox + +nox.options.default_venv_backend = "uv" + +package = "odoo_data_flow" +python_versions = ["3.12", "3.13", "3.11", "3.10", "3.9"] +nox.needs_version = ">= 2021.6.6" +nox.options.sessions = ( + "pre-commit", + "mypy", + "tests", + "typeguard", + "xdoctest", + "docs-build", +) + + +def activate_virtualenv_in_precommit_hooks(session: nox.Session) -> None: + """Activate virtualenv in hooks installed by pre-commit. + + This function patches git hooks installed by pre-commit to activate the + session's virtual environment. This allows pre-commit to locate hooks in + that environment when invoked from git. + + Args: + session: The Session object. + """ + assert session.bin is not None # nosec + + # Only patch hooks containing a reference to this session's bindir. Support + # quoting rules for Python and bash, but strip the outermost quotes so we + # can detect paths within the bindir, like /python. + bindirs = [ + bindir[1:-1] if bindir[0] in "'\"" else bindir + for bindir in (repr(session.bin), shlex.quote(session.bin)) + ] + + virtualenv = session.env.get("VIRTUAL_ENV") + if virtualenv is None: + return + + headers = { + # pre-commit < 2.16.0 + "python": f"""\ + import os + os.environ["VIRTUAL_ENV"] = {virtualenv!r} + os.environ["PATH"] = os.pathsep.join(( + {session.bin!r}, + os.environ.get("PATH", ""), + )) + """, + # pre-commit >= 2.16.0 + "bash": f"""\ + VIRTUAL_ENV={shlex.quote(virtualenv)} + PATH={shlex.quote(session.bin)}"{os.pathsep}$PATH" + """, + # pre-commit >= 2.17.0 on Windows forces sh shebang + "/bin/sh": f"""\ + VIRTUAL_ENV={shlex.quote(virtualenv)} + PATH={shlex.quote(session.bin)}"{os.pathsep}$PATH" + """, + } + + hookdir = Path(".git") / "hooks" + if not hookdir.is_dir(): + return + + for hook in hookdir.iterdir(): + if hook.name.endswith(".sample") or not hook.is_file(): + continue + + if not hook.read_bytes().startswith(b"#!"): + continue + + text = hook.read_text() + + if not any( + (Path("A") == Path("a") and bindir.lower() in text.lower()) + or bindir in text + for bindir in bindirs + ): + continue + + lines = text.splitlines() + + for executable, header in headers.items(): + if executable in lines[0].lower(): + lines.insert(1, dedent(header)) + hook.write_text("\n".join(lines)) + break + + +@nox.session(name="pre-commit", python=python_versions[0]) +def precommit(session: nox.Session) -> None: + """Lint using pre-commit.""" + args = session.posargs or [ + "run", + "--all-files", + "--hook-stage=manual", + "--show-diff-on-failure", + ] + session.run( + "uv", + "sync", + "--active", + "--group", + "dev", + "--group", + "lint", + external=True, + ) + session.run("pre-commit", *args, external=True) + if args and args[0] == "install": + activate_virtualenv_in_precommit_hooks(session) + + +@nox.session(python=python_versions) +def mypy(session: nox.Session) -> None: + """Type-check using mypy.""" + args = session.posargs or ["src", "tests", "docs/conf.py"] + # session.run( + # "uv", + # "sync", + # "--active", + # "--group", + # "dev", + # "--group", + # "mypy", + # env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + # external=True, + # ) + session.install( + "--group", "dev", "--group", "mypy" + ) # Install mypy and dev dependencies + session.install("-e", ".") + session.run("mypy", *args) + if not session.posargs: + session.run( + "mypy", f"--python-executable={sys.executable}", "noxfile.py" + ) + + +@nox.session(python=python_versions) +def tests(session: nox.Session) -> None: + """Run the test suite.""" + # session.run_install( + # "uv", + # "sync", + # "--group", + # "dev", + # "--group", + # "lint", + # env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + # ) + # + session.env["PYTHONPATH"] = "src" + session.install("--group", "dev", "--group", "lint") + session.install("-e", ".") + + try: + session.run( + "coverage", + "run", + "--parallel", + "-m", + "pytest", + *session.posargs, + external=True, + env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + ) + finally: + if session.interactive: + session.notify("coverage", posargs=[]) + + +@nox.session(python=python_versions[0]) +def coverage(session: nox.Session) -> None: + """Produce the coverage report.""" + args = session.posargs or ["report"] + session.run( + "uv", + "pip", + "install", + "--active", + "coverage[toml]", + env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + external=True, + ) + if not session.posargs and any(Path().glob(".coverage.*")): + session.run("coverage", "combine") + + session.run("coverage", *args) + + +@nox.session(python=python_versions[0]) +def typeguard(session: nox.Session) -> None: + """Runtime type checking using Typeguard.""" + # session.run( + # "uv", + # "sync", + # "--group", + # "dev", + # "--group", + # "typeguard", + # env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + # external=True, + # ) + session.install("--group", "dev", "--group", "typeguard") + session.install("-e", ".") + session.run("pytest", "--typeguard-packages", package, *session.posargs) + + +@nox.session(python=python_versions) +def xdoctest(session: nox.Session) -> None: + """Run examples with xdoctest.""" + if session.posargs: + args = [package, *session.posargs] + else: + args = [f"--modname={package}", "--command=all"] + if "FORCE_COLOR" in os.environ: + args.append("--colored=1") + session.install("--group", "dev", "--group", "xdoctest") + session.install("-e", ".") + session.run("xdoctest", package, *session.posargs) + + +@nox.session(name="docs-build", python=python_versions[1]) +def docs_build(session: nox.Session) -> None: + """Build the documentation.""" + args = session.posargs or ["docs", "docs/_build"] + if not session.posargs and "FORCE_COLOR" in os.environ: + args.insert(0, "--color") + + # session.run( + # "uv", + # "sync", + # "--group", + # "docs", + # "--group", + # "dev", + # env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + # external=True, + # ) + session.install("--group", "dev", "--group", "docs") + session.install("-e", ".") + + build_dir = Path("docs", "_build") + if build_dir.exists(): + shutil.rmtree(build_dir) + + session.run("sphinx-build", *args) + + +@nox.session(python=python_versions[0]) +def docs(session: nox.Session) -> None: + """Build and serve the documentation with live reloading on file changes.""" + args = session.posargs or ["--open-browser", "docs", "docs/_build"] + session.run( + "uv", + "sync", + "--group", + "docs", + external=True, + env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, + ) + + build_dir = Path("docs", "_build") + if build_dir.exists(): + shutil.rmtree(build_dir) + + session.run("sphinx-autobuild", *args) diff --git a/odoo_convert_path_to_image.py b/odoo_convert_path_to_image.py deleted file mode 100755 index 25c76e3e..00000000 --- a/odoo_convert_path_to_image.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python -#-*- coding: utf-8 -*- -''' -Copyright (C) Thibault Francois - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as -published by the Free Software Foundation, version 3. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Lesser Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see . -''' - -import argparse -import os -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Convert csv column Image Path into base64') - parser.add_argument('file', metavar='F', help='file to convert') - parser.add_argument('--path', dest='path', help='Image Path Prefix, default is the working directory') - parser.add_argument('--out', dest='out', help='name of the result file, default out.csv', default="out.csv") - parser.add_argument('-f', dest='fields', help='Fields to convert from path to base64, comma separated', required = True) - args = parser.parse_args() - - file_csv = args.file - out_csv = args.out - path = args.path - fields = args.fields - if not path: - path = os.getcwd() - if not path.endswith(os.sep): - path += os.sep - - - processor = Processor(file_csv) - mapping = processor.get_o2o_mapping() - for f in fields.split(','): - f = f.strip() - mapping[f] = mapper.binary_map(mapper.remove_sep_mapper(f), path) - processor.process(mapping, out_csv, {}, 'list') - processor.write_to_file("") - diff --git a/odoo_convert_url_to_image.py b/odoo_convert_url_to_image.py deleted file mode 100755 index b36ed40d..00000000 --- a/odoo_convert_url_to_image.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python -#-*- coding: utf-8 -*- -''' -Copyright (C) Thibault Francois - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as -published by the Free Software Foundation, version 3. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Lesser Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see . -''' - -import argparse -import os -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import Processor - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Convert csv column Image URL into base64') - parser.add_argument('file', metavar='F', help='file to convert') - parser.add_argument('--out', dest='out', help='name of the result file, default out.csv', default="out.csv") - parser.add_argument('-f', dest='fields', help='Fields to convert from path to base64, comma separated', required = True) - args = parser.parse_args() - - file_csv = args.file - out_csv = args.out - fields = args.fields - - processor = Processor(file_csv) - mapping = processor.get_o2o_mapping() - for f in fields.split(','): - f = f.strip() - mapping[f] = mapper.binary_url(f, verbose=True) - processor.process(mapping, out_csv, {}, 'list') - processor.write_to_file("") - diff --git a/odoo_csv_tools/__init__.py b/odoo_csv_tools/__init__.py deleted file mode 100644 index f58bd928..00000000 --- a/odoo_csv_tools/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import lib -from . import export_threaded -from . import import_threaded diff --git a/odoo_csv_tools/export_threaded.py b/odoo_csv_tools/export_threaded.py deleted file mode 100755 index 1d591ce3..00000000 --- a/odoo_csv_tools/export_threaded.py +++ /dev/null @@ -1,100 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Copyright (C) Thibault Francois - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as -published by the Free Software Foundation, version 3. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Lesser Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see . -''' -import sys -import csv - -from time import time - -from .lib import conf_lib -from .lib.conf_lib import log_error, log_info -from .lib.internal.rpc_thread import RpcThread -from .lib.internal.csv_reader import UnicodeWriter -from .lib.internal.io import ListWriter, open_write -from .lib.internal.tools import batch - - -if sys.version_info >= (3, 0, 0): - from xmlrpc.client import Fault -else: - from xmlrpclib import Fault - -csv.field_size_limit(2**31-1) - -class RPCThreadExport(RpcThread): - - def __init__(self, max_connection, model, header, writer, batch_size=20, context=None): - super(RPCThreadExport, self).__init__(max_connection) - self.model = model - self.header = header - self.batch_size = batch_size - self.writer = writer - self.context = context - self.result = {} - - def launch_batch(self, data_ids, batch_number): - def launch_batch_fun(data_ids, batch_number, check=False): - st = time() - try: - self.result[batch_number] = self.model.export_data(data_ids, self.header, context=self.context)['datas'] - except Fault as e: - log_error("export %s failed" % batch_number) - log_error(e.faultString) - except Exception as e: - log_info("Unknown Problem") - exc_type, exc_value, _ = sys.exc_info() - # traceback.print_tb(exc_traceback, file=sys.stdout) - log_error(exc_type) - log_error(exc_value) - log_info("time for batch %s: %s" % (batch_number, time() - st)) - - self.spawn_thread(launch_batch_fun, [data_ids, batch_number], {}) - - def write_file(self, file_writer): - file_writer.writerow(self.header) - for key in self.result: - file_writer.writerows(self.result[key]) - - -def export_data(config_file, model, domain, header, context=None, output=None, max_connection=1, batch_size=100, - separator=';', encoding='utf-8'): - object_registry = conf_lib.get_server_connection(config_file).get_model(model) - - if output: - file_result = open_write(output, encoding=encoding) - writer = UnicodeWriter(file_result, delimiter=separator, encoding=encoding, quoting=csv.QUOTE_ALL) - else: - writer = ListWriter() - - rpc_thread = RPCThreadExport(int(max_connection), object_registry, header, writer, batch_size, context) - st = time() - - ids = object_registry.search(domain, context=context) - i = 0 - for b in batch(ids, batch_size): - batch_ids = [l for l in b] - rpc_thread.launch_batch(batch_ids, i) - i += 1 - - rpc_thread.wait() - log_info("%s %s exported, total time %s second(s)" % (len(ids), model, (time() - st))) - log_info("Writing file") - rpc_thread.write_file(writer) - if output: - file_result.close() - return False, False - else: - return writer.header, writer.data diff --git a/odoo_csv_tools/import_threaded.py b/odoo_csv_tools/import_threaded.py deleted file mode 100755 index 0e87a40e..00000000 --- a/odoo_csv_tools/import_threaded.py +++ /dev/null @@ -1,245 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Copyright (C) Thibault Francois - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as -published by the Free Software Foundation, version 3. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Lesser Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see . -''' - -import sys -import csv - -from time import time - -from .lib import conf_lib -from .lib.conf_lib import log_error, log_info, log -from .lib.internal.rpc_thread import RpcThread -from .lib.internal.io import ListWriter, open_read, open_write -from .lib.internal.csv_reader import UnicodeReader, UnicodeWriter -from .lib.internal.tools import batch - -if sys.version_info >= (3, 0, 0): - from xmlrpc.client import Fault -else: - from xmlrpclib import Fault - from builtins import range - -csv.field_size_limit(2**31-1) - - -class RPCThreadImport(RpcThread): - - def __init__(self, max_connection, model, header, writer, batch_size=20, context=None): - super(RPCThreadImport, self).__init__(max_connection) - self.model = model - self.header = header - self.batch_size = batch_size - self.writer = writer - self.context = context - - def launch_batch(self, data_lines, batch_number, check=False, o2m=False): - def launch_batch_fun(lines, batch_number, check=False): - i = 0 - batch_size = len(lines) if o2m else self.batch_size - for lines_batch in batch(lines, batch_size): - lines_batch = [l for l in lines_batch] - self.sub_batch_run(lines_batch, batch_number, i, len(lines), check=check) - i += 1 - - self.spawn_thread(launch_batch_fun, [data_lines, batch_number], {'check': check}) - - def sub_batch_run(self, lines, batch_number, sub_batch_number, total_line_nb, check=False): - success = False - - st = time() - try: - success = self._send_rpc(lines, batch_number, sub_batch_number, check=check) - except Fault as e: - log_error("Line %s %s failed" % (batch_number, sub_batch_number)) - log_error(e.faultString) - except ValueError as e: - log_error("Line %s %s failed value error" % (batch_number, sub_batch_number)) - except Exception as e: - log_info("Unknown Problem") - exc_type, exc_value, _ = sys.exc_info() - # traceback.print_tb(exc_traceback, file=sys.stdout) - log_error(exc_type) - log_error(exc_value) - - if not success: - self.writer.writerows(lines) - - log_info("time for batch %s - %s of %s : %s" % ( - batch_number, (sub_batch_number + 1) * self.batch_size, total_line_nb, time() - st)) - - def _send_rpc(self, lines, batch_number, sub_batch_number, check=False): - res = self.model.load(self.header, lines, context=self.context) - if res['messages']: - for msg in res['messages']: - log_error('batch %s, %s' % (batch_number, sub_batch_number)) - log_error(msg) - log_error(lines[msg['record']]) - return False - if len(res['ids']) != len(lines) and check: - log_error("number of record import is different from the record to import, probably duplicate xml_id") - return False - - return True - - -def filter_line_ignore(ignore, header, line): - new_line = [] - for k, val in zip(header, line): - if k not in ignore: - new_line.append(val) - return new_line - - -def filter_header_ignore(ignore, header): - new_header = [] - for val in header: - if val not in ignore: - new_header.append(val) - return new_header - - -def read_file(file_to_read, delimiter=';', encoding='utf-8', skip=0): - def get_real_header(header): - """ Get real header cut at the first empty column """ - new_header = [] - for head in header: - if head: - new_header.append(head) - else: - break - return new_header - - def check_id_column(header): - try: - header.index('id') - except ValueError as ve: - log_error("No External Id (id) column defined, please add one") - raise ve - - def skip_line(reader): - log_info("Skipping until line %s excluded" % skip) - for _ in range(1, skip): - reader.next() - - log('open %s' % file_to_read) - file_ref = open_read(file_to_read, encoding=encoding) - reader = UnicodeReader(file_ref, delimiter=delimiter, encoding=encoding) - header = next(reader) - header = get_real_header(header) - check_id_column(header) - skip_line(reader) - data = [l for l in reader] - return header, data - - -""" - Splitting helper method -""" - - -def split_sort(split, header, data): - split_index = 0 - if split: - try: - split_index = header.index(split) - except ValueError as ve: - log("column %s not defined" % split) - raise ve - data = sorted(data, key=lambda d: d[split_index]) - return data, split_index - - -def do_not_split(split, previous_split_value, split_index, line, o2m=False, id_index=0): - # Do not split if you want to keep the one2many line with it's parent - # The column id should be empty - if o2m and not line[id_index]: - return True - - if not split: # If no split no need to continue - return False - - split_value = line[split_index] - if split_value != previous_split_value: # Different Value no need to not split - return False - - return True - - -def import_data(config_file, model, header=None, data=None, file_csv=None, context=None, fail_file=False, - encoding='utf-8', separator=";", ignore=False, split=False, check=True, max_connection=1, - batch_size=10, skip=0, o2m=False): - """ - header and data mandatory in file_csv is not provided - - """ - ignore = ignore or [] - context = context or {} - - if file_csv: - header, data = read_file(file_csv, delimiter=separator, encoding=encoding, skip=skip) - fail_file = fail_file or file_csv + ".fail" - file_result = open_write(fail_file, encoding=encoding) - - if not header or data == None: - raise ValueError("Please provide either a data file or a header and data") - - object_registry = conf_lib.get_server_connection(config_file).get_model(model) - - if file_csv: - writer = UnicodeWriter(file_result, delimiter=separator, encoding=encoding, quoting=csv.QUOTE_ALL) - else: - writer = ListWriter() - - writer.writerow(filter_header_ignore(ignore, header)) - if file_csv: - file_result.flush() - rpc_thread = RPCThreadImport(int(max_connection), object_registry, filter_header_ignore(ignore, header), writer, - batch_size, context) - st = time() - - try: - id_index = header.index('id') - except: - id_index = list(header).index('id') # Support python3 dict_keys - data, split_index = split_sort(split, header, data) - - i = 0 - previous_split_value = False - while i < len(data): - lines = [] - j = 0 - while i < len(data) and ( - j < batch_size or do_not_split(split, previous_split_value, split_index, data[i], o2m=o2m, - id_index=id_index)): - line = data[i][:len(header)] - lines.append(filter_line_ignore(ignore, header, line)) - previous_split_value = line[split_index] - j += 1 - i += 1 - batch_number = split and "[%s] - [%s]" % ( - rpc_thread.thread_number(), previous_split_value) or "[%s]" % rpc_thread.thread_number() - rpc_thread.launch_batch(lines, batch_number, check, o2m=o2m) - - rpc_thread.wait() - if file_csv: - file_result.close() - - log_info("%s %s imported, total time %s second(s)" % (len(data), model, (time() - st))) - if file_csv: - return False, False - else: - return writer.header, writer.data diff --git a/odoo_csv_tools/lib/__init__.py b/odoo_csv_tools/lib/__init__.py deleted file mode 100644 index 82df14f9..00000000 --- a/odoo_csv_tools/lib/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from . import internal -from . import conf_lib -from . import workflow -from . import checker -from . import mapper -from . import transform -from . import xml_transform diff --git a/odoo_csv_tools/lib/checker.py b/odoo_csv_tools/lib/checker.py deleted file mode 100644 index 61d27dc2..00000000 --- a/odoo_csv_tools/lib/checker.py +++ /dev/null @@ -1,54 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Created on 29 feb. 2016 - -@author: Thibault Francois -''' -#TODO -import re - -def id_validity_checker(id_field, pattern, null_values=['NULL']): - def check_id_validity(header, data): - regular = re.compile(pattern) - res = True - for i, line in enumerate(data): - line = [s.strip() if s.strip() not in null_values else '' for s in line] - line_dict = dict(zip(header, line)) - if not regular.match(line_dict[id_field]): - print("Check Failed Id Validity", i+1, line_dict[id_field]) - res = False - return res - return check_id_validity - -def line_length_checker(length): - def check_line_length(header, data): - i = 1 - res = True - for line in data: - i+=1 - if len(line) != length: - print("Check Failed", i, "Line Length", len(line)) - res = False - return res - return check_line_length - -def line_number_checker(line_number): - def check_line_numner(header, data): - if len(data) + 1 != line_number: - print("Check Line Number Failed %s instead of %s" % (len(data) + 1, line_number)) - return False - else: - return True - return check_line_numner - -def cell_len_checker(max_cell_len): - def check_max_cell_len(header, data): - res = True - for i, line in enumerate(data): - for ele in line: - if len(ele) > max_cell_len: - print("Check Failed", i + 1, "Cell Length", len(ele)) - print(line) - res = False - return res - return check_max_cell_len diff --git a/odoo_csv_tools/lib/conf_lib.py b/odoo_csv_tools/lib/conf_lib.py deleted file mode 100644 index e76971b4..00000000 --- a/odoo_csv_tools/lib/conf_lib.py +++ /dev/null @@ -1,44 +0,0 @@ -import odoolib -import sys -if sys.version_info >= (3, 0, 0): - import configparser as ConfigParser -else: - import ConfigParser -import logging -import sys - - -def get_server_connection(config_file): - config = ConfigParser.RawConfigParser({'protocol' : 'xmlrpc', 'port' : 8069}) - config.read(config_file) - - hostname = config.get('Connection', 'hostname') - database = config.get('Connection', 'database') - login = config.get('Connection', 'login') - password = config.get('Connection', 'password') - protocol = config.get('Connection', 'protocol') - port = int(config.get('Connection', 'port')) - uid = int(config.get('Connection', 'uid')) - return odoolib.get_connection(hostname=hostname, database=database, login=login, password=password, protocol=protocol, port=port, user_id=uid) - -def init_logger(): - logger_err = logging.getLogger("error") - logger_err.setLevel(logging.INFO) - err = logging.StreamHandler(sys.stderr) - logger_err.addHandler(err) - logger = logging.getLogger("info") - logger.setLevel(logging.INFO) - out = logging.StreamHandler(sys.stdout) - logger.addHandler(out) - -def log_info(msg): - logging.getLogger("info").info(msg) - -def log_error(msg): - logging.getLogger("error").info(msg) - -def log(msg): - log_info(msg) - log_error(msg) - -init_logger() diff --git a/odoo_csv_tools/lib/internal/__init__.py b/odoo_csv_tools/lib/internal/__init__.py deleted file mode 100644 index 5f6aaf50..00000000 --- a/odoo_csv_tools/lib/internal/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from . import exceptions -from . import tools -from . import csv_reader -from . import io -from . import rpc_thread diff --git a/odoo_csv_tools/lib/internal/csv_reader.py b/odoo_csv_tools/lib/internal/csv_reader.py deleted file mode 100644 index e81c01c7..00000000 --- a/odoo_csv_tools/lib/internal/csv_reader.py +++ /dev/null @@ -1,58 +0,0 @@ -''' -Created on 16 mai 2014 - -@author: openerp -''' -from __future__ import absolute_import -import sys -#import csv, codecs -if sys.version_info >= (3, 0, 0): - import csv -else: - import unicodecsv as csv -from io import StringIO -import threading - -class UnicodeReader: - """ - A CSV reader which will iterate over lines in the CSV file "f", - which is encoded in the given encoding. - """ - - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - self.reader = csv.reader(f, dialect=dialect, **kwds) - - def next(self): - #For python2 - return self.reader.next() - - def __next__(self): - #For python3 - return self.reader.__next__() - - def __iter__(self): - return self - - -class UnicodeWriter: - """ - A CSV writer which will write rows to CSV file "f", - which is encoded in the given encoding. - """ - - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - # Redirect output to a queue - self.stream = f - self.writer = writer = csv.writer(f, dialect=dialect, **kwds) - self.lock = threading.RLock() - - def writerow(self, row): - self.lock.acquire() - self.writer.writerow(row) - self.lock.release() - - def writerows(self, rows): - self.lock.acquire() - self.writer.writerows(rows) - self.stream.flush() - self.lock.release() diff --git a/odoo_csv_tools/lib/internal/exceptions.py b/odoo_csv_tools/lib/internal/exceptions.py deleted file mode 100644 index 1234a9d6..00000000 --- a/odoo_csv_tools/lib/internal/exceptions.py +++ /dev/null @@ -1,9 +0,0 @@ -''' -Created on 9 sept. 2016 - -@author: Thibault Francois -''' - -class SkippingException(Exception): - def __init__(self, message): - self.message = message diff --git a/odoo_csv_tools/lib/internal/io.py b/odoo_csv_tools/lib/internal/io.py deleted file mode 100644 index 05bc0609..00000000 --- a/odoo_csv_tools/lib/internal/io.py +++ /dev/null @@ -1,130 +0,0 @@ -''' -Created on 10 sept. 2016 - -@author: mythrys -''' -from __future__ import absolute_import - -import csv -import os -import sys -from . csv_reader import UnicodeWriter, UnicodeReader - -""" - Compatibility layer between python 2.7 and python 3 -""" -def is_string(f): - if sys.version_info >= (3, 0, 0): - return isinstance(f, str) - else: - return isinstance(f, basestring) - -def open_read(f, encoding='utf-8'): - if not is_string(f): - return f - if sys.version_info >= (3, 0, 0): - return open(f, 'r', newline='', encoding=encoding) - else: - return open(f, 'r') - -def open_write(f, encoding='utf-8'): - if not is_string(f): - return f - if sys.version_info >= (3, 0, 0): - return open(f, "w", newline='', encoding=encoding) - else: - return open(f, "w") - -def write_csv(filename, header, data, encoding="utf-8"): - file_result = open_write(filename, encoding=encoding) - c = UnicodeWriter(file_result, delimiter=';', quoting=csv.QUOTE_ALL, encoding=encoding) - c.writerow(header) - for d in data: - c.writerow(d) - file_result.close() - -def write_file(filename=None, header=None, data=None, fail=False, model="auto", - launchfile="import_auto.sh", worker=1, batch_size=10, init=False, encoding="utf-8", - conf_file=False, groupby='', sep=";", python_exe='python', path='', context=None, ignore=""): - def get_model(): - if model == "auto": - return filename.split(os.sep)[-1][:-4] - else: - return model - - context = '--context="%s"' % str(context) if context else '' - conf_file = conf_file or "%s%s%s" % ('conf', os.sep, 'connection.conf') - write_csv(filename, header, data, encoding=encoding) - if not launchfile: - return - - if not path.endswith(os.sep): - path = os.path.join(path, "") - - py_script = 'odoo_import_thread.py' - os_cmd = os.path.join(path, py_script) - if ' ' in os_cmd: - os_cmd =''.join(('"', os_cmd, '"')) - - mode = init and 'w' or 'a' - with open(launchfile, mode) as myfile: - myfile.write("%s %s -c %s --file=%s --model=%s --encoding=%s --worker=%s --size=%s --groupby=%s --ignore=%s --sep=\"%s\" %s\n" % - (python_exe, os_cmd, conf_file, filename, get_model(), encoding, worker, batch_size, groupby, ignore, sep, context)) - if fail: - myfile.write("%s %s -c %s --fail --file=%s --model=%s --encoding=%s --ignore=%s --sep=\"%s\" %s\n" % - (python_exe, os_cmd, conf_file, filename, get_model(), encoding, ignore, sep, context)) - - -################################################ -# Method to merge file together based on a key # -################################################ - -def write_file_dict(filename, header, data): - data_rows = [] - for _, val in data.iteritems(): - r = [val.get(h, '') for h in header] - data_rows.append(r) - write_csv(filename, header, data_rows) - - - -def read_file_dict(file_name, id_name): - file_ref = open(file_name, 'r') - reader = UnicodeReader(file_ref, delimiter=';') - - head = reader.next() - res = {} - for line in reader: - if any(line): - line_dict = dict(zip(head, line)) - res[line_dict[id_name]] = line_dict - return res, head - -def merge_file(master, child, field): - res = {} - for key, val in master.iteritems(): - data = dict(child.get(val[field], {})) - new_dict = dict(val) - new_dict.update(data) - res[key] = new_dict - return res - - -def merge_header(*args): - old_header = [item for sublist in args for item in sublist] - header = [] - for h in old_header: - if h and h not in header: - header.append(h) - return header - -class ListWriter(object): - def __init__(self): - self.data = [] - self.header = [] - - def writerow(self, header): - self.header = list(header) - - def writerows(self, line): - self.data.extend(list(line)) diff --git a/odoo_csv_tools/lib/internal/rpc_thread.py b/odoo_csv_tools/lib/internal/rpc_thread.py deleted file mode 100644 index 28a75604..00000000 --- a/odoo_csv_tools/lib/internal/rpc_thread.py +++ /dev/null @@ -1,40 +0,0 @@ -#-*- coding: utf-8 -*- -''' -Created on 19 august 2016 - -@author: Thibault Francois -''' - -import threading - -class RpcThread(object): - - def __init__(self, max_connection): - self.semaphore = threading.BoundedSemaphore(max_connection) - self.max_thread_semaphore = threading.BoundedSemaphore(max_connection * 4) - self.thread_list = [] - - def spawn_thread(self, fun, args, kwarg=None): - def wrapper(args, kwarg): - kwarg = kwarg or {} - self.semaphore.acquire() - try: - fun(*args, **kwarg) - except: - self.semaphore.release() - self.max_thread_semaphore.release() - raise - self.semaphore.release() - self.max_thread_semaphore.release() - self.max_thread_semaphore.acquire() - - thread = threading.Thread(None, wrapper, None, [args, kwarg], {}) - thread.start() - self.thread_list.append(thread) - - def wait(self): - for t in self.thread_list: - t.join() - - def thread_number(self): - return len(self.thread_list) diff --git a/odoo_csv_tools/lib/internal/tools.py b/odoo_csv_tools/lib/internal/tools.py deleted file mode 100644 index 90bf0170..00000000 --- a/odoo_csv_tools/lib/internal/tools.py +++ /dev/null @@ -1,95 +0,0 @@ -''' -Created on 9 sept. 2016 - -@author: Thibault Francois -''' -from itertools import islice, chain - -def batch(iterable, size): - sourceiter = iter(iterable) - while True: - batchiter = islice(sourceiter, size) - try: - yield chain([next(batchiter)], batchiter) - except StopIteration: - return -""" - Data formatting tools -""" -def to_xmlid(name): - return name.replace('.', '_').replace(',', '_').replace('\n', '_').replace('|', '_').replace(' ', '_').strip() - -def list_to_xml_id(names): - return '_'.join([to_xmlid(name) for name in names]) - -def to_m2o(PREFIX, value, default=''): - if not value: - return default - return PREFIX + '.' + to_xmlid(value) - -def to_m2m(PREFIX, value): - if not value: - return '' - - ids = [] - for val in value.split(','): - if val.strip(): - ids.append(PREFIX + '.' + to_xmlid(val)) - return ','.join(ids) - -def generate_attribute_list(PREFIX, *attributes): - header = ['id', 'name'] - lines = set() - for att in attributes: - lines.add((to_m2o(PREFIX, att), att)) - return header, lines - -""" - Secondary data file helper - -""" -class ReprWrapper(object): - def __init__(self, repr_str, func): - self._repr = repr_str - self._func = func - - def __call__(self, *args, **kw): - return self._func(*args, **kw) - - def __repr__(self): - return self._repr - -class AttributeLineDict: - def __init__(self, attribute_list_ids, id_gen_fun): - self.data = {} - self.att_list = attribute_list_ids - self.id_gen = id_gen_fun - - def add_line(self, line, header): - """ - line = ['product_tmpl_id/id' : id, 'attribute_id/id' : dict (att : id), 'value_ids/id' : dict(att: id)] - """ - line_dict = dict(zip(header, line)) - if self.data.get(line_dict['product_tmpl_id/id']): - for att_id, att in self.att_list: - if not line_dict['attribute_id/id'].get(att): - continue - template_info = self.data[line_dict['product_tmpl_id/id']] - template_info.setdefault(att_id, [line_dict['value_ids/id'][att]]).append(line_dict['value_ids/id'][att]) - else: - d = {} - for att_id, att in self.att_list: - if line_dict['attribute_id/id'].get(att): - d[att_id] = [line_dict['value_ids/id'][att]] - self.data[line_dict['product_tmpl_id/id']] = d - - def generate_line(self): - lines_header = ['id', 'product_tmpl_id/id', 'attribute_id/id', 'value_ids/id'] - lines_out = [] - for template_id, attributes in self.data.items(): - if not template_id: - continue - for attribute, values in attributes.items(): - line = [self.id_gen(template_id, attributes), template_id, attribute, ','.join(values)] - lines_out.append(line) - return lines_header, lines_out diff --git a/odoo_csv_tools/lib/mapper.py b/odoo_csv_tools/lib/mapper.py deleted file mode 100644 index 151a5684..00000000 --- a/odoo_csv_tools/lib/mapper.py +++ /dev/null @@ -1,381 +0,0 @@ -""" - Mapper -""" -from . internal.tools import to_m2m, to_m2o -from . internal.io import is_string -from . internal.exceptions import SkippingException -import base64 -import os -import requests - -def str_to_mapper(field): - if is_string(field): - return val(field) - return field - -def list_to_mapper(args): - return [val(f) if is_string(f) else f for f in args] - - -def field(col): - """ Return the col name if the col value for the given line is not empty - Use for product.attribute mapping - """ - def field_fun(line): - return col if line[col] else '' - return field_fun - -def const(value): - def const_fun(line): - return value - return const_fun - -def val(field, default='', postprocess=lambda x: x, skip=False): - def val_fun(line): - if not line[field] and skip: - raise SkippingException("Missing Value for %s" % field) - return postprocess(line.get(field, default) or default) - return val_fun - -def val_fallback(field, fallback_file, default='', postprocess=lambda x: x, skip=False): - def val_fun(line): - if not line[field] and not line[fallback_file] and skip: - raise SkippingException("Missing Value for %s" % field) - value = line[field] or line[fallback_file] or default - return postprocess(value) - return val_fun - -def val_label(field, default='', postprocess=lambda x: x, skip=False): - val_m = val(field, default=default, postprocess=postprocess, skip=skip) - def val_label_fun(line): - return "%s : %s" % (field, val_m(line)) - return val_label_fun - -def concat_mapper(separtor, *mapper): - def concat_fun(line): - return separtor.join([m(line) for m in mapper if m(line)]) - return concat_fun - -def concat_mapper_all(separtor, *mapper): - """ - Same as concat mapper, but if one value in the list of value to concat is empty, the all value return is - an empty string - Use for product.attribute - """ - def concat_fun(line): - values = [m(line) for m in mapper] - if not all(values): - return '' - return separtor.join(values) - return concat_fun - - -def concat(separtor, *fields): - return concat_mapper(separtor, *[val(f) for f in fields]) - -def concat_field(separtor, *fields): - return concat_mapper(separtor, *[val_label(f) for f in fields]) - -def concat_field_value_m2m(separator, *args): - def concat_name_value_fun(line): - return ','.join([separator.join([f, line[f]]) for f in args if line[f]]) - return concat_name_value_fun - -def map_val(field, mapping, default=''): - return val(field, postprocess=lambda x : mapping.get(x, default)) - -def num(field, default='0.0'): - return val(field, default, postprocess=lambda x: x.replace(',', '.')) - -def m2o_map(PREFIX, mapper, default='', skip=False): - def m2o_fun(line): - if skip and not mapper(line): - raise SkippingException("Missing Value for %s" % mapper(line)) - return to_m2o(PREFIX, mapper(line), default=default) - return m2o_fun - -def m2o(PREFIX, field, default='', skip=False): - def m2o_fun(line): - if skip and not line[field]: - raise SkippingException("Missing Value for %s" % field) - return to_m2o(PREFIX, line[field], default=default) - return m2o_fun - -def m2m(PREFIX, *args): - """ - @param args: list of string that should be included into the m2m field - """ - #TODO: add default - def m2m_fun(line): - return ','.join([to_m2m(PREFIX, line[f]) for f in args if line[f]]) - return m2m_fun - -def m2m_map(PREFIX, mapper): - """ - @param args: list of string that should be included into the m2m field - """ - #TODO: add default - def m2m_fun(line): - return to_m2m(PREFIX, mapper(line)) - return m2m_fun - -def bool_val(field, true_vals=[], false_vals=[]): - def bool_val_fun(line): - if line[field] in true_vals: - return '1' - if line[field] in false_vals: - return '0' - return '1' if line[field] else '0' - return bool_val_fun - -def binary_map(mapper, path_prefix, skip=False, encoding="utf-8"): - def binary_val(line): - field = mapper(line) - path = path_prefix + (mapper(line) or '') - if not os.path.exists(path) or not field: - if skip: - raise SkippingException("Missing File %s for field %s" % (path, field)) - return '' - - with open(path, "rb") as image_file: - encoded_string = base64.b64encode(image_file.read()).decode(encoding) - image_file.close() - return encoded_string - return binary_val - -def binary(field, path_prefix, skip=False, encoding="utf-8"): - return binary_map(val(field), path_prefix, skip=skip, encoding=encoding) - - - -def binary_url_map(mapper, skip=False, verbose=False, encoding="utf-8"): - def binary_url_fun(line): - url = mapper(line) - if verbose: - print("Fetch %s" % url) - res = requests.get(url) - if not res.status_code == 200: - if skip: - raise SkippingException("Cannot fetch file at url %s" % url) - return '' - - return base64.b64encode(res.content).decode(encoding) - return binary_url_fun - -def binary_url(field, skip=False, verbose=False): - return binary_url_map(val(field), skip=skip, verbose=verbose) - - - -""" - Specific to attribute mapper for V9 product.attribute_import -""" - -def val_att(att_list): - def val_att_fun(line): - return { att : line[att] for att in att_list if line[att]} - return val_att_fun - -def m2o_att(PREFIX, att_list): - def m2o_att_fun(line): - return { att : to_m2o(PREFIX, '_'.join([att, line[att]])) for att in att_list if line[att]} - return m2o_att_fun - -def m2o_att_name(PREFIX, att_list): - def m2o_att_fun(line): - return { att : to_m2o(PREFIX, att) for att in att_list if line[att]} - return m2o_att_fun - -def m2m_attribute_value(PREFIX, *args): - return m2m_map(PREFIX, concat_field_value_m2m('_', *args)) - -""" -Specific to attribute mapper for V13+ product.template.attribute.value -""" - -def m2m_template_attribute_value(PREFIX, template_id_field, *args): - """ - Generates a mapping function for product.template.attribute.value XMLIDs, - including the product template identifier. - - This function is specifically designed to create a mapper that constructs - comma-separated strings of XML IDs for product attribute values, incorporating - the identifier of the associated product template. This is useful when you need - to establish relationships based on attribute values within a specific product template context. - - Args: - PREFIX (str): The prefix to use for the generated XML IDs - (e.g., 'PRODUCT_ATTRIBUTE_VALUE'). This prefix should - be consistent with how your XML IDs are structured. - template_id_field (str): The name of the field/column in the CSV data - that contains the identifier (e.g., XML ID, - database ID, or other unique key) of the - related product template. This identifier - will be included in the generated XML IDs. - *args (str): A variable number of field/column names from the CSV data - that represent attribute values. These values will be - used to construct the XML IDs. - - Returns: - function: A mapper function that takes a CSV row (as a dictionary) as - input and returns a comma-separated string of generated XML IDs. - If the 'template_id_field' is missing in the CSV row, it returns an empty string. - - Example: - Assuming you have a CSV with columns 'product_template_ref', 'color', and 'size', - and your XML IDs for product attribute values are like - 'PRODUCT_ATTRIBUTE_VALUE_product_template_ref_color_red', - you would use: - - mapper.m2m_template_attribute_value('PRODUCT_ATTRIBUTE_VALUE', 'product_template_ref', 'color', 'size') - - Important Notes: - - The generated XML IDs are constructed by concatenating the 'PREFIX', - the value from 'template_id_field', and the values from the provided - attribute columns. - - The function handles cases where the 'template_id_field' might be - missing in the CSV data, returning an empty string to avoid errors. - - Ensure that the 'PREFIX' and the column names in 'args' are consistent - with your actual data structure and XML ID conventions. - """ - - def m2m_fun(line): - template_id = line.get(template_id_field) - if not template_id: - return "" # Handle cases where template ID is missing - - def mapper(line): - return ','.join([f"{template_id}_{f}_{line[f]}" for f in args if line[f]]) - - return to_m2m(PREFIX, mapper(line)) - - return m2m_fun - - -""" - Mapper that require rpc Connection (conf_lib) -""" -def database_id_mapper(PREFIX, field, connection, skip=False): - def database_id_mapper_fun(line): - res = to_m2o(PREFIX, line[field]) - if res: - module, name = res.split('.') - rec = connection.get_model('ir.model.data').search_read([('module', '=', module), ('name', '=', name)], ['res_id']) - if rec and rec[0]['res_id']: - return str(rec[0]['res_id']) - if skip: - raise SkippingException("%s not found" % res) - return '' - return database_id_mapper_fun - -def database_id_mapper_fallback(connection, *fields_mapper, **kwargs): - skip = kwargs.get("skip") - def database_id_mapper_fun(line): - res = [f(line) for f in fields_mapper if f(line)] - if res: - res = res[0] - module, name = res.split('.') - rec = connection.get_model('ir.model.data').search_read([('module', '=', module), ('name', '=', name)], ['res_id']) - if rec and rec[0]['res_id']: - return str(rec[0]['res_id']) - if skip: - raise SkippingException("%s not found" % res) - return '' - return database_id_mapper_fun - -def database_id_mapper_fallback_create(connection, model, *fields_mapper, **kwargs): - skip = kwargs.get("skip") - def database_id_mapper_fun(line): - res = [f(line) for f in fields_mapper if f(line)] - if res: - res = res[0] - module, name = res.split('.') - rec = connection.get_model('ir.model.data').search_read([('module', '=', module), ('name', '=', name)], ['res_id']) - if rec and rec[0]['res_id']: - return str(rec[0]['res_id']) - else: - connection.get_model(model).load(['id', 'name'], [[res, res]], context={'tracking_disable' : True, 'create_product_variant' : True,}) - return database_id_mapper_fun(line) - if skip: - raise SkippingException("%s not found" % res) - return '' - return database_id_mapper_fun - - - -#For many2many specific process -def m2m_id_list(PREFIX, *args, **kwargs): - """ - @param args: list of string that should be included into the m2m field - @param const_values: constant values that will be add to all line - """ - const_values = kwargs.get("const_values", []) - def split_m2m_id_fun(line): - """ Return a list of unique element (xml_id, name) - """ - map_list = list_to_mapper(args) - value = ','.join([to_m2m(PREFIX, m(line)) for m in map_list if m(line)] + const_values) - s = [] - for val in value.split(','): - if val.strip(): - s.append(val) - return s - return split_m2m_id_fun - -def m2m_value_list(*args, **kwargs): - """ - @param args: list of string that should be included into the m2m field - @param const_values: constant values that will be add to all line - """ - const_values = kwargs.get("const_values", []) - def split_m2m_value_fun(line): - """ Return a list of unique element value - """ - map_list = list_to_mapper(args) - value = ','.join([m(line) for m in map_list if m(line)] + const_values) - s = [] - for val in value.split(','): - if val.strip(): - s.append(val) - return s - return split_m2m_value_fun - -def remove_sep_mapper(f): - """ - @param f: field that will have the starting folder separator removed - """ - def remove_sep_mapper_fun(line): - if line[f].startswith(os.sep): - return line[f][len(os.sep):] - else: - return line[f] - return remove_sep_mapper_fun - - -############################## -# # -# Split Mapper # -# # -############################## - -def split_line_number(line_nb): - """ - Return a function that can we used by split method from Processor class, - this function will split the data every x lines where x is given by the param line_nb - :param line_nb: - """ - def split(line, i): - return divmod(i, line_nb)[0] - return split - - -def split_file_number(file_nb): - """ - Return a function that can we used by split method from Processor class, - this function will split the data into x file where x is given by the param file_nb - Order of data is not kept - :param line_nb: - """ - def split(line, i): - return divmod(i, file_nb)[1] - return split diff --git a/odoo_csv_tools/lib/transform.py b/odoo_csv_tools/lib/transform.py deleted file mode 100644 index 6fbe4a6f..00000000 --- a/odoo_csv_tools/lib/transform.py +++ /dev/null @@ -1,250 +0,0 @@ -#-*- coding: utf-8 -*- -''' -Created on 10 sept. 2016 - -@author: Thibault Francois -''' -import os - -from collections import OrderedDict - -from . internal.csv_reader import UnicodeReader -from . internal.tools import ReprWrapper, AttributeLineDict -from . internal.io import write_file, is_string, open_read -from . internal.exceptions import SkippingException -from . import mapper - -class Processor(object): - def __init__(self, filename=None, delimiter=";", encoding='utf-8', header=None, data=None, preprocess=lambda header, data: (header, data), conf_file=False): - self.file_to_write = OrderedDict() - if header and data: - self.header = header - self.data = data - elif filename: - self.header, self.data = self.__read_file(filename, delimiter, encoding) - else: - raise Exception("No Filename nor header and data provided") - self.header, self.data = preprocess(self.header, self.data) - self.conf_file = conf_file - - def check(self, check_fun, message=None): - res = check_fun(self.header, self.data) - if not res: - if message: - print(message) - else: - print("%s failed" % check_fun.__name__) - return res - - def split(self, split_fun): - res = {} - for i, d in enumerate(self.data): - k = split_fun(dict(zip(self.header, d)), i) - res.setdefault(k, []).append(d) - processor_dict = {} - for k, data in res.items(): - processor_dict[k] = Processor(header=list(self.header), data=data) - return processor_dict - - def get_o2o_mapping(self): - """Will generate a mapping with 'key' : mapper.val('key') for each key - - you can print using pprint to print the equivalent python of the mapping to use it in your file - - :return: a dict where the key is a str and the value a mapper.val function, - the key and the field pass to the mapper are identical - - { - 'id' : mapper.val('id'), - ..... - } - """ - mapping = {} - for column in [h for h in self.header if h]: - map_val_rep = ReprWrapper("mapper.val('%s')" %column, mapper.val(column)) - mapping[str(column)] = map_val_rep - return mapping - - def process(self, mapping, filename_out, import_args, t='list', null_values=['NULL', False], verbose=True, m2m=False): - if m2m: - head, data = self.__process_mapping_m2m(mapping, null_values=null_values, verbose=verbose) - else: - head, data = self.__process_mapping(mapping, t=t, null_values=null_values, verbose=verbose) - self._add_data(head, data, filename_out, import_args) - return head, data - - def write_to_file(self, script_filename, fail=True, append=False, python_exe='python', path='', encoding='utf-8'): - init = not append - for _, info in self.file_to_write.items(): - info_copy = dict(info) - info_copy.update({ - 'model' : info.get('model', 'auto'), - 'init' : init, - 'launchfile' : script_filename, - 'fail' : fail, - 'python_exe' : python_exe, - 'path' : path, - 'conf_file' : self.conf_file, - 'encoding': encoding, - }) - - write_file(**info_copy) - init = False - - def get_processed_data(self, filename_out): - return self.file_to_write[filename_out] - - def join_file(self, filename, master_key, child_key, header_prefix="child", delimiter=";", encoding='utf-8'): - """ - Join another file with the main file defined in the constructor. - Need a key (column name) on the master file and on the file to join - The line of the file to join will be added a the end of a line if - the value of the column master_key match the value of the column child_key - - If the key is not found in the file to join, empty cell are added at the end of the master file - - A prefix is added (after the merge operation) to all the column of the child file - to avoid collision with the header of the master file - - E.g.: join_file(filename, 'category_id', 'name') - Master file | Child file - name category_id | name color - A A | A Blue - B A | B Red - C B - D B - E C - - Final File - name category_id child_name child_color - A A A Blue - B A A Blue - C B B Red - D B B Red - E C - """ - header, data = self.__read_file(filename, delimiter, encoding) - child_key_pos = header.index(child_key) - master_key_pos = self.header.index(master_key) - - data_map = {} - for d in data: - data_map[d[child_key_pos]] = d - - for d in self.data: - if data_map.get(d[master_key_pos]): - d.extend(data_map[d[master_key_pos]]) - else: - d.extend([""] * len(header)) - - self.header += ["%s_%s" % (header_prefix, h) for h in header] - - ######################################## - # # - # Private Method # - # # - ######################################## - def __read_file(self, filename, delimiter, encoding): - file_ref = open_read(filename, encoding=encoding) - reader = UnicodeReader(file_ref, delimiter=delimiter, encoding=encoding) - head = next(reader) - data = [d for d in reader] - return head, data - - def __process_mapping(self, mapping, t, null_values, verbose): - """ - @param t: type of return, list or set - """ - lines_out = [] if t == 'list' else set() - for i, line in enumerate(self.data): - line = [s.strip() if s and s.strip() not in null_values else '' for s in line] - line_dict = dict(zip(self.header, line)) - try: - line_out = [mapping[k](line_dict) for k in mapping.keys()] - except SkippingException as e: - if verbose: - print("Skipping", i) - print(e.message) - continue - if t == 'list': - lines_out.append(line_out) - else: - lines_out.add(tuple(line_out)) - return mapping.keys(), lines_out - - def __process_mapping_m2m(self, mapping, null_values, verbose): - """ - - """ - head, data = self.__process_mapping(mapping, 'list', null_values, verbose) - lines_out = set() - for line_out in data: - index_list = [] - zip_list = [] - for index, value in enumerate(line_out): - if isinstance(value, list): - index_list.append(index) - zip_list.append(value) - values_list = zip(*zip_list) - for values in values_list: - new_line = list(line_out) - for i, val in enumerate(values): - new_line[index_list[i]] = val - lines_out.add(tuple(new_line)) - - return head, lines_out - - def _add_data(self, head, data, filename_out, import_args): - import_args = dict(import_args) - import_args['filename'] = os.path.abspath(filename_out) if filename_out else False - import_args['header'] = head - import_args['data'] = data - self.file_to_write[filename_out] = import_args - - -class ProductProcessorV9(Processor): - def __generate_attribute_data(self, attributes_list, ATTRIBUTE_PREFIX): - self.attr_header = ['id', 'name'] - self.attr_data = [[mapper.to_m2o(ATTRIBUTE_PREFIX, att), att] for att in attributes_list] - - def process_attribute_mapping(self, mapping, line_mapping, attributes_list, ATTRIBUTE_PREFIX, path, import_args, id_gen_fun=None, null_values=['NULL']): - """ - Mapping : name is mandatory vat_att(attribute_list) - """ - def add_value_line(values_out, line): - for att in attributes_list: - value_name = line[list(mapping.keys()).index('name')].get(att) - if value_name: - line_value = [ele[att] if isinstance(ele, dict) else ele for ele in line] - values_out.add(tuple(line_value)) - - id_gen_fun = id_gen_fun or (lambda template_id, values : mapper.to_m2o(template_id.split('.')[0] + '_LINE', template_id)) - - values_header = mapping.keys() - values_data = set() - - self.__generate_attribute_data(attributes_list, ATTRIBUTE_PREFIX) - att_data = AttributeLineDict(self.attr_data, id_gen_fun) - for line in self.data: - line = [s.strip() if s.strip() not in null_values else '' for s in line] - line_dict = dict(zip(self.header, line)) - line_out = [mapping[k](line_dict) for k in mapping.keys()] - - add_value_line(values_data, line_out) - values_lines = [line_mapping[k](line_dict) for k in line_mapping.keys()] - att_data.add_line(values_lines, line_mapping.keys()) - - line_header, line_data = att_data.generate_line() - context = import_args.get('context', {}) - context['create_product_variant'] = True - import_args['context'] = context - self._add_data(self.attr_header, self.attr_data, path + 'product.attribute.csv', import_args) - self._add_data(values_header, values_data, path + 'product.attribute.value.csv', import_args) - import_args = dict(import_args, groupby='product_tmpl_id/id') - self._add_data(line_header, line_data, path + 'product.attribute.line.csv', import_args) - -class ProductProcessorV10(Processor): - def process_attribute_data(self, attributes_list, ATTRIBUTE_PREFIX, filename_out, import_args): - attr_header = ['id', 'name', 'create_variant'] - attr_data = [[mapper.to_m2o(ATTRIBUTE_PREFIX, att), att, 'Dynamically'] for att in attributes_list] - self._add_data(attr_header, attr_data, filename_out, import_args) diff --git a/odoo_csv_tools/lib/workflow.py b/odoo_csv_tools/lib/workflow.py deleted file mode 100644 index 82011ec9..00000000 --- a/odoo_csv_tools/lib/workflow.py +++ /dev/null @@ -1,156 +0,0 @@ -''' -Created on 7 avr. 2016 - -@author: odoo -''' -#from __future__ import absolute_import -import sys -if sys.version_info >= (3, 0, 0): - from xmlrpc.client import Fault -else: - from xmlrpclib import Fault - -from time import time -from . internal.rpc_thread import RpcThread - -class InvoiceWorkflowV9(): - def __init__(self, connection, field, status_map, paid_date_field, payment_journal, max_connection=4): - """ - @param connection : need to use a jsonrpc connection - @param field: the that contains the state imported from legacy data - @param status_map: dict that contains the mapping between the odoo invoice status and legacy system status - the value should be a list - { - 'open' : ['satus1'], - 'paid' : ['status2', 'status3'], - 'cancel' : ... - 'proforma' : - } - """ - self.connection = connection - self.invoice_obj = connection.get_model('account.invoice') - self.payement_obj = connection.get_model('account.payment') - self.account_invoice_tax = self.connection.get_model('account.invoice.tax') - self.field = field - self.status_map = status_map - self.paid_date = paid_date_field - self.payment_journal = payment_journal - self.max_connection = max_connection - - def display_percent(self, i, percent_step, total): - if i % percent_step == 0: - print("%s%% : %s/%s time %s sec" % (round(i / float(total) * 100, 2), i, total, time() - self.time)) - - def set_tax(self): - def create_tax(invoice_id): - taxes = self.invoice_obj.get_taxes_values(invoice_id) - for tax in taxes.values(): - self.account_invoice_tax.create(tax) - - invoices = self.invoice_obj.search([('state', '=', 'draft'), - ('type', '=', 'out_invoice'), - ('tax_line_ids', '=', False)]) - total = len(invoices) - percent_step = int(total / 5000) or 1 - self.time = time() - rpc_thread = RpcThread(self.max_connection) - print("Compute Tax %s invoice" % total) - for i, invoice_id in enumerate(invoices): - self.display_percent(i, percent_step, total) - rpc_thread.spawn_thread(create_tax, [invoice_id]) - rpc_thread.wait() - - def validate_invoice(self): - invoice_to_validate = self.invoice_obj.search([(self.field, 'in', self.status_map['open'] + self.status_map['paid']), - ('state', '=', 'draft'), - ('type', '=', 'out_invoice')]) - total = len(invoice_to_validate) - percent_step = int(total / 5000) or 1 - rpc_thread = RpcThread(1) - print("Validate %s invoice" % total) - self.time = time() - for i, invoice_id in enumerate(invoice_to_validate): - self.display_percent(i, percent_step, total) - fun = self.connection.get_service('object').exec_workflow - rpc_thread.spawn_thread(fun, [self.connection.database, - self.connection.user_id, - self.connection.password, - 'account.invoice', - 'invoice_open', - invoice_id]) - rpc_thread.wait() - - def proforma_invoice(self): - invoice_to_proforma = self.invoice_obj.search([(self.field, 'in', self.status_map['proforma']), - ('state', '=', 'draft'), - ('type', '=', 'out_invoice')]) - total = len(invoice_to_proforma) - percent_step = int(total / 100) or 1 - self.time = time() - rpc_thread = RpcThread(self.max_connection) - print("Pro Format %s invoice" % total) - for i, invoice_id in enumerate(invoice_to_proforma): - self.display_percent(i, percent_step, total) - fun = self.connection.get_service('object').exec_workflow() - rpc_thread.spawn_thread(fun, [self.connection.database, - self.connection.user_id, - self.connection.password, - 'account.invoice', - 'invoice_proforma2', - invoice_id], {}) - rpc_thread.wait() - - def paid_invoice(self): - def pay_single_invoice(data_update, wizard_context): - data = self.payement_obj.default_get(["communication", "currency_id", "invoice_ids", - "payment_difference", "partner_id", "payment_method_id", - "payment_difference_handling", "journal_id", - "state", "writeoff_account_id", "payment_date", - "partner_type", "hide_payment_method", - "payment_method_code", "partner_bank_account_id", - "amount", "payment_type"], context=wizard_context) - data.update(data_update) - wizard_id = self.payement_obj.create(data, context=wizard_context) - try: - self.payement_obj.post([wizard_id], context=wizard_context) - except Fault: - pass - - - invoice_to_paid = self.invoice_obj.search_read([(self.field, 'in', self.status_map['paid']), ('state', '=', 'open'), ('type', '=', 'out_invoice')], - [self.paid_date, 'date_invoice']) - total = len(invoice_to_paid) - percent_step = int(total / 1000) or 1 - self.time = time() - rpc_thread = RpcThread(self.max_connection) - print("Paid %s invoice" % total) - for i, invoice in enumerate(invoice_to_paid): - self.display_percent(i, percent_step, total) - wizard_context = { - 'active_id' : invoice['id'], - 'active_ids' : [invoice['id']], - 'active.model' : 'account.invoice', - 'default_invoice_ids' : [(4, invoice['id'], 0)], - 'type' : "out_invoice", - "journal_type":"sale" - } - data_update = { - 'journal_id' : self.payment_journal, #payement journal - 'payment_date' : invoice[self.paid_date] or invoice['date_invoice'], - 'payment_method_id' : 1, - } - rpc_thread.spawn_thread(pay_single_invoice, [data_update, wizard_context], {}) - rpc_thread.wait() - - def rename(self, name_field): - invoice_to_paid = self.invoice_obj.search_read([(name_field, '!=', False),(name_field, '!=', '0.0'),('state', '!=', 'draft'), ('type', '=', 'out_invoice')], - [name_field]) - total = len(invoice_to_paid) - percent_step = int(total / 1000) or 1 - self.time = time() - rpc_thread = RpcThread(int(self.max_connection * 1.5)) - print("Rename %s invoice" % total) - for i, invoice in enumerate(invoice_to_paid): - self.display_percent(i, percent_step, total) - rpc_thread.spawn_thread(self.invoice_obj.write, [invoice['id'], {'number' : invoice[name_field], name_field : False}], {}) - rpc_thread.wait() diff --git a/odoo_csv_tools/lib/xml_transform.py b/odoo_csv_tools/lib/xml_transform.py deleted file mode 100644 index cc602386..00000000 --- a/odoo_csv_tools/lib/xml_transform.py +++ /dev/null @@ -1,61 +0,0 @@ -#-*- coding: utf-8 -*- -from . import transform -from collections import OrderedDict -from lxml import etree - - -class XMLProcessor(transform.Processor): - def __init__(self, filename, root_node_path, conf_file=False): # Add conf_file parameter - super().__init__(filename=filename) # Call Processor's __init__ - self.root = etree.parse(filename) - self.root_path = root_node_path - self.file_to_write = OrderedDict() - self.conf_file = conf_file # Initialize conf_file - - def process(self, mapping, filename_out, import_args, t='list', null_values=['NULL', False], verbose=True, m2m=False): - """ - Transforms data from the XML file based on the provided mapping. - - Args: - mapping (dict): A dictionary that defines how data from the XML file - should be mapped to fields in the output format (e.g., CSV). - The keys of the dictionary are the target field names, - and the values are XPath expressions to extract the - corresponding data from the XML. - filename_out (str): The name of the output file where the transformed - data will be written. - import_args (dict): A dictionary containing arguments that will be - passed to the `odoo_import_thread.py` script - (e.g., `{'model': 'res.partner', 'context': "{'tracking_disable': True}"}`). - t (str, optional): This argument is kept for compatibility but is not - used in `XMLProcessor`. Defaults to 'list'. - null_values (list, optional): This argument is kept for compatibility - but is not used in `XMLProcessor`. - Defaults to `['NULL', False]`. - verbose (bool, optional): This argument is kept for compatibility but - is not used in `XMLProcessor`. Defaults to - `True`. - m2m (bool, optional): This argument is kept for compatibility but is - not used in `XMLProcessor`. Defaults to `False`. - - Returns: - tuple: A tuple containing the header (list of field names) and the - transformed data (list of lists). - - Important Notes: - - The `t`, `null_values`, `verbose`, and `m2m` arguments are present - for compatibility with the `Processor` class but are not actually - used by the `XMLProcessor`. - - The `mapping` dictionary values should be XPath expressions that - select the desired data from the XML nodes. - """ - header = mapping.keys() - lines = [] - for r in self.root.xpath(self.root_path): - line = [r.xpath(mapping[k])[0] for k in header] - lines.append(line) - self._add_data(header, lines, filename_out, import_args) - return header, lines - - def split(self, split_fun): - raise NotImplementedError("Method split not supported for XMLProcessor") diff --git a/odoo_csv_tools/migrate.py b/odoo_csv_tools/migrate.py deleted file mode 100644 index 6775d304..00000000 --- a/odoo_csv_tools/migrate.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python -#-*- coding: utf-8 -*- -''' -Created on 27 déc. 2016 - -@author: Thibault Francois -''' -from lib.transform import Processor -from export_threaded import export_data -from import_threaded import import_data - -class Migrator(object): - - def __init__(self, config_export, config_import): - self.config_export = config_export - self.config_import = config_import - self.import_batch_size = 10 - self.import_max_con = 1 - self.export_batch_size = 100 - self.export_max_con = 1 - - def migrate(self, model, domain, field_export, mappings=[None]): - header, data = export_data(self.config_export, model, domain, field_export, max_connection=self.export_max_con, batch_size=self.export_batch_size) - processor = Processor(header=header, data=data) - for mapping in mappings: - if not mapping: - mapping = processor.get_o2o_mapping() - to_import_header, to_import_data = processor.process(mapping, False, {}) - import_data(self.config_import, model, header=to_import_header, data=to_import_data, max_connection=self.import_max_con, batch_size=self.import_batch_size) \ No newline at end of file diff --git a/odoo_export_thread.py b/odoo_export_thread.py deleted file mode 100755 index 04e6c400..00000000 --- a/odoo_export_thread.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -''' -Copyright (C) Thibault Francois - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as -published by the Free Software Foundation, version 3. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Lesser Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see . -''' - -import argparse -from odoo_csv_tools import export_threaded - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Import data in batch and in parallel') - parser.add_argument('-c', '--config', dest='config', default="conf/connection.conf", - help='Configuration File that contains connection parameters', required=True) - parser.add_argument('--file', dest='filename', help='Output File', required=True) - parser.add_argument('--model', dest='model', help='Model to Export', required=True) - parser.add_argument('--field', dest='fields', help='Fields to Export', required=True) - parser.add_argument('--domain', dest='domain', help='Filter', default="[]") - parser.add_argument('--worker', dest='worker', default=1, help='Number of simultaneous connection') - parser.add_argument('--size', dest='batch_size', default=10, help='Number of line to import per connection') - parser.add_argument('-s', '--sep', dest="separator", default=";", help='CSV separator') - parser.add_argument('--context', dest='context', - help='context that will be passed to the load function, need to be a valid python dict', - default="{'tracking_disable' : True}") - parser.add_argument('--encoding', dest='encoding', default="utf-8", help='Encoding of the data file') - # TODO args : encoding - # {'update_many2many': True,'tracking_disable' : True, 'create_product_variant' : True, 'check_move_validity' : False} - args = parser.parse_args() - - config_file = args.config - file_csv = args.filename - batch_size = int(args.batch_size) - model = args.model - max_connection = int(args.worker) - separator = args.separator - encoding = args.encoding - context = eval(args.context) - domain = eval(args.domain) - header = args.fields.split(',') - export_threaded.export_data(config_file, model, domain, header, context=context, output=file_csv, - max_connection=max_connection, batch_size=batch_size, separator=separator, - encoding=encoding) diff --git a/odoo_import_thread.py b/odoo_import_thread.py deleted file mode 100755 index 402fb479..00000000 --- a/odoo_import_thread.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python -#-*- coding: utf-8 -*- -''' -Copyright (C) Thibault Francois - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as -published by the Free Software Foundation, version 3. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Lesser Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see . -''' - -import argparse -from odoo_csv_tools import import_threaded - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Import data in batch and in parallel') - parser.add_argument('-c', '--config', dest='config', default="conf/connection.conf", help='Configuration File that contains connection parameters', required = True) - parser.add_argument('--file', dest='filename', help='File to import', required = True) - parser.add_argument('--model', dest='model', help='Model to import', required = True) - parser.add_argument('--worker', dest='worker', default=1, help='Number of simultaneous connection') - parser.add_argument('--size', dest='batch_size', default=10, help='Number of line to import per connection') - parser.add_argument('--skip', dest='skip', default=0, help='Skip until line [SKIP]') - parser.add_argument('--fail', action='store_true', dest="fail", help='Fail mode') - parser.add_argument('-s', '--sep', dest="separator", default=";", help='CSV separator') - parser.add_argument('--groupby', dest='split', help='Group data per batch with the same value for the given column in order to avoid concurrent update error') - parser.add_argument('--ignore', dest='ignore', help='list of column separate by comma. Those column will be remove from the import request') - parser.add_argument('--check', dest='check', action='store_true', help='Check if record are imported after each batch.') - parser.add_argument('--context', dest='context', help='context that will be passed to the load function, need to be a valid python dict', default="{'tracking_disable' : True}") - parser.add_argument('--o2m', action='store_true', dest="o2m", help="When you want to import o2m field, don't cut the batch until we find a new id") - parser.add_argument('--encoding', dest='encoding', default="utf-8", help='Encoding of the data file') - #TODO args : encoding - #{'update_many2many': True,'tracking_disable' : True, 'create_product_variant' : True, 'check_move_validity' : False} - args = parser.parse_args() - - file_csv = args.filename - batch_size = int(args.batch_size) - fail_file = file_csv + ".fail" - max_connection = int(args.worker) - split = False - encoding= args.encoding - context= eval(args.context) - ignore = False - if args.ignore: - ignore = args.ignore.split(',') - - if args.fail: - file_csv = fail_file - fail_file = fail_file + ".bis" - batch_size = 1 - max_connection = 1 - split = False - - import_threaded.import_data(args.config, args.model, file_csv=file_csv, context=context, - fail_file=fail_file, encoding=encoding, separator=args.separator, - ignore=ignore, split=args.split, check=args.check, - max_connection=max_connection, batch_size=batch_size, skip=int(args.skip), o2m=args.o2m) diff --git a/pics/account_move.png b/pics/account_move.png deleted file mode 100644 index 0eccf182..00000000 Binary files a/pics/account_move.png and /dev/null differ diff --git a/pics/cascade_update.png b/pics/cascade_update.png deleted file mode 100644 index 691d16d4..00000000 Binary files a/pics/cascade_update.png and /dev/null differ diff --git a/pics/fail.png b/pics/fail.png deleted file mode 100644 index c8baed82..00000000 Binary files a/pics/fail.png and /dev/null differ diff --git a/pics/group_by_1.png b/pics/group_by_1.png deleted file mode 100644 index 97b8fc6f..00000000 Binary files a/pics/group_by_1.png and /dev/null differ diff --git a/pics/group_by_2.png b/pics/group_by_2.png deleted file mode 100644 index 16540b51..00000000 Binary files a/pics/group_by_2.png and /dev/null differ diff --git a/pics/import_tool_options.png b/pics/import_tool_options.png deleted file mode 100644 index 137e98dd..00000000 Binary files a/pics/import_tool_options.png and /dev/null differ diff --git a/pics/o2m_csv.png b/pics/o2m_csv.png deleted file mode 100644 index c57ca9e0..00000000 Binary files a/pics/o2m_csv.png and /dev/null differ diff --git a/pics/o2m_csv_gen.png b/pics/o2m_csv_gen.png deleted file mode 100644 index 74f37192..00000000 Binary files a/pics/o2m_csv_gen.png and /dev/null differ diff --git a/pics/phase_load.png b/pics/phase_load.png deleted file mode 100644 index d83b67af..00000000 Binary files a/pics/phase_load.png and /dev/null differ diff --git a/pics/phase_transform.png b/pics/phase_transform.png deleted file mode 100644 index 6ac76683..00000000 Binary files a/pics/phase_transform.png and /dev/null differ diff --git a/pics/run_time_1.png b/pics/run_time_1.png deleted file mode 100644 index 5ce7cc93..00000000 Binary files a/pics/run_time_1.png and /dev/null differ diff --git a/pics/run_time_2.png b/pics/run_time_2.png deleted file mode 100644 index 31c434b8..00000000 Binary files a/pics/run_time_2.png and /dev/null differ diff --git a/pics/run_time_3.png b/pics/run_time_3.png deleted file mode 100644 index e287fbf4..00000000 Binary files a/pics/run_time_3.png and /dev/null differ diff --git a/pydoclint-baseLine.txt b/pydoclint-baseLine.txt new file mode 100644 index 00000000..392614e2 --- /dev/null +++ b/pydoclint-baseLine.txt @@ -0,0 +1,39 @@ +src/odoo_data_flow/lib/conf_lib.py + DOC111: Function `get_connection_from_config`: The option `--arg-type-hints-in-docstring` is `False` but there are type hints in the docstring arg list + DOC501: Function `get_connection_from_config` has raise statements, but the docstring does not have a "Raises" section + DOC503: Function `get_connection_from_config` exceptions in the "Raises" section in the docstring do not match those in the function body. Raised exceptions in the docstring: []. Raised exceptions in the body: ['Exception', 'FileNotFoundError', 'KeyError', 'ValueError']. +-------------------- +src/odoo_data_flow/lib/internal/exceptions.py + DOC301: Class `SkippingError`: __init__() should not have a docstring; please combine it with the docstring of the class +-------------------- +src/odoo_data_flow/lib/internal/io.py + DOC107: Function `write_csv`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints +-------------------- +src/odoo_data_flow/lib/internal/rpc_thread.py + DOC301: Class `RpcThread`: __init__() should not have a docstring; please combine it with the docstring of the class +-------------------- +src/odoo_data_flow/lib/internal/tools.py + DOC404: Function `batch` yield type(s) in docstring not consistent with the return annotation. The yield type (the 0th arg in Generator[...]/Iterator[...]): Any; docstring "yields" section types: + DOC201: Function `to_m2o` does not have a return section in docstring + DOC203: Function `to_m2o` return type(s) in docstring not consistent with the return annotation. Return annotation has 1 type(s); docstring return section has 0 type(s). + DOC001: Function/method `to_m2m`: Potential formatting errors in docstring. Error message: Expected a colon in 'separated by commas.'. (Note: DOC001 could trigger other unrelated violations under this function/method too. Please fix the docstring formatting first.) + DOC101: Function `to_m2m`: Docstring contains fewer arguments than in function signature. + DOC103: Function `to_m2m`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [prefix: str, value: str]. + DOC201: Function `to_m2m` does not have a return section in docstring + DOC203: Function `to_m2m` return type(s) in docstring not consistent with the return annotation. Return annotation has 1 type(s); docstring return section has 0 type(s). +-------------------- +src/odoo_data_flow/lib/workflow/invoice_v9.py + DOC301: Class `InvoiceWorkflowV9`: __init__() should not have a docstring; please combine it with the docstring of the class +-------------------- +src/odoo_data_flow/lib/xml_transform.py + DOC001: Function/method `process`: Potential formatting errors in docstring. Error message: Expected a colon in '`XMLProcessor`.'. (Note: DOC001 could trigger other unrelated violations under this function/method too. Please fix the docstring formatting first.) + DOC101: Method `XMLProcessor.process`: Docstring contains fewer arguments than in function signature. + DOC103: Method `XMLProcessor.process`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [filename_out: str, import_args: dict[str, Any], m2m: bool, mapping: dict[str, str], null_values: Union[list[Any], None], t: str, verbose: bool]. + DOC201: Method `XMLProcessor.process` does not have a return section in docstring + DOC203: Method `XMLProcessor.process` return type(s) in docstring not consistent with the return annotation. Return annotation has 1 type(s); docstring return section has 0 type(s). +-------------------- +src/odoo_data_flow/logging_config.py + DOC106: Function `setup_logging`: The option `--arg-type-hints-in-signature` is `True` but there are no argument type hints in the signature + DOC107: Function `setup_logging`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints + DOC111: Function `setup_logging`: The option `--arg-type-hints-in-docstring` is `False` but there are type hints in the docstring arg list +-------------------- diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..6a67bebf --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,121 @@ +[project] +name = "odoo-data-flow" +version = "0.0.0" +description = "Odoo Data Flow" +readme = "README.md" +requires-python = ">=3.9" +license = { text = "LGPL-3.0" } +authors = [ + { name = "bosd", email = "c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me" }, +] +classifiers = ["Development Status :: 3 - Alpha"] + +dependencies = ["click >=8.0.1", "odoo-client-lib", "requests", "lxml"] + + +[project.urls] +Homepage = "https://github.com/OdooDataFlow/odoo-data-flow" +Repository = "https://github.com/OdooDataFlow/odoo-data-flow" +Documentation = "https://odoo-data-flow.readthedocs.io" +Changelog = "https://github.com/OdooDataFlow/odoo-data-flow/releases" + +[dependency-groups] +dev = [ + "coverage[toml] >= 6.2", + "pre-commit >=2.16.0", + "pre-commit-hooks >=4.1.0", + "pytest >=6.2.5", + "pygments >=2.10.0", + "nox", +] +lint = ["ruff >=0.0.274", "pydoclint >=0.0.0"] +docs = [ + "furo >=2021.11.12", + "myst-parser == 3.0.1", + "sphinx >= 4.3.2", + "sphinx-autobuild >=2021.3.14", + "sphinx-click >=3.0.2", + "sphinx_mermaid", +] +mypy = ["mypy >=0.930"] +typeguard = ["typeguard >=2.13.3"] +xdoctest = ["xdoctest[colors] >=0.15.10"] + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.uv] +package = true + +[project.scripts] +odoo-data-flow = "odoo_data_flow.__main__:cli" + +[tool.coverage.paths] +source = ["src", "*/site-packages"] +tests = ["tests", "*/tests"] + +[tool.coverage.run] +branch = true +source = ["odoo_data_flow", "tests"] + +[tool.coverage.report] +show_missing = true +fail_under = 100 +exclude_lines = ["pragma: no cover", "if TYPE_CHECKING:"] + +[tool.mypy] +strict = true +warn_unreachable = true +pretty = true +show_column_numbers = true +show_error_context = true + +[tool.ruff] +src = ["src", "tests"] + +[tool.ruff.lint] +select = [ + "B", # flake8-bugbear + "C90", # mccabe + "D", # pydocstyle + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "N", # pep8-naming + "RUF", # Ruff-specific rules + "S", # flake8-bandit + "UP", # pyupgrade + "W", # pycodestyle +] +ignore = [ + "COM812", + "COM819", + "D107", # undocumented-public-init conflicts with DOC301 + "D206", + "D300", + "E111", + "E114", + "E117", + "ISC001", + "ISC002", + "Q000", + "Q001", + "Q002", + "Q003", + "W191", +] + +[tool.ruff.lint.per-file-ignores] +"*/test_*.py" = ["S101"] +"noxfile.py" = ["S101"] +"**/conftest.py" = ["S101"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.pydoclint] +style = 'google' +exclude = '\.git|\.nox|noxfile.py' +arg-type-hints-in-docstring = false +baseline = 'pydoclint-baseline.txt' +auto-generate-baseline = 'True' diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 6eca7405..00000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -odoo-client-lib==1.2.0 -unicodecsv==0.14.1 -future==0.16.0 -requests>=2.20.0 diff --git a/setup.py b/setup.py deleted file mode 100644 index 86eb59c8..00000000 --- a/setup.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Copyright (C) Thibault Francois - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as -published by the Free Software Foundation, version 3. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Lesser Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see . -''' - -from setuptools import setup, find_packages - -setup(name='odoo-import-export-client', - version='3.0.0', - install_requires=['odoo-client-lib', 'future', 'unicodecsv', 'requests'], - description='Library and script that allow to export and import data to Odoo using rpc api.', - author='Thibault Francois', - author_email='francois.th@gmail.com', - url='https://github.com/tfrancoi/odoo_csv_import', - packages=find_packages(exclude=['contrib', 'docs', 'tests*']), - scripts=['odoo_export_thread.py', 'odoo_import_thread.py', 'odoo_convert_path_to_image.py', 'odoo_convert_url_to_image.py'], - long_description="See the home page for any information: https://github.com/tfrancoi/odoo_csv_import", - keywords="odoo library import export thread python client lib web service", - license="LGPLv3", - classifiers=[ - "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", - "Programming Language :: Python", - ], - ) diff --git a/src/odoo_data_flow/__init__.py b/src/odoo_data_flow/__init__.py new file mode 100644 index 00000000..43b4c0f7 --- /dev/null +++ b/src/odoo_data_flow/__init__.py @@ -0,0 +1,9 @@ +"""Odoo Dataflow.""" + +from . import export_threaded, import_threaded, lib + +__all__ = [ + "export_threaded", + "import_threaded", + "lib", +] diff --git a/src/odoo_data_flow/__main__.py b/src/odoo_data_flow/__main__.py new file mode 100644 index 00000000..8e9517ca --- /dev/null +++ b/src/odoo_data_flow/__main__.py @@ -0,0 +1,291 @@ +"""Command-line interface for odoo-data-flow.""" + +import ast + +import click + +from .converter import run_path_to_image, run_url_to_image +from .exporter import run_export +from .importer import run_import +from .logging_config import setup_logging +from .migrator import run_migration +from .workflow_runner import run_invoice_v9_workflow + + +@click.group( + context_settings=dict(help_option_names=["-h", "--help"]), + invoke_without_command=True, +) +@click.version_option() +@click.option( + "-v", "--verbose", is_flag=True, help="Enable verbose, debug-level logging." +) +@click.pass_context +def cli(ctx, verbose): + """Odoo Data Flow: A tool for importing, exporting, and processing data.""" + setup_logging(verbose) + # If no subcommand is invoked, we explicitly print the help text. + # This is required when using invoke_without_command=True. + if ctx.invoked_subcommand is None: + click.echo(ctx.get_help()) + + +# --- Workflow Command --- +# This command is now a top-level command attached directly to 'cli'. +@cli.command(name="workflow-invoice-v9") +@click.option( + "-c", "--config", required=True, help="Path to the connection.conf file." +) +@click.option( + "--action", + "actions", + multiple=True, + type=click.Choice( + ["tax", "validate", "pay", "proforma", "rename", "all"], + case_sensitive=False, + ), + default=["all"], + help="Workflow action to run. Can be specified multiple times. Defaults to 'all'.", +) +@click.option( + "--field", + required=True, + help="The source field containing the legacy invoice status.", +) +@click.option( + "--status-map", + "status_map_str", + required=True, + help="Dictionary string mapping Odoo states to legacy states. " + "e.g., \"{'open': ['OP']}\"", +) +@click.option( + "--paid-date-field", + required=True, + help="The source field containing the payment date.", +) +@click.option( + "--payment-journal", + required=True, + type=int, + help="The database ID of the payment journal.", +) +@click.option( + "--max-connection", default=4, type=int, help="Number of parallel threads." +) +def invoice_v9_cmd(**kwargs): + """Runs the legacy Odoo v9 invoice processing workflow.""" + run_invoice_v9_workflow(**kwargs) + + +# --- Import Command --- +# This command is attached directly to the main 'cli' group. +@cli.command(name="import") +@click.option( + "-c", + "--config", + required=True, + help="Configuration file for connection parameters.", +) +@click.option("--file", "filename", required=True, help="File to import.") +@click.option("--model", required=True, help="Odoo model to import into.") +@click.option( + "--worker", default=1, type=int, help="Number of simultaneous connections." +) +@click.option( + "--size", + "batch_size", + default=10, + type=int, + help="Number of lines to import per connection.", +) +@click.option( + "--skip", default=0, type=int, help="Number of initial lines to skip." +) +@click.option( + "--fail", + is_flag=True, + default=False, + help="Run in fail mode, retrying records from the .fail file.", +) +@click.option( + "-s", "--sep", "separator", default=";", help="CSV separator character." +) +@click.option( + "--groupby", + "split", + default=None, + help="Column to group data by to avoid concurrent updates.", +) +@click.option( + "--ignore", default=None, help="Comma-separated list of columns to ignore." +) +@click.option( + "--check", + is_flag=True, + default=False, + help="Check if records are imported after each batch.", +) +@click.option( + "--context", + default="{'tracking_disable': True}", + help="Odoo context as a dictionary string.", +) +@click.option( + "--o2m", + is_flag=True, + default=False, + help="Special handling for one-to-many imports.", +) +@click.option("--encoding", default="utf-8", help="Encoding of the data file.") +def import_cmd(**kwargs): + """Runs the data import process.""" + run_import(**kwargs) + + +# --- Export Command --- +@cli.command(name="export") +@click.option( + "-c", + "--config", + required=True, + help="Configuration file for connection parameters.", +) +@click.option("--file", "filename", required=True, help="Output file path.") +@click.option("--model", required=True, help="Odoo model to export from.") +@click.option( + "--fields", required=True, help="Comma-separated list of fields to export." +) +@click.option( + "--domain", default="[]", help="Odoo domain filter as a list string." +) +@click.option( + "--worker", default=1, type=int, help="Number of simultaneous connections." +) +@click.option( + "--size", + "batch_size", + default=10, + type=int, + help="Number of records to process per batch.", +) +@click.option( + "-s", "--sep", "separator", default=";", help="CSV separator character." +) +@click.option( + "--context", + default="{'tracking_disable': True}", + help="Odoo context as a dictionary string.", +) +@click.option("--encoding", default="utf-8", help="Encoding of the data file.") +def export_cmd(**kwargs): + """Runs the data export process.""" + run_export(**kwargs) + + +# --- Path-to-Image Command --- +@cli.command(name="path-to-image") +@click.argument("file") +@click.option( + "-f", + "--fields", + required=True, + help="Comma-separated list of fields to convert from path to base64.", +) +@click.option( + "--path", + default=None, + help="Image path prefix. Defaults to the current working directory.", +) +@click.option( + "--out", default="out.csv", help="Name of the resulting output file." +) +def path_to_image_cmd(**kwargs): + """Converts columns with local file paths into base64 strings.""" + run_path_to_image(**kwargs) + + +# --- URL-to-Image Command --- +@cli.command(name="url-to-image") +@click.argument("file") +@click.option( + "-f", + "--fields", + required=True, + help="Comma-separated list of fields with URLs to convert to base64.", +) +@click.option( + "--out", default="out.csv", help="Name of the resulting output file." +) +def url_to_image_cmd(**kwargs): + """Downloads content from URLs in columns and converts to base64.""" + run_url_to_image(**kwargs) + + +# --- Migrate Command --- +@cli.command(name="migrate") +@click.option( + "--config-export", + required=True, + help="Path to the source Odoo connection config.", +) +@click.option( + "--config-import", + required=True, + help="Path to the destination Odoo connection config.", +) +@click.option("--model", required=True, help="The Odoo model to migrate.") +@click.option( + "--domain", default="[]", help="Domain filter to select records for export." +) +@click.option( + "--fields", required=True, help="Comma-separated list of fields to migrate." +) +@click.option( + "--mapping", + default=None, + help="A dictionary string defining the transformation mapping.", +) +@click.option( + "--export-worker", + default=1, + type=int, + help="Number of workers for the export phase.", +) +@click.option( + "--export-batch-size", + default=100, + type=int, + help="Batch size for the export phase.", +) +@click.option( + "--import-worker", + default=1, + type=int, + help="Number of workers for the import phase.", +) +@click.option( + "--import-batch-size", + default=10, + type=int, + help="Batch size for the import phase.", +) +def migrate_cmd(**kwargs): + """Performs a direct server-to-server data migration.""" + if kwargs.get("mapping"): + try: + kwargs["mapping"] = ast.literal_eval(kwargs["mapping"]) + except Exception as e: + print( + "Error: Invalid mapping provided. " + f"Must be a valid Python dictionary string. Error: {e}" + ) + return + run_migration(**kwargs) + + +# No cli.add_command() calls are needed here because the decorators +# handle the registration automatically. + +if __name__ == "__main__": + cli() diff --git a/src/odoo_data_flow/converter.py b/src/odoo_data_flow/converter.py new file mode 100755 index 00000000..73b2b7e9 --- /dev/null +++ b/src/odoo_data_flow/converter.py @@ -0,0 +1,79 @@ +"""CSV Data converter. + +This module contains functions for converting data, such as image paths +or URLs to base64 strings, for use in Odoo imports. +""" + +import base64 +import os + +from .lib import mapper +from .lib.transform import Processor +from .logging_config import log + + +def to_base64(filepath): + """Reads a local file and returns its base64 encoded content.""" + try: + with open(filepath, "rb") as f: + return base64.b64encode(f.read()).decode("utf-8") + except FileNotFoundError: + log.warning(f"File not found at '{filepath}', skipping.") + return "" # Return empty string if file is not found + + +def run_path_to_image(file, fields, out="out.csv", path=None): + """Path to image. + + Takes a CSV file and converts columns containing local file paths + into base64 encoded strings. + """ + log.info("Starting path-to-image conversion...") + + base_path = path or os.getcwd() + + processor = Processor(file) + mapping = processor.get_o2o_mapping() + + for f in fields.split(","): + field_name = f.strip() + if field_name not in mapping: + log.warning(f"Field '{field_name}' not found in source file. Skipping.") + continue + + log.info(f"Setting up conversion for column: '{field_name}'") + mapping[field_name] = mapper.val( + field_name, + postprocess=lambda x: to_base64(os.path.join(base_path, x)) if x else "", + ) + + processor.process(mapping, out, {}, "list") + processor.write_to_file("") + log.info(f"Conversion complete. Output written to '{out}'.") + + +def run_url_to_image(file, fields, out="out.csv"): + """URL to image. + + Takes a CSV file and converts columns containing URLs + into base64 encoded strings by downloading the content. + """ + log.info("Starting url-to-image conversion...") + + processor = Processor(file) + mapping = processor.get_o2o_mapping() + + for f in fields.split(","): + field_name = f.strip() + if field_name not in mapping: + log.warning(f"Field '{field_name}' not found in source file. Skipping.") + continue + + log.info(f"Setting up URL download and conversion for column: '{field_name}'") + # Use the binary_url_map mapper to download + # and encode the content from the URL + mapping[field_name] = mapper.binary_url_map(field_name) + + processor.process(mapping, out, {}, "list") + processor.write_to_file("") + log.info(f"Conversion complete. Output written to '{out}'.") diff --git a/src/odoo_data_flow/export_threaded.py b/src/odoo_data_flow/export_threaded.py new file mode 100755 index 00000000..cd57f27a --- /dev/null +++ b/src/odoo_data_flow/export_threaded.py @@ -0,0 +1,160 @@ +"""Export thread. + +This module contains the low-level, multi-threaded logic for exporting +data from an Odoo instance. +""" + +import csv +import sys +from time import time +from typing import Any, Optional + +from .lib import conf_lib +from .lib.internal.rpc_thread import RpcThread +from .lib.internal.tools import batch +from .logging_config import log + +# --- Fix for csv.field_size_limit OverflowError --- +# In newer Python versions (3.10+), especially on 64-bit systems, +# sys.maxsize is too large for the C long that the csv module's +# field_size_limit function expects. This causes an OverflowError. +# The following code block finds the maximum possible value that works +# by reducing it until it's accepted. +max_int = sys.maxsize +decrement = True +while decrement: + decrement = False + try: + csv.field_size_limit(max_int) + except OverflowError: + max_int = int(max_int / 10) + decrement = True + + +class RPCThreadExport(RpcThread): + """Export Thread handler. + + A specialized RpcThread for handling the export of data batches from Odoo. + It collects results from multiple threads in a thread-safe manner. + """ + + def __init__( + self, + max_connection: int, + model: Any, + header: list[str], + context: Optional[dict] = None, + ): + super().__init__(max_connection) + self.model = model + self.header = header + self.context = context or {} + self.results: dict[int, list[list[Any]]] = {} + + def launch_batch(self, data_ids: list[int], batch_number: int): + """Submits a batch of IDs to be exported by a worker thread.""" + + def launch_batch_fun(ids_to_export: list[int], num: int): + start_time = time() + try: + log.debug( + f"Exporting batch {num} with {len(ids_to_export)} records..." + ) + # The actual RPC call to Odoo + datas = self.model.export_data( + ids_to_export, self.header, context=self.context + ).get("datas", []) + self.results[num] = datas + log.debug( + f"Batch {num} finished in {time() - start_time:.2f}s. " + f"Fetched {len(datas)} records." + ) + except Exception as e: + log.error(f"Export for batch {num} failed: {e}", exc_info=True) + self.results[num] = [] + + self.spawn_thread(launch_batch_fun, [data_ids, batch_number]) + + def get_data(self) -> list[list[Any]]: + """Get data. + + Waits for all threads to complete and returns the collected data + in the correct order. + """ + super().wait() # Wait for all futures to complete + + all_data = [] + # Sort results by batch number to ensure original order is maintained + for batch_number in sorted(self.results.keys()): + all_data.extend(self.results[batch_number]) + return all_data + + +def export_data( + config_file: str, + model: str, + domain: list, + header: list[str], + context: Optional[dict] = None, + output: Optional[str] = None, + max_connection: int = 1, + batch_size: int = 100, + separator: str = ";", + encoding: str = "utf-8", +): + """Export Data. + + The main function for exporting data. It can either write to a file or + return the data in-memory for migrations. + """ + try: + connection = conf_lib.get_connection_from_config(config_file) + model_obj = connection.get_model(model) + except Exception as e: + log.error( + f"Failed to connect to Odoo or get model '{model}'. " + f"Please check your configuration. Error: {e}" + ) + return None, None if not output else (None, None) + + rpc_thread = RPCThreadExport(max_connection, model_obj, header, context) + start_time = time() + + log.info(f"Searching for records in model '{model}' to export...") + ids = model_obj.search(domain, context=context) + total_ids = len(ids) + log.info( + f"Found {total_ids} records to export. Splitting into batches of {batch_size}." + ) + + i = 0 + for id_batch in batch(ids, batch_size): + rpc_thread.launch_batch(list(id_batch), i) + i += 1 + + # This will block until all threads are done, then collect and sort the data + all_exported_data = rpc_thread.get_data() + + log.info( + f"Exported {len(all_exported_data)} records in total. Total time: " + f"{time() - start_time:.2f}s." + ) + + if output: + # Mode 1: Write to a file + log.info(f"Writing exported data to file: {output}") + try: + with open(output, "w", newline="", encoding=encoding) as f: + writer = csv.writer( + f, separator=separator, quoting=csv.QUOTE_ALL + ) + writer.writerow(header) + writer.writerows(all_exported_data) + log.info("File writing complete.") + except OSError as e: + log.error(f"Failed to write to output file {output}: {e}") + return None, None # Return nothing when writing to file + else: + # Mode 2: Return data for in-memory use (e.g., migration) + log.info("Returning exported data in-memory.") + return header, all_exported_data diff --git a/src/odoo_data_flow/exporter.py b/src/odoo_data_flow/exporter.py new file mode 100755 index 00000000..7b2235f6 --- /dev/null +++ b/src/odoo_data_flow/exporter.py @@ -0,0 +1,117 @@ +"""This module contains the core logic for exporting data from Odoo.""" + +import ast + +from . import export_threaded +from .logging_config import log + + +def run_export( + config, + filename, + model, + fields, + domain="[]", + worker=1, + batch_size=10, + separator=";", + context="{'tracking_disable' : True}", + encoding="utf-8", +): + """Export runner. + + Orchestrates the data export process, writing the output to a CSV file. + This function is designed to be called from the main CLI. + """ + log.info("Starting data export process...") + + # Safely evaluate the domain and context strings + try: + parsed_domain = ast.literal_eval(domain) + if not isinstance(parsed_domain, list): + raise TypeError("Domain must be a list of tuples.") + except Exception as e: + log.error(f"Invalid domain provided. Must be a valid Python list string. {e}") + return + + try: + parsed_context = ast.literal_eval(context) + if not isinstance(parsed_context, dict): + raise TypeError("Context must be a dictionary.") + except Exception as e: + log.error( + f"Invalid context provided. Must be a valid Python dictionary string. {e}" + ) + return + + # Process the fields string into a list + header = fields.split(",") + + log.info(f"Exporting from model: {model}") + log.info(f"Output file: {filename}") + log.info(f"Workers: {worker}, Batch Size: {batch_size}") + + # Call the core export function with an output filename + export_threaded.export_data( + config, + model, + parsed_domain, + header, + context=parsed_context, + output=filename, + max_connection=int(worker), + batch_size=int(batch_size), + separator=separator, + encoding=encoding, + ) + + log.info("Export process finished.") + + +def run_export_for_migration( + config, + model, + fields, + domain="[]", + worker=1, + batch_size=10, + context="{'tracking_disable' : True}", + encoding="utf-8", +): + """Migration exporter. + + Orchestrates the data export process, returning the data in memory. + This function is designed to be called by the migration tool. + """ + log.info(f"Starting in-memory export from model '{model}' for migration...") + + try: + parsed_domain = ast.literal_eval(domain) + except Exception: + log.warning( + "Invalid domain string for migration export," + "defaulting to empty domain '[]'." + ) + parsed_domain = [] + + try: + parsed_context = ast.literal_eval(context) + except Exception: + parsed_context = {} + + header_list = fields.split(",") if fields else [] + + header, data = export_threaded.export_data( + config, + model, + parsed_domain, + header_list, + context=parsed_context, + output=None, # This signals the function to return data + max_connection=int(worker), + batch_size=int(batch_size), + encoding=encoding, + ) + + log.info(f"In-memory export complete. Fetched {len(data)} records.") + return header, data diff --git a/src/odoo_data_flow/import_threaded.py b/src/odoo_data_flow/import_threaded.py new file mode 100755 index 00000000..f25e4e45 --- /dev/null +++ b/src/odoo_data_flow/import_threaded.py @@ -0,0 +1,307 @@ +"""Import thread. + +This module contains the low-level, multi-threaded logic for importing +data into an Odoo instance. +""" + +import csv +import sys +from collections.abc import Generator +from time import time +from typing import Any, Optional + +from .lib import conf_lib +from .lib.internal.rpc_thread import RpcThread +from .lib.internal.tools import batch +from .logging_config import log + +# --- Fix for csv.field_size_limit OverflowError --- +# In newer Python versions (3.10+), especially on 64-bit systems, +# sys.maxsize is too large for the C long that the csv module's +# field_size_limit function expects. This causes an OverflowError. +# The following code block finds the maximum possible value that works +# by reducing it until it's accepted. +max_int = sys.maxsize +decrement = True +while decrement: + decrement = False + try: + csv.field_size_limit(max_int) + except OverflowError: + max_int = int(max_int / 10) + decrement = True + + +class RPCThreadImport(RpcThread): + """RPC Import Thread. + + A specialized RpcThread for handling the import of data batches into Odoo. + It writes failed records to a file. + """ + + def __init__( + self, + max_connection: int, + model: Any, + header: list[str], + writer: csv.writer, + context: Optional[dict] = None, + ): + super().__init__(max_connection) + self.model = model + self.header = header + self.writer = writer + self.context = context or {} + + def launch_batch( + self, + data_lines: list[list[Any]], + batch_number: Any, + check: bool = False, + ): + """Submits a batch of data lines to be imported by a worker thread.""" + + def launch_batch_fun(lines: list[list[Any]], num: Any, do_check: bool): + start_time = time() + success = False + try: + log.debug(f"Importing batch {num} with {len(lines)} records...") + res = self.model.load(self.header, lines, context=self.context) + + if res.get("messages"): + for msg in res["messages"]: + record_index = msg.get("record", -1) + failed_line = ( + lines[record_index] + if record_index < len(lines) + else "N/A" + ) + log.error( + f"Odoo message for batch {num}: " + f"{msg.get('message', 'Unknown error')}. " + f"Record data: {failed_line}" + ) + # Mark as failed if there are any error messages + success = False + elif do_check and len(res.get("ids", [])) != len(lines): + log.error( + f"Record count mismatch for batch {num}. " + f"Expected {len(lines)}, " + f"got {len(res.get('ids', []))}. " + f"Probably a duplicate XML ID." + ) + success = False + else: + success = True + + except Exception as e: + log.error( + f"RPC call for batch {num} failed: {e}", exc_info=True + ) + success = False + + if not success and self.writer: + self.writer.writerows(lines) + + log.info( + f"Time for batch {num}: {time() - start_time:.2f}s. Success: {success}" + ) + + self.spawn_thread( + launch_batch_fun, [data_lines, batch_number], {"do_check": check} + ) + + +def _filter_ignored_columns( + ignore: list[str], header: list[str], data: list[list[Any]] +) -> tuple[list[str], list[list[Any]]]: + """Removes ignored columns from header and data.""" + if not ignore: + return header, data + + indices_to_keep = [i for i, h in enumerate(header) if h not in ignore] + new_header = [header[i] for i in indices_to_keep] + new_data = [[row[i] for i in indices_to_keep] for row in data] + + return new_header, new_data + + +def _read_data_file( + file_path: str, separator: str, encoding: str, skip: int +) -> tuple[list[str], list[list[Any]]]: + """Reads a CSV file and returns its header and data.""" + log.info(f"Reading data from file: {file_path}") + try: + with open(file_path, encoding=encoding, newline="") as f: + reader = csv.reader(f, separator=separator) + header = next(reader) + + if "id" not in header: + raise ValueError( + "Source file must contain an 'id' column for external IDs." + ) + + if skip > 0: + log.info(f"Skipping first {skip} lines...") + for _ in range(skip): + next(reader) + + return header, [row for row in reader] + except FileNotFoundError: + log.error(f"Source file not found: {file_path}") + return [], [] + except Exception as e: + log.error(f"Failed to read file {file_path}: {e}") + return [], [] + + +def _create_batches( + data: list[list[Any]], + split_by_col: str, + header: list[str], + batch_size: int, + o2m: bool, +) -> Generator[tuple[Any, list], None, None]: + """A generator that yields batches of data. + + If split_by_col is provided, it + groups records with the same value in that column into the same batch. + """ + if not split_by_col: + # Simple batching without grouping + for i, data_batch in enumerate(batch(data, batch_size)): + yield i, list(data_batch) + return + + try: + split_index = header.index(split_by_col) + id_index = header.index("id") + except ValueError as e: + log.error( + f"Grouping column '{e}' not found in header. Cannot use --groupby." + ) + return + + # Sort data by the grouping column + # to ensure all related records are contiguous + data.sort(key=lambda row: row[split_index]) + + current_batch = [] + current_split_value = None + batch_num = 0 + + for row in data: + # For o2m, keep adding lines to the batch if the ID is empty + is_o2m_line = o2m and not row[id_index] + + row_split_value = row[split_index] + + # Start a new batch if we are not in an o2m block and either the + # split value changes or the batch size is reached. + if ( + current_batch + and not is_o2m_line + and ( + row_split_value != current_split_value + or len(current_batch) >= batch_size + ) + ): + yield f"{batch_num}-{current_split_value}", current_batch + current_batch = [] + batch_num += 1 + + current_batch.append(row) + current_split_value = row_split_value + + if current_batch: + yield f"{batch_num}-{current_split_value}", current_batch + + +def import_data( + config_file: str, + model: str, + header: Optional[list[str]] = None, + data: Optional[list[list[Any]]] = None, + file_csv: Optional[str] = None, + context: Optional[dict] = None, + fail_file: str = False, + encoding: str = "utf-8", + separator: str = ";", + ignore: Optional[list[str]] = None, + split: str = False, + check: bool = True, + max_connection: int = 1, + batch_size: int = 10, + skip: int = 0, + o2m: bool = False, +): + """Main function to orchestrate the import process. + + Can be run from a file or from in-memory data. + """ + ignore = ignore or [] + context = context or {} + + if file_csv: + header, data = _read_data_file(file_csv, separator, encoding, skip) + if not data: + return # Stop if file reading failed + if not fail_file: # Only set default if not provided + fail_file = file_csv + ".fail" + + if header is None or data is None: + raise ValueError( + "Please provide either a data file or both 'header' and 'data'." + ) + + # Filter out ignored columns from both header and data + header, data = _filter_ignored_columns(ignore, header, data) + + try: + connection = conf_lib.get_connection_from_config(config_file) + model_obj = connection.get_model(model) + except Exception as e: + log.error(f"Failed to connect to Odoo: {e}") + return + + # Set up the writer for the fail file + fail_file_writer = None + fail_file_handle = None + if fail_file: + try: + fail_file_handle = open( + fail_file, "w", newline="", encoding=encoding + ) + fail_file_writer = csv.writer( + fail_file_handle, separator=separator, quoting=csv.QUOTE_ALL + ) + fail_file_writer.writerow( + header + ) # Write header to fail file immediately + except OSError as e: + log.error( + f"Could not open fail file for writing: {fail_file}. Error: {e}" + ) + return # Cannot proceed without a fail file + + rpc_thread = RPCThreadImport( + max_connection, model_obj, header, fail_file_writer, context + ) + start_time = time() + + # Create batches and launch them in threads + for batch_number, lines_batch in _create_batches( + data, split, header, batch_size, o2m + ): + rpc_thread.launch_batch(lines_batch, batch_number, check) + + # Wait for all threads to complete + rpc_thread.wait() + + if fail_file_handle: + fail_file_handle.close() + + log.info( + f"{len(data)} records processed for model '{model}'. " + f"Total time: {time() - start_time:.2f}s." + ) diff --git a/src/odoo_data_flow/importer.py b/src/odoo_data_flow/importer.py new file mode 100755 index 00000000..f1714eba --- /dev/null +++ b/src/odoo_data_flow/importer.py @@ -0,0 +1,96 @@ +"""This module contains the core logic for importing data into Odoo.""" + +import ast + +from . import import_threaded +from .logging_config import log + + +def run_import( + config, + filename, + model, + worker=1, + batch_size=10, + skip=0, + fail=False, + separator=";", + split=None, + ignore=None, + check=False, + context="{'tracking_disable' : True}", + o2m=False, + encoding="utf-8", +): + """Orchestrates the data import process from a CSV file.""" + log.info("Starting data import process from file...") + + file_csv = filename + fail_file = file_csv + ".fail" + + try: + parsed_context = ast.literal_eval(context) + if not isinstance(parsed_context, dict): + raise TypeError("Context must be a dictionary.") + except Exception as e: + log.error( + f"Invalid context provided. Must be a valid Python dictionary string. {e}" + ) + return + + ignore_list = ignore.split(",") if ignore else [] + + if fail: + log.info("Running in --fail mode. Retrying failed records...") + file_csv = fail_file + fail_file = fail_file + ".bis" + batch_size_run = 1 + max_connection_run = 1 + else: + batch_size_run = int(batch_size) + max_connection_run = int(worker) + + log.info(f"Importing file: {file_csv}") + log.info(f"Target model: {model}") + log.info(f"Workers: {max_connection_run}, Batch Size: {batch_size_run}") + + import_threaded.import_data( + config, + model, + file_csv=file_csv, + context=parsed_context, + fail_file=fail_file, + encoding=encoding, + separator=separator, + ignore=ignore_list, + split=split, + check=check, + max_connection=max_connection_run, + batch_size=batch_size_run, + skip=int(skip), + o2m=o2m, + ) + + log.info("Import process finished.") + + +def run_import_for_migration(config, model, header, data, worker=1, batch_size=10): + """Orchestrates the data import process from in-memory data.""" + log.info("Starting data import from in-memory data...") + + parsed_context = {"tracking_disable": True} + + log.info(f"Importing {len(data)} records into model: {model}") + log.info(f"Workers: {worker}, Batch Size: {batch_size}") + + import_threaded.import_data( + config, + model, + header=header, + data=data, + context=parsed_context, + max_connection=int(worker), + batch_size=int(batch_size), + ) + + log.info("In-memory import process finished.") diff --git a/src/odoo_data_flow/lib/__init__.py b/src/odoo_data_flow/lib/__init__.py new file mode 100644 index 00000000..9009a51b --- /dev/null +++ b/src/odoo_data_flow/lib/__init__.py @@ -0,0 +1,19 @@ +"initialize Library." + +from . import ( + checker, + conf_lib, + internal, + mapper, + transform, + workflow, +) + +__all__ = [ + "checker", + "conf_lib", + "internal", + "mapper", + "transform", + "workflow", +] diff --git a/src/odoo_data_flow/lib/checker.py b/src/odoo_data_flow/lib/checker.py new file mode 100644 index 00000000..512662bb --- /dev/null +++ b/src/odoo_data_flow/lib/checker.py @@ -0,0 +1,116 @@ +"""This module provides a library of "checker" functions. + +Each function is a factory that returns a new function designed to be passed +to the Processor's `.check()` method to perform data quality validations +before the transformation process begins. +""" + +import re +from typing import Callable, Optional + +from ..logging_config import log + +# Type aliases for clarity +Header = list[str] +Data = list[list[str]] +CheckFunc = Callable[[Header, Data], bool] + + +def id_validity_checker( + id_field: str, pattern: str, null_values: Optional[list[str]] = None +) -> CheckFunc: + """ID Validity checker. + + Returns a checker that validates a specific column + against a regex pattern. + """ + if null_values is None: + null_values = ["NULL"] + + def check_id_validity(header: Header, data: Data) -> bool: + try: + regex = re.compile(pattern) + except re.error as e: + log.error(f"Invalid regex pattern provided to id_validity_checker: {e}") + return False + + is_valid = True + for i, line in enumerate(data, start=1): + line_dict = dict(zip(header, line)) + id_value = line_dict.get(id_field, "") + + # Skip check if the value is considered null + if id_value in null_values or not id_value: + continue + + if not regex.match(id_value): + log.warning( + f"Check Failed (ID Validity) on line {i}: Value " + f"'{id_value}' in column '{id_field}' " + f"does not match pattern '{pattern}'." + ) + is_valid = False + return is_valid + + return check_id_validity + + +def line_length_checker(expected_length: int) -> CheckFunc: + """Line Length Checker. + + Returns a checker that verifies each row has an exact number of columns. + """ + + def check_line_length(header: Header, data: Data) -> bool: + is_valid = True + for i, line in enumerate(data, start=2): # Start from 2 to account for header + if len(line) != expected_length: + log.warning( + f"Check Failed (Line Length) on line {i}: " + f"Expected {expected_length} columns, but found " + f"{len(line)}." + ) + is_valid = False + return is_valid + + return check_line_length + + +def line_number_checker(expected_line_count: int) -> CheckFunc: + """Returns a checker that verifies the total number of data rows.""" + + def check_line_number(header: Header, data: Data) -> bool: + actual_line_count = len(data) + if actual_line_count != expected_line_count: + log.warning( + f"Check Failed (Line Count): Expected {expected_line_count} " + f"data rows, but found {actual_line_count}." + ) + return False + return True + + return check_line_number + + +def cell_len_checker(max_cell_len: int) -> CheckFunc: + """Cell Length Checker. + + Returns a checker that verifies no cell exceeds a maximum character length. + """ + + def check_max_cell_len(header: Header, data: Data) -> bool: + is_valid = True + for i, line in enumerate(data, start=2): + # Start from 2 to account for header + for j, cell in enumerate(line): + if len(cell) > max_cell_len: + column_name = header[j] if j < len(header) else f"column {j + 1}" + log.warning( + f"Check Failed (Cell Length) on line {i}, column " + f"'{column_name}': Cell length is {len(cell)}, " + f"which exceeds the max of {max_cell_len}." + ) + is_valid = False + return is_valid + + return check_max_cell_len diff --git a/src/odoo_data_flow/lib/conf_lib.py b/src/odoo_data_flow/lib/conf_lib.py new file mode 100644 index 00000000..877e6581 --- /dev/null +++ b/src/odoo_data_flow/lib/conf_lib.py @@ -0,0 +1,60 @@ +"""Config File Handler. + +This module handles reading the connection configuration file and +establishing a connection to the Odoo server using odoo-client-lib. +""" + +import configparser +from typing import Any + +import odoolib + +from ..logging_config import log + + +def get_connection_from_config(config_file: str) -> Any: + """Get connection from config. + + Reads an Odoo connection configuration file and returns an + initialized OdooClient object. + + Args: + config_file (str): The path to the connection.conf file. + + Returns: + Any: An initialized and connected Odoo client object, + returned by odoolib.get_connection) + or raises an exception on failure. + """ + config = configparser.ConfigParser() + if not config.read(config_file): + log.error(f"Configuration file not found or is empty: {config_file}") + raise FileNotFoundError(f"Configuration file not found: {config_file}") + + try: + conn_details = dict(config["Connection"]) + + # Ensure port and uid are integers + if "port" in conn_details: + conn_details["port"] = int(conn_details["port"]) + if "uid" in conn_details: + # The OdooClient expects the user ID as 'user_id' + conn_details["user_id"] = int(conn_details.pop("uid")) + + log.info(f"Connecting to Odoo server at {conn_details.get('hostname')}...") + + # Use odoo-client-lib to establish the connection + connection = odoolib.get_connection(**conn_details) + + log.info("Connection successful.") + return connection + + except (KeyError, ValueError) as e: + log.error( + f"Configuration file '{config_file}' is missing a required key " + f"or has a malformed value: {e}" + ) + raise + except Exception as e: + log.error(f"An unexpected error occurred while connecting to Odoo: {e}") + raise diff --git a/src/odoo_data_flow/lib/internal/__init__.py b/src/odoo_data_flow/lib/internal/__init__.py new file mode 100644 index 00000000..730b15e4 --- /dev/null +++ b/src/odoo_data_flow/lib/internal/__init__.py @@ -0,0 +1,17 @@ +"""Internal helper tools for odoo-data-flow. + +This __init__.py file makes the internal modules available under the +'internal' namespace and defines the public API of this sub-package. +""" + +from . import exceptions, io, rpc_thread, tools + +# By defining __all__, we explicitly state which names are part of the +# public API of this package. This also signals to linters like ruff +# that the imports above are intentional, which resolves the F401 error. +__all__ = [ + "exceptions", + "io", + "rpc_thread", + "tools", +] diff --git a/src/odoo_data_flow/lib/internal/exceptions.py b/src/odoo_data_flow/lib/internal/exceptions.py new file mode 100644 index 00000000..d42bb0c8 --- /dev/null +++ b/src/odoo_data_flow/lib/internal/exceptions.py @@ -0,0 +1,26 @@ +"""Excpention handler. + +This module defines custom exceptions used throughout the library. +""" + +from typing import Any + + +class SkippingError(Exception): + """An exception raised to signal that the current row should be skipped. + + This is used within mappers to control the data processing flow and + intentionally filter out certain records without causing the entire + process to fail. + """ + + def __init__(self, message: str, *args: Any): # noqa: d301 + """Initializes the exception with a descriptive message. + + *args: + message: The reason why the row is being skipped. + """ + self.message = message + # Call the parent Exception's __init__ to ensure it behaves + # like a standard Python exception. + super().__init__(message, *args) diff --git a/src/odoo_data_flow/lib/internal/io.py b/src/odoo_data_flow/lib/internal/io.py new file mode 100644 index 00000000..2eb3bc64 --- /dev/null +++ b/src/odoo_data_flow/lib/internal/io.py @@ -0,0 +1,114 @@ +"""IO helpers. + +This module contains low-level helper functions for file I/O, +including writing CSV data and generating shell scripts. +""" + +import csv +import os +import shlex +from typing import Any, Optional + +from ...logging_config import log + + +def write_csv( + filename: str, header: list[str], data: list[list[Any]], encoding="utf-8" +): + """Writes data to a CSV file with a semicolon separator. + + Args: + filename: The path to the output CSV file. + header: A list of strings for the header row. + data: A list of lists representing the data rows. + encoding: The file encoding to use. + """ + try: + with open(filename, "w", newline="", encoding=encoding) as f: + writer = csv.writer(f, separator=";", quoting=csv.QUOTE_ALL) + writer.writerow(header) + writer.writerows(data) + except OSError as e: + log.error(f"Failed to write to file {filename}: {e}") + + +def write_file( + filename: Optional[str] = None, + header: Optional[list[str]] = None, + data: Optional[list[list[Any]]] = None, + fail: bool = False, + model: str = "auto", + launchfile: str = "import_auto.sh", + worker: int = 1, + batch_size: int = 10, + init: bool = False, + encoding: str = "utf-8", + groupby: str = "", + sep: str = ";", + context: Optional[dict] = None, + ignore: str = "", + **kwargs, # to catch other unused params +): + """Filewriter. + + Writes data to a CSV file and generates a corresponding shell script + to import that file using the odoo-data-flow CLI. + """ + # Step 1: Write the actual data file + if filename and header is not None and data is not None: + write_csv(filename, header, data, encoding=encoding) + + # Step 2: If no launchfile is specified, we are done. + if not launchfile: + return + + # Step 3: Determine the target model name + if model == "auto" and filename: + model_name = os.path.basename(filename).replace(".csv", "") + else: + model_name = model + + # Step 4: Build the base command with its arguments + # We use shlex.quote to ensure all arguments + # are safely escaped for the shell. + command_parts = [ + "odoo-data-flow", + "import", + "--config", + shlex.quote(kwargs.get("conf_file", "conf/connection.conf")), + "--file", + shlex.quote(filename), + "--model", + shlex.quote(model_name), + "--encoding", + shlex.quote(encoding), + "--worker", + str(worker), + "--size", + str(batch_size), + "--sep", + shlex.quote(sep), + ] + + # Add optional arguments if they have a value + if groupby: + command_parts.extend(["--groupby", shlex.quote(groupby)]) + if ignore: + command_parts.extend(["--ignore", shlex.quote(ignore)]) + if context: + command_parts.extend(["--context", shlex.quote(str(context))]) + + # Step 5: Write the command(s) to the shell script + mode = "w" if init else "a" + try: + with open(launchfile, mode) as f: + # Write the main import command + f.write(" ".join(command_parts) + "\n") + + # If fail mode is enabled, + # write the second command with the --fail flag + if fail: + fail_command_parts = [*command_parts, "--fail"] + f.write(" ".join(fail_command_parts) + "\n") + except OSError as e: + log.error(f"Failed to write to launch file {launchfile}: {e}") diff --git a/src/odoo_data_flow/lib/internal/rpc_thread.py b/src/odoo_data_flow/lib/internal/rpc_thread.py new file mode 100644 index 00000000..90b7f5a4 --- /dev/null +++ b/src/odoo_data_flow/lib/internal/rpc_thread.py @@ -0,0 +1,78 @@ +"""RPC Threads. + +This module provides a robust, thread-safe mechanism for executing +RPC calls to Odoo in parallel. +""" + +import concurrent.futures +from typing import Any, Callable, Optional + +from ...logging_config import log + + +class RpcThread: + """A wrapper around ThreadPoolExecutor to manage parallel RPC calls to Odoo. + + This class simplifies running multiple functions concurrently while limiting + the number of simultaneous connections to the server. + """ + + def __init__(self, max_connection: int): # noqa: d301 + """Initializes the thread pool. + + Args: + max_connection: The maximum number of threads to run in parallel. + """ + if not isinstance(max_connection, int) or max_connection < 1: + raise ValueError("max_connection must be a positive integer.") + + self.executor = concurrent.futures.ThreadPoolExecutor( + max_workers=max_connection + ) + self.futures: list[concurrent.futures.Future] = [] + + def spawn_thread( + self, + fun: Callable, + args: list[Any], + kwargs: Optional[dict[str, Any]] = None, + ): + """Submits a function to be executed by a worker thread in the pool. + + Args: + fun: The function to execute. + args: A list of positional arguments to pass to the function. + kwargs: A dictionary of keyword arguments to pass to the function. + """ + if kwargs is None: + kwargs = {} + + future = self.executor.submit(fun, *args, **kwargs) + self.futures.append(future) + + def wait(self): + """Waits for all submitted tasks to complete. + + This method will block until every task has finished. If any task + raised an exception during its execution, that exception will be logged. + """ + log.info(f"Waiting for {len(self.futures)} tasks to complete...") + + # Use as_completed to process results as they finish, + # which is memory efficient. + for future in concurrent.futures.as_completed(self.futures): + try: + # Calling .result() will re-raise any exception that occurred + # in the worker thread. We catch it to log it. + future.result() + except Exception as e: + # Log the exception from the failed thread. + log.error(f"A task in a worker thread failed: {e}", exc_info=True) + + # Shutdown the executor gracefully. + self.executor.shutdown(wait=True) + log.info("All tasks have completed.") + + def thread_number(self) -> int: + """Returns the total number of tasks submitted to the pool.""" + return len(self.futures) diff --git a/src/odoo_data_flow/lib/internal/tools.py b/src/odoo_data_flow/lib/internal/tools.py new file mode 100644 index 00000000..64129f3d --- /dev/null +++ b/src/odoo_data_flow/lib/internal/tools.py @@ -0,0 +1,166 @@ +"""Internal dooo-flow Tools. + +This module provides low-level utility functions for data formatting +and iteration, +primarily used by the mapper and processor modules. +""" + +from collections.abc import Iterable +from itertools import islice +from typing import Any + + +def batch(iterable: Iterable[Any], size: int) -> Iterable[list[Any]]: + """Splits an iterable into batches of a specified size. + + Args: + iterable: The iterable to process. + size: The desired size of each batch. + + Yields: + A list containing the next batch of items. + """ + source_iterator = iter(iterable) + while True: + batch_iterator = islice(source_iterator, size) + # Get the first item to check if the iterator is exhausted + try: + first_item = next(batch_iterator) + except StopIteration: + return + + # Chain the first item back with the rest of the batch iterator + # and yield the complete batch as a list. + yield [first_item, *list(batch_iterator)] + + +# --- Data Formatting Tools --- + + +def to_xmlid(name: str) -> str: + """Create valid xmlid. + + Sanitizes a string to make it a valid XML ID, replacing special + characters with underscores. + """ + if not isinstance(name, str): + name = str(name) + + # A mapping of characters to replace. + replacements = {".": "_", ",": "_", "\n": "_", "|": "_", " ": "_"} + for old, new in replacements.items(): + name = name.replace(old, new) + return name.strip() + + +def to_m2o(prefix: str, value: Any, default: str = "") -> str: + """Creates a full external ID for a Many2one relationship. + + Creates a full external ID for a Many2one relationship by combining + a prefix and a sanitized value. + + Args: + prefix: The XML ID prefix (e.g., 'my_module'). + value: The value to be sanitized and appended to the prefix. + default: The value to return if the input value is empty. + + Return: + The formatted external ID (e.g., 'my_module.sanitized_value'). + """ + if not value: + return default + + # Ensure the prefix ends with a dot, + # but don't add one if it's already there. + if not prefix.endswith("."): + prefix += "." + + return f"{prefix}{to_xmlid(value)}" + + +def to_m2m(prefix: str, value: str) -> str: + """Creates a comma-separated list of external IDs . + + Creates a comma-separated list of external IDs for a Many2many relationship. + It takes a string of comma-separated values, sanitizes each one, and + prepends the prefix. + + Args: + prefix: The XML ID prefix to apply to each value. + value: A single string containing one or more values, + separated by commas. + + Return: + A comma-separated string of formatted external IDs. + """ + if not value: + return "" + + ids = [to_m2o(prefix, val.strip()) for val in value.split(",") if val.strip()] + return ",".join(ids) + + +class AttributeLineDict: + """Aggregates attribute line data for product templates.""" + + def __init__(self, attribute_list_ids, id_gen_fun): + self.data = {} + self.att_list = attribute_list_ids + self.id_gen = id_gen_fun + + def add_line(self, line, header): + """Processes a single line of attribute data and aggregates it + by product template ID. + + `line` is expected to contain: + - 'product_tmpl_id/id': The template's external ID. + - 'attribute_id/id': A dict mapping attribute name to its ID. + - 'value_ids/id': A dict mapping attribute name to the value's ID. + """ + line_dict = dict(zip(header, line)) + template_id = line_dict.get("product_tmpl_id/id") + if not template_id: + return + + if self.data.get(template_id): + # Template already exists, add new attribute values + template_info = self.data[template_id] + for att_id, att_name in self.att_list: + # Check if the current line contains this attribute + if line_dict.get("attribute_id/id", {}).get(att_name): + value = line_dict["value_ids/id"][att_name] + # Ensure value is unique before adding + if value not in template_info.setdefault(att_id, []): + template_info[att_id].append(value) + else: + # This is a new template + d = {} + for att_id, att_name in self.att_list: + if line_dict.get("attribute_id/id", {}).get(att_name): + d[att_id] = [line_dict["value_ids/id"][att_name]] + self.data[template_id] = d + + def generate_line(self): + """Generates the final list of attribute lines for the CSV file, + one line per attribute per product template. + """ + lines_header = [ + "id", + "product_tmpl_id/id", + "attribute_id/id", + "value_ids/id", + ] + lines_out = [] + for template_id, attributes in self.data.items(): + if not template_id: + continue + # Create a unique line for each attribute associated with the template + for attribute_id, values in attributes.items(): + line = [ + self.id_gen(template_id, attributes), + template_id, + attribute_id, + ",".join(values), # Odoo m2m/o2m often use comma-separated IDs + ] + lines_out.append(line) + return lines_header, lines_out diff --git a/src/odoo_data_flow/lib/mapper.py b/src/odoo_data_flow/lib/mapper.py new file mode 100644 index 00000000..0f2a969c --- /dev/null +++ b/src/odoo_data_flow/lib/mapper.py @@ -0,0 +1,531 @@ +"""This module contains a library of mapper functions. + +Mappers are the core building blocks for data transformations. Each function +in this module is a "mapper factory" - it is a function that you call to +configure and return another function, which will then be executed by the +Processor for each row of the source data. +""" + +import base64 +import inspect +import os +from typing import Any, Callable + +import requests + +from ..logging_config import log +from .internal.exceptions import SkippingError +from .internal.tools import to_m2m, to_m2o + +# Type alias for clarity +LineDict = dict[str, Any] +StateDict = dict[str, Any] +MapperFunc = Callable[[LineDict, StateDict], Any] + +# --- Helper Functions --- + + +def _get_field_value(line: LineDict, field: str, default: Any = "") -> Any: + """Safely retrieves a value from the current data row.""" + return line.get(field, default) or default + + +def _str_to_mapper(field: Any) -> MapperFunc: + """Converts a string field name into a basic val mapper.""" + if isinstance(field, str): + return val(field) + return field + + +def _list_to_mappers(args: tuple) -> list[MapperFunc]: + """Converts a list of strings or mappers into a list of mappers.""" + return [_str_to_mapper(f) for f in args] + + +# --- Basic Mappers --- + + +def const(value: Any) -> MapperFunc: + """Returns a mapper that always provides a constant value.""" + + def const_fun(line: LineDict, state: StateDict) -> Any: + return value + + return const_fun + + +def val( + field: str, + default: Any = "", + postprocess: Callable = lambda x, s: x, + skip: bool = False, +) -> MapperFunc: + """Returns a mapper that gets a value from a specific field in the row.""" + + def val_fun(line: LineDict, state: StateDict) -> Any: + value = _get_field_value(line, field) + if not value and skip: + raise SkippingError(f"Missing required value for field '{field}'") + + final_value = value or default + try: + # Check how many arguments the postprocess function expects for + # backward compatibility with lambdas that only take one argument. + sig = inspect.signature(postprocess) + if len(sig.parameters) == 1: + # Old style: pass only the value + return postprocess(final_value) + else: + # New style: pass value and state + return postprocess(final_value, state) + except (ValueError, TypeError): + # Fallback for built-ins or other callables where signature + # inspection fails. Assume new style. + return postprocess(final_value, state) + + return val_fun + + +# --- Combining Mappers --- + + +def concat(separator: str, *fields: Any, skip: bool = False) -> MapperFunc: + """Concatenate mapper. + + Returns a mapper that joins values from multiple fields or static strings. + If `skip` is True, it will raise a SkippingError if the result is empty. + """ + mappers = _list_to_mappers(fields) + + def concat_fun(line: LineDict, state: StateDict) -> str: + values = [str(m(line, state)) for m in mappers] + # Filter out empty strings before joining + result = separator.join([v for v in values if v]) + if not result and skip: + raise SkippingError( + f"Concatenated value for fields {fields} is empty." + ) + return result + + return concat_fun + + +def concat_mapper_all(separator: str, *fields: Any) -> MapperFunc: + """Same as concat mapper, but if one value in the list of values to concat + is empty, the entire returned value is an empty string. + """ + mappers = _list_to_mappers(fields) + + def concat_all_fun(line: LineDict, state: StateDict) -> str: + values = [str(m(line, state)) for m in mappers] + if not all(values): + return "" + return separator.join(values) + + return concat_all_fun + + +# --- Conditional Mappers --- + + +def cond(field: str, true_mapper: Any, false_mapper: Any) -> MapperFunc: + """Conditional mapper. + + Returns a mapper that applies one of two mappers based on the + truthiness of a value in a given field. + """ + true_m = _str_to_mapper(true_mapper) + false_m = _str_to_mapper(false_mapper) + + def cond_fun(line: LineDict, state: StateDict) -> Any: + if _get_field_value(line, field): + return true_m(line, state) + else: + return false_m(line, state) + + return cond_fun + + +def bool_val(field: str, true_values: list[str]) -> MapperFunc: + """Boolean Value mapper. + + Returns a mapper that checks if a field's value is in a list of true values. + """ + + def bool_val_fun(line: LineDict, state: StateDict) -> str: + return "1" if _get_field_value(line, field) in true_values else "0" + + return bool_val_fun + + +# --- Numeric Mappers --- + + +def num(field: str, default: str = "0.0") -> MapperFunc: + """Number mapper. + + Returns a mapper that converts a numeric string to a standard format, + replacing commas with dots. + """ + + def num_fun(line: LineDict, state: StateDict) -> str: + value = _get_field_value(line, field, default) + return value.replace(",", ".") + + return num_fun + + +# --- Relational Mappers --- + + +def field(col: str) -> MapperFunc: + """Returns the column name if the column has a value, otherwise an empty string. + Useful for product attribute mappings where the attribute name itself is needed. + """ + + def field_fun(line: LineDict, state: StateDict) -> str: + return col if _get_field_value(line, col) else "" + + return field_fun + + +def m2o( + prefix: str, field: str, default: str = "", skip: bool = False +) -> MapperFunc: + """M2O mapper. + + Takes a field name and creates a Many2one external ID from its value. + """ + + def m2o_fun(line: LineDict, state: StateDict) -> str: + value = _get_field_value(line, field) + if skip and not value: + raise SkippingError(f"Missing Value for {field}") + return to_m2o(prefix, value, default=default) + + return m2o_fun + + +def m2o_map( + prefix: str, *fields: Any, default: str = "", skip: bool = False +) -> MapperFunc: + """M20 Mapper. + + Returns a mapper for creating a Many2one external ID by concatenating + a prefix and values from one or more fields. + """ + concat_mapper = concat("_", *fields) + + def m2o_fun(line: LineDict, state: StateDict) -> str: + value = concat_mapper(line, state) + if not value and skip: + raise SkippingError( + f"Missing value for m2o_map with prefix '{prefix}'" + ) + return to_m2o(prefix, value, default=default) + + return m2o_fun + + +def m2m(prefix: str, *fields: Any, sep: str = ",") -> MapperFunc: + """M2M Mapper. + + Returns a mapper for creating a comma-separated list of Many2many + external IDs. + It can take multiple fields or a single field to be split. + """ + + def m2m_fun(line: LineDict, state: StateDict) -> str: + all_values = [] + if len(fields) > 1: # Mode 1: Multiple columns + for field in fields: + value = _get_field_value(line, field) + if value: + all_values.append(to_m2o(prefix, value)) + elif len(fields) == 1: # Mode 2: Single column with separator + field = fields[0] + value = _get_field_value(line, field) + if value: + all_values.extend( + to_m2o(prefix, v.strip()) for v in value.split(sep) + ) + + return ",".join(all_values) + + return m2m_fun + + +def m2m_map(prefix: str, mapper_func: MapperFunc) -> MapperFunc: + """M2M_Map Many 2 Many Mapper. + + Returns a mapper that takes the result of another mapper and creates + a Many2many external ID list from it. + """ + + def m2m_map_fun(line: LineDict, state: StateDict) -> str: + # Get the value from the provided mapper function + value = mapper_func(line, state) + # Use the standard to_m2m helper to format it correctly + return to_m2m(prefix, value) + + return m2m_map_fun + + +def m2o_att_name(prefix: str, att_list: list[str]) -> MapperFunc: + """M2O Attribute Name Mapper. + + Returns a mapper that creates a dictionary mapping attribute names to + their corresponding external IDs, but only for attributes that have a + value in the current row. + """ + + def m2o_att_fun(line: LineDict, state: StateDict) -> dict[str, str]: + return { + att: to_m2o(prefix, att) + for att in att_list + if _get_field_value(line, att) + } + + return m2o_att_fun + + +def m2m_id_list(prefix: str, *fields: Any, sep: str = ",") -> MapperFunc: + """M2M ID List Mapper. + + Returns a mapper that creates a comma-separated list of Many2many + external IDs from one or more fields. It concatenates values from the + fields first, then splits them by the separator. + """ + concat_m = concat("", *fields) + + def m2m_id_list_fun(line: LineDict, state: StateDict) -> str: + value = concat_m(line, state) + if not value: + return "" + values = [v.strip() for v in value.split(sep)] + return ",".join(to_m2o(prefix, v) for v in values if v) + + return m2m_id_list_fun + + +def m2m_value_list(*fields: Any, sep: str = ",") -> MapperFunc: + """M2M Value List Mapper. + + Returns a mapper that combines values from multiple fields and returns + them as a Python list of strings, split by the separator. + """ + concat_m = concat("", *fields) + + def m2m_value_list_fun(line: LineDict, state: StateDict) -> list[str]: + value = concat_m(line, state) + if not value: + return [] + return [v.strip() for v in value.split(sep) if v.strip()] + + return m2m_value_list_fun + + +# --- Advanced Mappers --- + + +def map_val( + mapping_dict: dict, key_mapper: Any, default: Any = "", m2m: bool = False +) -> MapperFunc: + """Returns a mapper that translates a value using a provided dictionary.""" + key_m = _str_to_mapper(key_mapper) + + def map_val_fun(line: LineDict, state: StateDict) -> Any: + key = key_m(line, state) + if m2m and isinstance(key, str): + keys = [k.strip() for k in key.split(",")] + return ",".join([str(mapping_dict.get(k, default)) for k in keys]) + return mapping_dict.get(key, default) + + return map_val_fun + + +def record(mapping: dict) -> MapperFunc: + """Returns a mapper that processes a sub-mapping for a related record. + + Used for creating one-to-many records. + """ + + def record_fun(line: LineDict, state: StateDict) -> dict: + # This function returns a dictionary that the Processor will understand + # as a related record to be created. + return { + key: mapper_func(line, state) + for key, mapper_func in mapping.items() + } + + return record_fun + + +# --- Binary Mappers --- + + +def binary(field: str, path_prefix: str = "", skip: bool = False) -> MapperFunc: + """Binary mapper. + + Returns a mapper that reads a local file path from a field, + and converts the file content to a base64 string. + """ + + def binary_fun(line: LineDict, state: StateDict) -> str: + filepath = _get_field_value(line, field) + if not filepath: + return "" + + full_path = os.path.join(path_prefix, filepath) + try: + with open(full_path, "rb") as f: + return base64.b64encode(f.read()).decode("utf-8") + except FileNotFoundError as e: + if skip: + raise SkippingError(f"File not found at '{full_path}'") from e + log.warning(f"File not found at '{full_path}', skipping.") + return "" + + return binary_fun + + +def binary_url_map(field: str, skip: bool = False) -> MapperFunc: + """Binary url mapper. + + Returns a mapper that reads a URL from a field, downloads the content, + and converts it to a base64 string. + """ + + def binary_url_fun(line: LineDict, state: StateDict) -> str: + url = _get_field_value(line, field) + if not url: + return "" + + try: + res = requests.get(url, timeout=10) + res.raise_for_status() # Raises an exception for 4xx/5xx errors + return base64.b64encode(res.content).decode("utf-8") + except requests.exceptions.RequestException as e: + if skip: + raise SkippingError( + f"Cannot fetch file at URL '{url}': {e}" + ) from e + log.warning(f"Cannot fetch file at URL '{url}': {e}") + return "" + + return binary_url_fun + + +# --- Legacy / Specialized Mappers --- + + +def val_att(att_list: list[str]) -> MapperFunc: + """Value Attribute Mapper for version 9. + + Returns a mapper that creates a dictionary containing only the attributes + from `att_list` that exist and have a truthy value in the current row. + """ + + def val_att_fun(line: LineDict, state: StateDict) -> dict[str, Any]: + return { + att: _get_field_value(line, att) + for att in att_list + if _get_field_value(line, att) + } + + return val_att_fun + + +def m2o_att(prefix: str, att_list: list[str]) -> MapperFunc: + """M2O Attribute Mapper for Version 9. + + Returns a mapper that creates a dictionary mapping an attribute name + to its corresponding external ID, where the ID is composed of the + prefix, the attribute name, and the attribute's value. + """ + + def m2o_att_fun(line: LineDict, state: StateDict) -> dict[str, str]: + result = {} + for att in att_list: + value = _get_field_value(line, att) + if value: + # The ID is composed of 'prefix_attribute_value' + id_value = f"{att}_{value}" + result[att] = to_m2o(prefix, id_value) + return result + + return m2o_att_fun + + +def concat_field_value_m2m(separator: str, *fields: str) -> MapperFunc: + """Specialized concat mapper that joins each field name with its value, + then joins all resulting parts with a comma. + Example: ('_', 'Color', 'Size') on a row with Color='Red' and Size='L' + returns "Color_Red,Size_L" + """ + + def concat_fun(line: LineDict, state: StateDict) -> str: + parts = [] + for field in fields: + value = _get_field_value(line, field) + if value: + parts.append(f"{field}{separator}{value}") + return ",".join(parts) + + return concat_fun + + +def m2m_attribute_value(prefix: str, *fields: str) -> MapperFunc: + """Creates a comma-separated list of m2m external IDs where each ID is + composed of the attribute name and its value. This is a composite + mapper for a common product attribute pattern. + + Example: ('ATT', 'Color', 'Size') on a row with Color='Red' + returns "external_id::ATT_Color_Red,external_id::ATT_Size_L" + """ + return m2m_map(prefix, concat_field_value_m2m("_", *fields)) + + +def m2m_template_attribute_value(prefix: str, *fields: Any) -> MapperFunc: + """Legace m2m Template Attribute mapper. + + Legacy mapper for creating complex XML IDs for product attribute values. + """ + concat_m = concat("_", *fields) + + def m2m_attribute_fun(line: LineDict, state: StateDict) -> str: + value = concat_m(line, state) + if not value: + return "" + return to_m2o(prefix, value) + + return m2m_attribute_fun + + +# --- Split Mappers --- + + +def split_line_number(line_nb: int) -> Callable: + """Split line number. + + Returns a function for the Processor's split method that creates a new + chunk every 'line_nb' lines. + """ + + def split(line: LineDict, i: int) -> int: + return i // line_nb + + return split + + +def split_file_number(file_nb: int) -> Callable: + """Split file number. + + Returns a function for the Processor's split method that distributes + records across a fixed number of 'file_nb' chunks. + """ + + def split(line: LineDict, i: int) -> int: + return i % file_nb + + return split diff --git a/src/odoo_data_flow/lib/transform.py b/src/odoo_data_flow/lib/transform.py new file mode 100644 index 00000000..730d4b37 --- /dev/null +++ b/src/odoo_data_flow/lib/transform.py @@ -0,0 +1,485 @@ +"""This module contains the core Processor class for transforming data.""" + +import csv +import os +from collections import OrderedDict +from typing import Callable, Optional + +from lxml import etree + +from ..logging_config import log +from . import mapper +from .internal.exceptions import SkippingError +from .internal.io import write_file +from .internal.tools import AttributeLineDict + + +class MapperRepr: + """Mapper representation. + + A wrapper to provide a useful string representation for mapper functions. + """ + + def __init__(self, repr_string, func): + self._repr_string = repr_string + self.func = func + + def __call__(self, *args, **kwargs): + """Call the wrapped function.""" + return self.func(*args, **kwargs) + + def __repr__(self): + """Return the string representation.""" + return self._repr_string + + +class Processor: + """Core class for reading, transforming, and preparing data for Odoo.""" + + def __init__( + self, + filename=None, + separator=";", + encoding="utf-8", + header=None, + data=None, + preprocess=lambda h, d: (h, d), + **kwargs, + ): + self.file_to_write = OrderedDict() + + # Determine if initializing from a file or in-memory data + if filename: + # The 'xml_...' kwargs are passed to the file reader + self.header, self.data = self._read_file( + filename, separator, encoding, **kwargs + ) + elif header is not None and data is not None: + self.header = header + self.data = data + else: + raise ValueError( + "Processor must be initialized with either a 'filename' or both" + " 'header' and 'data'." + ) + + # Apply any pre-processing hooks + self.header, self.data = preprocess(self.header, self.data) + + def _read_file(self, filename, separator, encoding, **kwargs): + """Reads a CSV or XML file and returns its header and data.""" + xml_root_path = kwargs.get("xml_root_tag") + + if xml_root_path: + log.info(f"Reading XML file: {filename}") + try: + # Use a secure parser to prevent XXE and other vulnerabilities + parser = etree.XMLParser( + resolve_entities=False, + no_network=True, + dtd_validation=False, + load_dtd=False, + ) + tree = etree.parse(filename, parser=parser) + nodes = tree.xpath(xml_root_path) + + if not nodes: + log.warning( + f"No nodes found for root path '{xml_root_path}'" + ) + return [], [] + + # Infer header from the tags of the first node's children + header = [elem.tag for elem in nodes[0]] + data = [] + for node in nodes: + row = [] + for col in header: + # Find the child element and get its text content + child = node.find(col) + row.append(child.text if child is not None else "") + data.append(row) + return header, data + + except etree.XMLSyntaxError as e: + log.error(f"Failed to parse XML file {filename}: {e}") + return [], [] + except Exception as e: + log.error( + f"An unexpected error occurred while reading XML file {filename}: {e}" + ) + return [], [] + else: + log.info(f"Reading CSV file: {filename}") + try: + with open(filename, encoding=encoding, newline="") as f: + reader = csv.reader(f, delimiter=separator) + header = next(reader) + data = [row for row in reader] + return header, data + except FileNotFoundError: + log.error(f"Source file not found at: {filename}") + return [], [] + except Exception as e: + log.error(f"Failed to read file {filename}: {e}") + return [], [] + + def check(self, check_fun, message=None): + """Runs a data quality check function against the loaded data.""" + res = check_fun(self.header, self.data) + if not res: + error_message = ( + message or f"Data quality check '{check_fun.__name__}' failed." + ) + log.warning(error_message) + return res + + def split(self, split_fun): + """Splits the processor's data into multiple new Processor objects.""" + grouped_data = OrderedDict() + for i, row in enumerate(self.data): + row_dict = dict(zip(self.header, row)) + key = split_fun(row_dict, i) + if key not in grouped_data: + grouped_data[key] = [] + grouped_data[key].append(row) + + return { + key: Processor(header=list(self.header), data=data) + for key, data in grouped_data.items() + } + + def get_o2o_mapping(self): + """Generates a direct 1-to-1 mapping dictionary.""" + return { + str(column): MapperRepr( + f"mapper.val('{column}')", mapper.val(column) + ) + for column in self.header + if column + } + + def process( + self, + mapping, + filename_out, + params=None, + t="list", + null_values=None, + m2m=False, + ): + """Main processor. + + Processes the data using a mapping and prepares it for writing. + """ + if null_values is None: + null_values = ["NULL", False] + if params is None: + params = {} + if m2m: + head, data = self._process_mapping_m2m( + mapping, null_values=null_values + ) + else: + head, data = self._process_mapping( + mapping, t=t, null_values=null_values + ) + + self._add_data(head, data, filename_out, params) + return head, data + + def write_to_file( + self, + script_filename, + fail=True, + append=False, + python_exe="python", + path="", + ): + """Write bash script. + + Generates the .sh script for the import. + """ + init = not append + for _, info in self.file_to_write.items(): + info_copy = info.copy() + info_copy.update( + { + "model": info.get("model", "auto"), + "init": init, + "launchfile": script_filename, + "fail": fail, + "python_exe": python_exe, + "path": path, + } + ) + write_file(**info_copy) + init = False + + def join_file( + self, + filename, + master_key, + child_key, + header_prefix="child", + separator=";", + encoding="utf-8", + ): + """File joiner. + + Joins data from a secondary file into the processor's main data. + """ + child_header, child_data = self._read_file( + filename, separator, encoding + ) + + try: + child_key_pos = child_header.index(child_key) + master_key_pos = self.header.index(master_key) + except ValueError as e: + log.error( + f"Join key error: {e}. Check if '{master_key}' and " + f"'{child_key}' exist in their respective files." + ) + return + + child_data_map = {row[child_key_pos]: row for row in child_data} + + empty_child_row = [""] * len(child_header) + for master_row in self.data: + key_value = master_row[master_key_pos] + row_to_join = child_data_map.get(key_value, empty_child_row) + master_row.extend(row_to_join) + + self.header.extend([f"{header_prefix}_{h}" for h in child_header]) + + def _add_data(self, head, data, filename_out, params): + params = params.copy() + params["filename"] = ( + os.path.abspath(filename_out) if filename_out else False + ) + params["header"] = head + params["data"] = data + self.file_to_write[filename_out] = params + + def _process_mapping(self, mapping, t, null_values): + """The core transformation loop.""" + lines_out = [] if t == "list" else set() + state = {} # Persistent state for the entire file processing + + for i, line in enumerate(self.data): + # Clean up null values + cleaned_line = [ + s.strip() if s and s.strip() not in null_values else "" + for s in line + ] + line_dict = dict(zip(self.header, cleaned_line)) + + try: + # Pass the state dictionary to each mapper call + line_out = [ + mapping[k](line_dict, state) for k in mapping.keys() + ] + except SkippingError as e: + log.debug(f"Skipping line {i}: {e.message}") + continue + # This try/except handles mappers that do not accept the `state` dictionary + # for backward compatibility. + except TypeError: + line_out = [mapping[k](line_dict) for k in mapping.keys()] + + if t == "list": + lines_out.append(line_out) + else: + lines_out.add(tuple(line_out)) + return list(mapping.keys()), lines_out + + def _process_mapping_m2m(self, mapping, null_values): + """m2m process mapping. + + Handles special m2m mapping by expanding list values into unique rows. + """ + head, data = self._process_mapping(mapping, "list", null_values) + lines_out = [] + + for line_out in data: + index_list, zip_list = [], [] + for index, value in enumerate(line_out): + if isinstance(value, list): + index_list.append(index) + zip_list.append(value) + + if not zip_list: + # Ensure we don't add duplicate rows + if line_out not in lines_out: + lines_out.append(line_out) + continue + + # Transpose the lists of values to create new rows + values_list = zip(*zip_list) + for values in values_list: + new_line = list(line_out) + for i, val in enumerate(values): + new_line[index_list[i]] = val + + # Ensure we don't add duplicate rows + if new_line not in lines_out: + lines_out.append(new_line) + + return head, lines_out + + +class ProductProcessorV10(Processor): + """Processor to generate a 'product.attribute' file with dynamic variant creation.""" + + def process_attribute_data( + self, attributes_list, ATTRIBUTE_PREFIX, filename_out, import_args + ): + """Creates and registers the 'product.attribute.csv' file. + + Args: + attributes_list (List[str]): list of attribute names (e.g., ['Color', 'Size']). + ATTRIBUTE_PREFIX (str): Prefix for generating external IDs. + filename_out (str): Output path for the CSV file. + import_args (Dict): Arguments for the import script. + """ + attr_header = ["id", "name", "create_variant"] + attr_data = [ + [mapper.to_m2o(ATTRIBUTE_PREFIX, att), att, "Dynamically"] + for att in attributes_list + ] + self._add_data(attr_header, attr_data, filename_out, import_args) + + +class ProductProcessorV9(Processor): + """Processor to generate variant data from a flat file, creating three CSV files: + 1. product.attribute.csv: The attributes themselves. + 2. product.attribute.value.csv: The specific values for each attribute. + 3. product.attribute.line.csv: Links attributes to product templates. + """ + + def _generate_attribute_file_data( + self, attributes_list: list[str], prefix: str + ) -> tuple[list[str], list[list[str]]]: + """Generates header and data for 'product.attribute.csv'.""" + header = ["id", "name"] + data = [[mapper.to_m2o(prefix, attr), attr] for attr in attributes_list] + return header, data + + def _extract_attribute_value_data( + self, + mapping: dict, + attributes_list: list[str], + processed_rows: list[dict], + ) -> set[tuple]: + """Extracts and transforms data for 'product.attribute.value.csv'. + + This replaces the original complex nested 'add_value_line' function. + """ + attribute_values = set() + # The 'name' mapping is expected to return a dict of {attribute: value} + name_key = "name" # This is a mandatory key in the original mapping + + for row_dict in processed_rows: + # Apply all mapping functions to the current row + try: + line_out_results = [ + mapping[k](row_dict) for k in mapping.keys() + ] + except TypeError: + line_out_results = [ + mapping[k](row_dict, {}) for k in mapping.keys() + ] + + # Find the result of the 'name' mapping, which contains the values + name_mapping_index = list(mapping.keys()).index(name_key) + values_dict = line_out_results[name_mapping_index] + + if not isinstance(values_dict, dict): + continue + + for attr_name in attributes_list: + # If the attribute exists for this product, + # create a line for its value + if values_dict.get(attr_name): + value_line = tuple( + res[attr_name] if isinstance(res, dict) else res + for res in line_out_results + ) + attribute_values.add(value_line) + + return attribute_values + + def process_attribute_mapping( + self, + mapping: dict, + line_mapping: dict, + attributes_list: list[str], + ATTRIBUTE_PREFIX: str, + path: str, + import_args: dict, + id_gen_fun: Optional[Callable] = None, + null_values: Optional[list[str]] = None, + ): + """Orchestrates the processing of product attributes and variants from source data.""" + # 1. Generate base attribute data (product.attribute.csv) + if null_values is None: + null_values = ["NULL"] + attr_header, attr_data = self._generate_attribute_file_data( + attributes_list, ATTRIBUTE_PREFIX + ) + + # 2. Clean and process all data rows into a list of dictionaries + processed_rows = [] + for line in self.data: + cleaned_line = [ + s.strip() if s.strip() not in null_values else "" for s in line + ] + processed_rows.append(dict(zip(self.header, cleaned_line))) + + # 3. Generate attribute value data (product.attribute.value.csv) + values_header = list(mapping.keys()) + values_data = self._extract_attribute_value_data( + mapping, attributes_list, processed_rows + ) + + # 4. Generate attribute line data (product.attribute.line.csv) + id_gen_fun = id_gen_fun or ( + lambda tmpl_id, vals: mapper.to_m2o( + tmpl_id.split(".")[0] + "_LINE", tmpl_id + ) + ) + line_aggregator = AttributeLineDict(attr_data, id_gen_fun) + for row_dict in processed_rows: + try: + values_lines = [ + line_mapping[k](row_dict) for k in line_mapping.keys() + ] + except TypeError: + values_lines = [ + line_mapping[k](row_dict, {}) for k in line_mapping.keys() + ] + line_aggregator.add_line(values_lines, list(line_mapping.keys())) + line_header, line_data = line_aggregator.generate_line() + + # 5. Add all three generated files to the write queue + context = import_args.setdefault("context", {}) + context["create_product_variant"] = True + + self._add_data( + attr_header, attr_data, path + "product.attribute.csv", import_args + ) + self._add_data( + values_header, + values_data, + path + "product.attribute.value.csv", + import_args, + ) + + line_import_args = dict(import_args, groupby="product_tmpl_id/id") + self._add_data( + line_header, + line_data, + path + "product.attribute.line.csv", + line_import_args, + ) diff --git a/src/odoo_data_flow/lib/workflow/__init__.py b/src/odoo_data_flow/lib/workflow/__init__.py new file mode 100644 index 00000000..466b78a6 --- /dev/null +++ b/src/odoo_data_flow/lib/workflow/__init__.py @@ -0,0 +1,7 @@ +"""Workflow tools for pre-post processing data.""" + +from . import invoice_v9 + +__all__ = [ + "invoice_v9", +] diff --git a/src/odoo_data_flow/lib/workflow/invoice_v9.py b/src/odoo_data_flow/lib/workflow/invoice_v9.py new file mode 100644 index 00000000..f6f19016 --- /dev/null +++ b/src/odoo_data_flow/lib/workflow/invoice_v9.py @@ -0,0 +1,236 @@ +"""Invoice helper for odoo version 9. + +This module contains a legacy workflow helper for processing imported +invoices in Odoo v9. It is preserved for reference but will need to be +updated to work with modern Odoo versions. +""" + +from time import time +from xmlrpc.client import Fault + +from ..internal.rpc_thread import RpcThread + + +class InvoiceWorkflowV9: + """Automate odoo 9 Invoice Workflow. + + A class to automate the lifecycle of imported invoices in Odoo v9, + such as validating, paying, and setting taxes. + """ + + def __init__( # noqa: doc301 + self, + connection, + field, + status_map, + paid_date_field, + payment_journal, + max_connection=4, + ): # noqa: doc301 + """Initializes the workflow processor. + + @param connection: An active odoo-client-lib connection object. + @param field: The field that contains the legacy status from source data + @param status_map: A dict mapping Odoo states to lists of legacy states. + e.g., {'open': ['status1'], 'paid': ['status2']} + @param paid_date_field: The field containing the payment date. + @param payment_journal: The database ID of the payment journal to use. + @param max_connection: The number of parallel threads to use. + """ + self.connection = connection + self.invoice_obj = connection.get_model("account.invoice") + self.payment_obj = connection.get_model("account.payment") + self.account_invoice_tax = self.connection.get_model("account.invoice.tax") + self.field = field + self.status_map = status_map + self.paid_date = paid_date_field + self.payment_journal = payment_journal + self.max_connection = max_connection + self.time = time() + + def _display_percent(self, i, percent_step, total): + if i % percent_step == 0: + percentage = round(i / float(total) * 100, 2) + elapsed_time = time() - self.time + print(f"{percentage}% : {i}/{total} time {elapsed_time:.2f} sec") + + def set_tax(self): + """Finds draft invoices and computes their taxes.""" + + def create_tax(invoice_id): + taxes = self.invoice_obj.get_taxes_values(invoice_id) + for tax in taxes.values(): + self.account_invoice_tax.create(tax) + + invoices = self.invoice_obj.search( + [ + ("state", "=", "draft"), + ("type", "=", "out_invoice"), + ("tax_line_ids", "=", False), + ] + ) + total = len(invoices) + percent_step = int(total / 5000) or 1 + self.time = time() + rpc_thread = RpcThread(self.max_connection) + print(f"Computing tax for {total} invoices...") + for i, invoice_id in enumerate(invoices): + self._display_percent(i, percent_step, total) + rpc_thread.spawn_thread(create_tax, [invoice_id]) + rpc_thread.wait() + + def validate_invoice(self): + """Finds and validates invoices that should be open or paid.""" + statuses_to_validate = self.status_map.get("open", []) + self.status_map.get( + "paid", [] + ) + invoice_to_validate = self.invoice_obj.search( + [ + (self.field, "in", statuses_to_validate), + ("state", "=", "draft"), + ("type", "=", "out_invoice"), + ] + ) + total = len(invoice_to_validate) + percent_step = int(total / 5000) or 1 + rpc_thread = RpcThread(1) # Validation should be single-threaded + print(f"Validating {total} invoices...") + self.time = time() + for i, invoice_id in enumerate(invoice_to_validate): + self._display_percent(i, percent_step, total) + fun = self.connection.get_service("object").exec_workflow + rpc_thread.spawn_thread( + fun, + [ + self.connection.database, + self.connection.user_id, + self.connection.password, + "account.invoice", + "invoice_open", + invoice_id, + ], + ) + rpc_thread.wait() + + def proforma_invoice(self): + """Finds and moves invoices to the pro-forma state.""" + invoice_to_proforma = self.invoice_obj.search( + [ + (self.field, "in", self.status_map.get("proforma", [])), + ("state", "=", "draft"), + ("type", "=", "out_invoice"), + ] + ) + total = len(invoice_to_proforma) + percent_step = int(total / 100) or 1 + self.time = time() + rpc_thread = RpcThread(self.max_connection) + print(f"Setting {total} invoices to pro-forma...") + for i, invoice_id in enumerate(invoice_to_proforma): + self._display_percent(i, percent_step, total) + fun = self.connection.get_service("object").exec_workflow + rpc_thread.spawn_thread( + fun, + [ + self.connection.database, + self.connection.user_id, + self.connection.password, + "account.invoice", + "invoice_proforma2", + invoice_id, + ], + {}, + ) + rpc_thread.wait() + + def paid_invoice(self): + """Finds open invoices and registers payments for them.""" + + def pay_single_invoice(data_update, wizard_context): + fields_to_get = [ + "communication", + "currency_id", + "invoice_ids", + "payment_difference", + "partner_id", + "payment_method_id", + "payment_difference_handling", + "journal_id", + "state", + "writeoff_account_id", + "payment_date", + "partner_type", + "hide_payment_method", + "payment_method_code", + "partner_bank_account_id", + "amount", + "payment_type", + ] + data = self.payment_obj.default_get(fields_to_get, context=wizard_context) + data.update(data_update) + wizard_id = self.payment_obj.create(data, context=wizard_context) + try: + self.payment_obj.post([wizard_id], context=wizard_context) + except Fault: + # Odoo may raise a fault for various reasons + # (e.g., already paid), + # which can be ignored in a batch process. + pass + + invoices_to_paid = self.invoice_obj.search_read( + domain=[ + (self.field, "in", self.status_map.get("paid", [])), + ("state", "=", "open"), + ("type", "=", "out_invoice"), + ], + fields=[self.paid_date, "date_invoice"], + ) + total = len(invoices_to_paid) + percent_step = int(total / 1000) or 1 + self.time = time() + rpc_thread = RpcThread(self.max_connection) + print(f"Registering payment for {total} invoices...") + for i, invoice in enumerate(invoices_to_paid): + self._display_percent(i, percent_step, total) + wizard_context = { + "active_id": invoice["id"], + "active_ids": [invoice["id"]], + "active.model": "account.invoice", + "default_invoice_ids": [(4, invoice["id"], 0)], + "type": "out_invoice", + "journal_type": "sale", + } + data_update = { + "journal_id": self.payment_journal, + "payment_date": invoice.get(self.paid_date) + or invoice.get("date_invoice"), + "payment_method_id": 1, # Manual + } + rpc_thread.spawn_thread( + pay_single_invoice, [data_update, wizard_context], {} + ) + rpc_thread.wait() + + def rename(self, name_field): + """Utility to move a value from a custom field to the invoice number.""" + invoices_to_rename = self.invoice_obj.search_read( + domain=[ + (name_field, "!=", False), + (name_field, "!=", "0.0"), + ("state", "!=", "draft"), + ("type", "=", "out_invoice"), + ], + fields=[name_field], + ) + total = len(invoices_to_rename) + percent_step = int(total / 1000) or 1 + self.time = time() + rpc_thread = RpcThread(int(self.max_connection * 1.5)) + print(f"Renaming {total} invoices...") + for i, invoice in enumerate(invoices_to_rename): + self._display_percent(i, percent_step, total) + update_vals = {"number": invoice[name_field], name_field: False} + rpc_thread.spawn_thread( + self.invoice_obj.write, [invoice["id"], update_vals], {} + ) + rpc_thread.wait() diff --git a/src/odoo_data_flow/logging_config.py b/src/odoo_data_flow/logging_config.py new file mode 100755 index 00000000..ea849846 --- /dev/null +++ b/src/odoo_data_flow/logging_config.py @@ -0,0 +1,37 @@ +"""Centralized logging configuration for the odoo-data-flow application.""" + +import logging +import sys + +# Get the root logger for the application package +log = logging.getLogger("odoo_data_flow") + + +def setup_logging(verbose=False): + """Configures the root logger for the application. + + This function sets up a handler that prints logs to the console + with a consistent format. + + Args: + verbose (bool): If True, the logging level is set to DEBUG. + Otherwise, it's set to INFO. + """ + # Determine the logging level + level = logging.DEBUG if verbose else logging.INFO + log.setLevel(level) + + # Clear any existing handlers to avoid duplicate logs if this is called + # multiple times + if log.hasHandlers(): + log.handlers.clear() + + # Create a handler to print to the console + handler = logging.StreamHandler(sys.stdout) + + # Create a formatter and set it for the handler + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + + # Add the handler to the logger + log.addHandler(handler) diff --git a/src/odoo_data_flow/migrator.py b/src/odoo_data_flow/migrator.py new file mode 100644 index 00000000..ae0c9082 --- /dev/null +++ b/src/odoo_data_flow/migrator.py @@ -0,0 +1,71 @@ +"""Migrate data between two odoo databases. + +This module contains the logic for performing a direct, in-memory +migration of data from one Odoo instance to another. +""" + +from .exporter import run_export_for_migration +from .importer import run_import_for_migration +from .lib.transform import Processor +from .logging_config import log + + +def run_migration( + config_export, + config_import, + model, + domain="[]", + fields=None, + mapping=None, + export_worker=1, + export_batch_size=100, + import_worker=1, + import_batch_size=10, +): + """Performs a server-to-server data migration. + + This function chains together the export, transform, and import processes + without creating intermediate files. + """ + log.info("--- Starting Server-to-Server Migration ---") + + # Step 1: Export data from the source database + log.info(f"Exporting data from model '{model}'...") + header, data = run_export_for_migration( + config=config_export, + model=model, + domain=domain, + fields=fields, + worker=export_worker, + batch_size=export_batch_size, + ) + + if not data: + log.warning("No data exported. Migration finished.") + return + + log.info(f"Successfully exported {len(data)} records.") + + # Step 2: Transform the data in memory + log.info("Transforming data in memory...") + processor = Processor(header=header, data=data) + + if not mapping: + log.info("No mapping provided, using 1-to-1 mapping.") + mapping = processor.get_o2o_mapping() + + # The process method returns the transformed header and data + to_import_header, to_import_data = processor.process(mapping, filename=None) + + # Step 3: Import the transformed data into the destination database + log.info(f"Importing {len(to_import_data)} records into destination...") + run_import_for_migration( + config=config_import, + model=model, + header=to_import_header, + data=to_import_data, + worker=import_worker, + batch_size=import_batch_size, + ) + + log.info("--- Migration Finished Successfully ---") diff --git a/src/odoo_data_flow/py.typed b/src/odoo_data_flow/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/odoo_data_flow/workflow_runner.py b/src/odoo_data_flow/workflow_runner.py new file mode 100644 index 00000000..75e51448 --- /dev/null +++ b/src/odoo_data_flow/workflow_runner.py @@ -0,0 +1,73 @@ +"""Workflow Runner, invoke workflows. + +This module acts as a dispatcher for running post-import workflows +from the command line. +""" + +import ast + +from .lib.conf_lib import get_connection_from_config +from .lib.workflow.invoice_v9 import InvoiceWorkflowV9 +from .logging_config import log + + +def run_invoice_v9_workflow( + actions, + config, + field, + status_map_str, + paid_date_field, + payment_journal, + max_connection, +): + """Initializes and runs the requested actions for the InvoiceWorkflowV9.""" + log.info("--- Initializing Invoice Workflow for Odoo v9 ---") + + try: + connection = get_connection_from_config(config_file=config) + + # Safely evaluate the status map string into a dictionary + status_map = ast.literal_eval(status_map_str) + + if not isinstance(status_map, dict): + raise TypeError("Status map must be a dictionary.") + + except Exception as e: + log.error(f"Failed to initialize workflow: {e}") + return + + # Instantiate the legacy workflow class + wf = InvoiceWorkflowV9( + connection, + field=field, + status_map=status_map, + paid_date_field=paid_date_field, + payment_journal=payment_journal, + max_connection=max_connection, + ) + + # Run the requested actions in a specific order + if not actions or "all" in actions: + actions = ["tax", "validate", "pay", "proforma", "rename"] + + log.info(f"Executing workflow actions: {', '.join(actions)}") + + if "tax" in actions: + wf.set_tax() + if "validate" in actions: + wf.validate_invoice() + if "pay" in actions: + wf.paid_invoice() + if "proforma" in actions: + wf.proforma_invoice() + if "rename" in actions: + rename_field = "x_legacy_number" + log.info(f"Note: 'rename' action is using a placeholder field: {rename_field}") + wf.rename(rename_field) + + log.info("--- Invoice Workflow Finished ---") + + +# We can add runners for other workflows here in the future +# def run_sale_order_workflow(...): +# pass diff --git a/tests/.coveragerc b/tests/.coveragerc deleted file mode 100644 index 6e0a1720..00000000 --- a/tests/.coveragerc +++ /dev/null @@ -1,3 +0,0 @@ -[run] -branch = True -source = .,.. diff --git a/tests/5_partner_export.sh b/tests/5_partner_export.sh index d0123f8d..191fea11 100644 --- a/tests/5_partner_export.sh +++ b/tests/5_partner_export.sh @@ -1,2 +1,20 @@ #!/usr/bin/env bash -$1 ../odoo_export_thread.py -c conf/connection.conf --file=data/res.partner.exported.csv --model=res.partner --worker=4 --size=200 --domain="[]" --field="id,name,phone,website,street,city,country_id/id" --sep=";" --encoding=utf-8-sig +# +# Tests the data export functionality. + +# Exit immediately if a command exits with a non-zero status. +set -e + +echo "--- Testing data export ---" +odoo-data-flow export \ + --config "tests/conf/connection.conf" \ + --file "data/res.partner.exported.csv" \ + --model "res.partner" \ + --fields "id,name,phone,website,street,city,country_id/id" \ + --domain "[]" \ + --worker 4 \ + --size 200 \ + --separator ";" \ + --encoding "utf-8-sig" + +echo "Data export test complete." diff --git a/tests/6_o2m_import.sh b/tests/6_o2m_import.sh index 70904c14..a1a629af 100644 --- a/tests/6_o2m_import.sh +++ b/tests/6_o2m_import.sh @@ -1,2 +1,17 @@ #!/usr/bin/env bash -$1 ../odoo_import_thread.py --file=origin/res.partner_o2m.csv --model='res.partner' --size=1 --worker=1 --conf=conf/connection.conf --o2m +# +# Tests the import of one-to-many (o2m) relationships. + +# Exit immediately if a command exits with a non-zero status. +set -e + +echo "--- Testing one-to-many (o2m) import ---" +odoo-data-flow import \ + --config "tests/conf/connection.conf" \ + --file "tests/origin/res.partner_o2m.csv" \ + --model "res.partner" \ + --size 1 \ + --worker 1 \ + --o2m + +echo "o2m import test complete." diff --git a/tests/7_convert_binary.sh b/tests/7_convert_binary.sh index 7673552d..3b0d573c 100644 --- a/tests/7_convert_binary.sh +++ b/tests/7_convert_binary.sh @@ -1,4 +1,21 @@ #!/usr/bin/env bash -$1 ../odoo_convert_path_to_image.py --path=./origin/img/ -f Image origin/contact.csv -$1 ../odoo_convert_url_to_image.py -f Image origin/contact_url.csv +# +# Tests the binary conversion commands. +# Exit immediately if a command exits with a non-zero status. +set -e + +echo "--- Testing binary conversion from local file paths ---" +odoo-data-flow path-to-image \ + tests/origin/contact.csv \ + --path "tests/origin/img/" \ + --fields "Image" \ + --out "data/contacts_from_path.csv" + +echo "--- Testing binary conversion from URLs ---" +odoo-data-flow url-to-image \ + tests/origin/contact_url.csv \ + --fields "Image" \ + --out "data/contacts_from_url.csv" + +echo "Binary conversion tests complete." diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..895256c7 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test suite for the odoo-data-flow package.""" diff --git a/tests/clean.sh b/tests/clean.sh index 5960daa8..b7c3218f 100755 --- a/tests/clean.sh +++ b/tests/clean.sh @@ -1,14 +1,24 @@ #!/usr/bin/env bash -#Need to launch odoo database accessible with the configuration given in conf/connection.conf -#Modules contacts need to be installed - -rm -rf data -rm -rf htmlcov -rm 0_partner_generated.sh -rm 1_partner_split.sh -rm 2_contact_import.sh -rm 3_product_import.sh -rm 4_product_import.sh -rm .coverage -rm error.log -rm out.csv +# +# Cleans up all artifacts generated by the test suite. +# This script should be run from the root of the repository. + +echo "Cleaning up test artifacts..." + +# Remove the main data output directory +rm -rf data/ + +# Remove coverage report files and database +rm -rf htmlcov/ +rm -f .coverage + +# Remove specific log files and default outputs +rm -f error.log +rm -f out.csv + +# Remove all fail and bis files from any directory +# The find command is robust and will search recursively. +find . -name "*.fail" -type f -delete +find . -name "*.fail.bis" -type f -delete + +echo "Cleanup complete." diff --git a/tests/const.py b/tests/const.py deleted file mode 100644 index a8821437..00000000 --- a/tests/const.py +++ /dev/null @@ -1,2 +0,0 @@ -# EXEC = 'coverage run -a' -EXEC = 'python3' diff --git a/tests/launch_test.sh b/tests/launch_test.sh index 552c4592..bcedfd71 100755 --- a/tests/launch_test.sh +++ b/tests/launch_test.sh @@ -1,38 +1,81 @@ #!/usr/bin/env bash -#Need to launch odoo database accessible with the configuration given in conf/connection.conf -#test works well on V11 -#Modules contacts need to be installed -#EXEC="python2" -for EXEC in "python2" "python3" "python3.7" "coverage run -a" -do - echo "============== Test $EXEC ==============" - rm -rf data - mkdir data - export PYTHONPATH=../ - echo "> Erase" - coverage erase - echo "> Generate data for import" - $EXEC test_import.py "$EXEC" - echo "> Run test import" - sh 0_partner_generated.sh - echo "> Run test split file" - $EXEC test_split.py "$EXEC" - echo "> Test mapping from file" - $EXEC test_from_file.py "$EXEC" - echo "> Import data with error" - sh 2_contact_import.sh 2> error.log - echo "> Import Product" - $EXEC test_product_v9.py "$EXEC" - sh 3_product_import.sh - echo "> Import Product v10" - $EXEC test_product_v10.py "$EXEC" - sh 4_product_import.sh - sh 5_partner_export.sh "$EXEC" - echo "> Import One2Many" - sh 6_o2m_import.sh "$EXEC" - echo "> Convert Binary" - sh 7_convert_binary.sh "$EXEC" - echo "Test join" - $EXEC test_merge.py - coverage html -done +# +# Main test suite for odoo-data-flow. +# This script should be run from the root of the repository. +# +# Prerequisites: +# 1. A virtual environment is active. +# 2. The package has been installed in editable mode: +# uv pip install -e . +# 3. An Odoo database is running and accessible via the configuration +# in tests/conf/connection.conf + +# Exit immediately if a command exits with a non-zero status. +set -e + +echo "============== Starting Odoo Data Flow Test Suite ==============" + +# --- Cleanup and Setup --- +echo "> Cleaning up previous test runs..." +rm -rf data/ .coverage *.fail *.fail.bis error.log +mkdir -p data/ + +echo "> Erasing previous coverage data..." +coverage erase + +# --- Running Tests --- +# Python scripts that prepare data are run under coverage. +# The subsequent steps use the new 'odoo-data-flow' CLI to test functionality. + +echo "> 1. Generating data and running initial partner import..." +coverage run -a tests/test_import.py +# Replaces 0_partner_generated.sh +odoo-data-flow import --config tests/conf/connection.conf --file data/res_partner.csv --model res.partner + +echo "> 2. Testing file split functionality..." +coverage run -a tests/test_split.py + +echo "> 3. Testing mapping from file..." +coverage run -a tests/test_from_file.py + +echo "> 4. Importing data with expected errors..." +# Replaces 2_contact_import.sh +# Assumes test_import.py also creates this file. +odoo-data-flow import --config tests/conf/connection.conf --file data/contact.csv --model res.partner 2> error.log + +echo "> 5. Importing Product (v9)..." +coverage run -a tests/test_product_v9.py +# Replaces 3_product_import.sh +# Assumes the python script generates 'product_template_v9.csv' +odoo-data-flow import --config tests/conf/connection.conf --file data/product_template_v9.csv --model product.template + +echo "> 6. Importing Product (v10)..." +coverage run -a tests/test_product_v10.py +# Replaces 4_product_import.sh +# Assumes the python script generates 'product_template_v10.csv' +odoo-data-flow import --config tests/conf/connection.conf --file data/product_template_v10.csv --model product.template + +echo "> 7. Exporting Partners..." +# Replaces 5_partner_export.sh +odoo-data-flow export --config tests/conf/connection.conf --model res.partner --fields "name,email" --output data/exported_partners.csv + +echo "> 8. Importing One2Many relations..." +# Replaces 6_o2m_import.sh +# Assumes a file like 'res.partner_o2m.csv' is generated/present +odoo-data-flow import --config tests/conf/connection.conf --file tests/origin/res.partner_o2m.csv --model res.partner --o2m + +echo "> 9. Converting Binary from Path..." +# Replaces 7_convert_binary.sh +# Assumes a source file with image paths exists for this test +odoo-data-flow path-to-image tests/origin/contact.csv --fields "image" --out data/contacts_with_images.csv + +echo "> 10. Testing merge functionality..." +coverage run -a tests/test_merge.py + + +# --- Finalizing --- +echo "" +echo "> Generating coverage report..." +coverage html + +echo "============== Test Suite Finished Successfully ==============" diff --git a/tests/test_from_file.py b/tests/test_from_file.py index f7b3774c..7d8741a4 100644 --- a/tests/test_from_file.py +++ b/tests/test_from_file.py @@ -1,65 +1,83 @@ -# -*- coding: utf-8 -*- -import sys +"""Test The mapper from file. -import os -from const import EXEC +This test script reads a CSV file from the 'origin' directory, +applies a mapping with various mappers, checks data quality, +and generates a clean CSV file ready for import. +""" -from odoo_csv_tools.lib import mapper, checker -from odoo_csv_tools.lib.transform import Processor +import os +import pprint -if len(sys.argv) == 2: - EXEC = sys.argv[1] +from odoo_data_flow.lib import checker, mapper +from odoo_data_flow.lib.transform import Processor +# --- Configuration --- +# Define translation maps and prefixes lang_map = { - '': '', - 'French': u'French (BE) / Français (BE)', - 'English': u'English', - 'Dutch': u'Dutch / Nederlands', + "": "", + "French": "French (BE) / Français (BE)", + "English": "English", + "Dutch": "Dutch / Nederlands", } country_map = { - 'Belgique': 'base.be', - 'BE': 'base.be', - 'FR': 'base.fr', - 'U.S': 'base.us', - 'US': 'base.us', - 'NL': 'base.nl', + "Belgique": "base.be", + "BE": "base.be", + "FR": "base.fr", + "U.S": "base.us", + "US": "base.us", + "NL": "base.nl", } PARTNER_PREFIX = "TEST_PARTNER" +IMAGE_PATH_PREFIX = "tests/origin/img/" -# STEP 1 : read the needed file(s) -processor = Processor('origin%scontact.csv' % os.sep) -# Print o2o mapping -import pprint +# --- Main Logic --- + +# STEP 1: Initialize the processor with the source file +source_file = os.path.join("tests", "origin", "contact.csv") +processor = Processor(source_file) +# Print the 1-to-1 mapping for debugging purposes +print("--- Auto-detected o2o Mapping ---") pprint.pprint(processor.get_o2o_mapping()) +print("---------------------------------") + -# STEP 2 : Define the mapping for every object to import +# STEP 2: Define the mapping for every object to import mapping = { - 'id': mapper.m2o(PARTNER_PREFIX, 'Company_ID', skip=True), - 'name': mapper.val('Company_Name', skip=True), - 'phone': mapper.val('Phone'), - 'website': mapper.val('www'), - 'street': mapper.val('address1'), - 'city': mapper.val('city'), - 'zip': mapper.val('zip code'), - 'country_id/id': mapper.map_val('country', country_map), - 'company_type': mapper.const('company'), - 'customer': mapper.bool_val('IsCustomer', ['1'], ['0']), - 'supplier': mapper.bool_val('IsSupplier', ['1'], ['0']), - 'lang': mapper.map_val('Language', lang_map), - 'image': mapper.binary("Image", "origin/img/"), + "id": mapper.concat(PARTNER_PREFIX, "_", "Company_ID", skip=True), + "name": mapper.val("Company_Name", skip=True), + "phone": mapper.val("Phone"), + "website": mapper.val("www"), + "street": mapper.val("address1"), + "city": mapper.val("city"), + "zip": mapper.val("zip code"), + "country_id/id": mapper.map_val(country_map, mapper.val("country")), + "company_type": mapper.const("company"), + # CORRECTED: bool_val now only takes a list of true values. + "customer_rank": mapper.bool_val("IsCustomer", ["1"]), + "supplier_rank": mapper.bool_val("IsSupplier", ["1"]), + "lang": mapper.map_val(lang_map, mapper.val("Language")), + # CORRECTED: Prepend the image path prefix using a postprocess function. + # "image_1920": mapper.binary( + # "Image", + # postprocess=lambda p: os.path.join(IMAGE_PATH_PREFIX, p) if p else "", + # ), TODO + "image_1920": mapper.binary("Image", "origin/img/"), } # Step 3: Check data quality (Optional) +print("Running data quality checks...") processor.check(checker.cell_len_checker(30)) -processor.check(checker.id_validity_checker('Company_ID', "COM\d")) +processor.check(checker.id_validity_checker("Company_ID", r"COM\d")) processor.check(checker.line_length_checker(13)) processor.check(checker.line_number_checker(21)) # Step 4: Process data -processor.process(mapping, 'data%sres.partner.csv' % os.sep, {'worker': 2, 'batch_size': 5}, 'set') +print("Processing data transformation...") +output_file = os.path.join("data", "res.partner.from_file.csv") +params = {"model": "res.partner", "worker": 2, "batch_size": 5} +processor.process(mapping, output_file, params) -# Step 5: Define output and import parameter -processor.write_to_file("2_contact_import.sh", python_exe=EXEC, path='../') +print(f"File transformation complete. Output at: {output_file}") diff --git a/tests/test_import.py b/tests/test_import.py index d8e2bf64..fbcf22be 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -1,64 +1,82 @@ -""" -Created on 14 sept. 2016 +"""Generate Test data. -@author: mythrys +This test script generates data for partner categories and partners +to be used in the main test suite. """ -import random -import sys - -from const import EXEC -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib import transform +import random -if sys.version_info < (3, 0, 0): - from builtins import range +from odoo_data_flow.lib import mapper, transform -if len(sys.argv) == 2: - EXEC = sys.argv[1] +# --- Configuration --- +PARTNER_PREFIX = "partner_generated" +TAG_PREFIX = "partner_tag" +PARTNER_OUTPUT = "data/res.partner.generated.csv" +TAG_OUTPUT = "data/res.partner.category.csv" -PARTNER_PREFIX = 'partner_generated' -TAG_PREFIX = 'partner_tag' -output = 'data/res.partner.generated.csv' -tag_output = 'data/res.partner.category.csv' -script = '0_partner_generated.sh' +# --- Test Data Generation --- +# Create 100 unique tags +tags = [f"Tag {i}" for i in range(100)] -tags = ["Tag %s" % i for i in range(0, 100)] +# Create a dataset for 200 partners, each assigned 5 random tags +header = ["id", "tags"] +data = [ + [str(i), ",".join(random.choice(tags) for _ in range(5))] # noqa + for i in range(200) +] -header = ['id', 'tags'] -data = [[str(i), ','.join(tags[random.randint(0, 99)] for i in range(0, 5))] for i in range(0, 200)] +# --- Mapping Definitions --- -mapping = { - 'id': mapper.m2o(PARTNER_PREFIX, 'id'), - 'name': mapper.val('id', postprocess=lambda x: "Partner %s" % x), - 'phone': mapper.val('id', postprocess=lambda x: "0032%s" % (int(x) * 11)), - 'website': mapper.val('id', postprocess=lambda x: "http://website-%s.com" % x), - 'street': mapper.val('id', postprocess=lambda x: "Street %s" % x), - 'city': mapper.val('id', postprocess=lambda x: "City %s" % x), - 'zip': mapper.val('id', postprocess=lambda x: ("%s" % x).zfill(6)), - 'country_id/id': mapper.const('base.be'), - 'company_type': mapper.const('company'), - 'customer': mapper.val('id', postprocess=lambda x: str(int(x) % 2)), - 'supplier': mapper.val('id', postprocess=lambda x: str((int(x) + 1) % 2)), - 'lang': mapper.const('English'), - 'category_id/id': mapper.m2m(TAG_PREFIX, 'tags') +# Mapping to create the partner category records. +# This will be processed in a special m2m mode to create one record +# per unique tag. +tag_mapping = { + "id": mapper.m2m_id_list(TAG_PREFIX, "tags"), + "name": mapper.m2m("tags", sep=","), + "parent_id/id": mapper.const("base.res_partner_category_0"), } -tag_mapping = { - 'id': mapper.m2m_id_list(TAG_PREFIX, 'tags'), - 'name': mapper.m2m_value_list('tags'), - 'parent_id/id': mapper.const('base.res_partner_category_0'), +# Mapping to create the partner records, linking them to the tags created above. +partner_mapping = { + "id": mapper.concat(PARTNER_PREFIX, "_", "id"), + "name": mapper.val("id", postprocess=lambda x: f"Partner {x}"), + "phone": mapper.val("id", postprocess=lambda x: f"0032{int(x) * 11}"), + "website": mapper.val( + "id", postprocess=lambda x: f"http://website-{x}.com" + ), + "street": mapper.val("id", postprocess=lambda x: f"Street {x}"), + "city": mapper.val("id", postprocess=lambda x: f"City {x}"), + "zip": mapper.val("id", postprocess=lambda x: str(x).zfill(6)), + "country_id/id": mapper.const("base.be"), + "company_type": mapper.const("company"), + "customer_rank": mapper.val("id", postprocess=lambda x: int(x) % 2), + "supplier_rank": mapper.val("id", postprocess=lambda x: (int(x) + 1) % 2), + "lang": mapper.const("en_US"), + "category_id/id": mapper.m2m(TAG_PREFIX, "tags"), } +# --- Processing --- + +# Initialize the processor with the in-memory data processor = transform.Processor(header=header, data=data) -processor.process(tag_mapping, tag_output, { - 'worker': 1, - 'batch_size': 10, - 'model': 'res.partner.category', -}, m2m=True) -processor.process(mapping, output, { - 'worker': 4, - 'batch_size': 100, - 'model': 'res.partner', -}) -processor.write_to_file(script, python_exe=EXEC, path='../', encoding="utf-8-sig") + +# Process the tags first, using the special m2m=True mode. +# This will find all unique tags from the 'tags' column and create a clean +# CSV file with one row for each unique tag. +print(f"Generating partner category data at: {TAG_OUTPUT}") +processor.process( + tag_mapping, + TAG_OUTPUT, + {"model": "res.partner.category"}, + m2m=True, +) + +# Next, process the main partner records. +print(f"Generating partner data at: {PARTNER_OUTPUT}") +processor.process( + partner_mapping, + PARTNER_OUTPUT, + {"model": "res.partner"}, +) + +print("Test data generation complete.") diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 00000000..d9c39282 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,51 @@ +"""Test cases for the __main__ module.""" + +import pytest +from click.testing import CliRunner + +# CORRECTED: Use an underscore for the package name in the import. +from odoo_data_flow import __main__ + + +@pytest.fixture +def runner() -> CliRunner: + """Fixture for invoking command-line interfaces.""" + return CliRunner() + + +def test_main_succeeds_without_command(runner: CliRunner) -> None: + """Test main Succeeds. + + It exits with a status code of 0 when no command is provided + and should show the main help message. + """ + # CORRECTED: The entry point function from our __main__.py is now 'cli'. + result = runner.invoke(__main__.cli) + assert result.exit_code == 0 + # A good basic test is to ensure the main commands are listed in the help output. + assert "import" in result.output + assert "export" in result.output + assert "path-to-image" in result.output + assert "url-to-image" in result.output + + +def test_main_shows_version(runner: CliRunner) -> None: + """It shows the version of the package when --version is used.""" + result = runner.invoke(__main__.cli, ["--version"]) + assert result.exit_code == 0 + # This checks that the command runs and that the word 'version' + # appears in the output, which is a robust check for the --version flag. + assert "version" in result.output + + +# You can also add more specific tests for each command. +# For example, testing that the 'import' command fails without required options: +def test_import_fails_without_options(runner: CliRunner) -> None: + """The import command should fail if required options are missing.""" + # We invoke the 'import' sub-command directly. + result = runner.invoke(__main__.cli, ["import"]) + # It should exit with a non-zero status code because options are missing. + assert result.exit_code != 0 + # Click's error message should mention the missing options. + assert "Missing option" in result.output + assert "--config" in result.output diff --git a/tests/test_merge.py b/tests/test_merge.py index 8c1650f7..b0748855 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -1,12 +1,30 @@ -''' -Created on 10 dec. 2019 +"""This test script checks the file joining functionality of the Processor. -@author: Thibault Francois -''' -import random +It merges two source CSV files based on a common key. +""" -from odoo_csv_tools.lib import transform +import os +from odoo_data_flow.lib import transform -processor = transform.Processor(filename='origin/test_merge1.csv') -processor.join_file("origin/test_merge2.csv", "category", "name") \ No newline at end of file +# --- Configuration --- +SOURCE_FILE_1 = os.path.join("tests", "origin", "test_merge1.csv") +SOURCE_FILE_2 = os.path.join("tests", "origin", "test_merge2.csv") + +# --- Main Logic --- +print(f"Initializing processor with primary file: {SOURCE_FILE_1}") +# The 'filename' argument is deprecated, but we keep it for now +# to match the existing test file structure. +# A future refactor could update the Processor to use a more explicit name. +processor = transform.Processor(filename=SOURCE_FILE_1) + +print(f"Joining with secondary file: {SOURCE_FILE_2}") +# Join the second file into the processor's data buffer. +# The join happens where the value in the 'category' column of file 1 +# matches the value in the 'name' column of file 2. +processor.join_file(SOURCE_FILE_2, "category", "name") + +print("File join complete. The processor now holds the merged data in memory.") +# Note: This test script only performs the in-memory join. +# A subsequent step in a test runner would be needed to process +# this merged data into a final output file. diff --git a/tests/test_product_v10.py b/tests/test_product_v10.py index f706e1a8..179cfb6e 100644 --- a/tests/test_product_v10.py +++ b/tests/test_product_v10.py @@ -1,102 +1,181 @@ -# -*- coding: utf-8 -*- -import sys +"""Test Odoo version 10 product import. + +This test script generates a complete set of data files for importing +products with variants, including categories, attributes, and attribute lines. +This is based on the v10 product structure. +""" import os -from const import EXEC -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import ProductProcessorV10 +from odoo_data_flow.lib import mapper -if len(sys.argv) == 2: - EXEC = sys.argv[1] +# We assume ProductProcessorV10 is a custom class inheriting from Processor +from odoo_data_flow.lib.transform import ProductProcessorV10 +# --- Configuration --- TEMPLATE_PREFIX = "PRODUCT_TEMPLATE" PRODUCT_PREFIX = "PRODUCT_PRODUCT" CATEGORY_PREFIX = "PRODUCT_CATEGORY" - ATTRIBUTE_PREFIX = "PRODUCT_ATTRIBUTE" ATTRIBUTE_VALUE_PREFIX = "PRODUCT_ATTRIBUTE_VALUE" ATTRIBUTE_LINE_PREFIX = "PRODUCT_ATTRIBUTE_LINE" -context = {'create_product_variant': True, 'tracking_disable': True} +# Define the attributes to be processed from the source file +attribute_list = ["Color", "Gender", "Size_H", "Size_W"] +source_file = os.path.join("tests", "origin", "product.csv") +context = {"create_product_variant": True, "tracking_disable": True} + -# STEP 1 : read the needed file(s) -processor = ProductProcessorV10('origin%sproduct.csv' % os.sep, delimiter=',') +# --- Main Logic --- +# STEP 1: Initialize the custom processor with the source file +print(f"Initializing processor for product import from: {source_file}") +processor = ProductProcessorV10(source_file, separator=",") -# STEP 2 : Category and Parent Category +# STEP 2: Generate data for Parent and Child Categories +print("Generating data for product categories...") categ_parent_map = { - 'id': mapper.m2o(CATEGORY_PREFIX, 'categoy'), - 'name': mapper.val('categoy'), + "id": mapper.m2o_map(CATEGORY_PREFIX, "categoy"), + "name": mapper.val("categoy"), } - categ_map = { - 'id': mapper.m2o(CATEGORY_PREFIX, 'Sub Category'), - 'parent_id/id': mapper.m2o(CATEGORY_PREFIX, 'categoy'), - 'name': mapper.val('Sub Category'), + "id": mapper.m2o_map(CATEGORY_PREFIX, "Sub Category"), + "parent_id/id": mapper.m2o_map(CATEGORY_PREFIX, "categoy"), + "name": mapper.val("Sub Category"), } +processor.process( + categ_parent_map, + os.path.join("data", "product.category.parent.csv"), + {"model": "product.category", "worker": 1, "batch_size": 5}, + "set", + m2m=True, # Use m2m=True to get a unique set of parent categories +) +processor.process( + categ_map, + os.path.join("data", "product.category.csv"), + {"model": "product.category", "worker": 1, "batch_size": 20}, + "set", + m2m=True, # Use m2m=True to get a unique set of child categories +) -processor.process(categ_parent_map, 'data%sproduct.category.parent.csv' % os.sep, {'worker': 1, 'batch_size': 5, - 'model': 'product.category'}, 'set') -processor.process(categ_map, 'data%sproduct.category.csv' % os.sep, {'worker': 1, 'batch_size': 20}, 'set') - -# STEP 3 : Product Template mapping +# STEP 3: Generate data for Product Templates +print("Generating data for product templates...") template_map = { - 'id': mapper.m2o(TEMPLATE_PREFIX, 'ref'), - 'categ_id/id': mapper.m2o(CATEGORY_PREFIX, 'Sub Category'), - 'standard_price': mapper.num('cost'), - 'lst_price': mapper.num('public_price'), - 'default_code': mapper.val('ref'), - 'name': mapper.val('name'), + "id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"), + "categ_id/id": mapper.m2o_map(CATEGORY_PREFIX, "Sub Category"), + "standard_price": mapper.num("cost"), + "list_price": mapper.num("public_price"), + "default_code": mapper.val("ref"), + "name": mapper.val("name"), + "type": mapper.const("product"), } -processor.process(template_map, 'data%sproduct.template.csv' % os.sep, {'worker': 4, 'batch_size': 10, - 'context': context}, 'set') +processor.process( + template_map, + os.path.join("data", "product.template.csv"), + { + "model": "product.template", + "worker": 4, + "batch_size": 10, + "context": context, + }, + m2m=True, # A product template should only be created once per ref +) + +# STEP 4: Generate data for Attributes +print("Generating data for product attributes...") +# The custom processor method handles creating a simple list of attributes +processor.process_attribute_data( + attribute_list, + ATTRIBUTE_PREFIX, + os.path.join("data", "product.attribute.csv"), + { + "model": "product.attribute", + "worker": 4, + "batch_size": 10, + "context": context, + }, +) -# STEP 4: Attribute List -attribute_list = ['Color', 'Gender', 'Size_H', 'Size_W'] -processor.process_attribute_data(attribute_list, ATTRIBUTE_PREFIX, 'data%sproduct.attribute.csv' % os.sep, - {'worker': 4, 'batch_size': 10, - 'context': context}) -# STEP 5: Attribute Value -attribue_value_mapping = { - 'id': mapper.m2m_id_list(ATTRIBUTE_VALUE_PREFIX, *[mapper.concat_field_value_m2m('_', f) for f in attribute_list]), - 'name': mapper.m2m_value_list(*attribute_list), - 'attribute_id/id': mapper.m2m_id_list(ATTRIBUTE_PREFIX, *[mapper.field(f) for f in attribute_list]), +# STEP 5: Generate data for Attribute Values +print("Generating data for product attribute values...") +attribute_value_mapping = { + "id": mapper.m2m_template_attribute_value( + ATTRIBUTE_VALUE_PREFIX, *attribute_list + ), + "name": mapper.m2m_value_list(*attribute_list), + "attribute_id/id": mapper.m2m_id_list( + ATTRIBUTE_PREFIX, *[mapper.field(f) for f in attribute_list] + ), } -processor.process(attribue_value_mapping, 'data%sproduct.attribute.value.csv' % os.sep, {'worker': 3, 'batch_size': 50, - 'context': context, - 'groupby': 'attribute_id/id'}, - m2m=True) +processor.process( + attribute_value_mapping, + os.path.join("data", "product.attribute.value.csv"), + { + "model": "product.attribute.value", + "worker": 3, + "batch_size": 50, + "context": context, + "groupby": "attribute_id/id", + }, + m2m=True, +) -# STEP 6: Attribute Value Line +# STEP 6: Generate data for Attribute Lines (linking attributes to templates) +print("Generating data for product attribute lines...") line_mapping = { - 'id': mapper.m2m_id_list(ATTRIBUTE_LINE_PREFIX, - *[mapper.concat_mapper_all('_', mapper.field(f), mapper.val('ref')) for f in - attribute_list]), - 'product_tmpl_id/id': mapper.m2o(TEMPLATE_PREFIX, 'ref'), - 'attribute_id/id': mapper.m2m_id_list(ATTRIBUTE_PREFIX, *[mapper.field(f) for f in attribute_list]), - 'value_ids/id': mapper.m2m_id_list(ATTRIBUTE_VALUE_PREFIX, - *[mapper.concat_field_value_m2m('_', f) for f in attribute_list]), + "id": mapper.m2m_id_list( + ATTRIBUTE_LINE_PREFIX, + *[ + mapper.concat_mapper_all("_", mapper.field(f), mapper.val("ref")) + for f in attribute_list + ], + ), + "product_tmpl_id/id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"), + "attribute_id/id": mapper.m2m_id_list( + ATTRIBUTE_PREFIX, *[mapper.field(f) for f in attribute_list] + ), + "value_ids/id": mapper.m2m_template_attribute_value( + ATTRIBUTE_VALUE_PREFIX, *attribute_list + ), } -context['update_many2many'] = True -processor.process(line_mapping, 'data%sproduct.attribute.line.csv' % os.sep, {'worker': 3, 'batch_size': 50, - 'context': dict(context), - 'groupby': 'product_tmpl_id/id'}, - m2m=True) -context.pop('update_many2many') +context_with_update = context.copy() +context_with_update["update_many2many"] = True +processor.process( + line_mapping, + os.path.join("data", "product.attribute.line.csv"), + { + "model": "product.attribute.line", + "worker": 3, + "batch_size": 50, + "context": context_with_update, + "groupby": "product_tmpl_id/id", + }, + m2m=True, +) -# STEP 7: Product Variant +# STEP 7: Generate data for final Product Variants (product.product) +print("Generating data for product variants...") product_mapping = { - 'id': mapper.m2o_map(PRODUCT_PREFIX, mapper.concat('_', 'barcode', 'Color', 'Gender', 'Size_H', 'Size_W'), - skip=True), - 'barcode': mapper.val('barcode'), - 'product_tmpl_id/id': mapper.m2o(TEMPLATE_PREFIX, 'ref'), - 'attribute_value_ids/id': mapper.m2m_attribute_value(ATTRIBUTE_VALUE_PREFIX, 'Color', 'Gender', 'Size_H', 'Size_W'), - 'default_code': mapper.val('ref'), - 'standard_price': mapper.num('cost'), + "id": mapper.m2o_map(PRODUCT_PREFIX, "barcode", skip=True), + "barcode": mapper.val("barcode"), + "product_tmpl_id/id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"), + # This mapper seems to handle the complex logic of finding the correct + # attribute values for a given variant. + "attribute_value_ids/id": mapper.m2m_template_attribute_value( + ATTRIBUTE_VALUE_PREFIX, "Color", "Gender", "Size_H", "Size_W" + ), + "default_code": mapper.val("ref"), + "standard_price": mapper.num("cost"), } -processor.process(product_mapping, 'data%sproduct.product.csv' % os.sep, {'worker': 3, 'batch_size': 50, - 'groupby': 'product_tmpl_id/id', - 'context': context}, 'set') +processor.process( + product_mapping, + os.path.join("data", "product.product.csv"), + { + "model": "product.product", + "worker": 3, + "batch_size": 50, + "groupby": "product_tmpl_id/id", + "context": context, + }, +) -# #Step 8: Define output and import parameter -processor.write_to_file("4_product_import.sh", python_exe=EXEC, path='../') +print("Product v10 test data generation complete.") diff --git a/tests/test_product_v9.py b/tests/test_product_v9.py index 4143ddfb..b2e9ca02 100644 --- a/tests/test_product_v9.py +++ b/tests/test_product_v9.py @@ -1,81 +1,141 @@ -# -*- coding: utf-8 -*- -import sys +"""Test Odoo version 9 product import. + +This test script generates a complete set of data files for importing +products with variants, including categories and attributes. +This is based on the v9 product structure. +""" import os -from const import EXEC -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib.transform import ProductProcessorV9 +from odoo_data_flow.lib import mapper -if len(sys.argv) == 2: - EXEC = sys.argv[1] +# We assume ProductProcessorV9 is a custom class inheriting from Processor +from odoo_data_flow.lib.transform import ProductProcessorV9 +# --- Configuration --- TEMPLATE_PREFIX = "PRODUCT_TEMPLATE" PRODUCT_PREFIX = "PRODUCT_PRODUCT" CATEGORY_PREFIX = "PRODUCT_CATEGORY" - ATTRIBUTE_PREFIX = "PRODUCT_ATTRIBUTE" ATTRIBUTE_VALUE_PREFIX = "PRODUCT_ATTRIBUTE_VALUE" -# Define the context that will be used -context = {'create_product_variant': True, 'tracking_disable': True} -# STEP 1 : read the needed file(s) -processor = ProductProcessorV9('origin%sproduct.csv' % os.sep, delimiter=',') +# Define the attributes to be processed from the source file +attribute_list = ["Color", "Gender", "Size_H", "Size_W"] +source_file = os.path.join("tests", "origin", "product.csv") +context = {"create_product_variant": True, "tracking_disable": True} + +# --- Main Logic --- +# STEP 1: Initialize the custom processor with the source file +print(f"Initializing processor for v9 product import from: {source_file}") +processor = ProductProcessorV9(source_file, separator=",") -# STEP 2 : Category and Parent Category +# STEP 2: Generate data for Parent and Child Categories +print("Generating data for product categories...") categ_parent_map = { - 'id': mapper.m2o(CATEGORY_PREFIX, 'categoy'), - 'name': mapper.val('categoy'), + "id": mapper.m2o_map(CATEGORY_PREFIX, "categoy"), + "name": mapper.val("categoy"), } - categ_map = { - 'id': mapper.m2o(CATEGORY_PREFIX, 'Sub Category'), - 'parent_id/id': mapper.m2o(CATEGORY_PREFIX, 'categoy'), - 'name': mapper.val('Sub Category'), + "id": mapper.m2o_map(CATEGORY_PREFIX, "Sub Category"), + "parent_id/id": mapper.m2o_map(CATEGORY_PREFIX, "categoy"), + "name": mapper.val("Sub Category"), } +processor.process( + categ_parent_map, + os.path.join("data", "product.category.parent.v9.csv"), + {"model": "product.category"}, + m2m=True, +) +processor.process( + categ_map, + os.path.join("data", "product.category.v9.csv"), + {"model": "product.category"}, + m2m=True, +) -processor.process(categ_parent_map, 'data%sproduct.category.parent.csv' % os.sep, - {'worker': 1, 'batch_size': 5, 'model': 'product.category'}, 'set') -processor.process(categ_map, 'data%sproduct.category.csv' % os.sep, {'worker': 1, 'batch_size': 20}, 'set') - -# STEP 3 : Product Template mapping +# STEP 3: Generate data for Product Templates +print("Generating data for product templates...") template_map = { - 'id': mapper.m2o(TEMPLATE_PREFIX, 'ref'), - 'categ_id/id': mapper.m2o(CATEGORY_PREFIX, 'Sub Category'), - 'standard_price': mapper.num('cost'), - 'lst_price': mapper.num('public_price'), - 'default_code': mapper.val('ref'), - 'name': mapper.val('name'), + "id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"), + "categ_id/id": mapper.m2o_map(CATEGORY_PREFIX, "Sub Category"), + "standard_price": mapper.num("cost"), + "list_price": mapper.num("public_price"), + "default_code": mapper.val("ref"), + "name": mapper.val("name"), + "type": mapper.const("product"), } -processor.process(template_map, 'data%sproduct.template.csv' % os.sep, - {'worker': 4, 'batch_size': 10, 'context': context}, 'set') +processor.process( + template_map, + os.path.join("data", "product.template.v9.csv"), + {"model": "product.template", "context": context}, + m2m=True, +) -# STEP 4: Attribute List -attribute_list = ['Color', 'Gender', 'Size_H', 'Size_W'] +# STEP 4: Generate data for Attributes and Values (in one go for v9) +# This was handled by a custom process_attribute_mapping in the original script. +# We now standardize this to create two separate, clean files. +print("Generating data for product attributes and values...") + + +# Attribute Value mapping +attribute_value_map = { + "id": mapper.m2m_template_attribute_value(ATTRIBUTE_VALUE_PREFIX, *attribute_list), + "name": mapper.m2m_value_list(*attribute_list), + "attribute_id/id": mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list), +} +processor.process( + attribute_value_map, + os.path.join("data", "product.attribute.value.v9.csv"), + { + "model": "product.attribute.value", + "context": context, + "groupby": "attribute_id/id", + }, + m2m=True, +) + +attribute_list = ["Color", "Gender", "Size_H", "Size_W"] attribue_value_mapping = { - 'id': mapper.m2o_att(ATTRIBUTE_VALUE_PREFIX, attribute_list), # TODO - 'name': mapper.val_att(attribute_list), # TODO - 'attribute_id/id': mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list), + "id": mapper.m2o_att(ATTRIBUTE_VALUE_PREFIX, attribute_list), # TODO + "name": mapper.val_att(attribute_list), # TODO + "attribute_id/id": mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list), } line_mapping = { - 'product_tmpl_id/id': mapper.m2o(TEMPLATE_PREFIX, 'ref'), - 'attribute_id/id': mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list), - 'value_ids/id': mapper.m2o_att(ATTRIBUTE_VALUE_PREFIX, attribute_list) # TODO + "product_tmpl_id/id": mapper.m2o(TEMPLATE_PREFIX, "ref"), + "attribute_id/id": mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list), + "value_ids/id": mapper.m2o_att(ATTRIBUTE_VALUE_PREFIX, attribute_list), # TODO } -processor.process_attribute_mapping(attribue_value_mapping, line_mapping, attribute_list, ATTRIBUTE_PREFIX, 'data/', - {'worker': 3, 'batch_size': 50, 'context': context}) +processor.process_attribute_mapping( + attribue_value_mapping, + line_mapping, + attribute_list, + ATTRIBUTE_PREFIX, + "data/", + {"worker": 3, "batch_size": 50, "context": context}, +) + -# STEP 5: Product Variant +# STEP 5: Generate data for Product Variants (product.product) +print("Generating data for product variants...") product_mapping = { - 'id': mapper.m2o_map(PRODUCT_PREFIX, mapper.concat('_', 'barcode', 'Color', 'Gender', 'Size_H', 'Size_W'), - skip=True), - 'barcode': mapper.val('barcode'), - 'product_tmpl_id/id': mapper.m2o(TEMPLATE_PREFIX, 'ref'), - 'attribute_value_ids/id': mapper.m2m_attribute_value(ATTRIBUTE_VALUE_PREFIX, 'Color', 'Gender', 'Size_H', 'Size_W'), + "id": mapper.m2o_map(PRODUCT_PREFIX, "barcode", skip=True), + "barcode": mapper.val("barcode"), + "product_tmpl_id/id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"), + "attribute_value_ids/id": mapper.m2m_template_attribute_value( + ATTRIBUTE_VALUE_PREFIX, "Color", "Gender", "Size_H", "Size_W" + ), } -processor.process(product_mapping, 'data%sproduct.product.csv' % os.sep, - {'worker': 3, 'batch_size': 50, 'groupby': 'product_tmpl_id/id', 'context': context}, 'set') +processor.process( + product_mapping, + os.path.join("data", "product.product.v9.csv"), + { + "model": "product.product", + "worker": 3, + "batch_size": 50, + "groupby": "product_tmpl_id/id", + "context": context, + }, +) -# Step 6: Define output and import parameter -processor.write_to_file("3_product_import.sh", python_exe=EXEC, path='../') +print("Product v9 test data generation complete.") diff --git a/tests/test_split.py b/tests/test_split.py index 2030dfa2..db9795ed 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -1,59 +1,89 @@ -''' -Created on 14 sept. 2016 +"""Test Split. + +This test script generates a large dataset and then splits it into multiple +files to test the processor's split functionality. +""" -@author: mythrys -''' import random -from odoo_csv_tools.lib import mapper -from odoo_csv_tools.lib import transform - -PARTNER_PREFIX = 'partner_generated' -TAG_PREFIX = 'partner_tag' -output = 'data/res.partner.generated.csv' -tag_output = 'data/res.partner.category.csv' -script = '1_partner_split.sh' - -tags = ["Tag %s" % i for i in range(0, 100)] - -header = ['id', 'tags'] -data = [[str(i), ','.join(tags[random.randint(0, 99)] for i in range(0, 5))] for i in range(0, 10000)] - -mapping = { - 'id': mapper.m2o(PARTNER_PREFIX, 'id'), - 'name': mapper.val('id', postprocess=lambda x: "Partner %s" % x), - 'phone': mapper.val('id', postprocess=lambda x: "0032%s" % (int(x) * 11)), - 'website': mapper.val('id', postprocess=lambda x: "http://website-%s.com" % x), - 'street': mapper.val('id', postprocess=lambda x: "Street %s" % x), - 'city': mapper.val('id', postprocess=lambda x: "City %s" % x), - 'zip': mapper.val('id', postprocess=lambda x: ("%s" % x).zfill(6)), - 'country_id/id': mapper.const('base.be'), - 'company_type': mapper.const('company'), - 'customer': mapper.val('id', postprocess=lambda x: str(int(x) % 2)), - 'supplier': mapper.val('id', postprocess=lambda x: str((int(x) + 1) % 2)), - 'lang': mapper.const('English'), - 'category_id/id': mapper.m2m(TAG_PREFIX, 'tags') -} +from odoo_data_flow.lib import mapper, transform + +# --- Configuration --- +PARTNER_PREFIX = "partner_generated" +TAG_PREFIX = "partner_tag" +PARTNER_OUTPUT_PREFIX = "data/res.partner.generated.split" +TAG_OUTPUT = "data/res.partner.category.split.csv" + +# --- Test Data Generation --- +# Create 100 unique tags +tags = [f"Tag {i}" for i in range(100)] +# Create a larger dataset for 10,000 partners +header = ["id", "tags"] +data = [ + [str(i), ",".join(random.choice(tags) for _ in range(5))] # noqa nosec B311 + for i in range(10000) +] # nosec B311 + +# --- Mapping Definitions (consistent with test_import.py) --- + +# Mapping to create the partner category records. tag_mapping = { - 'id': mapper.m2m_id_list(TAG_PREFIX, 'tags'), - 'name': mapper.m2m_value_list('tags'), - 'parent_id/id': mapper.const('base.res_partner_category_0'), + "id": mapper.m2m_id_list(TAG_PREFIX, "tags"), + "name": mapper.m2m("tags", sep=","), + "parent_id/id": mapper.const("base.res_partner_category_0"), } +# Mapping to create the partner records. +partner_mapping = { + "id": mapper.concat(PARTNER_PREFIX, "_", "id"), + "name": mapper.val("id", postprocess=lambda x: f"Partner {x}"), + "phone": mapper.val("id", postprocess=lambda x: f"0032{int(x) * 11}"), + "website": mapper.val( + "id", postprocess=lambda x: f"http://website-{x}.com" + ), + "street": mapper.val("id", postprocess=lambda x: f"Street {x}"), + "city": mapper.val("id", postprocess=lambda x: f"City {x}"), + "zip": mapper.val("id", postprocess=lambda x: str(x).zfill(6)), + "country_id/id": mapper.const("base.be"), + "company_type": mapper.const("company"), + "customer": mapper.val("id", postprocess=lambda x: int(x) % 2), + "supplier": mapper.val("id", postprocess=lambda x: (int(x) + 1) % 2), + "lang": mapper.const("en_US"), + "category_id/id": mapper.m2m(TAG_PREFIX, "tags"), +} + +# --- Processing --- +print("Initializing processor with 10,000 records.") processor = transform.Processor(header=header, data=data) -p_dict = processor.split(mapper.split_line_number(1000)) # Useless just for coverage -p_dict = processor.split(mapper.split_file_number(8)) -processor.process(tag_mapping, tag_output, { - 'worker': 1, # OPTIONAL - 'batch_size': 10, # OPTIONAL - 'model': 'res.partner.category', -}, m2m=True) -processor.write_to_file(script, path='../') -for index, p in p_dict.items(): - p.process(mapping, '%s.%s' % (output, index), { - 'worker': 4, # OPTIONAL - 'batch_size': 100, # OPTIONAL - 'model': 'res.partner', - }) - p.write_to_file(script, path='../', append=True) + +# This first split is primarily for test coverage purposes. +print("Running split by line number (for coverage)...") +processor.split(mapper.split_line_number(1000)) + +# This is the main test: split the dataset into 8 separate files. +print("Splitting data into 8 files...") +processor_dictionary = processor.split(mapper.split_file_number(8)) + +# First, process the tags into a single file from the main processor. +print(f"Generating single tag file for all splits at: {TAG_OUTPUT}") +processor.process( + tag_mapping, + TAG_OUTPUT, + {"model": "res.partner.category"}, + m2m=True, +) + +# Now, loop through the dictionary of split processors and have each one +# generate its own numbered output file. +print("Processing each data split into a separate partner file...") +for index, p in processor_dictionary.items(): + output_filename = f"{PARTNER_OUTPUT_PREFIX}.{index}.csv" + print(f" - Generating {output_filename}") + p.process( + partner_mapping, + output_filename, + {"model": "res.partner"}, + ) + +print("Split file generation complete.") diff --git a/tests/test_xml_file.py b/tests/test_xml_file.py index a27130d0..c5a45e25 100644 --- a/tests/test_xml_file.py +++ b/tests/test_xml_file.py @@ -1,18 +1,54 @@ -#-*- coding: utf-8 -*- -''' -Created on 21 févr. 2018 +"""Test XML Files. -@author: mythrys -''' -from odoo_csv_tools.lib import xml_transform +This test script checks the XML processing functionality. +It reads a source XML file, applies a mapping, and generates a +clean CSV file. +""" + +import os + +from odoo_data_flow.lib import mapper +from odoo_data_flow.lib.transform import Processor + +# --- Configuration --- +SOURCE_FILE = os.path.join("tests", "origin", "data.xml") +OUTPUT_FILE = os.path.join("data", "info_from_xml.csv") + +# --- Mapping Definition --- +# This mapping is updated to use dot notation for tags, which is the +# standard way the Processor handles nested data. +# Note: The new Processor may not support XPath features like accessing +# attributes (@name) or indexed elements (neighbor[1]). This test +# focuses on the documented tag-based mapping. mapping = { - 'name' : 'year/text()', - 'gdp': 'gdppc/text()', - 'nom': '@name', - 'neighbor' : 'neighbor[1]/@name', + "name": mapper.val("year"), + "gdp": mapper.val("gdppc"), + # Assuming 'nom' and 'neighbor' are now represented as tags in the XML. + "nom": mapper.val("name"), + "neighbor": mapper.val("neighbor.name"), +} + +# --- Main Logic --- +# Initialize the standard Processor, but with XML-specific arguments. +# We tell the processor that the records are enclosed in tags, +# and the whole list is inside a root tag (e.g., ). +print(f"Initializing XML processor for source file: {SOURCE_FILE}") +processor = Processor( + SOURCE_FILE, + xml_root_tag="data", # The root element containing all records + xml_record_tag="country", # The tag representing a single record +) + +# Define the parameters for the eventual import. +params = { + "model": "res.country.info", # Example model + "worker": 2, + "batch_size": 5, } -p = xml_transform.XMLProcessor("origin/data.xml", "//country", ) -p.process(mapping, 'data/info.csv', { 'worker' : 2, 'batch_size' : 5}) -p.write_to_file("99_contact_import.sh", python_exe='', path='') \ No newline at end of file +# Process the XML data using the mapping and write to a CSV file. +print(f"Processing XML data and writing to: {OUTPUT_FILE}") +processor.process(mapping, OUTPUT_FILE, params) + +print("XML file transformation complete.")