diff --git a/.cookiecutter.json b/.cookiecutter.json
new file mode 100644
index 00000000..73fac8b3
--- /dev/null
+++ b/.cookiecutter.json
@@ -0,0 +1,16 @@
+{
+ "_checkout": "2025.06.25",
+ "_output_dir": "/home/data/git/odoodataflow/odoo-data-flow",
+ "_repo_dir": "/home/bosd/.cookiecutters/cookiecutter-uv-hypermodern-python",
+ "_template": "gh:bosd/cookiecutter-uv-hypermodern-python",
+ "author": "bosd",
+ "copyright_year": "2025",
+ "development_status": "Development Status :: 3 - Alpha",
+ "email": "c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me",
+ "friendly_name": "Odoo Data Flow",
+ "github_user": "bosd",
+ "license": "GPL-3.0",
+ "package_name": "odoo-data-flow",
+ "project_name": "odoo-data-flow",
+ "version": "0.0.0"
+}
diff --git a/.coverage b/.coverage
new file mode 100644
index 00000000..7260ae0f
Binary files /dev/null and b/.coverage differ
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00000000..a8faee78
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,15 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.{py,toml}]
+indent_style = space
+indent_size = 4
+
+[*.yml,yaml,json]
+indent_style = space
+indent_size = 2
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..6313b56c
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..fc0e0c6e
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,49 @@
+version: 2
+updates:
+ - package-ecosystem: github-actions
+ directory: "/"
+ schedule:
+ interval: weekly
+ groups:
+ github-actions-updates:
+ applies-to: version-updates
+ dependency-type: development
+ github-actions-security-updates:
+ applies-to: security-updates
+ dependency-type: development
+ - package-ecosystem: pip
+ directory: "/.github/workflows"
+ schedule:
+ interval: weekly
+ groups:
+ workflow-updates:
+ applies-to: version-updates
+ dependency-type: development
+ workflow-security-updates:
+ applies-to: security-updates
+ dependency-type: development
+ - package-ecosystem: pip
+ directory: "/docs"
+ schedule:
+ interval: weekly
+ groups:
+ doc-updates:
+ applies-to: version-updates
+ dependency-type: development
+ doc-security-updates:
+ applies-to: security-updates
+ dependency-type: production
+ - package-ecosystem: pip
+ directory: "/"
+ schedule:
+ interval: weekly
+ versioning-strategy: lockfile-only
+ allow:
+ - dependency-type: "all"
+ groups:
+ pip-version-updates:
+ applies-to: version-updates
+ dependency-type: development
+ pip-security-updates:
+ applies-to: security-updates
+ dependency-type: production
diff --git a/.github/labels.yml b/.github/labels.yml
new file mode 100644
index 00000000..f7f83aad
--- /dev/null
+++ b/.github/labels.yml
@@ -0,0 +1,66 @@
+---
+# Labels names are important as they are used by Release Drafter to decide
+# regarding where to record them in changelog or if to skip them.
+#
+# The repository labels will be automatically configured using this file and
+# the GitHub Action https://github.com/marketplace/actions/github-labeler.
+- name: breaking
+ description: Breaking Changes
+ color: bfd4f2
+- name: bug
+ description: Something isn't working
+ color: d73a4a
+- name: build
+ description: Build System and Dependencies
+ color: bfdadc
+- name: ci
+ description: Continuous Integration
+ color: 4a97d6
+- name: dependencies
+ description: Pull requests that update a dependency file
+ color: 0366d6
+- name: documentation
+ description: Improvements or additions to documentation
+ color: 0075ca
+- name: duplicate
+ description: This issue or pull request already exists
+ color: cfd3d7
+- name: enhancement
+ description: New feature or request
+ color: a2eeef
+- name: github_actions
+ description: Pull requests that update Github_actions code
+ color: "000000"
+- name: good first issue
+ description: Good for newcomers
+ color: 7057ff
+- name: help wanted
+ description: Extra attention is needed
+ color: 008672
+- name: invalid
+ description: This doesn't seem right
+ color: e4e669
+- name: performance
+ description: Performance
+ color: "016175"
+- name: python
+ description: Pull requests that update Python code
+ color: 2b67c6
+- name: question
+ description: Further information is requested
+ color: d876e3
+- name: refactoring
+ description: Refactoring
+ color: ef67c4
+- name: removal
+ description: Removals and Deprecations
+ color: 9ae7ea
+- name: style
+ description: Style
+ color: c120e5
+- name: testing
+ description: Testing
+ color: b1fc6f
+- name: wontfix
+ description: This will not be worked on
+ color: ffffff
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
new file mode 100644
index 00000000..7a04410f
--- /dev/null
+++ b/.github/release-drafter.yml
@@ -0,0 +1,29 @@
+categories:
+ - title: ":boom: Breaking Changes"
+ label: "breaking"
+ - title: ":rocket: Features"
+ label: "enhancement"
+ - title: ":fire: Removals and Deprecations"
+ label: "removal"
+ - title: ":beetle: Fixes"
+ label: "bug"
+ - title: ":racehorse: Performance"
+ label: "performance"
+ - title: ":rotating_light: Testing"
+ label: "testing"
+ - title: ":construction_worker: Continuous Integration"
+ label: "ci"
+ - title: ":books: Documentation"
+ label: "documentation"
+ - title: ":hammer: Refactoring"
+ label: "refactoring"
+ - title: ":lipstick: Style"
+ label: "style"
+ - title: ":package: Dependencies"
+ labels:
+ - "dependencies"
+ - "build"
+template: |
+ ## Changes
+
+ $CHANGES
diff --git a/.github/workflows/constraints.txt b/.github/workflows/constraints.txt
new file mode 100644
index 00000000..d57727b6
--- /dev/null
+++ b/.github/workflows/constraints.txt
@@ -0,0 +1,3 @@
+pip==24.3.1
+nox==2024.10.09
+virtualenv==20.27.1
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
new file mode 100644
index 00000000..a36ce84e
--- /dev/null
+++ b/.github/workflows/labeler.yml
@@ -0,0 +1,22 @@
+name: Labeler
+
+on:
+ push:
+ branches:
+ - main
+ - master
+
+jobs:
+ labeler:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ issues: write
+ steps:
+ - name: Check out the repository
+ uses: actions/checkout@v4
+
+ - name: Run Labeler
+ uses: crazy-max/ghaction-github-labeler@v5.0.0
+ with:
+ skip-delete: true
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 00000000..bb404ef9
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,73 @@
+name: Release
+
+on:
+ push:
+ branches:
+ - main
+ - master
+
+jobs:
+ release:
+ name: Release
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out the repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.13"
+
+ - name: Install uv
+ uses: astral-sh/setup-uv@v4
+
+ - name: Check if there is a parent commit
+ id: check-parent-commit
+ run: |
+ echo "sha=$(git rev-parse --verify --quiet HEAD^)" >> $GITHUB_OUTPUT
+
+ - name: Detect and tag new version
+ id: check-version
+ if: steps.check-parent-commit.outputs.sha
+ uses: salsify/action-detect-and-tag-new-version@v2.0.3
+ with:
+ version-command: |
+ bash -o pipefail -c "uv version | awk '{ print \$2 }'"
+
+ - name: Bump version for developmental release
+ if: "! steps.check-version.outputs.tag"
+ run: |
+ sed -i -e "s/0.0.0/${GITHUB_REF#refs/*/}/" pyproject.toml
+ # uv bump patch &&
+ # version=$(uv version | awk '{ print $2 }') &&
+ # uv bump $version.dev.$(date +%s)
+
+ - name: Build package
+ run: |
+ uv build
+
+ - name: Publish package on PyPI
+ if: steps.check-version.outputs.tag
+ uses: pypa/gh-action-pypi-publish@v1.12.2
+ with:
+ user: __token__
+ password: ${{ secrets.PYPI_TOKEN }}
+
+ - name: Publish package on TestPyPI
+ if: "! steps.check-version.outputs.tag"
+ uses: pypa/gh-action-pypi-publish@v1.12.2
+ with:
+ user: __token__
+ password: ${{ secrets.TEST_PYPI_TOKEN }}
+ repository_url: https://test.pypi.org/legacy/
+
+ - name: Publish the release notes
+ uses: release-drafter/release-drafter@v6.0.0
+ with:
+ publish: ${{ steps.check-version.outputs.tag != '' }}
+ tag: ${{ steps.check-version.outputs.tag }}
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 00000000..e355ba60
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,150 @@
+name: Tests
+
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+
+jobs:
+ tests:
+ name: ${{ matrix.session }} / py${{ matrix.python }} / ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - { python: "3.12", os: "ubuntu-latest", session: "pre-commit" }
+ - { python: "3.13", os: "ubuntu-latest", session: "mypy" }
+ - { python: "3.12", os: "ubuntu-latest", session: "mypy" }
+ - { python: "3.11", os: "ubuntu-latest", session: "mypy" }
+ - { python: "3.10", os: "ubuntu-latest", session: "mypy" }
+ - { python: "3.9", os: "ubuntu-latest", session: "mypy" }
+ - { python: "3.13", os: "ubuntu-latest", session: "tests" }
+ - { python: "3.12", os: "ubuntu-latest", session: "tests" }
+ - { python: "3.11", os: "ubuntu-latest", session: "tests" }
+ - { python: "3.10", os: "ubuntu-latest", session: "tests" }
+ - { python: "3.9", os: "ubuntu-latest", session: "tests" }
+ - { python: "3.12", os: "windows-latest", session: "tests" }
+ - { python: "3.12", os: "macos-latest", session: "tests" }
+ - { python: "3.12", os: "ubuntu-latest", session: "typeguard" }
+ - { python: "3.13", os: "windows-latest", session: "tests" }
+ - { python: "3.13", os: "macos-latest", session: "tests" }
+ - { python: "3.13", os: "ubuntu-latest", session: "xdoctest" }
+ - { python: "3.13", os: "ubuntu-latest", session: "docs-build" }
+
+ env:
+ NOXSESSION: ${{ matrix.session }}
+ FORCE_COLOR: "1"
+ PRE_COMMIT_COLOR: "always"
+
+ steps:
+ - name: Check out the repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python }}
+
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+
+ - name: Create uv virtual environment
+ run: uv venv
+
+ - name: Install dependencies
+ run: |
+ uv sync --all-groups
+
+ - name: Compute pre-commit cache key
+ if: matrix.session == 'pre-commit'
+ id: pre-commit-cache
+ shell: python
+ run: |
+ import hashlib
+ import sys
+
+ python = "py{}.{}".format(*sys.version_info[:2])
+ payload = sys.version.encode() + sys.executable.encode()
+ digest = hashlib.sha256(payload).hexdigest()
+ result = "${{ runner.os }}-{}-{}-pre-commit".format(python, digest[:8])
+
+ print(f"result={result}")
+
+ - name: Restore pre-commit cache
+ uses: actions/cache@v4
+ if: matrix.session == 'pre-commit'
+ with:
+ path: ~/.cache/pre-commit
+ key: ${{ steps.pre-commit-cache.outputs.result }}-${{ hashFiles('.pre-commit-config.yaml') }}
+ restore-keys: |
+ ${{ steps.pre-commit-cache.outputs.result }}-
+
+ - name: Run Nox
+ run: |
+ uv run python -m nox --python=${{ matrix.python }}
+
+ - name: Upload coverage data
+ if: always() && matrix.session == 'tests'
+ uses: "actions/upload-artifact@v4"
+ with:
+ name: coverage-data-${{ matrix.session }}-${{ matrix.python }}-${{ matrix.os }}
+ path: ".coverage.*"
+ if-no-files-found: ignore
+ include-hidden-files: true
+
+ - name: Upload documentation
+ if: matrix.session == 'docs-build'
+ uses: actions/upload-artifact@v4
+ with:
+ name: docs
+ path: docs/_build
+
+ coverage:
+ runs-on: ubuntu-latest
+ needs: tests
+ steps:
+ - name: Check out the repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.13"
+
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+
+ - name: Create uv virtual environment
+ run: uv venv
+
+ - name: Install Nox
+ run: |
+ uv pip install nox
+ uv run python -m nox --version
+
+ - name: Install dependencies
+ run: |
+ uv sync --all-groups
+
+ - name: Download coverage data
+ uses: actions/download-artifact@v4
+ with:
+ pattern: coverage-data-*
+ merge-multiple: true
+
+ - name: Combine coverage data and display human readable report
+ run: |
+ uv run python -m nox --session=coverage
+
+ - name: Create coverage report
+ run: |
+ uv run python -m nox --session=coverage -- xml -i
+
+ - name: Upload coverage report
+ uses: codecov/codecov-action@v4
+ env:
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+ with:
+ files: ./coverage.xml
diff --git a/.gitignore b/.gitignore
index f31acebf..dfc210b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,13 @@
*.pyc
.project
.pydevproject
-
+.cache
+node_modules
+*.egg-info
+.ruff_cache
+.nox
+.venv
+__pycache__
+_build
+docs/_build
+coverage.xml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..c5139e6d
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,50 @@
+repos:
+ - repo: local
+ hooks:
+ - id: check-added-large-files
+ name: Check for added large files
+ entry: check-added-large-files
+ language: system
+ - id: check-toml
+ name: Check Toml
+ entry: check-toml
+ language: system
+ types: [toml]
+ - id: check-yaml
+ name: Check Yaml
+ entry: check-yaml
+ language: system
+ types: [yaml]
+ - id: end-of-file-fixer
+ name: Fix End of Files
+ entry: end-of-file-fixer
+ language: system
+ types: [text]
+ stages: [pre-commit, pre-push, manual]
+ - id: trailing-whitespace
+ name: Trim Trailing Whitespace
+ entry: trailing-whitespace-fixer
+ language: system
+ types: [text]
+ stages: [pre-commit, pre-push, manual]
+ - id: pydoclint
+ name: pydoclint
+ entry: pydoclint
+ language: system
+ types: [python]
+ args: ["--generate-baseline=True"]
+ - id: ruff
+ name: ruff
+ entry: ruff check
+ args: [--fix]
+ language: python
+ types_or: [python, pyi]
+ - id: ruff-format
+ name: ruff-format
+ entry: ruff format
+ language: python
+ types_or: [python, pyi]
+ # - repo: https://github.com/pre-commit/mirrors-prettier
+ # rev: v4.0.0-alpha.8
+ # hooks:
+ # - id: prettier
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
deleted file mode 100644
index e6bf1bb3..00000000
--- a/.readthedocs.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
-# Required
-version: 2
-
-# Set the OS, Python version, and other tools you might need
-build:
- os: ubuntu-24.04
- tools:
- python: "3.13"
-
-# Build documentation in the "docs/" directory with Sphinx
-sphinx:
- configuration: docs/conf.py
-
-# Optionally, but recommended,
-# declare the Python requirements required to build your documentation
-# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
-# python:
-# install:
-# - requirements: docs/requirements.txt
-
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 00000000..dd5f40b6
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,12 @@
+version: 2
+build:
+ os: ubuntu-20.04
+ tools:
+ python: "3.13"
+sphinx:
+ configuration: docs/conf.py
+formats: all
+python:
+ install:
+ - requirements: docs/requirements.txt
+ - path: .
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000..b802047a
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,132 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes,
+ and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the overall
+ community
+
+Examples of unacceptable behavior include:
+
+- The use of sexualized language or imagery, and sexual attention or advances of
+ any kind
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email address,
+ without their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+[c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me](mailto:c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me).
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][mozilla coc].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][faq]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[mozilla coc]: https://github.com/mozilla/diversity
+[faq]: https://www.contributor-covenant.org/faq
+[translations]: https://www.contributor-covenant.org/translations
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..3a7efe7d
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,113 @@
+# Contributor Guide
+
+Thank you for your interest in improving this project.
+This project is open-source under the [LGPL license] and
+welcomes contributions in the form of bug reports, feature requests, and pull requests.
+
+Here is a list of important resources for contributors:
+
+- [Source Code]
+- [Documentation]
+- [Issue Tracker]
+- [Code of Conduct]
+
+[lgpl 3.0 license]: https://www.gnu.org/licenses/lgpl-3.0
+[source code]: https://github.com/bosd/odoo-data-flow
+[documentation]: https://odoo-data-flow.readthedocs.io/
+[issue tracker]: https://github.com/bosd/odoo-data-flow/issues
+
+## How to report a bug
+
+Report bugs on the [Issue Tracker].
+
+When filing an issue, make sure to answer these questions:
+
+- Which operating system and Python version are you using?
+- Which version of this project are you using?
+- What did you do?
+- What did you expect to see?
+- What did you see instead?
+
+The best way to get your bug fixed is to provide a test case,
+and/or steps to reproduce the issue.
+
+## How to request a feature
+
+Request features on the [Issue Tracker].
+
+## How to set up your development environment
+
+You need Python 3.9+ and the following tools:
+
+- [uv]
+- [Nox]
+
+Install the package with development requirements:
+
+```console
+$ uv install
+```
+
+You can now run an interactive Python session,
+or the command-line interface:
+
+```console
+$ uv run python
+$ uv run odoo-data-flow
+```
+
+[uv]: https://docs.astral.sh/uv/
+[nox]: https://nox.thea.codes/
+
+## How to test the project
+
+Run the full test suite:
+
+```console
+$ nox
+```
+
+List the available Nox sessions:
+
+```console
+$ nox --list-sessions
+```
+
+You can also run a specific Nox session.
+For example, invoke the unit test suite like this:
+
+```console
+$ nox --session=tests
+```
+
+Unit tests are located in the _tests_ directory,
+and are written using the [pytest] testing framework.
+
+[pytest]: https://pytest.readthedocs.io/
+
+## How to submit changes
+
+Open a [pull request] to submit changes to this project.
+
+Your pull request needs to meet the following guidelines for acceptance:
+
+- The Nox test suite must pass without errors and warnings.
+- Include unit tests. This project maintains 100% code coverage.
+- If your changes add functionality, update the documentation accordingly.
+
+Feel free to submit early, though—we can always iterate on this.
+
+To run linting and code formatting checks before committing your change, you can install pre-commit as a Git hook by running the following command:
+
+```console
+$ nox --session=pre-commit -- install
+```
+
+It is recommended to open an issue before starting work on anything.
+This will allow a chance to talk it over with the owners and validate your approach.
+
+[pull request]: https://github.com/bosd/odoo-data-flow/pulls
+
+
+
+[code of conduct]: CODE_OF_CONDUCT.md
diff --git a/COPYING b/LICENSE
similarity index 100%
rename from COPYING
rename to LICENSE
diff --git a/README.md b/README.md
index 86d009f0..b99b384e 100644
--- a/README.md
+++ b/README.md
@@ -1,1616 +1,128 @@
-Odoo CSV Import Export Library
-==============================
-This library provides tools to easily and quickly import data into Odoo or export data from Odoo using CSV file.
-It also provide a framework to manipulate data from CSV.
+
+
+
-- [Odoo CSV Import Export Library](#odoo-csv-import-export-library)
-- [Installation](#installation)
-- [Importing Data](#importing-data)
- - [Import Parameters](#import-parameters)
- - [--config CONFIG](#config-config)
- - [--file FILENAME](#file-filename)
- - [--sep SEPARATOR](#sep-separator)
- - [--skip LINE](#skip-line)
- - [--model MODEL](#model-model)
- - [--size BATCH_SIZE](#size-batchsize)
- - [--worker WORKER](#worker-worker)
- - [--groupby SPLIT](#groupby-split)
- - [--ignore IGNORE](#ignore-ignore)
- - [--context CONTEXT](#context-context)
- - [Import Related Keys](#import-related-keys)
- - [ORM and Performance Related Keys](#orm-and-performance-related-keys)
- - [Model Specific Keys](#model-specific-keys)
- - [--o2m](#o2m)
- - [--check](#check)
- - [--fail](#fail)
- - [Using the Script](#using-the-script)
- - [Transformations](#transformations)
- - [Basic Concepts](#basic-concepts)
- - [A Simple Partner Import](#a-simple-partner-import)
- - [Dealing with Relationships](#dealing-with-relationships)
- - [Many2one Relationships](#many2one-relationships)
- - [One2many Relationships](#one2many-relationships)
- - [Many2many Relationships](#many2many-relationships)
- - [Controlling the Load sequence](#controlling-the-load-sequence)
- - [Mapper Functions](#mapper-functions)
- - [mapper.const(value)](#mapperconstvalue)
- - [mapper.val(field, default='', postprocess=lambda x: x, skip=False)](#mappervalfield-default-postprocesslambda-x-x-skipfalse)
- - [mapper.map_val(field, mapping, default='')](#mappermapvalfield-mapping-default)
- - [mapper.num(field, default='0.0')](#mappernumfield-default00)
- - [mapper.bool_val(field, true_vals=[], false_vals=[])](#mapperboolvalfield-truevals-falsevals)
- - [mapper.binary(field, path_prefix, skip=False, encoding="utf-8")](#mapperbinaryfield-pathprefix-skipfalse-encoding%22utf-8%22)
- - [mapper.concat(separator, *fields)](#mapperconcatseparator-fields)
- - [mapper.m2o(PREFIX, field, default='', skip=False)](#mapperm2oprefix-field-default-skipfalse)
- - [mapper.m2o_map(PREFIX, mapper, default='', skip=False)](#mapperm2omapprefix-mapper-default-skipfalse)
- - [mapper.m2m(PREFIX, *fields)](#mapperm2mprefix-fields)
- - [mapper.m2m_id_list(PREFIX, *args, **kwargs)](#mapperm2midlistprefix-args-kwargs)
- - [mapper.m2m_value_list(*args, **kwargs)](#mapperm2mvaluelistargs-kwargs)
- - [mapper.m2m_template_attribute_value(*args, **kwargs)](#mapperm2m_template_attribute_valueprefix-template_id_field-args)
- - [Advanced Transformations](#advanced-transformations)
- - [User Defined Mappers](#user-defined-mappers)
- - [Managing the Client CSV file](#managing-the-client-csv-file)
- - [Adding a column](#adding-a-column)
- - [Removing Lines](#removing-lines)
- - [Updating Records With Database IDs](#updating-records-with-database-ids)
- - [XML Processing](#XML-Processing)
- - [A Real Life Example](#a-real-life-example)
- - [Performances Considerations](#performances-considerations)
- - [Importing Related or Computed Fields](#importing-related-or-computed-fields)
- - [Troubleshooting](#troubleshooting)
- - [When the number of records does not match](#when-the-number-of-records-does-not-match)
- - [Tips and Tricks](#tips-and-tricks)
- - [Importing Data of Multiple Companies](#importing-data-of-multiple-companies)
- - [Importing Translations](#importing-translations)
- - [Importing Account Move Lines](#importing-account-move-lines)
-- [Exporting Data](#exporting-data)
-- [Requirements](#requirements)
+# Odoo Data Flow
-# Installation
-* From GitHub
+[][pypi status]
+[][pypi status]
+[][pypi status]
+[][license]
-```
-git clone git@github.com:tfrancoi/odoo_csv_import.git
-```
-
-* From PyPi
-
-```
-[sudo] pip install odoo_import_export_client
-```
-# Importing Data
-The Odoo CSV Import Export library provides the script `odoo_import_thread.py` to import data into Odoo. The script is designed to load one data file into one model. That means you might need to run the script several times with different data files, models and other options to complete an import.
-
-
-
-Data are not inserted directly into the database, instead they are loaded by calling the method `models.load`. Doing so, the standard behaviour of each model is respected.
-
-This script has several options. Type the command `odoo_import_thread.py --help` to get the usage.
-
-```
-usage: odoo_import_thread.py [-h] -c CONFIG --file FILENAME --model MODEL
- [--worker WORKER] [--size BATCH_SIZE]
- [--skip SKIP] [--fail] [-s SEPARATOR]
- [--groupby SPLIT] [--ignore IGNORE] [--check]
- [--context CONTEXT] [--o2m]
-
-Import data in batch and in parallel
-
-optional arguments:
- -h, --help show this help message and exit
- -c CONFIG, --config CONFIG
- Configuration File that contains connection parameters
- --file FILENAME File to import
- --model MODEL Model to import
- --worker WORKER Number of simultaneous connection
- --size BATCH_SIZE Number of line to import per connection
- --skip SKIP Skip until line [SKIP]
- --fail Fail mode
- -s SEPARATOR, --sep SEPARATOR
- CSV separator
- --groupby SPLIT Group data per batch with the same value for the given
- column in order to avoid concurrent update error
- --ignore IGNORE list of column separate by comma. Those column will be
- remove from the import request
- --check Check if record are imported after each batch.
- --context CONTEXT context that will be passed to the load function, need
- to be a valid python dict
- --o2m When you want to import o2m field, don't cut the batch
- until we find a new id
-```
-One of the most important feature is the ability to import in parallel while controlling the transaction size.
-These options allow to import huge data files while dealing with performance, what is not possible with the builtin Odoo import wizard.
-
-Here is how looks like a typical execution of the script:
-
-
-
-The parameter values are set for illustrating purposes.
-
-When running the script, an amount of threads are spawned. Each of them handles an amount of records by transaction. Each transaction inserts or updates records in the Odoo instance defined in the configuration file.
-
-## Import Parameters
-
-### --config CONFIG
-The configuration file `CONFIG` is a text file that defines the parameters used in the import. Here is an example.
-
-```
-[Connection]
-hostname = mydb.odoo.com
-database = mydb
-login = admin
-password = admin
-protocol = jsonrpcs
-port = 443
-uid = 2
-```
-The section `[connection]` is mandatory. Then the following parameters must be set accordingly.
-
- | Parameter | Description |
-
- |
- hostname
- |
-
- Name of the host where the Odoo resides.
- |
-
-
- |
- database
- |
-
- Name of the Postgresql database.
- |
-
-
- |
- login
- |
-
- The login used to create or update the records.
- |
-
-
- |
- password
- |
-
- The login's password.
- |
-
-
- |
- protocol
- |
-
- Protocol used for RPC calls. It can be one of the following values: xmlrpc, xmlrpcs, jsonrpc, jsonrpcs.
- For a remote database, it's strongly advised to used an encrypted protocol (xmlrcps or jsonrpcs).
- |
-
-
- |
- port
- |
-
- TCP port where Odoo can be reached. Usually 443 for encrypted remote connections, or 8069 for a local Odoo with its default configuration.
- |
-
-
- |
- uid
- |
-
- This is the database id of the res.user identified by the parameter 'login'.
- Well known ids are:
- 1 = admin user prior to V12.
- 2 = admin user as of V12.
- |
-
-
-
-> **Tips:** On premise, it's advised to use a dedicated user with the minimal access rights on all the models related to the import.
-
-By default, `CONFIG` is set to `conf/connection.conf`. Under windows, you must always set this option because the path separator is not compliant with the OS.
-
-### --file FILENAME
-Define the CSV `FILENAME` to import. The CSV format is mandatory. In order to be importable in Odoo, this file must follow some rules:
-- The file must be in UTF-8 encoding.
-- One file must contain data of only one model.
-- The first line is the column names. All columns must have the technical name of the fields.
-- All lines must have an `id` column fullfilled with an XML_ID that identifies the record.
-- Some field formats must be respected:
- - Boolean values must be 0 or 1.
- - Binary data must be encoded in base64.
- - Datetime fields format depends on the language (often %Y-%m-%d %H:%M:%S).
- - The decimal separator of float values also depends on the language (often '.').
- - Selection fields must always contain database values.
- - Many2one fields must be suffixed with `/id` if their value is an XML_ID or `.id` if it's a database id.
- - Many2many fields must be a comma separated list of XML_IDs.
-- If a field value is splitted into multiple lines, it must be enclosed with double quotes (").
-
-The fields separator can be set with the option `--sep SEPARATOR`.
-
-You can skip the first lines of the file with the option `--skip LINE`.
-
-The name of the CSV file can be used to set the model by default. Ex: the file name `res_partner.csv` sets the model to `res.partner`. See the `--model` option for more detail.
-
-### --sep SEPARATOR
-Define the column separator. Ex: `--sep=,`. By default, it's the semicolon (;).
-
-If the separator is present in a field value, the value must be enclosed with double quotes (").
-
-### --skip LINE
-Allow to skip the first `LINE`line(s) of the file. Ex: `--skip=3` will skip the first three lines.
-
-The first line must be the column names. Don't skip any line if it's the case.
-
-### --model MODEL
-Set the `MODEL` to import data into. Ex: `--model=res.partner`.
-
-By default the model is the name of the CSV file with the underscores (_) replaced by dots (.) and without extension. Meaning, if the CSV file is named`res_partner.csv`, the model is `res.partner` by default .
-
-### --size BATCH_SIZE
-Controls the number of records (`BATCH_SIZE`) imported in one transaction.
-
-When using the standard import wizard of Odoo, an import is always handled by one worker in one single transaction for the whole file.
-
-
-
-When dealing with big data files, this may lead to two main issues:
-- the time needed to import the whole file could exceed the maximum time allowed for a transaction to run. This time is set by the Odoo parameters `limit-time-cpu` and `limit-time-real`,
-- if an error occurs on one record, the whole transaction fails and all the records are rejected.
-
-The solution is then to reduce the number of records in one transaction by setting the `--size` parameter to the desired number.
-
-
-
-Here colored in blue, the transaction contains two records (`--size=2`). Now, only two records instead of the whole file must be imported during the time allocated for the transaction.
-
-This option is also helpful when importing large records over a WAN connection because a smaller transaction size leads to smaller JSON or XML payloads to send over the network. However it causes a bit more network overhead which could slow down the total run time. This run time can be drastically decreased by using the `--worker` parameter.
-
-### --worker WORKER
-Controls the number of import threads in parallel.
-
-Here is how a import looks like whith `--worker=2`.
-
-
-
-The whole file is now handled by two workers in parallel. The total run time is then divided by two.
-
-As a rule of thumb, you can set the number of workers up to 80% of the number Odoo workers. So that other users can still work while the import runs.
-
-When working with multiple workers, there is a potential drawback: the concurrent updates.
-
-In the following example, suppose Import Thread 1 is importing my_partner_2 while Import Thread 2 is importing my_partner_3.
-
-
-
-Both partners have the same parent_id: my_partner_1. As the `parent_id` is a related field non readonly, the insert/update of my_partner_2 and my_partner_3 will both trigger an update on my_partner_1. That's a concurrent update. As a consequence the current transaction of both threads will fail. To solve such an issue, the parameter `--groupby` can be used.
-
-### --groupby SPLIT
-Selects the field to group in one thread.
-
-To avoid the concurrent update issue described previously, you can use `--groupby=parent_id/id`. By doing this, we ensure all the records with the same `parent_id/id` are imported by the same thread. It thus eliminates the concurrent updates **caused by the parent_id**.
-
-
-
-### --ignore IGNORE
-Specifies the columns that do not need to be imported. Multiple columns can be set in a comma separated list. Ex: `--ignore=col1,col2,col3`.
-
-This is typically used to avoid cascade updates while importing related fields. Refer to [Importing Related or Computed fields](#Importing-Related-or-Computed-Fields).
-
-### --context CONTEXT
-Define the context of the ORM while importing. Ex:`--context="{'tracking_disable': True}"`.
-
-Here are some useful context keys.
-
-#### Import Related Keys
-
-
- | Key | Description |
-
- |
- write_metadata
- |
-
- When True, allow to import audit log fields (create_uid, create_date, write_uid, write_date). The import must run with the `admin` user. Requires the module import_metadata available here.
- |
-
-
- |
- update_many2many
- |
-
- Set it to True when the data file contains a many2many relationship splitted as one record per line instead of a comma separated list of XML_IDs in one column. Suppose we want to assign categories to products, a regular record is:
- product1;categ1,categ2,categ3
- With 'update_many2many': True, you can import a file with the following structure:
- product1;categ1
- product1;categ2
- product1;categ3
- |
-
-
-
-#### ORM and Performance Related Keys
-
-
- | Key | Description |
-
- |
- tracking_disable
- |
-
- When True, don't create messages in the chatter
- |
-
-
- |
- defer_fields_computation
- |
-
- When True, recompute the computed fields at the end of the transaction instead of after each record. Useless if --size=1. Requires the module defer_fields_computation available here.
- |
-
-
- |
- defer_parent_store_computation
- |
-
- Defer the computation of the fields parent_left and parent_right at the end of the transaction.
- Valid up to Odoo 11.
- |
-
-
- |
- lang
- |
-
- Set the current language. Ex: 'lang': fr_FR
- |
-
-
- |
- force_company
- |
-
- Set the current company. Use the database identifier of the company.
- |
-
-
-
-#### Model Specific Keys
-
-
- | Key | Description |
-
- |
- check_move_validity
- |
-
- Set it to False when you import account moves and account move lines. Refer to "Importing Account Move Lines" for more details.
- |
-
-
- |
- create_product_product
- |
-
- Set it to True when you import product templates and also the variants. Without this key, the ORM will automatically create the variants when the templates are imported.
- |
-
-
-
-These are some examples. Feel free to look into Odoo code to find out all context keys.
-
-### --o2m
-Use this option when you import a data file with one2many relationships. The import file must follow a specific structure.
-
-
-
-Suppose the model `master` has two one2many fields `child1_ids` and `child2_ids`, linking respectively the models `child1` and `child2`.
-In the line beginning a master record, you can set all the master fields, like a regular import file. In addition, you can add the fields of child records. In the next lines, you can add the data of the next childs, leaving empty the columns of the master record and the unexisting childs.
-
-With the `--o2m` option, the master record will be imported with its two `child1` and its three `child2` in the same time.
-It worths noticing that it's impossible to set XML_IDs on the child records. As a consequence:
-- you cannot run the import again to update the childs data,
-- the childs cannot be referenced in another import file.
-
-
-
-### --check
-With this option, at the end of each transaction, the number of records in the transaction is compared to the number of imported records. If these numbers do not match, an error message is printed. Most likely, the transaction contains records with duplicate XML_IDs. Refer to [When the number of records does not match](#when-the-number-of-records-does-not-match) for more explanations.
-
-### --fail
-Engage the fail mode.
-
-When you run `odoo_import_thread.py` whithout the `--fail` option, it runs in _normal_ mode. In this mode, any rejected record is printed in a file with the same name as the parameter `--file` suffixed by `.fail` and located in the same folder.
-
-This `.fail` file may contain records rejected for good reasons (ie. a required field is missing) or _bad_ reasons. If you run an import with multiple workers, a bad reason could be a concurrent update. And even in a single thread, when an error occurs in one record, all the records of the transaction (`--size`) are rejected.
-This means the `.fail` file may contain records that could be imported if the process had run by a single thread and in a specific transaction for each record. That's the role of the fail mode.
-
-
-
-In fail mode, `odoo_import_thread.py` will try to import the records of the `.fail` file. Additionaly, neither `--worker` nor `--size` is set. The default values of `1` are then used to ensure a single thread and a single record per transaction.
-
-In this mode, the rejected records are placed in the `.fail.bis` file in the same folder. This file contains only rejections that need your attention and must be solved before importing again.
-
-## Using the Script
-To use all the benefits of the script, most of the time imports are run with multiple workers and a user defined transaction size. In this case and because of the fail mode, you always need two command lines to import one file.
-
-Assuming:
-- your configuration file is `connection.conf` located in the current folder,
-- your data file is `my_data.csv` located in the current folder,
-- the target model is `my.model`,
-- you want to run a fast multithreaded import.
-
-Run the following two commands (as an example):
-```
-odoo_import_thread.py -c connection.conf --file=my_data.csv --model=my.model --worker=4 --size=200 [other options]
-odoo_import_thread.py -c connection.conf --file=my_data.csv --model=my.model --fail [other options]
-```
-
-The first command runs the import in parallel and in batch. The rejected records are printed in the file `my_data.csv.fail`. We don't care about this file, it will be handled in the next command.
-
-In the second command, the parameter `--fail` replaces `--worker` and `--size`. The import runs in fail mode. It will read `my_data.csv.fail` (note the parameter `--file` is unchanged) and print the rejected records in the file `my_data.csv.fail.bis`. If the `.fail.bis` file empty, all the records are imported (inserted or updated).
-
-Refer to the [Troubleshooting](#troubleshooting) section to know how to solve some issues.
-
-## Transformations
-
-When the file to import doesn't respect the [expected format](#file-FILENAME) of `odoo_import_thread.py`, it's necessary to apply some transformations in order to create compliant CSV files. The Odoo CSV Import Export library helps in creating a python script able to transform CSV files thanks to the `Processor` and the `mapper` objects.
-
-
-
-### Basic Concepts
-
-Let's start with a simple use case to introduce the main concepts of the tranformations. Once you're familiar with, a more complete use case is provided [here](#a-real-life-example).
-
-#### A Simple Partner Import
-A customer wants to import some partners. He provides the following CSV file, say `client_file.csv`:
-
-```
-Firstname;Lastname;Birthdate
-John;Doe;31/12/1980
-David;Smith;28/02/1985
-```
-This file cannot be imported directly because:
-- the fields `Lastname`, `Firstname`, `Birthdate` do not exist in the model `res.partner`,
-- the date format is not compliant,
-- there is no `id` field.
-
-The first step to do is to ensure that all the fields exist in the target model. Here, the birthdate is a new data. We assume it must be stored in the field `birthdate` created before the import. Instead, `Firstname` and `Lastname` will be used as the `name`of the partner.
-
-Now the transformation step can begin. It consists on writing a python script that builds another CSV file compliant with the model `res.partner`, this is our Odoo CSV file.
-In this case, the transformation steps will:
-- define how we build the `name` field from the columns `Lastname` and `Firstname`,
-- change the date format to a compliant one.
-
-Another important point to consider is what happens when we load the data several times (*it could occur if the tranformations must be rewritten*). Basically, when we import the Odoo CSV file the first time, the two partners will be created. But if we run it again, we don't want to create duplicates. Instead, we want to update the partner information. So the transformation phase is also necessary to:
-
-- assign an XML_ID to each partner of the file.
-
-The presence of an XML_ID ensures that a record is created if it doesn't exist, or updated if it already exists. This behaviour is included in the method `load` of each Odoo model.
-
-Let's build the transformation script, say `res_partner.py`. We start with importing the needed objects from the library.
-
-```
-# -*- coding: utf-8 -*-
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-
-# Custom import
-from datetime import datetime # used to change the format of datetime fields
-```
-
-Then we build a `Processor` object from the client CSV file `client_file.csv`. Assuming this file resides in the current folder:
-
-```
-processor = Processor('client_file.csv', delimiter=';')
-```
-
-Now we create a mapping dictionary where the keys are the fields of the target model (`res.partner`) we want to import -**at least the required fields without default value**- and how we get them from the client file.
-
-```
-res_partner_mapping = {
- 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')),
- 'name: mapper.concat(' ','Firstname','Lastname'),
- 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")),
-}
-```
-All the fields are extracted with the methods of the `mapper` object. These are described [here](#mapper-functions).
-
-An important thing to notice is the `id` field. It is mandatory by the script `odoo_import_thread.py`. It contains an XML_ID that we build *as we want* as soon as its unicity is garanteed. In this example, we assume the concatenation of three columns (`Lastname`, `Firstname` and `Birthdate`) is enough to identify a record. It will create XML_IDs like `my_import_res_partner.John_Doe_31/12/1980` in the Odoo CSV file. You are free to choose whatever module name you want, here `my_import_res_partner`, but it's a good idea to include the model name and something like the *project* name.
-
-Now we can invoke the transformation by itself.
-
-```
-processor.process(res_partner_mapping, 'res.partner.csv', {'model': 'res.partner', 'context': "{'tracking_disable': True}", 'worker': 2, 'batch_size': 20})
-```
-This step will create the import file `res.partner.csv` for the model. `res.partner`. It should look like this, conforming to `res_partner_mapping`:
-
-```
-id;name;birthdate
-my_import_res_partner.John_Doe_31/12/1980;John Doe;31-12-1980 00:00:00
-my_import_res_partner.David_Smith_28/02/1985;David Smith;28-02-1985 00:00:00
-```
-> **Note:** The order of the columns is not related to the client file or the keys in the transform mapping dictionary.
-
-Notice some options are set when invoking the transformation: `'context': "{'tracking_disable': True}", 'worker': 2, 'batch_size': 20}`.
-They don't play any role in the transformation by itself. Instead it will be used by the import shell script later. Hopefully, we can automatically create the shell script by adding this line:
-
-```
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-
-This will create the script `res_partner.sh` that will load the data with `odoo_import_thread.py`, first in normal mode, then in fail mode. It looks like this:
-
-```
-odoo_import_thread.py -c conf/connection.conf --file=res.partner.csv --model=res.partner --worker=2 --size=20 --groupby= --ignore= --sep=";" --context="{'tracking_disable': True}"
-odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.csv --model=res.partner --ignore= --sep=";" --context="{'tracking_disable': True}"
-```
-
-The complete python script:
-```
-# -*- coding: utf-8 -*-
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-
-# Custom import
-from datetime import datetime # used to change the format of datetime fields
-
-processor = Processor('client_file.csv', delimiter=';')
-
-res_partner_mapping = {
- 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')),
- 'name: mapper.concat(' ','Firstname','Lastname'),
- 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")),
-}
-
-processor.process(res_partner_mapping, 'res.partner.csv', {'model': 'res.partner', 'context': "{'tracking_disable': True}", 'worker': 2, 'batch_size': 20})
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-
-Run the transformations
-
-```
-python res_partner.py
-```
-
-You should have created:
-- the import file `res.partner.csv` in the same folder as the client file `res_partner.csv`,
-- the shell script `res_partner.sh` in your current folder.
-
-#### Dealing with Relationships
-
-##### Many2one Relationships
-
-Coming back to our simple example, let's suppose the client adds the partner's company in his data. Here, we are not in a multi companies environment, the company is just the partner's parent. The file could look like this:
-```
-Company;Firstname;Lastname;Birthdate
-The World Company;John;Doe;31/12/1980
-The Famous Company;David;Smith;28/02/1985
-```
-In this case we must import four partners (the two companies and the two persons) and set the field `parent_id` of the two persons to their respective company. In a relational database we link records thanks to their internal identifiers (`id`). But at this step, these ids are unknown because the records are not imported yet. We will then use the XML_IDs to link the records.
-
-It means when we transform a company, we assign an XML_ID to it, then we use this XML_ID as the `parent_id` of the person who is a member of this company. As a consequence the companies must be imported before the persons. More precisely, the XML_IDs set in the `parent_id` must exist before being used as a relationship value.
-
-Let's create the transformation script. As usual, we start with the needed imports and the creation of a `Processor` on the client file.
-```
-# -*- coding: utf-8 -*-
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-from datetime import datetime # used to change the format of datetime fields
-
-processor = Processor('client_file.csv', delimiter=';')
-```
-
-Now we can define the mapping to extract the companies. These are records in the model `res.partner` with the boolean field `is_company` set. We also assume the company name is unique so that we can use it as an identifier in the XML_ID.
-```
-res_partner_company_mapping = {
- 'id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')),
- 'name': mapper.val('Company'),
- 'is_company': mapper.const('1'),
-}
-
-processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set')
-```
-It worths noting the option`'set'` of `processor.process` while invoking the companies transformation. This option allows to remove duplicates in the Odoo CSV file. It could be the case if several partners belong to the same company.
-
-And here is the mapping to extract the persons. It's exactly the same as before except we've added the field `parent_id`.
-```
-res_partner_mapping = {
- 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')),
- 'name': mapper.concat(' ','Firstname','Lastname'),
- 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")),
- 'parent_id/id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')),
-}
-
-processor.process(res_partner_mapping, 'res.partner.csv', {})
-```
-The important thing to notice here is that we use exactly the same transformation method for the partner ids in both mappings in order to generate the same XML_ID (`res_partner_mapping['parent_id/id']` = `res_partner_company_mapping['id']`). *Remember also the suffix `/id` when using XML_IDs in relation fields.*
-
-The results will be two differents Odoo CSV files:
-
-- The partners who are companies: `res.partner.company.csv`.
-```
-id;name;is_company
-my_import_res_partner.The World Company;The World Company;1
-my_import_res_partner.The Famous Company;The Famous Company;1
-```
-
-- The persons: `res.partner.csv` where the column `partner_id/id` refer to an existing `id` in `res.partner.company.csv`.
-```
-id;parent_id/id;name;birthdate
-my_import_res_partner.John_Doe_31/12/1980;my_import_res_partner.The World Company;John Doe;31-12-1980 00:00:00
-my_import_res_partner.David_Smith_28/02/1985;my_import_res_partner.The Famous Company;David Smith;28-02-1985 00:00:00
-```
-
-Finally we generate the shell script that will load the files by adding this line in the transformation script.
-
-```
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-
-This will create the shell script `res_partner.sh` that will load the data. It looks like this:
-
-```
-odoo_import_thread.py -c conf/connection.conf --file=res.partner.company.csv --model=res.partner --groupby= --ignore= --sep=";" --context="{}"
-odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.company.csv --model=res.partner --ignore= --sep=";" --context="{}"
-
-odoo_import_thread.py -c conf/connection.conf --file=res.partner.csv --model=res.partner --groupby= --ignore= --sep=";" --context="{}"
-odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.csv --model=res.partner --ignore= --sep=";" --context="{}"
-```
-The script contains all the commands to load both Odoo CSV files. They are written in the same order as in the transformation script. So the import sequence is respected.
-
-The complete python script:
-```
-# -*- coding: utf-8 -*-
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-from datetime import datetime # used to change the format of datetime fields
-
-processor = Processor('client_file.csv', delimiter=';')
-
-res_partner_company_mapping = {
- 'id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')),
- 'name': mapper.val('Company'),
- 'is_company': mapper.const('1'),
-}
-
-processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set')
-
-res_partner_mapping = {
- 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')),
- 'name': mapper.concat(' ','Firstname','Lastname'),
- 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")),
- 'parent_id/id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')),
-}
-
-processor.process(res_partner_mapping, 'res.partner.csv', {})
-
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-
-
-##### One2many Relationships
-
-Usually we don't import `One2many` fields. Instead, we import the inverse `Many2one` relation in the linked model.
-
-##### Many2many Relationships
-
-Let's suppose our customer adds some categories to the partners. The client file could look like this:
-```
-Company;Firstname;Lastname;Birthdate;Category
-The World Company;John;Doe;31/12/1980;Premium
-The Famous Company;David;Smith;28/02/1985;Normal,Bad Payer
-```
-The categories are set in one column in the same line of the partner record and separated with a comma.
-
-By looking into Odoo, we see that the model `res.partner` contains a field `category_id` which is a Many2many to the model `res.partner.category`. If you remember the rule _Many2many fields must be a comma separated list of XML_IDs_ and that an XML_ID must be created before being used in a relationship, you get an idea of the procedure to apply.
-
-1- Create all the categories by extracting them from the client file and assign them and XML_ID.
-
-2- Build a comma separated list of XML_IDs of categories for each partner.
-
-Let's start the transformation script. As usual, we start with the needed imports and the creation of a `Processor` on the client file.
-```
-# -*- coding: utf-8 -*-
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-from datetime import datetime # used to change the format of datetime fields
-
-processor = Processor('client_file.csv', delimiter=';')
-```
-
-The first transformation extracts the categories. We assume the name is unique to create the XML_IDs.
-```
-partner_category_mapping = {
- 'id': mapper.m2m_id_list('res_partner_category', 'Category'),
- 'name': mapper.m2m_value_list('Category'),
-}
-
-processor.process(partner_category_mapping, 'res.partner.category.csv', {}, m2m=True)
-```
-Notice we use two mapper functions to deal with Many2many relationships: `m2m_id_list` and `m2m_value_list`, and the option `m2m=True` in the `processor.process`command.
-
-This will create the file `res.partner.category.csv` with all the unique categories as follow:
-```
-id;name
-res_partner_category.Premium;Premium
-res_partner_category.Normal;Normal
-res_partner_category.Bad Payer;Bad Payer
-```
-
-Now we can complete the person mapping. It's exactly the same as before except we have added the field `category_id`.
-
-```
-res_partner_mapping = {
- 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')),
- 'name': mapper.concat(' ','Firstname','Lastname'),
- 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")),
- 'parent_id/id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')),
- 'category_id/id': mapper.m2m('res_partner_category', 'Category'),
-}
-
-processor.process(res_partner_mapping, 'res.partner.csv', {})
-```
-The important thing here is to use the same XML_ID `'res_partner_category' + Category'` for both
-`res_partner_mapping[category_id/id] = mapper.m2m` and `partner_category_mapping[id] = mapper.m2m_id_list`.
-
- The mapping `res_partner_mapping` will create a new file `res.partner.csv` like this:
-```
-id;parent_id/id;name;birthdate;category_id/id
-my_import_res_partner.John_Doe_31/12/1980;my_import_res_partner.The World Company;John Doe;31-12-1980 00:00:00;res_partner_category.Premium
-my_import_res_partner.David_Smith_28/02/1985;my_import_res_partner.The Famous Company;David Smith;28-02-1985 00:00:00;res_partner_category.Normal,res_partner_category.Bad Payer
-```
-Notice the column `category_id/id` that contains a comma separated list of XML_IDs of partner categories.
-
-Finally we create the load script by adding this line:
-```
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-It creates the script `res_partner.sh` looking like this:
-```
-odoo_import_thread.py -c conf/connection.conf --file=res.partner.category.csv --model=res.partner.category --groupby= --ignore= --sep=";" --context="{}"
-odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.category.csv --model=res.partner.category --ignore= --sep=";" --context="{}"
-
-odoo_import_thread.py -c conf/connection.conf --file=res.partner.csv --model=res.partner --groupby= --ignore= --sep=";" --context="{}"
-odoo_import_thread.py -c conf/connection.conf --fail --file=res.partner.csv --model=res.partner --ignore= --sep=";" --context="{}"
-```
-
-The complete python script:
-```
-# -*- coding: utf-8 -*-
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-from datetime import datetime # used to change the format of datetime fields
-
-processor = Processor('client_file.csv', delimiter=';')
-
-partner_category_mapping = {
- 'id': mapper.m2m_id_list('res_partner_category', 'Category'),
- 'name': mapper.m2m_value_list('Category'),
-}
-
-processor.process(partner_category_mapping, 'res.partner.category.csv', {}, m2m=True)
-
-res_partner_mapping = {
- 'id': mapper.m2o_map('my_import_res_partner', mapper.concat('_', 'Firstname', 'Lastname', 'Birthdate')),
- 'name': mapper.concat(' ','Firstname','Lastname'),
- 'birthdate': mapper.val('Birthdate', postprocess=lambda x: datetime.strptime(x, "%d/%m/%y").strftime("%Y-%m-%d 00:00:00")),
- 'parent_id/id': mapper.m2o_map('my_import_res_partner', mapper.val('Company')),
- 'category_id/id': mapper.m2m('res_partner_category', 'Category'),
-}
-
-processor.process(res_partner_mapping, 'res.partner.csv', {})
-
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-
-> **Note:** it's possible to import many2many relationships with another file structure. Refer to the context key [update_many2many](#import-related-keys) to learn how.
-
-
-#### Controlling the Load sequence
-The load order in the shell script depends on the order of the `processor.process` instructions in the transformation script.
-
-This example script:
-```
-processor = Processor('client_file.csv', delimiter=';')
-
-res_partner_company_mapping = {
-}
-
-res_partner_mapping = {
-}
-
-processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set')
-processor.process(res_partner_mapping, 'res.partner.csv', {})
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-will create the load script `res_partner.sh` with:
-
-1- the load of `res.partner.company.csv` because it's the first invoked transformation,
-
-2- the load of `res.partner.csv`.
-
-If you want to import the persons first (which is a bad idea here), just inverse the two `processor.process` commands.
-```
-processor = Processor('client_file.csv', delimiter=';')
-
-res_partner_company_mapping = {
-}
-
-res_partner_mapping = {
-}
-
-processor.process(res_partner_mapping, 'res.partner.csv', {})
-processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set')
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-
-Now if you want to create two separated shell scripts, one for companies and another for the persons, you need to create a new `Processor`.
-```
-#For the 1st load script
-processor = Processor('client_file.csv', delimiter=';')
-res_partner_company_mapping = {
-}
-processor.process(res_partner_company_mapping, 'res.partner.company.csv', {}, 'set')
-processor.write_to_file("res_partner_company.sh", python_exe='', path='')
-
-#For the 2nd load script
-processor = Processor('client_file.csv', delimiter=';')
-res_partner_mapping = {
-}
-processor.process(res_partner_mapping, 'res.partner.csv', {})
-processor.write_to_file("res_partner.sh", python_exe='', path='')
-```
-This will create the script `res_partner_company.sh` that loads only `res.partner.company.csv`, and the script `res_partner.sh` that loads only `res.partner.csv`. It can be useful if you need to do something between importing the companies and the persons (ie. to call another load or RPC calls). The drawback is that the client file is read twice.
-
-### Mapper Functions
-You can get the value of columns in the client file with several methods defined in the `mapper` object. Take a look at `lib/mapper.py` to get an up to date list of methods. Here are the most commonly used.
-
-#### mapper.const(value)
-Use it to assign always the same value to a field.
-
-
- | Client File | Mapper | Import File |
-
-
- my_column
- my_value1
- my_value2
- |
-
- {
- 'my_field': mapper.const('forced_value'),
- }
- |
-
- my_field
- forced_value
- forced_value
- |
-
-
-
-Example: setting a company missing in the client file:
-```
-fields_mapping = {
- ...
- 'company_id/id': mapper.const('base.main_company'),
- ...
-}
-```
-
-#### mapper.val(field, default='', postprocess=lambda x: x, skip=False)
-
-Takes the value of the column. Use it for a `Char` or `Text` field.
-
-
- | Client File | Mapper | Import File |
-
-
- my_column
- my_value1
- my_value2
- |
-
- {
- 'my_field': mapper.val('my_column'),
- }
- |
-
- my_field
- my_value1
- my_value2
- |
-
-
-
-#### mapper.map_val(field, mapping, default='')
-Takes the value from a dictionary where the key is the value of the column.
-
-
- | Client File | Mapper | Import File |
-
-
- my_column
- key1
- key2
- |
-
- mapping = {
- 'key1': 'value1',
- 'key2': 'value2'
- }
- {
- 'my_field': mapper.map_val('my_column', mapping),
- }
- |
-
- my_field
- value1
- value2
- |
-
-
-
-Example: setting a country.
-
-```
-Country_column;
-BE;
-FR;
-```
-
-```
-country_map = {
- 'BE': 'base.be',
- 'FR': 'base.fr',
-}
-
-fields_mapping = {
- ...
- 'country_id/id': mapper.map_val('Country_column', country_map),
- ...
-}
-```
-
-#### mapper.num(field, default='0.0')
-
-Takes the numeric value of the column. Transform the comma decimal separator by a dot. Use it for `Integer` or `Float` fields.
-
-
- | Client File | Mapper | Import File |
-
-
- my_column
- 01;
- 2,3;
- |
-
- {
- 'my_field': mapper.num('my_column'),
- }
- |
-
- my_field
- 01
- 2.3
- |
-
-
-
-#### mapper.bool_val(field, true_vals=[], false_vals=[])
-
-A boolean field in Odoo is always imported as 1 or 0. `true_vals` and `false_vals` are used to map the original values to 1 and 0. If the value in the client file is not in `true_vals` or `false_vals`, it is considered as TRUE if a value is present or FALSE if the column is empty.
-
-
- | Client File | Mapper | Import File |
-
-
- my_column
- Yes
- No
-
- something else
- |
-
- {
- 'my_field': mapper.bool_val('my_column', ['Yes'], ['No']),
- }
- |
-
- my_field
- 1
- 0
- 0
- 1
- |
-
-
-
-
-#### mapper.binary(field, path_prefix, skip=False, encoding="utf-8")
-Use it to convert a binary file in base64 and to put the result in a binary field. Typically used to import images and attachments.
-
-Assuming images `pict_1.png` and `pict2.png` are located in `/home/Pictures`:
-
-
- | Client File | Mapper | Import File |
-
-
- my_column
- pict_1.png
- pict_2.png
- |
-
- {
- 'my_field': mapper.binary('my_column', '/home/Pictures/'),
- }
- |
-
- my_field
- kllkxqlxsqnxqxhHJVJSFSVSJDYVDV......
- KKjdsndb77573çinjhffxxcdkllkxq......
- |
-
-
-
-#### mapper.concat(separator, *fields)
-
-Concatenate the value of one or several columns separated with `separator`.
-
-
- | Client File | Mapper | Import File |
-
-
- my_column1;my_column2
- val1;val2
- val3,val4
- |
-
- {
- 'my_field': mapper.concat('_','my_column1','my_column2'),
- }
- |
-
- my_field
- val1_val2;
- val3_val4;
- |
-
-
-
-
-#### mapper.m2o(PREFIX, field, default='', skip=False)
-
-Use it to create an XML_ID where the module is `PREFIX` and the name is the value of the column.
-
-
- | Client File | Mapper | Import File |
-
-
- my_column
- my_value1
- my_value2
- |
-
- {
- 'my_field/id': mapper.m2o('my_import_my_model','my_column'),
- }
- |
-
- my_field/id
- my_import_my_model.my_value1
- my_import_my_model.my_value2
- |
-
-
-
-> Notice the field name suffixed with /id in the mapping dictionary.
-
-#### mapper.m2o_map(PREFIX, mapper, default='', skip=False)
-
-Use it to create an XML_ID where the module is `PREFIX` and the name is the result of `mapper`. It is often used with the mapper `mapper.concat` to create XML_IDs as the concatenation of several columns.
-
-
- | Client File | Mapper | Import File |
-
-
- my_column1;my_column2
- val1,val2
- val3,val4
- |
-
- {
- 'my_field/id': mapper.m2o_map('my_import_my_model',mapper.concat('_','my_column1','my_column2')),
- }
- |
-
- my_field/id
- my_import_my_model.val1_val2;
- my_import_my_model.val3_val4;
- |
-
-
-
-> Notice the field name suffixed with /id in the mapping dictionary.
-
-#### mapper.m2m(PREFIX, *fields)
-Returns a comma separated list of one or several columns, each value being prefixed by `PREFIX`. Use it to build XML_IDs list for a `Many2many` field.
-
- | Client File | Mapper | Import File |
-
-
- my_column1;my_column2
- val1,val2;val3
- |
-
- {
- 'my_field/id': mapper.m2m('my_import_my_model','my_column1', 'my_column2')),
- }
- |
-
- my_field/id;
- my_import_my_model.val1,my_import_my_model.val2,my_import_my_model.val3
- |
-
-
-
-> Notice the field name suffixed with /id in the mapping dictionary.
-
-> Notice val1, val2 of my_column1 are handled the same way as val3 in my_column2.
-
-
-
-#### mapper.m2m_id_list(PREFIX, *args, **kwargs)
-Build one record (line) per distinct value of a comma separated list of data inside a column, and prefix the value with `PREFIX`.
-
-
- | Client File | Mapper | Import File (1) |
-
-
- my_column1
- val1,val2
- val1,val3
- val4
- |
-
- {
- 'my_field/id': mapper.m2m_id_list('my_import_my_model','my_column1')),
- }
- |
-
- my_field/id
- my_import_my_model.val1
- my_import_my_model.val2
- my_import_my_model.val3
- my_import_my_model.val4
- |
-
-
-
-(1) **To use in conjunction with the option `m2m` while invoking the transformation** (see [Many2many Relationships](#many2many-relationships)).
-
-```
-processor.process(mapping, 'output.file.csv', {}, m2m=True)
-```
-Without this option, the import file would look like this:
-```
-my_field/id
-[my_import_my_model.val1, my_import_my_model.val2]
-[my_import_my_model.val1, my_import_my_model.val3]
-[my_import_my_model.val4]
-```
-
-#### mapper.m2m_value_list(*args, **kwargs)
-Build one record (line) per distinct value of a comma separated list of data inside a column.
-
- | Client File | Mapper | Import File (1) |
-
-
- my_column1
- val1,val2
- val1,val3
- val4
- |
-
- {
- 'my_field': mapper.m2m_value_list('my_column1')),
- }
- |
-
- my_field
- val1
- val2
- val3
- val4
- |
-
-
-
-(1) **To use in conjunction with the option `m2m` while invoking the transformation** (see [Many2many Relationships](#many2many-relationships)).
-```
-processor.process(mapping, 'output.file.csv', {}, m2m=True)
-```
-Without this option, the import file would look like this:
-```
-my_field/id
-[val1, val2]
-[val1, val3]
-[val4]
-```
-
-#### mapper.m2m_template_attribute_value(PREFIX, template_id_field, *args)
+[][read the docs]
+[][tests]
+[][codecov]
-Generates a mapping function for `product.template.attribute.value` XMLIDs, including the product template identifier.
+[][pre-commit]
+[![Ruff codestyle][ruff badge]][ruff project]
-This function is specifically designed to create a mapper that constructs comma-separated strings of XML IDs for product attribute values, incorporating the identifier of the associated product template. This is useful when you need to establish relationships based on attribute values within a specific product template context.
+[pypi status]: https://pypi.org/project/odoo-data-flow/
+[read the docs]: https://odoo-data-flow.readthedocs.io/
+[tests]: https://github.com/OdooDataFlow/odoo-data-flow/actions?workflow=Tests
+[codecov]: https://app.codecov.io/gh/OdooDataFlow/odoo-data-flow
+[pre-commit]: https://github.com/pre-commit/pre-commit
+[ruff badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
+[ruff project]: https://github.com/charliermarsh/ruff
-
- | Client File | Mapper | Import File |
-
-
- product_template_ref;color;size
- template_1;red;medium
- template_1;blue;large
- template_2;green;small
- |
-
- {
- 'my_field/id': mapper.m2m_template_attribute_value('PRODUCT_ATTRIBUTE_VALUE', 'product_template_ref', 'color', 'size'),
- }
- |
-
- my_field/id
- PRODUCT_ATTRIBUTE_VALUE_template_1_color_red,PRODUCT_ATTRIBUTE_VALUE_template_1_size_medium
- PRODUCT_ATTRIBUTE_VALUE_template_1_color_blue,PRODUCT_ATTRIBUTE_VALUE_template_1_size_large
- PRODUCT_ATTRIBUTE_VALUE_template_2_color_green,PRODUCT_ATTRIBUTE_VALUE_template_2_size_small
- |
-
-
+A powerful Python library for defining robust, repeatable, and high-performance data import/export workflows for Odoo. It replaces complex, manual data preparation with a clean, "configuration-as-code" approach.
-Args:
+---
-* `PREFIX (str)`: The prefix to use for the generated XML IDs (e.g., 'PRODUCT_ATTRIBUTE_VALUE'). This prefix should be consistent with how your XML IDs are structured.
-* `template_id_field (str)`: The name of the field/column in the CSV data that contains the identifier (e.g., XML ID, database ID, or other unique key) of the related product template. This identifier will be included in the generated XML IDs.
-* `*args (str)`: A variable number of field/column names from the CSV data that represent attribute values. These values will be used to construct the XML IDs.
+## Key Features
-Returns:
+- **Declarative Transformations:** Use simple Python scripts and a rich set of `mapper` functions to transform any source CSV or XML data into an Odoo-ready format.
+- **Two-Phase Workflow:** Cleanly separates data **transformation** from data **loading**, making complex migrations easier to manage, reuse, and debug.
+- **High-Performance CLI:** Import and export data with a clean, modern command-line interface, featuring high performance parallel processing (`--worker`), batching (`--size`), and robust error handling.
+- **Automatic Scripting:** Automatically generate shell scripts for the loading phase, ensuring a repeatable and reliable process every time.
+- **Robust Error Handling and Recovery:** Verify the number of records processed in a batch against the number successfully imported, helping to quickly identify issues.
+- **Direct Server-to-Server Migration:** Perform a complete export, transform, and import from one Odoo instance to another in a single, in-memory step with the `migrate` command.
+- **Post-Import Workflows:** Run automated actions on your data _after_ it has been imported (e.g., validating invoices, registering payments) using the powerful `workflow` command.
+- **export data from Odoo** into CSV format. While specific details on export parameters are not extensively documented in the provided text, this capability complements the import functionality, offering a complete solution for Odoo data management.
+- **Multiple Data Sources**: Natively supports CSV and XML files. Easily extendable to support other sources like databases or APIs.
+- **Data Validation:** Ensure data integrity before it even reaches Odoo.
-* `function`: A mapper function that takes a CSV row (as a dictionary) as input and returns a comma-separated string of generated XML IDs. If the `template_id_field` is missing in the CSV row, it returns an empty string.
-Important Notes:
+## Installation
-* The generated XML IDs are constructed by concatenating the `PREFIX`, the value from `template_id_field`, and the values from the provided attribute columns.
-* The function handles cases where the `template_id_field` might be missing in the CSV data, returning an empty string to avoid errors.
-* Ensure that the `PREFIX` and the column names in `args` are consistent with your actual data structure and XML ID conventions.
-
-### Advanced Transformations
-
-#### User Defined Mappers
-Sometimes, the builtin mappers do not meet your needs, even with a `postprocess` function. In this case, you can map a field to a tailor made function.
-This function takes an argument representing an entire line of the client file as a dictionary where the columns are the keys.
+You can install _Odoo Data Flow_ via `uv` or `pip` from [PyPI]:
+```console
+$ uv pip install odoo-data-flow
```
-def my_field_mapper(line):
- if line['column1'] == 'a_value':
- return 'something'
- return 'something_else'
-fields_mapping = {
- ...
- 'my_field': my_field_mapper,
- ...
-}
-```
-
-#### Managing the Client CSV file
-Sometimes it's useful to change the client file according to some needs (ie. removing useless lines, adding columns with data, ...). You can do that with a preprocessor function when building the `Processor`. See this example that does... nothing.
+## Quick Usage Example
-```
-def myPreprocessor(header, data):
- return header, data
+The core workflow involves two simple steps:
-processor = Processor('client_file.csv', delimiter=';', preprocess=myPreprocessor)
-```
-
-The `preprocessor` function takes two arguments: `header` is a list of all the columns, and `data` is a list of dictionaries, each dictionary being a line. Let's see two learning examples of preprocessing.
-
-##### Adding a column
-Here we add the column `NEW_COLUMN` filled with the value `NEW_VALUE` for all the records.
-```
-def myPreprocessor(header, data):
- header.append('NEW_COLUMN')
- for i, j in enumerate(data):
- data[i].append(NEW_VALUE)
- return header, data
-```
-##### Removing Lines
-Say we want to remove all the lines having the column `Firstname` with the value `John`.
-```
-def myPreprocessor(header, data):
- data_new = []
- for i, j in enumerate(data):
- line = dict(zip(header, j))
- if line['Firstname'] != 'John':
- data_new.append(j)
- return header, data_new
-```
-
-> **Note:** The client file is not physically changed. Only the buffer used by the Processor is changed in memory. Anyway the new columns are usable in the fields mapping dictionary, and the removed lines are not processed.
-
-#### Updating Records With Database IDs
-It is possible to **update** records knowing their database ID instead of their XML_ID. The field `.id` designates a database ID. But also, the script `odoo_import_thread.py` requires an `id` field. So the trick is to build an empty `id` field and to map the `.id`from the client file.
-```
-my_mapping = {
- 'id': mapper.const(''),
- '.id': mapper.val('id_column'),
- ...
-}
-```
-
-#### XML Processing
-
-The `XMLProcessor` class allows you to transform data from XML files into a format suitable for Odoo import, providing an alternative to the `Processor` class for XML-based data sources.
+**1. Transform your source data with a Python script.**
+Create a `transform.py` file to define the mapping from your source file to Odoo's format.
```python
-# -*- coding: utf-8 -*-
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-from lxml import etree
-
-processor = XMLProcessor(filename, root_node_path)
-```
-
-The XMLProcessor is initialized with the XML file to process and an XPath expression to identify the data records.
-
-`XMLProcessor.__init__(filename, root_node_path, conf_file=False)`
-Constructor for the XMLProcessor class.
-
-*Args:*
-
-`filename` (str): The path to the XML file to be processed.
-
-`root_node_path` (str): An XPath expression specifying the root node(s) within the XML file to iterate over. Each node found by this XPath will be treated as a data record.
-
-`conf_file` (str, optional): The path to a configuration file. Inherited from the Processor class but may not be used in the same way by XMLProcessor. Defaults to False.
-
-`XMLProcessor.process(mapping, filename_out, import_args, t='list', null_values=['NULL', False], verbose=True, m2m=False)`
-Transforms data from the XML file based on the provided mapping.
-
-*Args:*
-
-`mapping` (dict): A dictionary that defines how data from the XML file should be mapped to fields in the output format (e.g., CSV). The keys of the dictionary are the target field names, and the values are XPath expressions to extract the corresponding data from the XML.
-
-`filename_out` (str): The name of the output file where the transformed data will be written.
-
-`import_args` (dict): A dictionary containing arguments that will be passed to the odoo_import_thread.py script (e.g., `{'model': 'res.partner', 'context': "{'tracking_disable': True}"}`).
-
-`t (str, optional)`: This argument is kept for compatibility but is not used in XMLProcessor. Defaults to 'list'.
-
-`null_values` (list, optional): This argument is kept for compatibility but is not used in XMLProcessor. Defaults to `['NULL', False]`.
-
-`verbose` (bool, optional): This argument is kept for compatibility but is not used in XMLProcessor. Defaults to True.
-
-`m2m (bool, optional)`: This argument is kept for compatibility but is not used in XMLProcessor. Defaults to False.
-
-*Returns:*
-
-`tuple`: A tuple containing the header (list of field names) and the transformed data (list of lists).
-
-> **Important Notes:**
-The t, null_values, verbose, and m2m arguments are present for compatibility with the Processor class but are not actually used by the XMLProcessor.
-The mapping dictionary values should be XPath expressions that select the desired data from the XML nodes.
-
-`XMLProcessor.split(split_fun)`
-Raises a NotImplementedError because the split functionality is not supported for XMLProcessor.
+# transform.py
+from odoo_data_flow.lib.transform import Processor
+from odoo_data_flow.lib import mapper
-*Args:*
-
-`split_fun`: This argument is not used.
-
-*Raises:*
-
-`NotImplementedError`: Indicates that the split method is not available for XML processing.
-
-##### Example of XML to CSV Transformation
-
-Let's say you have the following XML data:
-
-```XML
-
-
-
- 1
- 2008
- 141100
-
-
-
-
- 4
- 2011
- 59900
-
-
-
- 68
- 2011
- 13600
-
-
-
-
-```
-
-To transform this into a CSV file with columns "name", "gdp", "year", and "neighbor", you would use the following Python script and mapping:
-
-```Python
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import XMLProcessor
-
-processor = XMLProcessor('countries.xml', '/data/country')
-
-mapping = {
- 'name': '/data/country/@name',
- 'gdp': '/data/country/gdppc/text()',
- 'year': '/data/country/year/text()',
- 'neighbor': '/data/country/neighbor/@name'
+my_mapping = {
+ 'id': mapper.concat('prod_', 'SKU'),
+ 'name': mapper.val('ProductName'),
+ 'list_price': mapper.num('Price'),
}
-processor.process(mapping, 'countries.csv', {})
+processor = Processor('origin/products.csv')
+processor.process(my_mapping, 'data/products_clean.csv', {'model': 'product.product'})
+processor.write_to_file("load.sh")
```
-
-This would generate a CSV file like this:
-
-```CSV
-"name";"gdp";"year";"neighbor"
-"Liechtenstein";"141100";"2008";"Austria"
-"Singapore";"59900";"2011";"Malaysia"
-"Panama";"13600";"2011";"Costa Rica"
+...
+```console
+$ python transform.py
```
+**2. Load the clean data into Odoo using the CLI.**
+The `transform.py` script generates a `load.sh` file containing the correct CLI command.
-## A Real Life Example
-A complete import project (transformation and load) is available in the repo [odoo_import_example](https://github.com/tfrancoi/odoo_import_example). It demonstrates use cases such as:
-- importing partners with multiple categories
-- importing products and variants with their suppliers
-- importing messages
-
-> **Note:** The project was done in Odoo 11. Some models may differ in other versions.
-
-## Performances Considerations
-
-### Importing Related or Computed Fields
-
-Importing related fields (non readonly) causes cascade updates that drastically increase the import run time. Suppose the following example.
-
-
-
-Importing my_partner_2000 will trigger an update of my_partner_1 because the parent_id is a related field non readonly. And updating my_parent_1 will also trigger the update of all its childs. When importing the next record, my_partner_2001, the same scenario happens. My_partner_1 will be updated a second time and this will trigger the update of all its childs again. But this time the number of childs is increased by my_partner_2000. So each time a new partner is created, the number of updates behind the scene increases.
-
-You can figure out this scenario by looking at the transaction time that increases exponantially.
-
-The solution is to use the parameter `--ignore`. In this case, you should import with the option `--ignore=parent_id/id`.
-
-## Troubleshooting
-
-### When the number of records does not match
-Suppose your Odoo CSV file contain 100 records but after the load, you count less than 100 records more.
-
-Possible cause:
-- One or more records have the same XML_ID. So the first record with that XML_ID was inserted, while the other records have updated the first one instead of creating new ones.
-
-To check the unicity of _what you think is_ a record identifier in the client file:
-- create a new CSV file with one column containing the supposed identifier,
-- check the unicity of the values with the command, for example, in bash:
+```bash
+# Contents of the generated load.sh
+odoo-data-flow import --config conf/connection.conf --file data/products_clean.csv --model product.product ...
```
-sort my_xml_ids.csv | uniq -c | sort -nr
+Then execute the script.
+```console
+$ bash load.sh
```
-- look for an output line begining with a number > 1.
-## Tips and Tricks
+## Documentation
-### Importing Data of Multiple Companies
-When you run an import, the current company is the company of the user defined by the parameters `login` and `uid`in the [configuration file](#config-CONFIG).
-As a rule of thumb, it's advised to separate the imports by company.
+For a complete user guide, tutorials, and API reference, please see the **[full documentation on Read the Docs][read the docs]**.
+Please see the [Command-line Reference] for details.
-Assuming you have to import data for Company_A and Company_B:
-- build import files with data of Company_A
-- build import files with data of Company_B
-- set the import user as a member of Company_A
-- import the files with the data of Company_A
-- change the import user company to Company_B
-- import the files with the data of Company_B
+## Contributing
-### Importing Translations
+Contributions are very welcome.
+To learn more, see the [Contributor Guide].
-Of course, the translations can be imported with data files tailored for the translation models. But it's a lot easier with the `lang` key set in the context. Let's take an example with the products.
+## License
-First, import the sources terms. This can be done while importing legacy data. Say we have the following CSV file `product_template.csv`
+Distributed under the terms of the [LGPL 3.0 license][license],
+_Odoo Data Flow_ is free and open source software.
-```
-id;name;price
-my_module.product_wallet;Wallet;10.0
-my_module.product_bicyle;Bicycle;400.0
-```
-
-Import the products with the following command:
-```
-odoo_import_thread.py -c connection.conf --file=product_template.csv
-```
-_The option `--model=product.template` is not necessary since the CSV file is correctly named according to the model._
-
-Then, build another CSV for the translations, say `product_template_FR.csv` for the french translations. This file contain only the translated terms for the products.
-
-```
-id;name
-my_module.product_wallet;Portefeuille
-my_module.product_bicyle;Bicyclette
-```
-
-Import this file by setting the language in the context.
-```
-odoo_import_thread.py -c connection.conf --file=product_template_FR.csv --model=product.template --context="{'lang': fr_FR}"
-```
- And it's done.
-
- Actually, it doesn't import the translations explicitly. What happened is an update of the products name in the fr_FR language, which is equivalent and a more convenient process to build the translations file. First because it is based on the legacy file and overall because it lets the ORM manage the translations process.
-
-### Importing Account Move Lines
+## Issues
-This is an interesting use case of one2many relationship. Let's take a look at the simplified relationship model of `account.move` and `account.move.line`.
+If you encounter any problems,
+please [file an issue] along with a detailed description.
-
+## Credits
-As a rule of thumb, avoid to import one2many relationships because the inverse many2one relation always exists. So, one strategy could be to import first all account.move in one CSV file, then all the account.move.line in another CSV file. But here, this strategy doesn't work because there is a balance check on the account.move. *One account.move must have at least two account.move.line where the credit amount of the one balances the debit amount of the other.*
+This development of project is financially supported by [stefcy.com].
+This project was generated from [@bosd]'s [uv hypermodern python cookiecutter] template.
-That means, the import of the first account.move.line will fail because there its amount is not balanced yet. And the import of the second account.move.line will also fail because the first one is missing, and so on.
+[stefcy.com]: https://stefcy.com
+[@bosd]: https://github.com/bosd
+[pypi]: https://pypi.org/
+[uv hypermodern python cookiecutter]: https://github.com/bosd/cookiecutter-uv-hypermodern-python
+[file an issue]: https://github.com/OdooDataFlow/odoo-data-flow/issues
+[pip]: https://pip.pypa.io/
-One possible solution is to use a context with `'check_move_validity': False`. In that case you can import first all the account.move, then all the account.move.line. As there is no more balance check, you must be sure that all the account.move are well balanced.
-
-Another solution is to import the account.move and the account.move.line together. To do this, you can build a mixed CSV file like this, say `account_move.csv`.
-
-
-
-Then import it with the `--o2m` option.
-```
-odoo_import_thread.py -c connection.conf --file=account_move.csv --model=account.move --o2m
-```
-
-The option `--o2m` prevents the batch to be cut while importing the same account.move. The account.move is imported with its account.move.line in the same time. So the balance check can be performed at the end of the transaction.
-
-The drawback of this method is that you cannot set an XML_ID to an account.move.line. See [--o2m](#o2m) for more details.
-# Exporting Data
-
-The Odoo CSV Import Export library provides the script `odoo_export_thread.py` to export data from Odoo. This script has several options. Type the command `odoo_export_thread.py --help` to get the usage.
-
-```
-usage: odoo_export_thread.py [-h] -c CONFIG --file FILENAME --model MODEL
- --field FIELDS [--domain DOMAIN]
- [--worker WORKER] [--size BATCH_SIZE]
- [-s SEPARATOR] [--context CONTEXT]
-
-Import data in batch and in parallel
-
-optional arguments:
- -h, --help show this help message and exit
- -c CONFIG, --config CONFIG
- Configuration File that contains connection parameters
- --file FILENAME Output File
- --model MODEL Model to Export
- --field FIELDS Fields to Export
- --domain DOMAIN Filter
- --worker WORKER Number of simultaneous connection
- --size BATCH_SIZE Number of line to import per connection
- -s SEPARATOR, --sep SEPARATOR
- CSV separator
- --context CONTEXT context that will be passed to the load function, need
- to be a valid python dict
-```
-(To be continued...)
+
-# Requirements
-- [odoo-client-lib](https://github.com/odoo/odoo-client-lib)
+[license]: https://github.com/OdooDataFlow/odoo-data-flow/blob/main/LICENSE
+[contributor guide]: https://github.com/OdooDataFlow/odoo-data-flow/blob/main/CONTRIBUTING.md
+[command-line reference]: https://odoo-data-flow.readthedocs.io/en/latest/usage.html
diff --git a/README.rst b/README.rst
deleted file mode 100644
index ded12846..00000000
--- a/README.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-Odoo CSV Import Export Library
-==============================
-This library provides tools to easily and quickly import data into Odoo or export data from Odoo using CSV file.
-It also provide a framework to manipulate date from csv.
-
-Requirements
---------------
-* openerp-client-lib
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 00000000..9ac26504
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,9 @@
+comment: false
+coverage:
+ status:
+ project:
+ default:
+ target: "100"
+ patch:
+ default:
+ target: "100"
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
new file mode 100644
index 00000000..a0382c5e
--- /dev/null
+++ b/docs/ROADMAP.md
@@ -0,0 +1,28 @@
+# Project Roadmap
+
+This document outlines the planned future enhancements and major refactoring efforts for the `odoo-data-flow` library. Contributions are welcome!
+
+## Planned Features & Enhancements
+
+### 1. Modernize Post-Import Workflows
+
+- **Current Status:** The library includes a legacy `InvoiceWorkflowV9` class designed specifically for Odoo version 9. This class uses outdated API calls (e.g., `exec_workflow`) and will not work on modern Odoo versions.
+- **Goal:** Refactor the workflow system to support recent Odoo versions (16.0, 17.0, 18.0+).
+- **Tasks:**
+ - Create a new `InvoiceWorkflowV18` (or similar) class that uses the modern Odoo API for validating and paying invoices (e.g., calling button actions like `action_post`).
+ - Update the `workflow_runner.py` and the `__main__.py` CLI to allow users to specify which workflow version they want to run (e.g., `odoo-data-flow workflow invoice-v18`).
+ - Consider creating a base `Workflow` class that new, custom workflows can inherit from to promote a consistent structure.
+
+### 2. Add Support for More Data Formats
+
+- **Goal:** Expand the `Processor` to natively handle other common data formats beyond CSV and XML.
+- **Potential Formats:**
+ - JSONL (JSON Lines)
+ - Direct database connections (e.g., PostgreSQL, MySQL)
+
+### 3. Enhance Test Coverage
+
+- **Goal:** Increase unit and integration test coverage to improve reliability.
+- **Tasks:**
+ - Add E2E test which perform an actual import /export.
+ * **On Pull Requests to `main` / After Merging:** Run the slow E2E integration tests. This ensures that only fully validated code gets into your main branch, without slowing down the development process for every small change.
diff --git a/docs/_static/favicon.ico b/docs/_static/favicon.ico
new file mode 100644
index 00000000..9549e970
Binary files /dev/null and b/docs/_static/favicon.ico differ
diff --git a/docs/_static/icon.png b/docs/_static/icon.png
new file mode 100644
index 00000000..3c1234b2
Binary files /dev/null and b/docs/_static/icon.png differ
diff --git a/docs/codeofconduct.md b/docs/codeofconduct.md
new file mode 100644
index 00000000..58fd373b
--- /dev/null
+++ b/docs/codeofconduct.md
@@ -0,0 +1,3 @@
+```{include} ../CODE_OF_CONDUCT.md
+
+```
diff --git a/docs/comparison_with_other_tools.md b/docs/comparison_with_other_tools.md
new file mode 100644
index 00000000..2e36a494
--- /dev/null
+++ b/docs/comparison_with_other_tools.md
@@ -0,0 +1,84 @@
+# Comparison with Other Tools
+
+Choosing the right tool for a data migration is critical. While there are many ways to get data into and out of Odoo, `odoo-data-flow` is designed to solve a specific set of challenges related to complex, repeatable, and robust data workflows.
+
+This guide provides an in-depth comparison of `odoo-data-flow` with other common tools and methodologies to help you understand its strengths and decide when it's the right choice for your project.
+
+## Feature Comparison at a Glance
+
+| Feature | Odoo's Built-in Tool | Direct SQL | Custom Python Script | odoo-data-flow |
+| :--------------------- | :-------------------------- | :------------------------ | :--------------------- | :--------------------- |
+| **Ease of Use** | Very High | Very Low | Low | Medium |
+| **Transformation Power** | Very Low | High | Very High | Very High |
+| **Error Handling** | Low | None (High Risk) | Low (Manual) | Very High |
+| **Repeatability** | Low (Manual) | Medium | High | Very High |
+| **Safety (Odoo Logic)**| High | **None (Very Dangerous)** | High | Very High |
+| **Performance** | Low to Medium | Very High | Medium | High |
+| **Best For** | Simple, one-off imports by end-users. | Very specific, low-level data surgery by expert DBAs. | Highly unique, one-off scripted tasks. | Complex, repeatable data migrations and workflows. |
+
+---
+
+## In-Depth Analysis
+
+### 1. Odoo's Built-in Import/Export
+
+This is the standard import/export tool available in the Odoo user interface.
+
+* **Pros:**
+ * **Extremely Easy to Use:** It's designed for end-users and requires no programming knowledge.
+ * **Safe:** It uses Odoo's `load` method, so all business logic and validations are respected.
+
+* **Cons:**
+ * **Very Limited Transformations:** You cannot perform any significant data cleaning or restructuring. Your source file must already be in a nearly perfect format.
+ * **Poor Error Handling for Large Files:** If an error occurs in a large file, Odoo often provides a generic and unhelpful error message. Finding the single bad row in a file with thousands of lines is very difficult.
+ * **"All or Nothing" Transactions:** By default, if one record in a file fails, the entire import is rolled back. This makes importing large datasets very inefficient.
+ * **Not Repeatable:** The process is entirely manual (clicking through the UI), which makes it unsuitable for automated, repeatable migrations between environments (e.g., from staging to production).
+
+* **Verdict:** Perfect for simple, one-off tasks performed by functional users. It is not designed for the complex, repeatable migrations that developers often face.
+
+### 2. Direct Database (SQL) Manipulation
+
+This approach involves connecting directly to Odoo's PostgreSQL database and using SQL `INSERT` or `UPDATE` statements.
+
+* **Pros:**
+ * **Extremely Fast:** Bypassing the Odoo ORM is the fastest way to get data into the database.
+
+* **Cons:**
+ * **EXTREMELY DANGEROUS:** This is the most significant drawback. Direct SQL manipulation completely bypasses **all of Odoo's business logic, validations, and automated workflows.** You can easily corrupt your database beyond repair.
+ * **Data Inconsistency:** You risk breaking relational integrity (e.g., creating a sales order line without linking it to a sales order) and leaving your data in an inconsistent state.
+ * **Requires Expert Knowledge:** You need a deep understanding of both SQL and Odoo's complex database schema.
+ * **No Error Feedback:** The database will not tell you if you've violated a business rule, only if you've violated a database constraint (like a `NOT NULL` field).
+
+* **Verdict:** This method should almost never be used for standard data migration. It should only be considered for very specific, low-level data surgery by an expert database administrator who fully understands the risks.
+
+### 3. Custom Python Scripts (using `odoolib`, etc.)
+
+This is a very common approach for developers. It involves writing a custom Python script that reads a source file and uses a library like `odoolib` or `erppeek` to make RPC calls to Odoo.
+
+* **Pros:**
+ * **Extremely Flexible:** You have the full power of Python to implement any transformation logic you can imagine.
+ * **Safe:** As long as you use the `load` or `write` methods, you are respecting Odoo's business logic.
+
+* **Cons:**
+ * **Requires Writing Boilerplate Code:** You have to manually write the code for everything: parsing command-line arguments, reading and parsing CSV/XML files, managing connection details, implementing multi-threading, handling errors, logging, etc.
+ * **Error Handling is Manual:** You have to build your own `try...except` blocks and logging logic from scratch. A simple script will often fail on the first error.
+ * **Less Structured:** It's a "blank canvas" approach, which can lead to unstructured, difficult-to-maintain scripts if not carefully designed.
+
+* **Verdict:** A good choice for highly unique, one-off tasks that don't fit a standard ETL pattern. However, for a typical data migration, you will spend a lot of time re-implementing features that `odoo-data-flow` already provides out of the box.
+
+### 4. `odoo-data-flow`
+
+This library is designed to be the "sweet spot" between the simplicity of the built-in tool and the power of a fully custom script.
+
+* **Pros:**
+ * **Powerful Transformations:** It gives you the full power of Python through the `mapper` system, allowing you to handle any complex data transformation.
+ * **Structured and Repeatable:** It enforces a clean separation between the transform and load phases, resulting in well-organized, maintainable, and easily repeatable migration projects.
+ * **Robust Error Handling Built-In:** The two-tier failure handling system (`.fail.csv` and the final `..._failed.csv` with error reasons) is provided automatically, saving you from having to build this complex logic yourself.
+ * **Performance Features Included:** It comes with built-in, easy-to-use features for parallel processing (`--worker`) and deadlock prevention (`--groupby`).
+ * **Safe:** It exclusively uses Odoo's standard API methods, ensuring all business logic and validations are respected.
+
+* **Cons:**
+ * **Learning Curve:** It has a steeper learning curve than the simple Odoo UI importer. You need to be comfortable writing Python dictionaries and using the `mapper` functions.
+ * **Less Flexible Than a Pure Custom Script:** While very flexible, it is still an opinionated framework. For extremely unusual tasks that don't fit the "transform a file and load it" pattern, a pure custom script might be more appropriate.
+
+* **Verdict:** The ideal tool for developers handling complex, repeatable data migrations. It provides the power of a custom script without the need to write and maintain all the boilerplate code for file parsing, error handling, and process management.
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 00000000..c8f79949
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,25 @@
+"""Sphinx configuration."""
+
+project = "Odoo Data Flow"
+author = "bosd"
+copyright = "2025, bosd"
+extensions = [
+ "sphinx.ext.autodoc",
+ "sphinx.ext.napoleon",
+ "sphinxmermaid",
+ "sphinx_click",
+ "myst_parser",
+ "sphinx_copybutton",
+]
+autodoc_typehints = "description"
+html_theme = "shibuya"
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+html_logo = "_static/icon.png"
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+html_favicon = "_static/favicon.ico"
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 00000000..b9419640
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,7 @@
+```{include} ../CONTRIBUTING.md
+---
+end-before:
+---
+```
+
+[code of conduct]: codeofconduct
diff --git a/docs/core_concepts.md b/docs/core_concepts.md
new file mode 100644
index 00000000..fddb0b48
--- /dev/null
+++ b/docs/core_concepts.md
@@ -0,0 +1,209 @@
+# Core Concepts
+
+The `odoo-data-flow` library is built on a few key concepts that enable robust and manageable data migrations. Understanding these will help you get the most out of the tool for both importing and exporting data.
+
+## The Two-Phase Import Workflow
+
+For importing data, the library promotes a two-phase workflow to separate data manipulation from the actual loading process.
+
+1. **Transform Phase**: This phase focuses purely on data manipulation. A Python script reads your raw source files, applies cleaning and transformation rules using **mappers**, and produces clean, structured CSV files that are perfectly formatted for Odoo.
+
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ A(["Odoo-Data-Flow"]) -- Processor
+ Mapper --- B["Transform Python Script"]
+ B --- C["Client CSV File"]
+ B --> D["Transformed CSV Files for import"]
+ A@{ shape: proc}
+ C@{ shape: doc}
+ D@{ shape: docs}
+ style A fill:#BBDEFB
+ style B fill:#C8E6C9
+ style C fill:#FFF9C4
+ style D fill:#FFF9C4
+```
+
+2. **Load Phase**: This phase focuses purely on data import. The generated shell script or the direct `odoo-data-flow import` command takes the clean CSV files and loads them into Odoo.
+
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ A["Odoo-Data-Flow"] -- Import --- B["odoo-client lib"]
+ B --- C["Transformed CSV Files"]
+ B L_B_D_0@--> D["odoo"]
+ n1["Configuration File"] --> B
+ A@{ shape: proc}
+ B@{ shape: proc}
+ C@{ shape: docs}
+ D@{ shape: cyl}
+ n1@{ shape: doc}
+ style A fill:#BBDEFB
+ style B fill:#FFE0B2
+ style C fill:#FFF9C4
+ style D fill:#AA00FF
+ style n1 fill:#C8E6C9
+ L_B_D_0@{ animation: slow }
+```
+
+This separation provides several key advantages:
+
+* **Debugging**: If there's a problem, you can easily tell if it's a data transformation issue or an Odoo connection issue.
+* **Reusability**: You can run the time-consuming transformation once and then use the resulting clean data to load into multiple Odoo instances (e.g., testing, staging, and production).
+* **Simplicity**: Each script has a single, clear responsibility.
+
+## The Import Strategy: One File, One Model
+
+It is important to understand that the `odoo-data-flow import` command is designed to load **one data file into one specific Odoo model** at a time. This means that a complete data migration (e.g., for partners, products, and sales orders) will require you to run the transform and load process several times with different data files and different target models.
+
+This deliberate design ensures clarity and respects Odoo's internal logic. Data is not inserted directly into the database; instead, it is loaded by calling Odoo's standard `load` method. This ensures that all the business logic, validations, and automations associated with each model are triggered correctly, just as they would be in the Odoo user interface.
+
+## Post-Import Processing (Workflows)
+
+In addition to transforming and loading data, the library provides a powerful **workflow** system for running automated, post-import actions on your records directly in Odoo.
+
+This is an advanced feature designed for complex use cases, such as validating a large batch of imported invoices, registering payments, or triggering other specific business logic that needs to happen _after_ the initial data has been loaded.
+
+This is handled by the `odoo-data-flow workflow` command, which allows you to run predefined processes on your data.
+
+### Overall Data Flow Including Workflows
+
+This diagram shows how the workflow phase fits in after the main transform and load phases.
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ subgraph subGraph0["1 Transform Phase"]
+ B{"Processor"}
+ A["Raw Source Data"]
+ end
+ subgraph subGraph1["2 Load Phase"]
+ C["Clean Data"]
+ D{"odoo-data-flow import"}
+ E["Odoo Database"]
+ end
+ subgraph subGraph2["3 Workflow Phase"]
+ F{"odoo-data-flow workflow"}
+ end
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ F -- "Validate, Pay, etc." --> E
+ E@{ shape: cyl}
+ style A fill:#FFF9C4
+ style D fill:#BBDEFB
+ style E fill:#AA00FF
+ style F fill:#BBDEFB
+ style subGraph0 fill:transparent
+ style subGraph1 fill:transparent
+ style subGraph2 fill:transparent
+
+```
+
+## Core Components of the Transform Phase
+
+The transformation is driven by three main components in your Python script:
+
+### 1. The `Processor`
+
+The `Processor` is the engine of the library. You initialize it with your source file path and its settings (like the separator). Its main job is to apply your mapping rules and generate the clean data and the load script.
+
+### 2. The `mapper` Functions
+
+Mappers are the individual building blocks for your transformations. They are simple, reusable functions that define *how* to create the value for a single column in your destination file. The library provides a rich set of mappers for concatenation, direct value mapping, static values, and handling complex relationships.
+
+> For a complete list of all available mappers and their options, see the [Data Transformations Guide](guides/data_transformations.md).
+
+
+### 3. The Mapping Dictionary
+
+This standard Python `dict` ties everything together. The keys are the column names for your **destination** CSV file, and the values are the `mapper` functions that will generate the data for that column.
+
+## Understanding the Load Phase and Error Handling
+
+A key strength of this library is its robust error handling, which ensures that a few bad records won't cause an entire import to fail. This is managed through a clever two-pass system.
+
+### The Two-Pass Load Sequence
+
+The generated `load.sh` script contains two commands designed to maximize both speed and accuracy.
+
+```bash
+# First pass (Normal Mode): Fast, parallel import. Writes recoverable errors to a .fail file.
+odoo-data-flow import --config conf/connection.conf --file data/res_partner.csv --model res.partner
+# Second pass: Slower, precise import of the failed records.
+odoo-data-flow import --config conf/connection.conf --fail --file data/res_partner.csv --model res.partner
+```
+
+1. **First Pass (Normal Mode)**: The command runs in its default, high-speed mode, importing records in batches. If an entire batch is rejected for any reason, the original records from that batch are written to an intermediate failure file named **`.fail.csv`** (e.g., `res.partner.fail.csv`).
+
+2. **Second Pass (`--fail` Mode)**: The command is invoked again with the `--fail` flag. In this mode, it automatically targets the `.fail.csv` file and retries each failed record individually. Records that still fail are written to a final, timestamped error file: **`_YYYYMMDD_HHMMSS_failed.csv`**. This file includes an additional **`_ERROR_REASON`** column to explain why each record failed, making it easy to identify and fix the problematic data manually.
+
+
+### Error Handling Flow Diagram
+
+This diagram visualizes how records flow through the two-pass system.
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ A["data.csv
(100 records)"] --> B{"First Pass
odoo-data-flow import"}
+ B -- 95 successful records --> C["Odoo Database"]
+ B -- 5 failed records --> D["data.fail.csv
(5 records)"]
+ D --> E{"Second Pass
odoo-data-flow import --fail"}
+ E -- 3 recovered records --> C
+ E -- 2 true errors --> F["fa:fa-user-edit data_YYMMDD_failed.csv
(2 records to fix)"]
+
+ A@{ shape: doc}
+ C@{ shape: cyl}
+ D@{ shape: doc}
+ F@{ shape: doc}
+ style A fill:#FFF9C4
+ style B fill:#BBDEFB
+ style C fill:#AA00FF
+ style D fill:#FFD600
+ style E fill:#BBDEFB
+ style F fill:#FF6D00
+
+```
+
+## The Export Concept
+
+The library can also be used to export data from Odoo, which is useful for backups, analysis, or migrating data between systems. The export process is a direct command-line call.
+
+### Export Flow Diagram
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ ExportA["Odoo Instance"] L_ExportA_ExportB_0@--> ExportB{"odoo-data-flow export"}
+ ExportC["Configuration
(CLI Options)"] --> ExportB
+ ExportB L_ExportB_ExportD_0@--> ExportD["Output File
(e.g., exported_partners.csv)"]
+ ExportA@{ shape: cyl}
+ ExportD@{ shape: doc}
+ style ExportA fill:#AA00FF
+ style ExportB fill:#BBDEFB
+ style ExportD fill:#FFF9C4
+ L_ExportA_ExportB_0@{ animation: slow }
+ L_ExportB_ExportD_0@{ animation: slow }
+```
+
+> For detailed instructions, see the [Exporting Data Guide](guides/exporting_data.md).
diff --git a/docs/faq.md b/docs/faq.md
new file mode 100644
index 00000000..6d2f7695
--- /dev/null
+++ b/docs/faq.md
@@ -0,0 +1,220 @@
+# FAQ & Troubleshooting
+
+This document answers frequently asked questions and provides solutions to common problems you may encounter while using `odoo-data-flow`.
+
+## Frequently Asked Questions
+
+### What is `odoo-data-flow`?
+
+It is a powerful Python library designed to handle the import and export of data to and from Odoo. It allows you to define complex data transformations in Python, providing a robust and repeatable process for data migrations.
+
+### How is this different from Odoo's standard import tool?
+
+While Odoo's built-in import is great for simple tasks, `odoo-data-flow` offers several key advantages for complex or large-scale migrations:
+
+- **Separation of Concerns**: It cleanly separates the data **transformation** logic (cleaning your source data) from the data
+
+**loading** logic (importing into Odoo).
+
+- **Robust Error Handling**: Its two-pass import system intelligently handles errors, ensuring that one bad record doesn't stop the entire process.
+
+- **Powerful Transformations**: You can use the full power of Python and a rich set of built-in `mapper` functions to handle almost any data transformation challenge.
+
+- **Repeatability and Version Control**: Since your transformation logic is code, it can be version-controlled (with Git), tested, and reused across multiple environments (like staging and production) with confidence.
+
+### Can I use this for both importing and exporting?
+
+Yes. The library provides tools for both workflows. The `Processor` and `mapper` modules are used for transforming and preparing data for import, while the `odoo-data-flow export` command is used to export data from Odoo into CSV files.
+
+### Can I migrate data directly between two Odoo databases?
+
+Yes. The library includes a powerful `odoo-data-flow migrate` command that performs a complete export, transform, and import from one Odoo instance to another in a single step, without creating intermediate files. This is ideal for migrating data from a staging server to production.
+
+> For detailed instructions, see the [Server-to-Server Migration Guide](guides/server_to_server_migration.md).
+
+### How do I process a CSV file that has no header?
+
+The `Processor` can be initialized directly with in-memory data. If your source file has no header, you can read it manually using Python's standard `csv` module and provide your own header list.
+
+1. Read the raw data from the CSV file into a list of lists.
+2. Create a Python list containing the header names in the correct order.
+3. Initialize the `Processor` using the `header=` and `data=` arguments instead of `filename=`.
+
+```python
+import csv
+from odoo_data_flow.lib.transform import Processor
+
+# 1. Define the header manually
+my_header = ['LegacyID', 'FirstName', 'LastName', 'Email']
+my_data = []
+
+# 2. Read the file into a list
+with open('origin/contacts_no_header.csv', 'r') as f:
+ reader = csv.reader(f)
+ my_data = list(reader)
+
+# 3. Initialize the Processor with the in-memory data
+processor = Processor(header=my_header, data=my_data)
+
+# You can now proceed with your mapping as usual
+# my_mapping = {'name': mapper.concat(' ', 'FirstName', 'LastName'), ...}
+```
+
+### Where can I find a complete, real-world example?
+
+A full example project, demonstrating a realistic data migration workflow with multiple models and complex transformations, is available on GitHub. This is an excellent resource for seeing how all the pieces fit together.
+
+- **[Odoo Data Flow Example Repository](https://github.com/OdooDataFlow/odoo-data-flow-example/tree/18.0)**
+
+### Can `odoo-data-flow` connect directly to Google Sheets?
+
+No, the `odoo-data-flow` library cannot connect directly to Google Sheets to read data.
+
+The tool is designed to read data from local files on your computer, specifically in either **CSV** or **XML** format. It does not have the built-in functionality to authenticate with Google's services and pull data directly from a spreadsheet URL.
+
+#### Recommended Workflow
+
+The standard and easiest way to use your data from Google Sheets is to first download the sheet as a CSV file and then use that local file with the tool.
+
+1. Open your spreadsheet in Google Sheets.
+2. From the top menu, select **File** -> **Download**.
+3. Choose the **Comma-separated values (.csv)** option.
+
+
+4. This will save the current sheet as a `.csv` file to your computer's "Downloads" folder.
+5. You can then use that downloaded file with the `odoo-data-flow` command:
+
+ ```bash
+ odoo-data-flow import --file /path/to/your/downloaded-sheet.csv
+ ```
+
+This workflow ensures that you have a local copy of the data at the time of import and allows you to use all the powerful transformation features of the library on your spreadsheet data.
+
+
+## I can't connect to my cloud-hosted Odoo instance (e.g., Odoo.sh). What should I do?
+
+This is a common issue. When connecting to a cloud-hosted Odoo instance, you often need to use a secure connection protocol.
+
+The solution is typically to set the `protocol` in your `conf/connection.conf` file to **`jsonrpcs`** (note the `s` at the end for "secure").
+
+While Odoo's external API has historically used XML-RPC, modern cloud instances often require the secure JSON-RPC protocol for integrations.
+
+### Example Configuration for a Cloud Instance
+
+Your `conf/connection.conf` should look something like this:
+
+```{code-block} ini
+:caption: conf/connection.conf
+[Connection]
+hostname = my-project.odoo.com
+port = 443
+database = my-project-production
+login = admin
+password = xxxxxxxxxx
+uid = 2
+protocol = jsonrpcs
+```
+
+### Key things to check:
+
+1. **Protocol**: Ensure it is set to `jsonrpcs`.
+2. **Port**: Secure connections almost always use port `443`.
+3. **Hostname & Database**: Make sure you are using the correct hostname and database name provided by your cloud hosting platform (e.g., from your Odoo.sh dashboard). These are often different from the simple names used for local instances.
+
+
+---
+
+## Troubleshooting Common Errors
+
+When an import fails, understanding why is key. Here are some of the most common issues and how to solve them.
+
+### Understanding the Failure Files
+
+The two-pass import process is designed to isolate errors effectively and generates two different types of failure files for two different purposes.
+
+* **`.fail.csv` (e.g., `res.partner.fail.csv`)**:
+
+ * **When it's created**: During the **first pass** (a normal import).
+
+ * **What it contains**: If a batch of records fails to import, this file will contain the *entire original, unmodified batch* that failed.
+
+ * **Purpose**: This file is for **automated processing**. It's the input for the second pass (`--fail` mode).
+
+* `_YYYYMMDD_HHMMSS_failed.csv` (e.g., **`data_20250626_095500_failed.csv`)**:
+
+ * **When it's created**: During the **second pass** (when you run with the `--fail` flag).
+
+ * **What it contains**: This file contains only the individual records that *still* failed during the record-by-record retry. Crucially, it includes an extra **`_ERROR_REASON`** column explaining exactly why each record failed.
+
+ * **Purpose**: This file is for **human review**. The error messages help you find and fix the specific data problems.
+
+**Your recommended workflow should be:**
+
+1. Run your `load.sh` script or the `odoo-data-flow import` command.
+
+2. If a `.fail.csv` file is created, run the command again with the `--fail` flag.
+
+3. If a timestamped `..._failed.csv` file is created, open it to identify the data issues using the `_ERROR_REASON` column.
+
+4. Fix the issues in your original source file or your `transform.py` script.
+
+5. Delete the `.fail.csv` and `_failed.csv` files and rerun the entire process from the beginning.
+
+
+### Record Count Mismatch
+
+Sometimes, the number of records in your source file doesn't match the number of records created in Odoo, even if there are no errors in the final failure file.
+
+* **Cause:** This usually happens when your mapping logic unintentionally filters out rows. For example, using a `postprocess` function that can return an empty value for an external ID (`id` field). If the external ID is empty, the entire record is skipped without error.
+
+* **Solution:**
+
+ 1. **Check your `id` field**: The most common culprit is the mapping for the `id` field. Ensure it *always* returns a non-empty, unique value for every row you intend to import.
+
+ 2. **Use a `preprocessor`**: For complex debugging, you can use a [preprocessor function](guides/data_transformations.md) to add a unique line number to each row. Import this line number into a custom field in Odoo (`x_studio_import_line_number`). After the import, you can easily compare the line numbers in your source file with those in Odoo to find exactly which rows were skipped.
+
+
+### Connection Errors
+
+These errors usually happen when the `odoo-data-flow` client cannot reach your Odoo instance.
+
+- **Error:** `Connection refused`
+ - **Cause:** The `hostname` or `port` in your `conf/connection.conf` is incorrect, or the Odoo server is not running.
+ - **Solution:** Double-check your connection details and ensure the Odoo instance is active and accessible.
+
+- **Error:** `Wrong login/password`
+ - **Cause:** The credentials in `conf/connection.conf` are incorrect.
+ - **Solution:** Verify your `database`, `login`, and `password`.
+
+### Odoo Access & Validation Errors
+
+These errors come directly from Odoo when the data is not valid enough to save.
+
+- **Error:** `AccessError`, `You are not allowed to modify this document`
+ - **Cause:** The user specified by `uid` in your `conf/connection.conf` lacks the necessary permissions (e.g., Create or Write access) for the target model.
+ - **Solution:** Check the user's Access Rights in Odoo's settings.
+
+- **Error:** `ValidationError: A required field was not provided`
+ - **Cause:** Your transformed CSV file is missing a column for a field marked as `required=True` on the Odoo model.
+ - **Solution:** Check the model's definition in Odoo and ensure your `transform.py` script generates a value for that field.
+
+- **Error:** `No matching record found for external id '__export__.my_external_id_123'`
+ - **Cause:** You are trying to link to a related record (e.g., setting the `partner_id` on a sales order), but the external ID you are providing does not exist in the database.
+ - **Solution:**
+ 1. Ensure you have successfully imported the parent records first.
+ 2. Check for typos. The prefix and value used in your `m2o_map` must exactly match the external ID of the parent record.
+ 3. See the section below on Import Order.
+
+### Understanding Import Order for Relational Data
+
+A very common reason for the `No matching record found` error is that you are trying to import records in the wrong order.
+
+* **The Rule:** You must always import "parent" records **before** you import the "child" records that refer to them.
+
+* **Example:** Imagine you are importing Contacts (`res.partner`) and assigning them to Contact Tags (`res.partner.category`). Odoo cannot assign a contact to the "VIP" tag if that "VIP" tag doesn't exist in the database yet.
+
+* **Correct Import Sequence**:
+
+ 1. **First, import `res.partner.category`**: Run a transformation and load process for your contact tags. This creates the tags and their external IDs in Odoo.
+
+ 2. **Then, import `res.partner`**: Run a separate process for your contacts. The mapping for the `category_id/id` field can now successfully use `mapper.m2o_map` to look up the external IDs of the tags you created in the first step.
diff --git a/docs/guides/advanced_usage.md b/docs/guides/advanced_usage.md
new file mode 100644
index 00000000..94dfc181
--- /dev/null
+++ b/docs/guides/advanced_usage.md
@@ -0,0 +1,405 @@
+# Guide: Advanced Usage
+
+This guide covers more complex scenarios and advanced features of the library that can help you solve specific data transformation challenges.
+
+
+## Processing XML Files
+
+While CSV is common, you may have source data in XML format. The `Processor` can handle XML files with a couple of extra configuration arguments.
+
+- **`xml_root_tag` (str)**: The name of the root tag in your XML document that contains the collection of records.
+- **`xml_record_tag` (str)**: The name of the tag that represents a single record.
+
+### Example XML Input (`origin/clients.xml`)
+
+Here is an example of an XML file that the `Processor` can parse. Note the `` container tag and the repeating `` tags for each record. The processor can also handle nested tags like ``.
+
+```{code-block} xml
+:caption: origin/clients.xml
+
+
+
+ C1001
+ The World Company
+
+ contact@worldco.com
+ 111-222-3333
+
+
+
+ C1002
+ The Famous Company
+
+ info@famous.com
+ 444-555-6666
+
+
+
+```
+
+### Example Transformation Code
+To process this XML, you provide an XPath expression to the xml_root_tag argument. This tells the Processor which nodes in the XML tree represent the individual records (rows) you want to process. The tags inside each record are then treated as columns.
+
+xml_root_tag: An XPath expression to select the list of records. For the example above, './Client' tells the processor to find every tag within the document.
+
+The Processor automatically flattens the nested structure, so you can access tags like and directly in your mapping.
+
+
+```python
+from odoo_data_flow.lib.transform import Processor
+from odoo_data_flow.lib import mapper
+
+# Access nested XML tags using dot notation.
+res_partner_mapping = {
+ 'id': mapper.m2o_map('xml_client_', 'ClientID'),
+ 'name': mapper.val('Name'),
+ 'email': mapper.val('Contact.Email'),
+ 'phone': mapper.val('Contact.Phone'),
+}
+
+# Initialize the Processor with XML-specific arguments
+processor = Processor(
+ 'origin/clients.xml',
+ xml_root_tag='ClientList',
+ xml_record_tag='Client'
+)
+# ... rest of the process
+```
+
+---
+
+## Importing Data for Multiple Companies
+
+When working in a multi-company Odoo environment, you need a clear strategy to ensure records are created in the correct company. There are two primary methods to achieve this.
+
+### Method 1: The Procedural Approach (Recommended)
+
+This is the safest and most common approach. The core idea is to separate your data by company and run a distinct import process for each one.
+
+1. **Separate your source files:** Create one set of data files for Company A and a completely separate set for Company B.
+2. **Set the User's Company:** In Odoo, log in as the user defined in your `connection.conf`. In the user preferences, set their default company to **Company A**.
+3. **Run the Import for Company A:** Execute your transformation and load scripts for Company A's data. All records created will be assigned to Company A by default.
+4. **Change the User's Company:** Go back to Odoo and change the same user's default company to **Company B**.
+5. **Run the Import for Company B:** Execute the import process for Company B's data. These new records will now be correctly assigned to Company B.
+
+This method is robust because it relies on Odoo's standard multi-company behavior and prevents accidental data mixing.
+
+### Method 2: The Programmatic Approach (`company_id`)
+
+This method is useful when your source file contains data for multiple companies mixed together. You can explicitly tell Odoo which company a record belongs to by mapping a value to the `company_id/id` field.
+
+**Example: A source file with mixed-company products**
+
+```text
+SKU,ProductName,CompanyCode
+P100,Product A,COMPANY_US
+P101,Product B,COMPANY_EU
+```
+
+**Transformation Script**
+Your mapping dictionary can use the `CompanyCode` to link to the correct company record in Odoo using its external ID.
+
+```python
+from odoo_data_flow.lib import mapper
+
+product_mapping = {
+ 'id': mapper.m2o_map('prod_', 'SKU'),
+ 'name': mapper.val('ProductName'),
+ # This line explicitly sets the company for each row.
+ # Assumes your res.company records have external IDs like 'main_COMPANY_US'.
+ 'company_id/id': mapper.m2o_map('main_', 'CompanyCode'),
+}
+```
+
+**Warning:** While powerful, this method requires that you have stable and correct external IDs for your `res.company` records. The procedural approach is often simpler and less error-prone.
+
+---
+
+## Importing Translations
+
+The most efficient way to import translations is to perform a standard import with a special `lang` key in the context. This lets Odoo's ORM handle the translation creation process correctly.
+
+The process involves two steps:
+
+1. **Import the base terms:** First, import your records with their default language values (e.g., English).
+2. **Import the translated terms:** Then, import a second file containing only the external IDs and the translated values, while setting the target language in the context.
+
+### Example: Translating Product Names to French
+
+**Step 1: Import the base product data in English**
+
+**Source File (`product_template.csv`):**
+
+```text
+id;name;price
+my_module.product_wallet;Wallet;10.0
+my_module.product_bicyle;Bicycle;400.0
+```
+
+You would import this file normally. The `id` column provides the stable external ID for each product.
+
+**Step 2: Import the French translations**
+
+**Source File (`product_template_FR.csv`):**
+This file only needs to contain the external ID and the fields that are being translated.
+
+```text
+id;name
+my_module.product_wallet;Portefeuille
+my_module.product_bicyle;Bicyclette
+```
+
+**Transformation and Load**
+While you can use a `transform.py` script to generate the load script, for a simple translation update, you can also run the command directly.
+
+**Command-line Example:**
+
+```bash
+odoo-data-flow import \
+ --config conf/connection.conf \
+ --file product_template_FR.csv \
+ --model product.template \
+ --context "{'lang': 'fr_FR'}"
+```
+
+This does not overwrite the English name; instead, it correctly creates or updates the French translation for the `name` field on the specified products.
+
+---
+
+## Importing Account Move Lines
+
+Importing journal entries (`account.move`) with their debit/credit lines (`account.move.line`) is a classic advanced use case that requires creating related records using `mapper.record` and stateful processing.
+
+### Performance Tip: Skipping Validation
+
+For a significant performance boost when importing large, pre-validated accounting entries, you can tell Odoo to skip its balancing check (debits == credits) during the import. This is done by passing a special context key.
+
+### Example: Importing an Invoice
+
+**Source File: `invoices.csv`**
+
+```text
+Journal,Reference,Date,Account,Label,Debit,Credit
+INV,INV2023/12/001,2023-12-31,,,
+,,"Customer Invoices",600,"Customer Debtor",250.00,
+,,"Customer Invoices",400100,"Product Sales",,200.00
+,,"Customer Invoices",451000,"VAT Collected",,50.00
+```
+
+**Transformation Script**
+
+```python
+from odoo_data_flow.lib.transform import Processor
+from odoo_data_flow.lib import mapper
+
+# ... (see Data Transformations guide for full stateful processing example)
+
+# Define parameters, including the crucial context key
+params = {
+ 'model': 'account.move',
+ # WARNING: Only use check_move_validity: False if you are certain
+ # your source data is balanced.
+ 'context': "{'check_move_validity': False, 'tracking_disable': True}"
+}
+
+processor = Processor('origin/invoices.csv')
+# ... rest of process
+```
+
+---
+
+## Importing One-to-Many Relationships (`--o2m` flag)
+
+The `--o2m` flag enables a special import mode for handling source files where child records (the "many" side) are listed directly under their parent record (the "one" side).
+
+### Use Case and File Structure
+
+This mode is designed for files structured like this, where a master record has lines for two different one-to-many fields (`child1_ids` and `child2_ids`):
+
+**Source File (`master_with_children.csv`)**
+
+```text
+MasterID,MasterName,Child1_SKU,Child2_Ref
+M01,Master Record 1,field_value1_of_child1,field_value1_of_child2
+, , , field_value2_of_child1,field_value2_of_child2
+, , , ,field_value3_of_child2
+```
+
+With the `--o2m` option, the processor understands that the lines with empty master fields belong to the last master record encountered. It will import "Master Record 1" with two `child1` records and three `child2` records simultaneously.
+
+### Transformation and Load
+
+Your mapping would use `mapper.record` and `mapper.cond` to process the child lines, similar to the `account.move.line` example. The key difference is enabling the `o2m` flag in your `params` dictionary.
+
+```python
+# In your transform.py
+params = {
+ 'model': 'master.model',
+ 'o2m': True # Enable the special o2m handling
+}
+```
+
+The generated `load.sh` script will then include the `--o2m` flag in the `odoo-data-flow import` command.
+
+### Important Limitations
+
+This method is convenient but has significant consequences because **it is impossible to set XML_IDs on the child records**. As a result:
+
+- You **cannot run the import again to update** the child records. Any re-import will create new child records.
+- The child records **cannot be referenced** by their external ID in any other import file.
+
+This method is best suited for simple, one-off imports of transactional data where the child lines do not need to be updated or referenced later.
+
+---
+
+## Validating Imports (`--check` flag)
+
+The `--check` flag provides an extra layer of validation during the import process. When this flag is used, at the end of each transaction, the client compares the number of records sent in the batch with the number of records Odoo reports as successfully imported.
+
+If these numbers do not match, an error message is printed. This is an extremely useful tool for catching silent errors. The most common cause for a mismatch is having records with duplicate XML_IDs within the same batch.
+
+For more details on why this might happen, see the [Record Count Mismatch](../faq.md) section in the FAQ.
+
+### Usage
+
+To enable this feature, set the `check` key to `True` in your `params` dictionary.
+
+```python
+# In your transform.py
+params = {
+ 'model': 'res.partner',
+ 'check': True # Enable import validation
+}
+```
+
+The generated `load.sh` script will then include the `--check` flag in the `odoo-data-flow import` command.
+
+---
+
+## Advanced Product Imports: Creating Variants
+
+When you import `product.template` records along with their attributes and values, Odoo does not create the final `product.product` variants by default. You must explicitly tell Odoo to do so using a context key.
+
+### The `create_product_product` Context Key
+
+By setting `create_product_product: True` in the context of your `product.template` import, you trigger the Odoo mechanism that generates all possible product variants based on the attribute lines you have imported for that template.
+
+This is typically done as the final step _after_ you have already imported the product attributes, attribute values, and linked them to the templates via attribute lines.
+
+### Example: Triggering Variant Creation
+
+Assume you have already run separate imports for `product.attribute`, `product.attribute.value`, and `product.attribute.line`. Now, you want to trigger the variant creation.
+
+The easiest way is to re-import your `product.template.csv` file with the special context key.
+
+**Transformation and Load**
+In the `params` dictionary of your `product.template` transformation script, add the key:
+
+```python
+# In your transform.py for product templates
+
+params = {
+ 'model': 'product.template',
+ # This context key tells Odoo to generate the variants
+ 'context': "{'create_product_product': True, 'tracking_disable': True}"
+}
+
+# The mapping would be the same as your initial template import
+template_mapping = {
+ 'id': mapper.m2o_map('prod_tmpl_', 'Ref'),
+ 'name': mapper.val('Name'),
+ # ... other template fields
+}
+```
+
+When you run the generated `load.sh` script for this process, Odoo will find each product template, look at its attribute lines, and create all the necessary `product.product` variants (e.g., a T-Shirt in sizes S, M, L and colors Red, Blue).
+
+---
+
+## Merging Data from Multiple Files (`join_file`)
+
+Sometimes, the data you need for a single import is spread across multiple source files. The `.join_file()` method allows you to enrich your main dataset by merging columns from a second file, similar to a VLOOKUP in a spreadsheet.
+
+### The `.join_file()` Method
+
+You first initialize a `Processor` with your primary file. Then, you call `.join_file()` to merge data from a secondary file based on a common key.
+
+- **`filename` (str)**: The path to the secondary file to merge in.
+- **`key1` (str)**: The name of the key column in the **primary** file.
+- **`key2` (str)**: The name of the key column in the **secondary** file.
+
+### Example: Merging Customer Details into an Order File
+
+**Transformation Script (`transform_merge.py`)**
+
+```{code-block} python
+:caption: transform_merge.py
+from odoo_data_flow.lib.transform import Processor
+from odoo_data_flow.lib import mapper
+
+# 1. Initialize a processor with the primary file (orders)
+processor = Processor('origin/orders.csv')
+
+# 2. Join the customer details file.
+print("Joining customer details into orders data...")
+processor.join_file('origin/customer_details.csv', 'CustomerCode', 'Code')
+
+# 3. Define a mapping that uses columns from BOTH files
+order_mapping = {
+ 'id': mapper.m2o_map('import_so_', 'OrderID'),
+ 'name': mapper.val('OrderID'),
+ 'date_order': mapper.val('OrderDate'),
+ # 'ContactPerson' comes from the joined file
+ 'x_studio_contact_person': mapper.val('ContactPerson'),
+}
+
+# The processor now contains the merged data and can be processed as usual
+processor.process(
+ mapping=order_mapping,
+ filename_out='data/orders_with_details.csv',
+ params={'model': 'sale.order'}
+)
+```
+
+---
+
+## Splitting Large Datasets for Import
+
+When dealing with extremely large source files, processing everything in a single step can be memory-intensive and unwieldy. The library provides a `.split()` method on the `Processor` to break down a large dataset into smaller, more manageable chunks.
+
+### The `.split()` Method
+
+The `.split()` method divides the processor's in-memory dataset into a specified number of parts. It does not write any files itself; instead, it returns a dictionary where each key is an index and each value is a new, smaller `Processor` object containing a slice of the original data.
+
+You can then iterate over this dictionary to process each chunk independently.
+
+### Example: Splitting a Large File into 4 Parts
+
+**Transformation Script (`transform_split.py`)**
+
+```{code-block} python
+:caption: transform_split.py
+from odoo_data_flow.lib.transform import Processor
+from odoo_data_flow.lib import mapper
+
+# 1. Define your mapping as usual
+product_mapping = {
+ 'id': mapper.concat('large_prod_', 'SKU'),
+ 'name': mapper.val('ProductName'),
+}
+
+# 2. Initialize a single processor with the large source file
+processor = Processor('origin/large_products.csv')
+
+# 3. Split the processor into 4 smaller, independent processors
+split_processors = processor.split(mapper.split_file_number(4))
+
+# 4. Loop through the dictionary of new processors
+for index, chunk_processor in split_processors.items():
+ output_filename = f"data/products_chunk_{index}.csv"
+ chunk_processor.process(
+ mapping=product_mapping,
+ filename_out=output_filename,
+ params={'model': 'product.product'}
+ )
+```
diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md
new file mode 100644
index 00000000..7a443a01
--- /dev/null
+++ b/docs/guides/configuration.md
@@ -0,0 +1,92 @@
+# Configuration Guide
+
+This guide provides a detailed reference for the `connection.conf` file, which is essential for connecting the `odoo-data-flow` tool to your Odoo instance.
+
+## The Connection File
+
+All commands that need to communicate with an Odoo server (e.g., `import`, `export`, `migrate`) require connection details. These are stored in a standard INI-formatted configuration file.
+
+By default, the tool looks for this file at `conf/connection.conf`, but you can specify a different path using the `--config` command-line option.
+
+### File Format and Example
+
+The configuration file must contain a `[Connection]` section with the necessary key-value pairs.
+
+
+```{code-block} ini
+:caption: conf/connection.conf
+[Connection]
+hostname = localhost
+port = 8069
+database = my_odoo_db
+login = admin
+password = my_admin_password
+uid = 2
+protocol = xmlrpc
+```
+
+### Configuration Keys
+
+#### `hostname`
+* **Required**: Yes
+* **Description**: The IP address or domain name of your Odoo server.
+* **Example**: `hostname = odoo.mycompany.com`
+
+#### `port`
+* **Required**: Yes
+* **Description**: The port your Odoo server is running on. This is typically `8069` for standard Odoo instances.
+* **Example**: `port = 8069`
+
+#### `database`
+* **Required**: Yes
+* **Description**: The name of the Odoo database you want to connect to.
+* **Example**: `database = my_production_db`
+
+#### `login`
+* **Required**: Yes
+* **Description**: The username (login email) of the Odoo user that the tool will use to connect.
+* **Example**: `login = admin`
+
+#### `password`
+* **Required**: Yes
+* **Description**: The password for the specified Odoo user.
+* **Example**: `password = my_secret_password`
+
+#### `uid`
+* **Required**: Yes
+* **Description**: The database ID of the Odoo user identified by the `login` parameter. This is required for making RPC calls.
+* **Well-known IDs**:
+ * `1`: The default administrator user in Odoo versions prior to 12.0.
+ * `2`: The default administrator user in Odoo versions 12.0 and newer.
+* **Example**: `uid = 2`
+
+#### `protocol`
+* **Required**: No
+* **Description**: The connection protocol to use for XML-RPC calls. `xmlrpc` uses HTTP, while `xmlrpcs` uses HTTPS for a secure connection. While modern Odoo uses JSON-RPC for its web interface, the external API for this type of integration typically uses XML-RPC.
+* **Default**: `xmlrpc`
+* **Example**: `protocol = xmlrpcs`
+
+---
+
+
+```{admonition} Tip
+:class: note
+
+On premise, it's advised to use a dedicated API user with the minimal access rights required for the models related to the import, rather than using the main administrator account.
+```
+
+### Real world Example
+
+Below is a real world example of connection to a cloud hosted odoo instance on [opaas](https://www.opaas.cloud/).
+
+```{code-block} ini
+:caption: conf/connection.conf
+[Connection]
+hostname = test.yourinstance.opa.as
+database = bvnem-test
+login = admin
+password = secret_password
+protocol = jsonrpcs
+port = 443
+uid = 2
+```
diff --git a/docs/guides/data_transformations.md b/docs/guides/data_transformations.md
new file mode 100644
index 00000000..fff895d1
--- /dev/null
+++ b/docs/guides/data_transformations.md
@@ -0,0 +1,547 @@
+# Guide: Data Transformations with Mappers
+
+Mappers are the core of the data transformation process. They are powerful, reusable functions that you use within your mapping dictionary to define how each column of your destination file should be generated.
+
+This guide provides a comprehensive reference for all mappers available in the `odoo_data_flow.lib.mapper` module.
+
+---
+
+## Data Quality Validation (`Processor.check`)
+
+Before you start the main transformation process, it's often a good idea to validate the quality and structure of your source data. The library provides a `.check()` method on the `Processor` object for this purpose.
+
+You can call `.check()` multiple times with different "checker" functions to validate your data against a set of rules. If a check fails, a warning will be logged to the console, and you can prevent the transformation from continuing.
+
+### Using Checkers
+
+In your `transform.py` script, after initializing the `Processor` but before calling `.process()`, you can add your checks:
+
+```{code-block} python
+:caption: transform.py
+from odoo_data_flow.lib import checker
+from odoo_data_flow.lib.transform import Processor
+
+# Initialize processor
+processor = Processor('origin/my_data.csv')
+
+# --- Add Data Quality Checks ---
+print("Running data quality checks...")
+processor.check(checker.line_length_checker(15))
+processor.check(checker.cell_len_checker(120))
+processor.check(checker.id_validity_checker('SKU', r'^[A-Z]{2}-\d{4}$'))
+
+# Now, proceed with the mapping and processing
+# processor.process(...)
+```
+
+### Available Checker Functions
+
+The following checkers are available in the `odoo_data_flow.lib.checker` module.
+
+#### `checker.line_length_checker(expected_length)`
+
+Verifies that every row in your data file has exactly the `expected_length` number of columns. This is useful for catching malformed CSV rows.
+
+#### `checker.cell_len_checker(max_cell_len)`
+
+Verifies that no single cell (field) in your entire dataset exceeds the `max_cell_len` number of characters.
+
+#### `checker.line_number_checker(expected_line_count)`
+
+Verifies that the file contains exactly `expected_line_count` number of data rows (not including the header).
+
+#### `checker.id_validity_checker(id_field, pattern)`
+
+Verifies that the value in the specified `id_field` column for every row matches the given regex `pattern`. This is extremely useful for ensuring key fields like SKUs or external IDs follow a consistent format.
+
+---
+
+## Basic Mappers
+
+### `mapper.val(field, [postprocess])`
+
+Retrieves the value from a single source column, identified by `field`. This is the most fundamental mapper.
+
+- **`field` (str)**: The name of the column in the source file.
+- **`postprocess` (function, optional)**: A function to modify the value after it has been read.
+
+### `mapper.const(value)`
+
+Fills a column with a fixed, constant `value` for every row.
+
+- **`value`**: The static value to use (e.g., string, bool, integer).
+
+#### How it works
+
+**Input Data (`source.csv`)**
+| AnyColumn |
+| --------- |
+| a |
+| b |
+
+**Transformation Code**
+
+```python
+'sale_type': mapper.const('service')
+```
+
+**Output Data**
+| sale_type |
+| --------- |
+| service |
+| service |
+
+---
+
+## Combining and Formatting
+
+### `mapper.concat(separator, *fields)`
+
+Joins values from one or more source columns together, separated by a given `separator`.
+
+- **`separator` (str)**: The string to place between each value.
+- **`*fields` (str)**: A variable number of source column names (`field`) or static strings to join.
+
+---
+
+## Conditional and Boolean Logic
+
+### `mapper.cond(field, true_value, false_value)`
+
+Checks the value of the source column `field`. If it's considered "truthy" (not empty, not "False", not 0), it returns `true_value`, otherwise it returns `false_value`.
+
+### `mapper.bool_val(field, true_values)`
+
+Checks if the value in the source column `field` exists within the `true_values` list and returns a boolean.
+
+- **`field` (str)**: The column to check.
+- **`true_values` (list)**: A list of strings that should be considered `True`.
+
+#### How it works
+
+**Input Data (`source.csv`)**
+| Status |
+| ------------- |
+| Active |
+| Done |
+
+**Transformation Code**
+
+```python
+'is_active': mapper.bool_val('Status', ['Active', 'In Progress']),
+```
+
+**Output Data**
+| is_active |
+| --------- |
+| True |
+| False |
+
+---
+
+## Numeric Mappers
+
+### `mapper.num(field, default='0.0')`
+
+Takes the numeric value of the source column `field`. It automatically transforms a comma decimal separator (`,`) into a dot (`.`). Use it for `Integer` or `Float` fields in Odoo.
+
+- **`field` (str)**: The column containing the numeric string.
+- **`default` (str, optional)**: A default value to use if the source value is empty. Defaults to `'0.0'`.
+
+#### How it works
+
+**Input Data (`source.csv`)**
+| my_column |
+| --------- |
+| 01 |
+| 2,3 |
+| |
+
+**Transformation Code**
+
+```python
+'my_field': mapper.num('my_column'),
+'my_field_with_default': mapper.num('my_column', default='-1.0')
+```
+
+**Output Data**
+| my_field | my_field_with_default |
+| -------- | --------------------- |
+| 1 | 1 |
+| 2.3 | 2.3 |
+| 0.0 | -1.0 |
+
+---
+
+## Relational Mappers
+
+### `mapper.m2o_map(prefix, *fields)`
+
+A specialized `concat` for creating external IDs for **Many2one** relationship fields (e.g., `partner_id`). This is useful when the unique ID for a record is spread across multiple columns.
+
+---
+## Many-to-Many Relationships
+
+Handling many-to-many relationships often requires a two-step process:
+1. **Extract and Create Related Records**: First, you need to identify all the unique values for the related records (e.g., all unique "Tags" or "Categories"), create a separate CSV file for them, and assign each one a unique external ID.
+2. **Link to Main Records**: In the main record file (e.g., partners), you create a comma-separated list of the external IDs of the related records.
+
+The library provides special mappers and a processing flag (`m2m=True`) to make this easy.
+
+### Example: Importing Partners with Categories
+
+Let's assume you have a source file where partner categories are listed in a single column, separated by commas.
+
+**Input Data (`client_file.csv`)**
+| Company | Firstname | Lastname | Birthdate | Category |
+| ------------------- | --------- | -------- | ---------- | ------------------- |
+| The World Company | John | Doe | 31/12/1980 | Premium |
+| The Famous Company | David | Smith | 28/02/1985 | Normal, Bad Payer |
+
+#### Step 1: Extract and Create Unique Categories
+
+We need to create a `res.partner.category.csv` file. The key is to use `mapper.m2m_id_list` and `mapper.m2m_value_list` combined with the `m2m=True` flag in the `.process()` method. This tells the processor to automatically find all unique values in the 'Category' column, split them, and create one row for each.
+
+**Transformation Code**
+```python
+# This mapping is specifically for extracting unique categories.
+partner_category_mapping = {
+ 'id': mapper.m2m_id_list('res_partner_category', 'Category'),
+ 'name': mapper.m2m_value_list('Category'),
+}
+
+# The m2m=True flag activates the special processing mode.
+processor.process(partner_category_mapping, 'res.partner.category.csv', m2m=True)
+```
+
+**Output File (`res.partner.category.csv`)**
+This file will contain one row for each unique category found across all partner records.
+| id | name |
+| ----------------------------- | ---------- |
+| res_partner_category.Premium | Premium |
+| res_partner_category.Normal | Normal |
+| res_partner_category.Bad_Payer| Bad Payer |
+
+#### Step 2: Create the Partner File with M2M Links
+
+Now that the categories have their own external IDs, you can create the partner records and link them using the `mapper.m2m` function. This mapper will create the required comma-separated list of external IDs for Odoo.
+
+**Transformation Code**
+```python
+res_partner_mapping = {
+ 'id': mapper.m2o_map('my_import_res_partner', 'Firstname', 'Lastname', 'Birthdate'),
+ 'name': mapper.concat(' ', 'Firstname', 'Lastname'),
+ 'parent_id/id': mapper.m2o_map('my_import_res_partner', 'Company'),
+ # Use mapper.m2m to create the comma-separated list of external IDs
+ 'category_id/id': mapper.m2m('res_partner_category', 'Category', sep=','),
+}
+
+processor.process(res_partner_mapping, 'res.partner.csv')
+```
+
+**Output File (`res.partner.csv`)**
+| id | parent_id/id | name | category_id/id |
+| -------------------------------------- | ---------------------------------------- | ----------- | --------------------------------------------------------------- |
+| my_import_res_partner.John_Doe_31/12/1980 | my_import_res_partner.The_World_Company | John Doe | res_partner_category.Premium |
+| my_import_res_partner.David_Smith_28/02/1985| my_import_res_partner.The_Famous_Company| David Smith | res_partner_category.Normal,res_partner_category.Bad_Payer |
+
+---
+
+## Importing Product Variants: Legacy (v9-v12) vs. Modern (v13+)
+
+Importing product variants (e.g., a T-shirt that comes in different colors and sizes) is a common but complex task. The data model for product attributes changed significantly in Odoo 13. The library provides two distinct workflows to handle both the old and new systems.
+
+### Modern Approach (Odoo v13+)
+
+This is the recommended approach for all modern Odoo versions. Odoo can now automatically create product variants if you provide the attribute values directly.
+
+* **Processor Class**: `ProductProcessorV10`
+* **Key Mapper**: `mapper.m2m_template_attribute_value`
+
+**How it Works:**
+You provide the attribute values (e.g., "Blue", "L") as a comma-separated string. The `ProductProcessorV10` sets `create_variant: 'Dynamically'` on the attribute, telling Odoo to find or create the corresponding `product.attribute.value` records and link them to the product template automatically.
+
+#### Example Transformation Script (Modern)
+```python
+# In your transform.py
+from odoo_data_flow.lib import mapper
+from odoo_data_flow.lib.transform import ProductProcessorV10
+
+# Initialize the modern processor
+processor = ProductProcessorV10('origin/products.csv')
+
+# --- 1. Create the product.attribute records ---
+# This step tells Odoo which attributes can create variants
+attributes = ['Color', 'Size']
+processor.process_attribute_data(
+ attributes, 'prod_attrib', 'data/product.attribute.csv', {}
+)
+
+# --- 2. Create the product.template records ---
+# The key is to map the raw values to the attribute's technical name
+template_mapping = {
+ 'id': mapper.m2o_map('prod_template_', 'template_id'),
+ 'name': mapper.val('Product Name'),
+ 'attribute_line_ids/Color/value_ids': mapper.val('Color'),
+ 'attribute_line_ids/Size/value_ids': mapper.val('Size'),
+}
+processor.process(template_mapping, 'data/product.template.csv')
+```
+
+### Legacy Approach (Odoo v9-v12)
+
+This approach is for older Odoo versions and requires a more manual, three-file process. You must create the attributes, then the attribute values with their own external IDs, and finally link them to the product template.
+
+* **Processor Class**: `ProductProcessorV9`
+* **Key Mappers**: `mapper.m2m_attribute_value`, `mapper.val_att`, `mapper.m2o_att`
+
+#### Example Transformation Script (Legacy)
+```python
+# In your transform.py
+from odoo_data_flow.lib import mapper
+from odoo_data_flow.lib.transform import ProductProcessorV9
+
+# Initialize the legacy processor
+processor = ProductProcessorV9('origin/products.csv')
+
+# --- This single call creates all three required files ---
+attributes = ['Color', 'Size']
+attribute_prefix = 'prod_attrib'
+
+# Mapping for product.attribute.value file
+value_mapping = {
+ 'id': mapper.m2m_attribute_value(attribute_prefix, *attributes),
+ 'name': mapper.val_att(attributes),
+ 'attribute_id/id': mapper.m2o_att_name(attribute_prefix, attributes),
+}
+
+# Mapping for product.template.attribute.line file
+line_mapping = {
+ 'product_tmpl_id/id': mapper.m2o_map('prod_template_', 'template_id'),
+ 'attribute_id/id': mapper.m2o_att_name(attribute_prefix, attributes),
+ 'value_ids/id': mapper.m2o_att(attribute_prefix, attributes),
+}
+
+processor.process_attribute_mapping(
+ value_mapping, line_mapping, attributes, attribute_prefix, 'data/', {}
+)
+```
+
+---
+
+## Advanced Mapping
+
+### `mapper.map_val(map_dict, key, default=None, m2m=False)`
+
+Looks up a `key` in a `map_dict` and returns the corresponding value. This is extremely useful for translating values from a source system to Odoo values.
+
+- **`map_dict` (dict)**: The Python dictionary to use as a translation table.
+- **`key` (mapper)**: A mapper that provides the key to look up in the dictionary (often `mapper.val`).
+- **`default` (optional)**: A default value to return if the key is not found.
+- **`m2m` (bool, optional)**: If set to `True`, the `key` is expected to be a list of values. The mapper will look up each value in the list and return a comma-separated string of the results.
+
+#### Example: Advanced Country Mapping
+
+**Transformation Code**
+```python
+# The mapping dictionary translates source codes to Odoo external IDs.
+country_map = {
+ 'BE': 'base.be',
+ 'FR': 'base.fr',
+ 'NL': 'base.nl',
+}
+
+# Use map_val to look up the code and return the external ID.
+'country_id/id': mapper.map_val(country_map, mapper.val('CountryCode'))
+```
+
+---
+
+## Binary Mappers
+
+### `mapper.binary(field)`
+
+Reads a local file path from the source column `field` and converts the file content into a base64-encoded string.
+
+- **`field` (str)**: The name of the column that contains the relative path to the image file.
+
+#### How it works
+
+**Input Data (`images.csv`)**
+| ImagePath |
+| --------------------- |
+| images/product_a.png |
+
+**Transformation Code**
+```python
+# Reads the file at the path and encodes it for Odoo
+'image_1920': mapper.binary('ImagePath')
+```
+
+**Output Data**
+| image_1920 |
+| ---------------------------------- |
+| iVBORw0KGgoAAAANSUhEUg... (etc.) |
+
+### `mapper.binary_url_map(field)`
+
+Reads a URL from the source column `field`, downloads the content from that URL, and converts it into a base64-encoded string.
+
+- **`field` (str)**: The name of the column that contains the full URL to the image or file.
+
+#### How it works
+
+**Input Data (`image_urls.csv`)**
+| ImageURL |
+| -------------------------------------- |
+| https://www.example.com/logo.png |
+
+**Transformation Code**
+```python
+# Downloads the image from the URL and encodes it
+'image_1920': mapper.binary_url_map('ImageURL')
+```
+
+**Output Data**
+| image_1920 |
+| ---------------------------------- |
+| iVBORw0KGgoAAAANSUhEUg... (etc.) |
+
+---
+
+## Advanced Techniques
+
+(pre-processing-data)=
+### Pre-processing Data
+
+For complex manipulations before the mapping starts, you can pass a `preprocess` function to the `Processor` constructor. This function receives the CSV header and data and must return them after modification.
+
+#### Adding Columns
+
+```python
+def my_preprocessor(header, data):
+ header.append('NEW_COLUMN')
+ for i, j in enumerate(data):
+ data[i].append('NEW_VALUE')
+ return header, data
+
+processor = Processor('source.csv', preprocess=my_preprocessor)
+```
+
+#### Removing Lines
+
+```python
+def my_preprocessor(header, data):
+ data_new = []
+ for i, j in enumerate(data):
+ line = dict(zip(header, j))
+ if line['Firstname'] != 'John':
+ data_new.append(j)
+ return header, data_new
+
+processor = Processor('source.csv', preprocess=my_preprocessor)
+```
+
+### Sharing Data Between Mappers (The `state` Dictionary)
+
+For complex, stateful transformations, every mapper function receives a `state` dictionary as its second argument. This dictionary is persistent and shared across the entire processing of a file, allowing you to "remember" values from one row to the next.
+
+This is essential for handling hierarchical data, like sales orders and their lines.
+
+#### Example: Remembering the Current Order ID
+
+```python
+def get_order_id(val, state):
+ # When we see a new Order ID, save it to the state
+ if val:
+ state['current_order_id'] = val
+ return val
+
+sales_order_mapping = {
+ 'id': mapper.val('OrderID', postprocess=get_order_id),
+ 'order_line/product_id/id': mapper.m2o_map('prod_', 'SKU'),
+ 'order_line/order_id/id': lambda line, state: state.get('current_order_id')
+}
+```
+
+### Conditionally Skipping Rows (`_filter`)
+
+You can filter out rows from your source data by adding a special `_filter` key to your mapping dictionary. The mapper for this key should return `True` for any row that you want to **skip**.
+
+**Input Data (`source.csv`)**
+| Name | Status |
+| ----- | --------- |
+| John | Active |
+| Jane | Cancelled |
+| | |
+
+**Transformation Code**
+```python
+my_mapping = {
+ '_filter': mapper.val('Status', postprocess=lambda x: x == 'Cancelled' or not x),
+ 'name': mapper.val('Name'),
+ # ... other fields
+}
+```
+In this example, the rows for "Jane" and the blank line will be skipped, and only the row for "John" will be processed.
+
+### Creating Custom Mappers
+
+Any Python function can act as a custom mapper when used with `postprocess`. The function will receive the value from the source column as its first argument and the shared `state` dictionary as its second.
+
+### Updating Records With Database IDs
+
+To update records using their database ID, map your source ID to the special `.id` field and provide an empty `id` field.
+
+```python
+my_mapping = {
+ 'id': mapper.const(''),
+ '.id': mapper.val('id_column_from_source'),
+ 'name': mapper.val('name_from_source'),
+ # ... other fields to update
+}
+```
+
+### Creating Related Records (`mapper.record`)
+
+This special mapper takes a full mapping dictionary to create related records (e.g., sales order lines) during the transformation of a main record.
+
+#### Example: Importing Sales Orders and their Lines
+
+**Input Data (`orders.csv`)**
+| OrderID | Warehouse | SKU | Qty |
+| ------- | --------- | -------- | --- |
+| SO001 | MAIN | | |
+| | | PROD_A | 2 |
+| | | PROD_B | 5 |
+
+**Transformation Code**
+```python
+from odoo_data_flow.lib import mapper
+
+def get_order_id(val, state):
+ if val:
+ state['current_order_id'] = val
+ return val
+ return None
+
+def remember_value(key):
+ def postprocess(val, state):
+ if val:
+ state[key] = val
+ return val
+ return postprocess
+
+order_line_mapping = {
+ 'order_id/id': lambda line, state: state.get('current_order_id'),
+ 'product_id/id': mapper.m2o_map('prod_', 'SKU'),
+ 'product_uom_qty': mapper.num('Qty'),
+ 'warehouse_id/id': lambda line, state: state.get('current_warehouse_id')
+}
+
+sales_order_mapping = {
+ 'id': mapper.val('OrderID', postprocess=get_order_id),
+ 'name': mapper.val('OrderID'),
+ 'warehouse_id/id': mapper.m2o_map('wh_', 'Warehouse', postprocess=remember_value('current_warehouse_id')),
+ 'order_line': mapper.cond('SKU', mapper.record(order_line_mapping))
+}
diff --git a/docs/guides/exporting_data.md b/docs/guides/exporting_data.md
new file mode 100644
index 00000000..d9e5be38
--- /dev/null
+++ b/docs/guides/exporting_data.md
@@ -0,0 +1,87 @@
+# Guide: Exporting Data from Odoo
+
+In addition to importing, `odoo-data-flow` provides a powerful command-line utility for exporting data directly from Odoo into a structured CSV file. This is ideal for creating backups, feeding data into other systems, or for analysis.
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ ExportA["Odoo Instance"] L_ExportA_ExportB_0@--> ExportB{"odoo-data-flow export"}
+ ExportC["Configuration
(CLI Options)"] --> ExportB
+ ExportB L_ExportB_ExportD_0@--> ExportD["Output File
(e.g., exported_partners.csv)"]
+ ExportA@{ shape: cyl}
+ ExportD@{ shape: doc}
+ style ExportA fill:#AA00FF
+ style ExportB fill:#BBDEFB
+ style ExportD fill:#FFF9C4
+ L_ExportA_ExportB_0@{ animation: slow }
+ L_ExportB_ExportD_0@{ animation: slow }
+```
+
+
+## The `odoo-data-flow export` Command
+
+The export process is handled by the `export` sub-command of the main `odoo-data-flow` tool. Unlike the import workflow, exporting is a single-step operation where you execute one command with the right parameters to pull data from your Odoo database.
+
+### Command-Line Options
+
+The command is configured using a set of options. Here are the most essential ones:
+
+| Option | Description |
+| ---------- | --------------------------------------------------------------------------------------------------------- |
+| `--config` | **Required**. Path to your `connection.conf` file containing the Odoo credentials. |
+| `--model` | **Required**. The technical name of the Odoo model you want to export records from (e.g., `res.partner`). |
+| `--fields` | **Required**. A comma-separated list of the technical field names you want to include in the export file. |
+| `--file` | **Required**. The path and filename for the output CSV file (e.g., `data/exported_partners.csv`). |
+| `--domain` | A filter to select which records to export, written as a string. Defaults to `[]` (export all records). |
+| `--worker` | The number of parallel processes to use for the export. Defaults to `1`. |
+| `--size` | The number of records to fetch in a single batch. Defaults to `10`. |
+
+### Understanding the `--domain` Filter
+
+The `--domain` option allows you to precisely select which records to export. It uses Odoo's standard domain syntax, which is a list of tuples formatted as a string.
+
+A domain is a list of search criteria. Each criterion is a tuple `('field_name', 'operator', 'value')`.
+
+**Simple Domain Example:**
+To export only companies (not individual contacts), the domain would be `[('is_company', '=', True)]`. You would pass this to the command line as a string:
+
+`--domain "[('is_company', '=', True)]"`
+
+**Complex Domain Example:**
+To export all companies from the United States, you would combine two criteria:
+
+`--domain "[('is_company', '=', True), ('country_id.code', '=', 'US')]"`
+
+### Specifying Fields with `--fields`
+
+The `--fields` option is a simple comma-separated list of the field names you want in your output file. You can also access fields on related records using dot notation.
+
+- Simple fields: `name,email,phone`
+- Relational fields: `name,parent_id/name,parent_id/city` (This would get the contact's name, their parent company's name, and their parent company's city).
+
+## Full Export Example
+
+Let's combine these concepts into a full example. We want to export the name, email, and city for all individual contacts (not companies) located in Belgium.
+
+Here is the full command you would run from your terminal:
+
+```bash
+odoo-data-flow export \
+ --config conf/connection.conf \
+ --model "res.partner" \
+ --domain "[('is_company', '=', False), ('country_id.code', '=', 'BE')]" \
+ --fields "name,email,city,country_id/name" \
+ --file "data/belgian_contacts.csv"
+```
+
+### Result
+
+This command will:
+
+1. Connect to the Odoo instance defined in `conf/connection.conf`.
+2. Search the `res.partner` model for records that are not companies and have their country set to Belgium.
+3. For each matching record, it will retrieve the `name`, `email`, `city`, and the `name` of the related country.
+4. It will save this data into a new CSV file located at `data/belgian_contacts.csv`.
diff --git a/docs/guides/exporting_data_for_modules.md b/docs/guides/exporting_data_for_modules.md
new file mode 100644
index 00000000..040edf74
--- /dev/null
+++ b/docs/guides/exporting_data_for_modules.md
@@ -0,0 +1,180 @@
+# Guide: Exporting Data for Odoo Modules (CSV to XML)
+
+A common requirement for Odoo developers is to package data (e.g., default settings, master data like countries or states) into XML files within a custom module. This ensures the data is loaded automatically when the module is installed.
+
+While `odoo-data-flow` does not export directly to Odoo's XML format, it is the perfect tool for the first and most critical step: extracting the data from a database into a clean, reliable format.
+
+This guide provides a standard workflow for exporting data to a CSV file and then converting that file into a properly formatted Odoo XML data file.
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart LR
+ subgraph subGraph0["Step 1: Export from Odoo"]
+ B{"odoo-data-flow export"}
+ A[("Odoo Database")]
+ C["temp_data.csv"]
+ end
+ subgraph subGraph1["Step 2: Convert on Local Machine"]
+ D{"Python Script
(create_data_file.py)"}
+ E["my_module/data/my_data.xml"]
+ end
+ subgraph subGraph2["Step 3: Update Custom Module"]
+ F{"Your Custom Module"}
+ G["__manifest__.py"]
+ end
+ A L_A_B_0@--> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ G --> F
+ style B fill:#BBDEFB
+ style A fill:#AA00FF
+ style C fill:#FFF9C4
+ style D fill:#FFE0B2
+ style E fill:#FFF9C4
+ style F fill:#C8E6C9,stroke:#388E3C
+ style G fill:#E1F5FE
+ style subGraph0 fill:transparent
+ style subGraph1 fill:transparent
+ L_A_B_0@{ animation: slow }
+```
+
+## The Workflow
+
+The process involves two simple steps:
+
+1. **Export to CSV**: Use the `odoo-data-flow export` command to pull the data you need from your Odoo instance into a clean CSV file.
+2. **Convert to XML**: Use a simple Python script to read the CSV and generate an XML file in the exact format Odoo requires for data files.
+
+### Step 1: Export the Data to a CSV File
+
+First, use the `export` command to get the data you want to include in your module. For this example, let's export all the US states from the `res.country.state` model.
+
+```bash
+odoo-data-flow export \
+ --config conf/my_db.conf \
+ --model res.country.state \
+ --domain "[('country_id.code', '=', 'US')]" \
+ --fields "name,code,country_id/id" \
+ --file temp_us_states.csv
+```
+
+This command will create a file named `temp_us_states.csv` that looks something like this:
+
+
+```{code-block} text
+:caption: temp_us_states.csv
+name;code;country_id/id
+Alabama;AL;base.us
+Alaska;AK;base.us
+...
+```
+
+### Step 2: Convert the CSV to an Odoo XML Data File
+
+Now, we can use a Python script to convert this CSV into an XML file suitable for your module's `data` directory.
+
+The script below will:
+- Read each row from the CSV.
+- Create a `` tag for each state.
+- Automatically generate a unique XML ID for each record (e.g., `state_us_al`).
+- Create `` tags for each column.
+
+```{code-block} python
+:caption: create_data_file.py
+import csv
+from lxml import etree
+
+CSV_FILE_PATH = "temp_us_states.csv"
+XML_OUTPUT_PATH = "my_awesome_module/data/res_country_state_data.xml"
+MODEL_NAME = "res.country.state"
+ID_PREFIX = "state_us_"
+
+# Create the root tags
+odoo_tag = etree.Element("odoo", noupdate="1")
+
+# Read the CSV and create a for each row
+with open(CSV_FILE_PATH, "r", encoding="utf-8") as f:
+ # Use DictReader to easily access columns by header name
+ reader = csv.DictReader(f, delimiter=';')
+ for row in reader:
+ # Create a unique XML ID for the record, e.g., "state_us_al"
+ record_id = f"{ID_PREFIX}{row['code'].lower()}"
+
+ # Create the tag
+ record_tag = etree.SubElement(
+ odoo_tag, "record", id=record_id, model=MODEL_NAME
+ )
+
+ # Create a tag for each column in the CSV
+ for header, value in row.items():
+ # Skip empty values to keep the XML clean
+ if not value:
+ continue
+
+ field_tag = etree.SubElement(record_tag, "field", name=header)
+
+ # Use a 'ref' attribute for relational fields ending in /id
+ if header.endswith("/id"):
+ field_tag.set("ref", value)
+ else:
+ field_tag.text = value
+
+# Write the final XML to a file with pretty printing for readability
+with open(XML_OUTPUT_PATH, "wb") as f:
+ f.write(
+ etree.tostring(
+ odoo_tag, pretty_print=True, xml_declaration=True, encoding='utf-8'
+ )
+ )
+
+print(f"Successfully generated Odoo XML data file at: {XML_OUTPUT_PATH}")
+```
+
+Running this script will produce the following XML file, perfectly formatted for Odoo.
+
+```{code-block} xml
+:caption: my_awesome_module/data/res_country_state_data.xml
+
+
+
+ Alabama
+ AL
+
+
+
+ Alaska
+ AK
+
+
+
+
+```
+
+### Step 3: Add the Data File to Your Module
+
+The final step is to tell Odoo to load this file when your module is installed or updated.
+
+1. Move the generated XML file to your module's `data/` directory.
+2. Add the path to the `data` key in your module's `__manifest__.py` file.
+
+```{code-block} python
+:caption: my_awesome_module/__manifest__.py
+{
+ 'name': 'My Awesome Module',
+ 'version': '1.0',
+ # ... other manifest keys
+ 'data': [
+ 'security/ir.model.access.csv',
+ 'data/res_country_state_data.xml', # Add this line
+ 'views/my_views.xml',
+ ],
+ 'installable': True,
+}
+```
+
+Now, when you install or upgrade your module, Odoo will automatically load all the US states from your XML file. This workflow combines the power of `odoo-data-flow` for data extraction with a simple, reusable script for generating module data.
diff --git a/docs/guides/field_mapping_workflow.md b/docs/guides/field_mapping_workflow.md
new file mode 100644
index 00000000..cbe45776
--- /dev/null
+++ b/docs/guides/field_mapping_workflow.md
@@ -0,0 +1,172 @@
+# Guide: Creating a Migration Mapping with a Field Diff
+
+One of the most time-consuming tasks in an Odoo-to-Odoo migration is identifying which fields have been added, removed, or renamed between your source and destination databases.
+
+This guide provides a powerful, systematic workflow to quickly generate a complete list of all field differences, allowing you to build an accurate transformation mapping with confidence.
+
+---
+
+## The Goal
+
+The objective is to produce two CSV files: one listing all the fields for a specific model from your **source** database, and another listing all the fields for the same model from your **destination** database. By comparing these two files, you can instantly see every change.
+
+## The Workflow
+
+The entire process is done using the `odoo-data-flow export` command, which we will use to query Odoo's internal data dictionary.
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ subgraph subGraph0["Source Database"]
+ A[("Source Odoo DB")]
+ end
+ subgraph subGraph1["Destination Database"]
+ B[("Destination Odoo DB")]
+ end
+ subgraph Analysis["Analysis"]
+ G["Diff Tool
(e.g., VS Code)"]
+ E["source_fields.csv"]
+ F["destination_fields.csv"]
+ end
+ subgraph subGraph3["Developer's Local Machine"]
+ direction LR
+ C{"odoo-data-flow export
--model ir.model.fields"}
+ D{"odoo-data-flow export
--model ir.model.fields"}
+ Analysis
+ H["Developer / LLM
(fa:fa-user-edit)"]
+ I["transform.py
(mapping dictionary)"]
+ end
+ C --> A & E
+ D --> B & F
+ E --> G
+ F --> G
+ G -- List of
Renamed/Removed
Fields --> H
+ H -- Writes the mapping logic --> I
+ style A fill:#AA00FF
+ style B fill:#C8E6C9,stroke:#388E3C
+ style G fill:#FFE0B2
+ style E fill:#FFF9C4
+ style F fill:#FFF9C4
+ style C fill:#BBDEFB
+ style D fill:#BBDEFB
+ style I fill:#E1F5FE
+ style subGraph3 fill:transparent
+ L_C_E_0@{ animation: slow }
+ L_D_F_0@{ animation: slow }
+```
+
+### Step 1: Export Field Definitions from the Source Database
+
+First, run the `export` command pointed at your **source** database configuration. This command targets the `ir.model.fields` model, which is Odoo's internal dictionary of all model fields.
+
+* `--model ir.model.fields`: We are querying the model that holds field definitions.
+* `--domain "[('model', '=', 'res.partner')]"`: This is the crucial filter. It tells Odoo to only return records where the `model` field is `res.partner`.
+* `--fields "name,field_description,ttype"`: We export the technical name, the user-friendly label, and the field type, which is excellent information for comparison.
+
+```bash
+odoo-data-flow export \
+ --config conf/source_db.conf \
+ --model ir.model.fields \
+ --domain "[('model', '=', 'res.partner')]" \
+ --fields "name,field_description,ttype" \
+ --file source_res_partner_fields.csv
+```
+
+### Step 2: Export Field Definitions from the Destination Database
+
+Next, run the exact same command, but change the configuration to point to your **destination** database.
+
+```bash
+odoo-data-flow export \
+ --config conf/destination_db.conf \
+ --model ir.model.fields \
+ --domain "[('model', '=', 'res.partner')]" \
+ --fields "name,field_description,ttype" \
+ --file destination_res_partner_fields.csv
+```
+
+### Step 3: Compare the Files with a Diff Tool
+
+You will now have two CSV files:
+* `source_res_partner_fields.csv`
+* `destination_res_partner_fields.csv`
+
+Open these two files in a visual "diff tool." Most modern code editors have a built-in file comparison feature (e.g., VS Code). You can also use dedicated tools like `Meld`, `Beyond Compare`, or `vimdiff`.
+
+The diff tool will give you a clear, side-by-side view of every change.
+
+---
+
+## From Diff to Mapping: A Practical Example
+
+With the comparison open, building your mapping becomes a simple task of "filling in the blanks."
+
+Let's imagine your diff tool shows the following differences:
+
+| Source Field (`source_res_partner_fields.csv`) | Destination Field (`destination_res_partner_fields.csv`) | Analysis |
+| :--- | :--- | :--- |
+| `name` | `name` | No change. A direct 1-to-1 mapping. |
+| `street2` | (missing) | This field was removed in the new version. |
+| (missing) | `street_two` | A new field was added. It looks like `street2` was renamed. |
+| `ref` | `partner_ref` | The `ref` field was renamed to `partner_ref`. |
+| `customer` | (missing) | This old boolean field was replaced. |
+| (missing) | `customer_rank` | A new integer field `customer_rank` was added to replace `customer`. |
+| `some_legacy_field` | (missing) | This custom field from the old system is no longer needed. |
+
+### Building the Python Mapping
+
+Based on this analysis, you can now construct your mapping dictionary in your `transform.py` script.
+
+```python
+from odoo_data_flow.lib import mapper
+
+partner_migration_mapping = {
+ # Direct 1-to-1 mapping for unchanged fields
+ 'id': mapper.m2o_map('mig_partner_', 'name'),
+ 'name': mapper.val('name'),
+ 'city': mapper.val('city'), # Assuming city was unchanged
+
+ # Handle renamed fields: map the old name to the new name
+ 'street_two': mapper.val('street2'),
+ 'partner_ref': mapper.val('ref'),
+
+ # Handle changed logic: convert the old boolean to the new rank
+ # If the old 'customer' field was '1' (True), set rank to 1, else 0.
+ 'customer_rank': mapper.val('customer', postprocess=lambda x: 1 if x == '1' else 0),
+
+ # Fields to ignore: simply omit 'some_legacy_field' from the mapping.
+ # It will not be included in the output file.
+}
+```
+
+### Tools to Accelerate the Process
+
+* **Diff Tools:** As mentioned, a visual diff tool is your most valuable asset in this process. It makes spotting changes effortless.
+
+* **AI Assistants (like Gemini, ChatGPT, etc.):** You can significantly speed up the creation of the final mapping dictionary by using an AI assistant.
+ 1. Copy the full content of `source_res_partner_fields.csv`.
+ 2. Copy the full content of `destination_res_partner_fields.csv`.
+ 3. Use a prompt like the following:
+
+ > "I am migrating data between two Odoo databases. Below are two CSV files listing the field definitions for the `res.partner` model from the source and destination databases.
+ >
+ > Compare these two files and generate a Python dictionary for the `odoo-data-flow` library that maps the source fields to the destination fields.
+ >
+ > - For fields that have the same name, create a direct `mapper.val()` mapping.
+ > - For fields that appear to have been renamed (e.g., based on the description), map the old name to the new one.
+ > - For fields that only exist in the source, add a Python comment indicating they have been removed.
+ >
+ > **Source Fields:**
+ > ```text
+ > [Paste content of source_res_partner_fields.csv here]
+ > ```
+ >
+ > **Destination Fields:**
+ > ```text
+ > [Paste content of destination_res_partner_fields.csv here]
+ > ```"
+
+The AI can generate a nearly complete mapping dictionary for you in seconds, which you can then review and refine. This combination of automated export and AI-assisted mapping can reduce the time it takes to create a migration plan from hours to minutes.
diff --git a/docs/guides/importing_data.md b/docs/guides/importing_data.md
new file mode 100644
index 00000000..ef15b3da
--- /dev/null
+++ b/docs/guides/importing_data.md
@@ -0,0 +1,214 @@
+# Guide: A Deep Dive into Importing
+
+This guide expands on the import workflow, providing a detailed look at the `Processor` class and, most importantly, the requirements for your input data files.
+
+## Command-Line Usage
+
+The primary way to import data is through the `import` command. If your configuration file is in the default location and your CSV file is named after the Odoo model, the command is very simple:
+
+```bash
+odoo-data-flow import --file path/to/res_partner.csv
+```
+
+### Key Options for `import`
+
+* `--config`: **(Optional)** Path to your connection configuration file. **Defaults to `conf/connection.conf`**.
+* `--file`: **(Required)** Path to the source CSV file you want to import.
+* `--model`: **(Optional)** The target Odoo model (e.g., `res.partner`). If you omit this option, the tool will automatically infer the model name from your CSV filename. For example, a file named `res_partner.csv` will be imported into the `res.partner` model.
+* `--worker`: Number of parallel threads to use for the import.
+* `--fail`: Runs the import in "fail mode," retrying only the records from the corresponding `.fail.csv` file.
+* `--skip`: The number of initial lines to skip in the source file before reading the header.
+* `--sep`: The character separating columns. Defaults to a semicolon (`;`).
+
+## The "Upsert" Strategy: How External IDs Work
+
+A core feature of `odoo-data-flow` is its ability to safely handle both creating new records and updating existing ones in a single process. This is often called an "upsert" (update or insert) operation, and it is the default behavior of the tool.
+
+This makes your data imports **idempotent**, meaning you can run the same import script multiple times without creating duplicate records.
+
+### The Role of the External ID (`id` column)
+
+This entire feature is powered by the mandatory `id` column in your CSV file. This column holds a unique **External ID** for every record.
+
+When you run an import, Odoo's `load` method performs the following logic for each row:
+
+1. **Check for External ID:** It looks at the value in the `id` column.
+
+2. **If the ID Exists:** If a record with that same external ID is found in the database, Odoo **updates** that existing record with the new values from your file.
+
+3. **If the ID Does Not Exist:** If no record with that external ID is found, Odoo **creates** a new record and assigns it that external ID.
+
+This built-in upsert logic is essential for incremental data loads and for re-running scripts to correct or enrich data that has already been imported.
+
+## Input File Requirements
+
+For a successful import into Odoo, the clean CSV file you generate (the `filename_out` in your script) must follow some important rules.
+
+* **Encoding**: The file must be in `UTF-8` encoding.
+* **One Model per File**: Each CSV file should only contain data for a single Odoo model (e.g., all `res.partner` records).
+* **Header Row**: The first line of the file must be the header row. All column names must be the technical field names from the Odoo model (e.g., `name`, `parent_id`, `list_price`).
+* **External ID**: All rows must have an `id` column containing a unique External ID (also known as an XML ID). This is essential for the "upsert" logic described above.
+* **Field Separator**: The character separating columns can be defined with the `--sep` command-line option. The default is a semicolon (`;`). **Crucially, if a field's value contains the separator character, the entire field value must be enclosed in double quotes (`"`).**
+* **Skipping Lines**: If your source file contains introductory lines before the header, you can use the `--skip` option to ignore them during the import process.
+
+### Special Field Naming Conventions
+
+To handle relational data and updates by database ID, the tool uses special column headers:
+
+* **`/id` Suffix (for External IDs)**: When mapping to a `Many2one` or `Many2many` field, you must append `/id` to the field name (e.g., `partner_id/id`). This tells Odoo to look up the related record using the provided External ID.
+
+* **`.id` Field Name (for Database IDs)**: To update a record using its existing database ID (an integer, not an external ID), use the special field name `.id`. When you use this, you should also provide an empty `id` column to tell Odoo you are updating, not creating a new record.
+
+### Field Formatting Rules
+
+Odoo's `load` method expects data for certain field types to be in a specific format.
+
+* **Boolean**: Must be `1` for True and `0` for False. The `mapper.bool_val` can help with this.
+* **Binary**: Must be a base64 encoded string. The `mapper.binary` and `mapper.binary_url_map` functions handle this automatically.
+* **Date & Datetime**: The format depends on the user's language settings in Odoo, but the standard, safe formats are `YYYY-MM-DD` for dates and `YYYY-MM-DD HH:MM:SS` for datetimes.
+* **Float**: The decimal separator must be a dot (`.`). The `mapper.num` function handles converting comma separators automatically.
+* **Selection**: Must contain the internal value for the selection, not the human-readable label (e.g., `'draft'` instead of `'Draft'`).
+* **Many2one**: The column header must be suffixed with `/id`, and the value should be the external ID of the related record.
+* **Many2many**: The column header must be suffixed with `/id`, and the value should be a comma-separated list of external IDs for the related records.
+
+### Important: Do Not Import into Computed Fields
+
+A common mistake is to try to import data directly into a field that is **computed** by Odoo (i.e., a field whose value is calculated automatically based on other fields). This will not work as expected.
+
+**The Rule:** Always import the raw "ingredient" fields and let Odoo perform the calculation.
+
+Even if an import into a computed field appears to succeed, the value will be overwritten the next time the record is saved or its source fields are changed. Most computed fields are also marked as `readonly`, which will cause the import to fail outright.
+
+#### Example: `price_subtotal` on a Sales Order Line
+
+The `price_subtotal` on a sales order line is calculated automatically from the quantity, unit price, and discount.
+
+**Incorrect Mapping (This will fail or be overwritten):**
+```python
+order_line_mapping = {
+ # ... other fields
+ # Incorrectly trying to write directly to the computed field
+ 'price_subtotal': mapper.num('SubtotalFromSourceFile'),
+}
+```
+
+**Correct Mapping (Import the source fields):**
+```python
+order_line_mapping = {
+ # ... other fields
+ # Import the raw ingredients and let Odoo do the calculation
+ 'product_uom_qty': mapper.num('Quantity'),
+ 'price_unit': mapper.num('UnitPrice'),
+ 'discount': mapper.num('DiscountPercent'),
+}
+```
+
+By importing the source fields, you ensure that Odoo's business logic is triggered correctly and your data remains consistent.
+
+---
+
+## The `Processor` Class
+
+The `Processor` is the central component of the transform phase. It handles reading the source file, applying the mapping, and generating the output files required for the load phase.
+
+### Initialization
+
+You initialize the processor by providing the path to your source data file and optional formatting parameters.
+
+```python
+from odoo_data_flow.lib.transform import Processor
+
+processor = Processor(
+ 'origin/my_data.csv', # Path to the source file
+ separator=';', # The character used to separate columns
+)
+```
+
+The constructor takes the following arguments:
+
+* **`filename` (str)**: The path to the CSV or XML file you want to transform.
+* **`separator` (str, optional)**: The column separator for CSV files. Defaults to `;`.
+* **`preprocess` (function, optional)**: A function to modify the raw data _before_ mapping begins. See the [Data Transformations Guide](./data_transformations.md) for details.
+* **`xml_root_tag` (str, optional)**: Required argument for processing XML files. See the [Advanced usage Guide](./advanced_usage.md) for details.
+
+
+## The `process()` Method
+
+This is the main method that executes the transformation. It takes your mapping dictionary and applies it to each row of the source file, writing the output to a new target file.
+
+```python
+processor.process(
+ mapping=my_mapping_dict,
+ filename_out='data/clean_data.csv',
+ params=import_params_dict
+)
+```
+
+The method takes these key arguments:
+
+* **`mapping` (dict)**: **Required**. The mapping dictionary that defines the transformation rules for each column.
+* **`filename_out` (str)**: **Required**. The path where the clean, transformed CSV file will be saved.
+* **`params` (dict, optional)**: A crucial dictionary that holds the configuration for the `odoo-data-flow import` command. These parameters will be used when generating the `load.sh` script.
+
+### Configuring the Import Client with `params`
+
+The `params` dictionary allows you to control the behavior of the import client without ever leaving your Python script. The keys in this dictionary map directly to the command-line options of the `odoo-data-flow import` command.
+
+| `params` Key | `odoo-data-flow import` Option | Description |
+| ------------ | ------------------------------ | ----------------------------------------------------------------------------------------------------------------- |
+| `model` | `--model` | **Optional**. The technical name of the Odoo model (e.g., `sale.order`). If you omit this, the tool infers it from the filename. |
+| `context` | `--context` | An Odoo context dictionary string. Essential for disabling mail threads, etc. (e.g., `"{'tracking_disable': True}"`) |
+| `worker` | `--worker` | The number of parallel processes to use for the import. |
+| `size` | `--size` | The number of records to process in a single Odoo transaction. |
+| `ignore` | `--ignore` | A comma-separated string of fields to ignore during the import. Crucial for performance with related fields. |
+| `skip` | `--skip` | The number of initial lines to skip in the source file before reading the header. |
+
+## Generating the Script with `write_to_file()`
+
+After calling `process()`, you can generate the final shell script that will be used in the load phase.
+
+```python
+processor.write_to_file("load_my_data.sh")
+```
+
+This method takes a single argument: the path where the `load.sh` script should be saved. It automatically uses the `filename_out` and `params` you provided to the `process()` method to construct the correct commands.
+
+## Full Example
+
+Here is a complete `transform.py` script that ties everything together.
+
+```{code-block} python
+:caption: transform.py
+from odoo_data_flow.lib.transform import Processor
+from odoo_data_flow.lib import mapper
+
+# 1. Define the mapping rules
+sales_order_mapping = {
+ 'id': mapper.m2o_map('import_so_', 'OrderRef'),
+ 'partner_id/id': mapper.m2o_map('main_customers_', 'CustomerCode'),
+ 'name': mapper.val('OrderRef'),
+ # ... other fields
+}
+
+# 2. Define the parameters for the load script
+import_params = {
+ 'model': 'sale.order',
+ 'context': "{'tracking_disable': True, 'mail_notrack': True}",
+ 'worker': 4,
+ 'size': 500
+}
+
+# 3. Initialize the processor
+processor = Processor('origin/sales_orders.csv', separator=',')
+
+# 4. Run the transformation
+processor.process(
+ mapping=sales_order_mapping,
+ filename_out='data/sale_order.csv',
+ params=import_params
+)
+
+# 5. Generate the final script
+processor.write_to_file("load_sales_orders.sh")
+
+print("Transformation complete.")
diff --git a/docs/guides/index.md b/docs/guides/index.md
new file mode 100644
index 00000000..c3f6f522
--- /dev/null
+++ b/docs/guides/index.md
@@ -0,0 +1,20 @@
+# How-To Guides
+
+This section provides detailed, step-by-step guides for common tasks and advanced features of the `odoo-data-flow` library.
+
+```{toctree}
+:maxdepth: 1
+:caption: "Guides"
+
+importing_data
+exporting_data
+exporting_data_for_modules
+data_transformations
+configuration
+advanced_usage
+performance_tuning
+field_mapping_workflow
+post_import_workflows
+server_to_server_migration
+
+```
diff --git a/docs/guides/performance_tuning.md b/docs/guides/performance_tuning.md
new file mode 100644
index 00000000..fb56d54c
--- /dev/null
+++ b/docs/guides/performance_tuning.md
@@ -0,0 +1,274 @@
+# Guide: Performance Tuning
+
+When working with large datasets, the performance of your data import can become critical. This guide covers the key parameters and strategies you can use to tune the import process for maximum speed and efficiency.
+
+The primary way to control performance is by adjusting the parameters passed to the `odoo-data-flow import` command, which you can set in the `params` dictionary in your `transform.py` script.
+
+---
+
+## Using Multiple Workers
+
+The most significant performance gain comes from parallel processing. The import client can run multiple "worker" processes simultaneously, each handling a chunk of the data.
+
+- **CLI Option**: `--worker`
+- **`params` Key**: `'worker'`
+- **Default**: `1`
+
+By increasing the number of workers, you can leverage multiple CPU cores on the machine running the import script and on the Odoo server itself.
+
+### Example
+
+To use 4 parallel processes for an import:
+
+```python
+# In your transform.py script
+
+import_params = {
+ 'model': 'sale.order',
+ 'worker': 4, # Use 4 workers
+ # ... other params
+}
+
+processor.process(
+ mapping=my_mapping,
+ filename_out='data/sale_order.csv',
+ params=import_params
+)
+```
+
+This will add the `--worker=4` flag to the command in your generated `load.sh` script.
+
+### Trade-offs and Considerations
+
+- **CPU Cores**: A good rule of thumb is to set the number of workers to be equal to, or slightly less than, the number of available CPU cores on your Odoo server.
+- **Database Deadlocks**: The biggest risk with multiple workers is the potential for database deadlocks. This can happen if two workers try to write records that depend on each other at the same time. The library's two-pass error handling system is designed to mitigate this.
+
+## Solving Concurrent Updates with `--groupby`
+
+The `--groupby` option is a powerful feature designed to solve the "race condition" problem that occurs during high-performance, multi-worker imports.
+
+- **CLI Option**: `--groupby`
+- **`params` Key**: `'split'` (Note: the internal key is `split`)
+- **Default**: `None`
+
+### The Problem: A Race Condition
+
+Imagine you are using multiple workers to import contacts that all link to the _same_ parent company.
+
+- **Worker 1** takes a contact and tries to update "Company A".
+- At the exact same time, **Worker 2** takes another contact and _also_ tries to update "Company A".
+
+The database locks the company record for Worker 1, so when Worker 2 tries to access it, it fails with a "concurrent update" error.
+
+#### The Solution: The "Sorting Hat"
+
+The `--groupby` option acts like a "sorting hat." Before the import begins, it looks at the column you specify (e.g., `parent_id/id`) and ensures that **all records with the same value in that column are sent to the exact same worker.**
+
+This guarantees that two different workers will never try to update the same parent record at the same time, completely eliminating these errors.
+
+#### Visualizing the Difference
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+graph TD
+ subgraph subGraph0["Without --groupby (High Risk of Error)"]
+ A["Records:
C1 (Parent A)
C2 (Parent B)
C3 (Parent A)"] --> B{Random Distribution};
+ B --> W1["Worker 1 gets C1"];
+ B --> W2["Worker 2 gets C3"];
+ B --> W3["Worker 3 gets C2"];
+ W1 -- "tries to update" --> P_A(("Parent A"));
+ W2 -- "tries to update" --> P_A;
+ W3 -- "updates" --> P_B(("Parent B"));
+ P_A --> X["ERROR
Concurrent Update"];
+ end
+
+ subgraph subGraph1["With --groupby=parent_id/id (Safe)"]
+ C["Records:
C1 (Parent A)
C2 (Parent B)
C3 (Parent A)"] --> D{Smart Distribution};
+ D -- "parent_id = A" --> W3b["Worker 1 gets C1, C3"];
+ D -- "parent_id = B" --> W4b["Worker 2 gets C2"];
+ W3b --> S1[("Update Parent A")];
+ W4b --> S2[("Update Parent B")];
+ S1 & S2 --> Y(["SUCCESS"]);
+ end
+ style W1 fill:#FFF9C4
+ style W2 fill:#C8E6C9
+ style W3 fill:#FFE0B2
+ style W3b fill:#FFF9C4
+ style W4b fill:#C8E6C9
+ style D fill:#BBDEFB
+ style B fill:#BBDEFB
+ style subGraph0 fill:transparent
+ style subGraph1 fill:transparent
+ style Y stroke:#00C853
+```
+
+### Example
+
+To safely import contacts in parallel, grouped by their parent company:
+
+```python
+# In your transform.py script
+
+import_params = {
+ 'model': 'res.partner',
+ 'worker': 4,
+ # This is the crucial part
+ 'split': 'parent_id/id', # The internal key is 'split'
+}
+```
+
+This will add `--groupby=parent_id/id` to your generated `load.sh` script.
+
+## Understanding Batch Size (`--size`)
+
+The `--size` option is one of the most critical parameters for controlling the performance and reliability of your imports. In simple terms, it controls **how many records are processed in a single database transaction**.
+
+To understand why this is so important, think of it like going through a checkout at a grocery store.
+
+### The Default Odoo Behavior: One Big Basket
+
+When you use Odoo's standard import wizard, it's like putting all of your items (every single row in your file) into **one giant shopping basket**. This "all-or-nothing" approach has two major problems:
+
+1. **Transaction Timeouts:** The Odoo server has a time limit to process your entire basket. If you have too many items (a very large file), it might take too long, and the server will give up with a "Transaction timed out" error. None of your records are imported.
+2. **Single Point of Failure:** If just one record in your giant basket is "bad" (e.g., a missing price), the server rejects the **entire basket**. All of your other perfectly good records are rejected along with the single bad one.
+
+#### How `--size` Solves the Problem: Multiple Small Baskets
+
+The `odoo-data-flow` library allows you to break up your import into smaller, more manageable chunks. When you use `--size 100`, you are telling the tool to use **multiple, smaller baskets**, each containing only 100 items.
+
+This solves both problems:
+
+1. Each small basket is processed very quickly, avoiding server timeouts.
+2. If one small basket has a bad record, only that basket of 100 records is rejected. All the other baskets are still successfully imported.
+
+#### Visualizing the Difference
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ subgraph subGraph0["Default Odoo Import (One Big Basket)"]
+ B{"One Large Transaction
Size=1000"}
+ A["1000 Records"]
+ D@{ label: "FAIL
All 1000 records rejected" }
+ C["Odoo Database"]
+ end
+ subgraph subGraph1["odoo-data-flow with --size=100 (Multiple Small Baskets)"]
+ F{"Transaction 1
100 records"}
+ E["1000 Records"]
+ G["Odoo Database"]
+ H{"Transaction 2
100 records"}
+ I@{ label: "FAIL
Only 100 records rejected" }
+ J["...continues with Transaction 3"]
+ end
+ A --> B
+ B -- Single Error --> D
+ B -- No Errors --> C
+ E --> F
+ F --> G & H
+ H -- Single Error --> I
+ H -- No Errors --> G
+ I --> J
+ J --> G
+
+ D@{ shape: rect}
+ C@{ shape: cyl}
+ G@{ shape: cyl}
+ I@{ shape: rect}
+ style C fill:#AA00FF
+ style G fill:#AA00FF
+ style subGraph0 fill:transparent
+ style subGraph1 fill:transparent
+
+```
+
+#### Trade-offs and Considerations
+
+- **Larger Batch Size**: Can be faster as it reduces the overhead of creating database transactions, but consumes more memory. If one record in a large batch fails, Odoo may reject the entire batch.
+- **Smaller Batch Size**: More resilient to individual record errors and consumes less memory, but can be slower due to increased network overhead.
+- **WAN Performance:** For slow networks, sending smaller chunks of data is often more stable than sending one massive payload.
+
+
+### Handling Server Timeouts (`limit-time-real`)
+
+A common source of import failures, especially with large or complex data, is the Odoo server's built-in request timeout.
+
+- **What it is**: Odoo servers have a configuration parameter called `limit-time-real` which defines the maximum time (in seconds) a worker process is allowed to run before it is automatically terminated. The default value is **120 seconds (2 minutes)**.
+
+- **The Problem**: If a single batch of records takes longer than this limit to process (due to complex computations, custom logic, or a very large batch size), the server will kill the process, and your import will fail for that batch.
+
+- **The Solution**: The solution is to reduce the batch size using the `--size` option. By sending fewer records in each transaction, you ensure that each individual transaction can be completed well within the server's time limit.
+
+> **Tip:** If your imports are failing with "timeout" or "connection closed" errors, the first thing you should try is reducing the `--size` value (e.g., from `1000` down to `200` or `100`).
+
+
+## Mapper Performance
+
+The choice of mappers can impact performance.
+
+- **Fast Mappers**: Most mappers, like `val`, `const`, `concat`, and `num`, are extremely fast as they operate only on the data in the current row.
+
+- **Slow Mappers**: The `mapper.relation` function should be used with caution. For **every single row**, it performs a live search request to the Odoo database, which can be very slow for large datasets.
+
+**Recommendation**: If you need to map values based on data in Odoo, it is much more performant to first export the mapping from Odoo into a Python dictionary and then use the much faster `mapper.map_val` to do the translation in memory.
+
+---
+
+## Importing Related or Computed Fields (A Major Performance Trap)
+
+A common but very slow practice is to import values into related or computed fields. This can lead to a massive number of "behind the scenes" updates and cause your import time to increase exponentially.
+
+### The Problem: Cascading Updates
+
+Consider an example where you are importing a list of contacts and setting their `parent_id` (parent company).
+
+```python
+# SLOW - DO NOT DO THIS
+my_mapping = {
+ 'id': mapper.m2o_map('child_', 'Ref'),
+ 'name': mapper.val('Name'),
+ # This next line causes the performance issue
+ 'parent_id/id': mapper.m2o_map('parent_', 'ParentRef'),
+}
+```
+
+This triggers a cascade of updates. Each time a new child contact is imported for the same parent, Odoo re-writes the _entire_ list of children on the parent record. The number of database writes grows with every new record, slowing the import to a crawl.
+
+### The Solution: Use the `--ignore` Option
+
+The correct way to handle this is to prevent the import client from writing to the problematic field. You can do this by adding the `ignore` key to your `params` dictionary.
+
+- **CLI Option**: `--ignore`
+- **`params` Key**: `'ignore'`
+
+```python
+# In your transform.py script
+
+# The mapping still defines the relationship
+my_mapping = {
+ 'id': mapper.m2o_map('child_', 'Ref'),
+ 'name': mapper.val('Name'),
+ 'parent_id/id': mapper.m2o_map('parent_', 'ParentRef'),
+}
+
+# The params tell the client to IGNORE the parent_id/id field
+import_params = {
+ 'model': 'res.partner',
+ 'ignore': 'parent_id/id', # The field to ignore
+}
+
+processor.process(
+ mapping=my_mapping,
+ filename_out='data/contacts.csv',
+ params=import_params
+)
+```
+
+This will generate a `load.sh` script with the `--ignore=parent_id/id` flag. The import client will then skip this column, avoiding the cascading updates entirely. Odoo's internal logic will still correctly establish the relationship based on the other direction of the field, but far more efficiently.
+
+**Recommendation**: For performance, **always** use `--ignore` for related fields that have an inverse relation (like `parent_id` and `child_ids`). Only import the "forward" direction of the relationship.
diff --git a/docs/guides/post_import_workflows.md b/docs/guides/post_import_workflows.md
new file mode 100644
index 00000000..453adc2c
--- /dev/null
+++ b/docs/guides/post_import_workflows.md
@@ -0,0 +1,61 @@
+# Guide: Post-Import Workflows
+
+The `odoo-data-flow` library provides a powerful system for running automated actions on your data _after_ it has been imported into Odoo. This is handled by the `odoo-data-flow workflow` command.
+
+This feature is designed for complex data migrations where simple importing is not enough. A common use case is in accounting, where imported draft invoices must be validated, reconciled, and paid. Instead of performing these actions manually in the Odoo UI for thousands of records, you can automate them with a workflow.
+
+## The `invoice-v9` Workflow
+
+The library currently includes a built-in workflow specifically for processing customer invoices (`account.invoice`) in Odoo version 9.
+
+**Warning:** This workflow uses legacy Odoo v9 API calls and will **not** work on modern Odoo versions (10.0+). It is provided as a reference and an example of how a post-import process can be structured.
+
+The workflow allows you to perform the following actions on your imported invoices:
+
+- **`tax`**: Computes taxes for imported draft invoices.
+- **`validate`**: Validates draft invoices, moving them to the 'Open' state.
+- **`pay`**: Registers a payment against an open invoice, moving it to the 'Paid' state.
+- **`proforma`**: Converts draft invoices to pro-forma invoices.
+- **`rename`**: A utility to move a value from a custom field to the official `number` field.
+
+### Usage
+
+You run the workflow from the command line, specifying which action(s) you want to perform.
+
+```bash
+odoo-data-flow workflow invoice-v9 [OPTIONS]
+```
+
+### Command-Line Options
+
+| Option | Description |
+| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `-c`, `--config` | **Required**. Path to your `connection.conf` file. |
+| `--action` | The workflow action to run (`tax`, `validate`, `pay`, `proforma`, `rename`). This option can be used multiple times. If omitted, all actions are run in a logical order. |
+| `--field` | **Required**. The name of the field in `account.invoice` that holds the legacy status from your source system. The workflow uses this to find the right invoices. |
+| `--status-map` | **Required**. A dictionary string that maps Odoo states to your legacy statuses. For example: `"{'open': ['OP', 'Validated'], 'paid': ['PD']}"` |
+| `--paid-date-field` | **Required**. The name of the field containing the payment date, used by the `pay` action. |
+| `--payment-journal` | **Required**. The database ID (integer) of the `account.journal` to be used for payments. |
+| `--max-connection` | The number of parallel threads to use for processing. Defaults to `4`. |
+
+### Example Command
+
+Imagine you have imported thousands of invoices. Now, you want to find all the invoices with a legacy status of "Validated" and move them to the "Open" state in Odoo.
+
+You would run the following command:
+
+```bash
+odoo-data-flow workflow invoice-v9 \
+ --config conf/connection.conf \
+ --action validate \
+ --field x_studio_legacy_status \
+ --status-map "{'open': ['Validated']}" \
+ --paid-date-field x_studio_payment_date \
+ --payment-journal 5
+```
+
+This command will:
+
+1. Connect to Odoo.
+2. Search for all `account.invoice` records where `x_studio_legacy_status` is 'Validated'.
+3. Run the `validate_invoice` function on those records, triggering the workflow to open them.
diff --git a/docs/guides/server_to_server_migration.md b/docs/guides/server_to_server_migration.md
new file mode 100644
index 00000000..ee4248f4
--- /dev/null
+++ b/docs/guides/server_to_server_migration.md
@@ -0,0 +1,114 @@
+# Guide: Server-to-Server Migration
+
+The `odoo-data-flow` library includes a powerful `migrate` command designed to perform a direct, in-memory data migration from one Odoo database to another. This is an advanced feature that chains together the export, transform, and import processes into a single step, without needing to create intermediate CSV files on your local machine.
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart LR
+ subgraph subGraph0["Source Environment"]
+ A[("Source Odoo DB")]
+ end
+ subgraph subGraph1["Destination Environment"]
+ F[("Destination Odoo DB")]
+ end
+ subgraph subGraph2["Migration Process (In-Memory)"]
+ B["odoo-data-flow migrate"]
+ C{"Exporter"}
+ D{"Processor & Mappers"}
+ E{"Importer"}
+ end
+ B -- 1 Connect & Export --> A
+ A -- 2 Data Stream --> C
+ C -- Raw Data (Header & Rows) --> D
+ D -- Transformed Data --> E
+ E -- 3 Load Data --> F
+ style A fill:#AA00FF
+ style F fill:#AA00FF
+ style B fill:#BBDEFB,stroke:#1976D2
+ style C fill:#FFE0B2
+ style D fill:#FFCC80
+ style E fill:#FFE0B2
+ style subGraph2 fill:transparent
+ style subGraph0 fill:transparent
+ style subGraph1 fill:transparent
+
+```
+
+## Use Case
+
+This command is ideal for scenarios such as:
+
+- Migrating data from a staging or development server to a production server.
+- Consolidating data from one Odoo instance into another.
+- Performing a data transformation and re-importing into the same database.
+
+## The `odoo-data-flow migrate` Command
+
+The migration is handled by the `migrate` sub-command. It works by exporting data from a source instance, applying an in-memory transformation using the same `mapper` functions, and then immediately importing the result into a destination instance.
+
+### Command-Line Options
+
+The command is configured using a set of options that combine parameters from both the `export` and `import` commands.
+
+| Option | Description |
+| --------------------- | ------------------------------------------------------------------------------------------------------------------- |
+| `--config-export` | **Required**. Path to the `connection.conf` file for the **source** Odoo instance (where data is exported from). |
+| `--config-import` | **Required**. Path to the `connection.conf` file for the **destination** Odoo instance (where data is imported to). |
+| `--model` | **Required**. The technical name of the Odoo model you want to migrate (e.g., `res.partner`). |
+| `--fields` | **Required**. A comma-separated list of the technical field names you want to migrate. |
+| `--domain` | An Odoo domain filter to select which records to export from the source instance. Defaults to `[]` (all records). |
+| `--mapping` | A dictionary string defining the transformation rules. If omitted, a direct 1-to-1 mapping is used. |
+| `--export-worker` | The number of parallel workers to use for the export phase. Defaults to `1`. |
+| `--export-batch-size` | The batch size for the export phase. Defaults to `100`. |
+| `--import-worker` | The number of parallel workers to use for the import phase. Defaults to `1`. |
+| `--import-batch-size` | The batch size for the import phase. Defaults to `10`. |
+
+## Full Migration Example
+
+Let's say we want to migrate all partners from a staging server to a production server. We also want to add a prefix to their names during the migration to indicate they came from the staging environment.
+
+**Step 1: Create two connection files**
+
+You would have two configuration files: `conf/staging.conf` and `conf/production.conf`.
+
+**Step 2: Define the mapping (optional)**
+
+If you need to transform the data, you can define a mapping. For this example, we'll pass it as a string on the command line.
+The mapping would look like this in Python:
+
+```python
+my_mapping = {
+ 'id': mapper.concat('migrated_partner_', 'id'),
+ 'name': mapper.concat('Staging - ', 'name'),
+ 'phone': mapper.val('phone'),
+ # ... other fields
+}
+```
+
+As a command-line string, it would be: `"{'id': mapper.concat('migrated_partner_', 'id'), 'name': mapper.concat('Staging - ', 'name'), ...}"`
+
+**Step 3: Run the `migrate` command**
+
+You would run the following command from your terminal:
+
+```bash
+odoo-data-flow migrate \
+ --config-export "conf/staging.conf" \
+ --config-import "conf/production.conf" \
+ --model "res.partner" \
+ --fields "id,name,phone" \
+ --mapping "{'name': mapper.concat('Staging - ', 'name'), 'phone': mapper.val('phone')}"
+```
+
+### Result
+
+This single command will:
+
+1. Connect to the staging Odoo database.
+2. Export the `id`, `name`, and `phone` fields for all `res.partner` records.
+3. In memory, transform the data by prepending "Staging - " to each partner's name.
+4. Connect to the production Odoo database.
+5. Import the transformed data, creating new partners with the updated names.
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..6121f872
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,83 @@
+```{include} ../README.md
+---
+end-before:
+---
+```
+
+# Odoo Data Flow
+
+**A robust, declarative library for managing complex data imports and exports with Odoo.**
+
+Odoo Data Flow is a powerful and flexible Python library designed to simplify the import and export of data to and from Odoo. It allows you to define data mappings and transformations in a declarative way, making complex data operations manageable and repeatable.
+You can easily manage complex transformations, relationships, and validations, making your data integration tasks simpler and more reliable.
+
+This library is the successor to the `odoo-csv-import-export` library, refactored for modern development practices and enhanced clarity.
+
+```{mermaid}
+---
+config:
+ theme: redux
+---
+flowchart TD
+ subgraph subGraph0["External Data"]
+ A["CSV / XML File"]
+ end
+ subgraph s1["odoo-data-flow"]
+ B{"Model Definition in Python"}
+ C["@field Decorators"]
+ D["Transformation & Validation Logic"]
+ end
+ subgraph Odoo["Odoo"]
+ E["Odoo Database"]
+ end
+ A --> B
+ B -- Defines --> C
+ C -- Applies --> D
+ B -- Orchestrates --> E
+ A@{ shape: doc}
+ E@{ shape: cyl}
+ style A fill:#FFF9C4
+ style B fill:#C8E6C9
+ style E fill:#AA00FF
+ style s1 fill:#BBDEFB
+ style Odoo fill:transparent
+ style subGraph0 fill:transparent
+
+
+```
+
+
+## Getting Started
+
+Ready to simplify your Odoo data integrations?
+
+| Step | Description |
+| ------------------------------------------ | --------------------------------------------------------------- |
+| 🚀 **[Quickstart](./quickstart.md)** | Your first end-to-end example. Go from file to Odoo in minutes. |
+| ⚙️ **[Installation](./installation.md)** | How to install the library in your project. |
+| 🧠 **[Core Concepts](./core_concepts.md)** | Understand the key ideas behind the library. |
+
+[license]: license
+[contributor guide]: contributing
+[command-line reference]: usage
+
+```{toctree}
+---
+hidden:
+maxdepth: 1
+---
+
+installation
+quickstart
+usage
+core_concepts
+comparison_with_other_tools
+guides/index
+reference
+faq
+contributing
+Code of Conduct
+License
+Changelog
+ROADMAP
+```
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 00000000..6654e63a
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,109 @@
+# Installation with uv
+
+This guide uses `uv`, a high-performance Python package installer and resolver, to set up your environment. It's a modern, fast alternative to `pip` and `venv`.
+
+## 1. Install `uv` (if you haven't already)
+
+First, ensure `uv` is installed on your system. If not, run the appropriate command for your operating system:
+
+```bash
+# macOS / Linux
+curl -LsSf [https://astral.sh/uv/install.sh](https://astral.sh/uv/install.sh) | sh
+
+# Windows (in PowerShell)
+irm [https://astral.sh/uv/install.ps1](https://astral.sh/uv/install.ps1) | iex
+```
+
+For other installation options, please refer to the [official `uv` documentation](https://astral.sh/uv#installation).
+
+## 2. Prerequisites
+
+- **Python 3.10 or newer:** `uv` will automatically find and use a compatible Python version on your system.
+- **Access to an Odoo instance:** To import or export data, you will need the URL, database name, and login credentials for an Odoo instance.
+
+## 3. The Connection Configuration File
+
+Before you can use the tool, you must create a configuration file to store your Odoo connection details.
+
+Create a folder named `conf/` in your project directory, and inside it, create a file named `connection.conf`.
+
+**File: `conf/connection.conf`**
+
+```ini
+[Connection]
+hostname = my-odoo-instance.odoo.com
+database = my_odoo_db
+login = admin
+password =
+protocol = jsonrpcs
+port = 443
+uid = 2
+```
+
+### Configuration Keys Explained
+
+| Key | Description |
+| :--------- | :---------------------------------------------------------------------------------------------------------------------------------------- |
+| `hostname` | The domain or IP address of your Odoo server. |
+| `database` | The name of the Odoo database you want to connect to. |
+| `login` | The login username for the Odoo user that will perform the operations. |
+| `password` | The password for the specified Odoo user. |
+| `protocol` | The protocol to use for the connection. For Odoo.sh or a standard HTTPS setup, use `jsonrpcs`. For a local, non-SSL setup, use `jsonrpc`. |
+| `port` | The port for the connection. Standard ports are `443` for HTTPS (`jsonrpcs`) and `8069` for HTTP (`jsonrpc`). |
+| `uid` | The database ID of the Odoo user. `2` is often the default administrator user in a new database. |
+
+## 4. Standard Installation
+
+1. **Create and activate a virtual environment:**
+
+ This command creates a standard virtual environment in a `.venv` folder.
+
+ ```bash
+ uv venv
+ ```
+
+ Next, activate the environment:
+
+ ```bash
+ # For Unix/macOS
+ source .venv/bin/activate
+
+ # For Windows
+ .venv\Scripts\activate
+ ```
+
+ Your terminal prompt should now indicate that you are in the `.venv` environment.
+
+2. **Install `odoo-data-flow`:**
+
+ With the environment active, use `uv` to install the package from PyPI.
+
+ ```bash
+ uv pip install odoo-data-flow
+ ```
+
+## 5. Installing for Development
+
+If you want to contribute to the project or test the latest unreleased changes, you can install the library directly from the source code.
+
+1. **Clone the GitHub repository:**
+
+ ```bash
+ git clone [https://github.com/OdooDataFlow/odoo-data-flow.git](https://github.com/OdooDataFlow/odoo-data-flow.git)
+ cd odoo-data-flow
+ ```
+
+2. **Create and activate an environment:**
+
+ ```bash
+ uv venv
+ source .venv/bin/activate
+ ```
+
+3. **Install in editable mode:**
+ This command links the installed package to the source code in your directory. Any edits you make to the code will be immediately available.
+ ```bash
+ uv pip install -e .
+ ```
+
+You are now set up and ready to create your first data flow.
diff --git a/docs/license.md b/docs/license.md
new file mode 100644
index 00000000..218790f5
--- /dev/null
+++ b/docs/license.md
@@ -0,0 +1,7 @@
+# License
+
+```{literalinclude} ../LICENSE
+---
+language: none
+---
+```
diff --git a/docs/quickstart.md b/docs/quickstart.md
new file mode 100644
index 00000000..492e873b
--- /dev/null
+++ b/docs/quickstart.md
@@ -0,0 +1,146 @@
+# Quickstart
+## A Real-World Import Workflow
+
+This guide demonstrates a realistic and robust workflow for importing data. Instead of a single script that does everything, we will separate the process into two distinct phases, which is highly recommended for any serious data migration:
+
+1. **Transform Phase**: A Python script reads a raw source file, cleans the data using the library's powerful **mappers**, and produces a clean CSV file ready for Odoo. It also generates a shell script for the next phase.
+2. **Load Phase**: The generated shell script uses the new `odoo-data-flow` command-line tool to efficiently load the clean CSV data into Odoo.
+
+This separation makes the process more manageable, easier to debug, and allows you to reuse the transformed data for multiple Odoo instances (e.g., staging and production).
+
+## Step 1: Project Setup
+
+First, create the recommended directory structure for a data flow project.
+
+```
+.
+├── conf/
+│ └── connection.conf
+├── origin/
+│ └── clients.csv
+├── data/
+│ └── (this will be created by our script)
+└── transform.py
+```
+
+- `conf/`: Holds configuration files, like Odoo connection details.
+- `origin/`: Contains the original, raw data files from the source system.
+- `data/`: Will store the transformed, clean CSV files ready for import.
+- `transform.py`: Our main Python script for the transformation logic.
+
+## Step 2: Connection Configuration (`connection.conf`)
+
+Create the `conf/connection.conf` file. The section header `[Connection]` and the keys (`database`, `login`) must match this example, as they are used by the import client.
+
+
+```{code-block} ini
+:caption: conf/connection.conf
+
+[Connection]
+hostname = my-odoo-instance.odoo.com
+database = my_odoo_db
+login = admin
+password =
+protocol = jsonrpcs
+port = 443
+uid = 2
+```
+
+
+## Step 3: The Raw Data (`origin/clients.csv`)
+
+Create a raw data file in `origin/clients.csv`.
+
+```{code-block} text
+:caption: origin/clients.csv
+ID,Firstname,Lastname,EmailAddress
+C001,John,Doe,john.doe@test.com
+C002,Jane,Smith,jane.s@test.com
+```
+
+## Step 4: The Transformation Script (`transform.py`)
+
+This script is the core of our logic. It uses the `Processor` to read the source file and a `mapping` dictionary to define the transformations.
+
+Create the file `transform.py`:
+
+```{code-block} python
+:caption: transform.py
+from odoo_data_flow.lib.transform import Processor
+from odoo_data_flow.lib import mapper
+
+# 1. Define the mapping rules in a dictionary.
+res_partner_mapping = {
+ 'id': mapper.concat('example_client_', 'ID'),
+ 'name': mapper.concat(' ', 'Firstname', 'Lastname'),
+ 'email': mapper.val('EmailAddress'),
+ 'is_company': mapper.const(False),
+}
+
+# 2. Initialize the Processor.
+processor = Processor(
+ 'origin/clients.csv',
+ separator=','
+)
+
+# 3. Define parameters for the import client.
+params = {
+ 'model': 'res.partner',
+ 'context': "{'tracking_disable': True}"
+}
+
+# 4. Run the process.
+processor.process(
+ mapping=res_partner_mapping,
+ filename_out='data/res_partner.csv',
+ params=params
+)
+
+# 5. Generate the shell script for the loading phase.
+processor.write_to_file("load.sh")
+
+print("Transformation complete. Clean data and load script are ready.")
+```
+
+## Step 5: Run the Transformation
+
+Execute the script from your terminal:
+
+```bash
+python transform.py
+```
+
+## Step 6: Review the Generated Files
+
+Let's look at what was created.
+
+**File: `data/res_partner.csv` (Transformed & Clean Data)**
+
+```{code-block} text
+:caption: data/res_partner.csv
+id,name,email,is_company
+example_client_C001,"John Doe",john.doe@test.com,False
+example_client_C002,"Jane Smith",jane.s@test.com,False
+```
+
+**File: `load.sh` (The Loading Script)**
+This file now contains commands that use the new, clean `odoo-data-flow` command-line interface.
+
+```{code-block} bash
+:caption: load.sh
+#!/bin/bash
+odoo-data-flow import --config conf/connection.conf --file data/res_partner.csv --model res.partner --context "{'tracking_disable': True}"
+odoo-data-flow import --config conf/connection.conf --fail --file data/res_partner.csv --model res.partner --context "{'tracking_disable': True}"
+```
+
+## Step 7: Load the Data into Odoo
+
+Finally, execute the generated shell script to upload the data.
+
+```bash
+bash load.sh
+```
+
+The `odoo-data-flow` tool will connect to your database and import the records. Log in to your Odoo instance and navigate to the **Contacts** app to see your newly imported contacts.
+
+Congratulations! You have successfully completed a full transform and load workflow with the new `odoo-data-flow` tool.
diff --git a/docs/reference.md b/docs/reference.md
new file mode 100644
index 00000000..95261dc4
--- /dev/null
+++ b/docs/reference.md
@@ -0,0 +1,58 @@
+# API Reference
+
+This section provides an auto-generated API reference for the core components of the `odoo-data-flow` library.
+
+## Command-Line Interface (`__main__`)
+
+This module contains the main `click`-based command-line interface.
+
+```{eval-rst}
+.. click:: odoo_data_flow.__main__:cli
+ :prog: odoo-data-flow
+ :nested: full
+```
+
+## Transformation Processor (`lib.transform`)
+
+This module contains the main `Processor` class used for data transformation.
+
+```{eval-rst}
+.. automodule:: odoo_data_flow.lib.transform
+ :members: Processor
+ :member-order: bysource
+```
+
+## Mapper Functions (`lib.mapper`)
+
+This module contains all the built-in `mapper` functions for data transformation.
+
+```{eval-rst}
+.. automodule:: odoo_data_flow.lib.mapper
+ :members:
+ :undoc-members:
+```
+
+## High-Level Runners
+
+These modules contain the high-level functions that are called by the CLI commands.
+
+### Importer (`importer`)
+
+```{eval-rst}
+.. automodule:: odoo_data_flow.importer
+ :members: run_import
+```
+
+### Exporter (`exporter`)
+
+```{eval-rst}
+.. automodule:: odoo_data_flow.exporter
+ :members: run_export
+```
+
+### Migrator (`migrator`)
+
+```{eval-rst}
+.. automodule:: odoo_data_flow.migrator
+ :members: run_migration
+```
diff --git a/docs/requirements.txt b/docs/requirements.txt
index e69de29b..b6daa041 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -0,0 +1,6 @@
+shibuya==2025.5.30
+sphinx==7.4.7
+sphinx-click==5.2.1
+myst_parser==4.0.1
+sphinx-copybutton==0.5.2
+sphinx-mermaid
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 00000000..2ce27971
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,7 @@
+# CLI Usage
+
+```{eval-rst}
+.. click:: odoo_data_flow.__main__:cli
+ :prog: odoo_data_flow
+ :nested: full
+```
diff --git a/noxfile.py b/noxfile.py
new file mode 100644
index 00000000..b2725cab
--- /dev/null
+++ b/noxfile.py
@@ -0,0 +1,273 @@
+"""Nox sessions."""
+
+import os
+import shlex
+import shutil
+import sys
+from pathlib import Path
+from textwrap import dedent
+
+import nox
+
+nox.options.default_venv_backend = "uv"
+
+package = "odoo_data_flow"
+python_versions = ["3.12", "3.13", "3.11", "3.10", "3.9"]
+nox.needs_version = ">= 2021.6.6"
+nox.options.sessions = (
+ "pre-commit",
+ "mypy",
+ "tests",
+ "typeguard",
+ "xdoctest",
+ "docs-build",
+)
+
+
+def activate_virtualenv_in_precommit_hooks(session: nox.Session) -> None:
+ """Activate virtualenv in hooks installed by pre-commit.
+
+ This function patches git hooks installed by pre-commit to activate the
+ session's virtual environment. This allows pre-commit to locate hooks in
+ that environment when invoked from git.
+
+ Args:
+ session: The Session object.
+ """
+ assert session.bin is not None # nosec
+
+ # Only patch hooks containing a reference to this session's bindir. Support
+ # quoting rules for Python and bash, but strip the outermost quotes so we
+ # can detect paths within the bindir, like /python.
+ bindirs = [
+ bindir[1:-1] if bindir[0] in "'\"" else bindir
+ for bindir in (repr(session.bin), shlex.quote(session.bin))
+ ]
+
+ virtualenv = session.env.get("VIRTUAL_ENV")
+ if virtualenv is None:
+ return
+
+ headers = {
+ # pre-commit < 2.16.0
+ "python": f"""\
+ import os
+ os.environ["VIRTUAL_ENV"] = {virtualenv!r}
+ os.environ["PATH"] = os.pathsep.join((
+ {session.bin!r},
+ os.environ.get("PATH", ""),
+ ))
+ """,
+ # pre-commit >= 2.16.0
+ "bash": f"""\
+ VIRTUAL_ENV={shlex.quote(virtualenv)}
+ PATH={shlex.quote(session.bin)}"{os.pathsep}$PATH"
+ """,
+ # pre-commit >= 2.17.0 on Windows forces sh shebang
+ "/bin/sh": f"""\
+ VIRTUAL_ENV={shlex.quote(virtualenv)}
+ PATH={shlex.quote(session.bin)}"{os.pathsep}$PATH"
+ """,
+ }
+
+ hookdir = Path(".git") / "hooks"
+ if not hookdir.is_dir():
+ return
+
+ for hook in hookdir.iterdir():
+ if hook.name.endswith(".sample") or not hook.is_file():
+ continue
+
+ if not hook.read_bytes().startswith(b"#!"):
+ continue
+
+ text = hook.read_text()
+
+ if not any(
+ (Path("A") == Path("a") and bindir.lower() in text.lower())
+ or bindir in text
+ for bindir in bindirs
+ ):
+ continue
+
+ lines = text.splitlines()
+
+ for executable, header in headers.items():
+ if executable in lines[0].lower():
+ lines.insert(1, dedent(header))
+ hook.write_text("\n".join(lines))
+ break
+
+
+@nox.session(name="pre-commit", python=python_versions[0])
+def precommit(session: nox.Session) -> None:
+ """Lint using pre-commit."""
+ args = session.posargs or [
+ "run",
+ "--all-files",
+ "--hook-stage=manual",
+ "--show-diff-on-failure",
+ ]
+
+ session.run(
+ "uv",
+ "sync",
+ "--group",
+ "dev",
+ "--group",
+ "lint",
+ external=True,
+ )
+ session.run("pre-commit", *args, external=True)
+ if args and args[0] == "install":
+ activate_virtualenv_in_precommit_hooks(session)
+
+
+@nox.session(python=python_versions)
+def mypy(session: nox.Session) -> None:
+ """Type-check using mypy."""
+ args = session.posargs or ["src", "tests", "docs/conf.py"]
+
+ session.run(
+ "uv",
+ "sync",
+ "--group",
+ "dev",
+ "--group",
+ "mypy",
+ external=True,
+ )
+
+ session.install("mypy")
+ session.install("pytest")
+ session.install("-e", ".")
+ session.run("mypy", *args)
+ if not session.posargs:
+ session.run("mypy", f"--python-executable={sys.executable}", "noxfile.py")
+
+
+@nox.session(python=python_versions)
+def tests(session: nox.Session) -> None:
+ """Run the test suite."""
+ session.run(
+ "uv",
+ "sync",
+ "--group",
+ "dev",
+ "--group",
+ "lint",
+ external=True,
+ )
+
+ session.install("pytest", "coverage")
+ session.install("-e", ".")
+ session.run("pytest", *session.posargs)
+
+
+@nox.session(python=python_versions[0])
+def coverage(session: nox.Session) -> None:
+ """Produce the coverage report."""
+ args = session.posargs or ["report"]
+ session.install("pytest", "coverage[toml]", "pytest-cov")
+ session.install("-e", ".")
+ session.log("Running pytest with coverage...")
+ session.run("pytest", "--cov=src", "--cov-report=xml")
+
+ if not session.posargs and any(Path().glob(".coverage.*")):
+ session.run("coverage", "combine")
+
+ session.run("coverage", *args)
+
+
+@nox.session(name="typeguard", python=python_versions[0])
+def typeguard_tests(session: nox.Session) -> None:
+ """Run tests with typeguard."""
+ session.run(
+ "uv",
+ "sync",
+ "--group",
+ "dev",
+ "--group",
+ "typeguard",
+ external=True,
+ )
+
+ session.install("typeguard", "pytest")
+ session.install("-e", ".")
+ session.run("pytest", "--typeguard-packages", package, *session.posargs)
+
+
+@nox.session(python=python_versions)
+def xdoctest(session: nox.Session) -> None:
+ """Run examples with xdoctest."""
+ if session.posargs:
+ args = [package, *session.posargs]
+ else:
+ args = [f"--modname={package}", "--command=all"]
+ if "FORCE_COLOR" in os.environ:
+ args.append("--colored=1")
+ session.run(
+ "uv",
+ "sync",
+ "--group",
+ "dev",
+ "--group",
+ "xdoctest",
+ external=True,
+ )
+ session.install("xdoctest")
+ session.install("-e", ".")
+ session.run("python", "-m", "xdoctest", package, *args)
+
+
+@nox.session(name="docs-build", python=python_versions[1])
+def docs_build(session: nox.Session) -> None:
+ """Build the documentation."""
+ args = session.posargs or ["docs", "docs/_build"]
+ if not session.posargs and "FORCE_COLOR" in os.environ:
+ args.insert(0, "--color")
+
+ session.run(
+ "uv",
+ "sync",
+ "--group",
+ "dev",
+ "--group",
+ "docs",
+ external=True,
+ )
+ session.install(
+ "sphinx",
+ "sphinx-mermaid",
+ "sphinx-click",
+ "myst_parser",
+ "shibuya",
+ "sphinx-copybutton",
+ )
+ session.install("-e", ".")
+
+ build_dir = Path("docs", "_build")
+ if build_dir.exists():
+ shutil.rmtree(build_dir)
+
+ session.run("sphinx-build", *args)
+
+
+@nox.session(python=python_versions[0])
+def docs(session: nox.Session) -> None:
+ """Build and serve the documentation with live reloading on file changes."""
+ args = session.posargs or ["--open-browser", "docs", "docs/_build"]
+ session.run(
+ "uv",
+ "sync",
+ "--group",
+ "docs",
+ external=True,
+ env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location},
+ )
+
+ build_dir = Path("docs", "_build")
+ if build_dir.exists():
+ shutil.rmtree(build_dir)
+
+ session.run("sphinx-autobuild", *args)
diff --git a/odoo_convert_path_to_image.py b/odoo_convert_path_to_image.py
deleted file mode 100755
index 25c76e3e..00000000
--- a/odoo_convert_path_to_image.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-#-*- coding: utf-8 -*-
-'''
-Copyright (C) Thibault Francois
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as
-published by the Free Software Foundation, version 3.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Lesser Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program. If not, see .
-'''
-
-import argparse
-import os
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='Convert csv column Image Path into base64')
- parser.add_argument('file', metavar='F', help='file to convert')
- parser.add_argument('--path', dest='path', help='Image Path Prefix, default is the working directory')
- parser.add_argument('--out', dest='out', help='name of the result file, default out.csv', default="out.csv")
- parser.add_argument('-f', dest='fields', help='Fields to convert from path to base64, comma separated', required = True)
- args = parser.parse_args()
-
- file_csv = args.file
- out_csv = args.out
- path = args.path
- fields = args.fields
- if not path:
- path = os.getcwd()
- if not path.endswith(os.sep):
- path += os.sep
-
-
- processor = Processor(file_csv)
- mapping = processor.get_o2o_mapping()
- for f in fields.split(','):
- f = f.strip()
- mapping[f] = mapper.binary_map(mapper.remove_sep_mapper(f), path)
- processor.process(mapping, out_csv, {}, 'list')
- processor.write_to_file("")
-
diff --git a/odoo_convert_url_to_image.py b/odoo_convert_url_to_image.py
deleted file mode 100755
index b36ed40d..00000000
--- a/odoo_convert_url_to_image.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-#-*- coding: utf-8 -*-
-'''
-Copyright (C) Thibault Francois
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as
-published by the Free Software Foundation, version 3.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Lesser Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program. If not, see .
-'''
-
-import argparse
-import os
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import Processor
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='Convert csv column Image URL into base64')
- parser.add_argument('file', metavar='F', help='file to convert')
- parser.add_argument('--out', dest='out', help='name of the result file, default out.csv', default="out.csv")
- parser.add_argument('-f', dest='fields', help='Fields to convert from path to base64, comma separated', required = True)
- args = parser.parse_args()
-
- file_csv = args.file
- out_csv = args.out
- fields = args.fields
-
- processor = Processor(file_csv)
- mapping = processor.get_o2o_mapping()
- for f in fields.split(','):
- f = f.strip()
- mapping[f] = mapper.binary_url(f, verbose=True)
- processor.process(mapping, out_csv, {}, 'list')
- processor.write_to_file("")
-
diff --git a/odoo_csv_tools/__init__.py b/odoo_csv_tools/__init__.py
deleted file mode 100644
index f58bd928..00000000
--- a/odoo_csv_tools/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from . import lib
-from . import export_threaded
-from . import import_threaded
diff --git a/odoo_csv_tools/export_threaded.py b/odoo_csv_tools/export_threaded.py
deleted file mode 100755
index 1d591ce3..00000000
--- a/odoo_csv_tools/export_threaded.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Copyright (C) Thibault Francois
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as
-published by the Free Software Foundation, version 3.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Lesser Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program. If not, see .
-'''
-import sys
-import csv
-
-from time import time
-
-from .lib import conf_lib
-from .lib.conf_lib import log_error, log_info
-from .lib.internal.rpc_thread import RpcThread
-from .lib.internal.csv_reader import UnicodeWriter
-from .lib.internal.io import ListWriter, open_write
-from .lib.internal.tools import batch
-
-
-if sys.version_info >= (3, 0, 0):
- from xmlrpc.client import Fault
-else:
- from xmlrpclib import Fault
-
-csv.field_size_limit(2**31-1)
-
-class RPCThreadExport(RpcThread):
-
- def __init__(self, max_connection, model, header, writer, batch_size=20, context=None):
- super(RPCThreadExport, self).__init__(max_connection)
- self.model = model
- self.header = header
- self.batch_size = batch_size
- self.writer = writer
- self.context = context
- self.result = {}
-
- def launch_batch(self, data_ids, batch_number):
- def launch_batch_fun(data_ids, batch_number, check=False):
- st = time()
- try:
- self.result[batch_number] = self.model.export_data(data_ids, self.header, context=self.context)['datas']
- except Fault as e:
- log_error("export %s failed" % batch_number)
- log_error(e.faultString)
- except Exception as e:
- log_info("Unknown Problem")
- exc_type, exc_value, _ = sys.exc_info()
- # traceback.print_tb(exc_traceback, file=sys.stdout)
- log_error(exc_type)
- log_error(exc_value)
- log_info("time for batch %s: %s" % (batch_number, time() - st))
-
- self.spawn_thread(launch_batch_fun, [data_ids, batch_number], {})
-
- def write_file(self, file_writer):
- file_writer.writerow(self.header)
- for key in self.result:
- file_writer.writerows(self.result[key])
-
-
-def export_data(config_file, model, domain, header, context=None, output=None, max_connection=1, batch_size=100,
- separator=';', encoding='utf-8'):
- object_registry = conf_lib.get_server_connection(config_file).get_model(model)
-
- if output:
- file_result = open_write(output, encoding=encoding)
- writer = UnicodeWriter(file_result, delimiter=separator, encoding=encoding, quoting=csv.QUOTE_ALL)
- else:
- writer = ListWriter()
-
- rpc_thread = RPCThreadExport(int(max_connection), object_registry, header, writer, batch_size, context)
- st = time()
-
- ids = object_registry.search(domain, context=context)
- i = 0
- for b in batch(ids, batch_size):
- batch_ids = [l for l in b]
- rpc_thread.launch_batch(batch_ids, i)
- i += 1
-
- rpc_thread.wait()
- log_info("%s %s exported, total time %s second(s)" % (len(ids), model, (time() - st)))
- log_info("Writing file")
- rpc_thread.write_file(writer)
- if output:
- file_result.close()
- return False, False
- else:
- return writer.header, writer.data
diff --git a/odoo_csv_tools/import_threaded.py b/odoo_csv_tools/import_threaded.py
deleted file mode 100755
index 0e87a40e..00000000
--- a/odoo_csv_tools/import_threaded.py
+++ /dev/null
@@ -1,245 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Copyright (C) Thibault Francois
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as
-published by the Free Software Foundation, version 3.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Lesser Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program. If not, see .
-'''
-
-import sys
-import csv
-
-from time import time
-
-from .lib import conf_lib
-from .lib.conf_lib import log_error, log_info, log
-from .lib.internal.rpc_thread import RpcThread
-from .lib.internal.io import ListWriter, open_read, open_write
-from .lib.internal.csv_reader import UnicodeReader, UnicodeWriter
-from .lib.internal.tools import batch
-
-if sys.version_info >= (3, 0, 0):
- from xmlrpc.client import Fault
-else:
- from xmlrpclib import Fault
- from builtins import range
-
-csv.field_size_limit(2**31-1)
-
-
-class RPCThreadImport(RpcThread):
-
- def __init__(self, max_connection, model, header, writer, batch_size=20, context=None):
- super(RPCThreadImport, self).__init__(max_connection)
- self.model = model
- self.header = header
- self.batch_size = batch_size
- self.writer = writer
- self.context = context
-
- def launch_batch(self, data_lines, batch_number, check=False, o2m=False):
- def launch_batch_fun(lines, batch_number, check=False):
- i = 0
- batch_size = len(lines) if o2m else self.batch_size
- for lines_batch in batch(lines, batch_size):
- lines_batch = [l for l in lines_batch]
- self.sub_batch_run(lines_batch, batch_number, i, len(lines), check=check)
- i += 1
-
- self.spawn_thread(launch_batch_fun, [data_lines, batch_number], {'check': check})
-
- def sub_batch_run(self, lines, batch_number, sub_batch_number, total_line_nb, check=False):
- success = False
-
- st = time()
- try:
- success = self._send_rpc(lines, batch_number, sub_batch_number, check=check)
- except Fault as e:
- log_error("Line %s %s failed" % (batch_number, sub_batch_number))
- log_error(e.faultString)
- except ValueError as e:
- log_error("Line %s %s failed value error" % (batch_number, sub_batch_number))
- except Exception as e:
- log_info("Unknown Problem")
- exc_type, exc_value, _ = sys.exc_info()
- # traceback.print_tb(exc_traceback, file=sys.stdout)
- log_error(exc_type)
- log_error(exc_value)
-
- if not success:
- self.writer.writerows(lines)
-
- log_info("time for batch %s - %s of %s : %s" % (
- batch_number, (sub_batch_number + 1) * self.batch_size, total_line_nb, time() - st))
-
- def _send_rpc(self, lines, batch_number, sub_batch_number, check=False):
- res = self.model.load(self.header, lines, context=self.context)
- if res['messages']:
- for msg in res['messages']:
- log_error('batch %s, %s' % (batch_number, sub_batch_number))
- log_error(msg)
- log_error(lines[msg['record']])
- return False
- if len(res['ids']) != len(lines) and check:
- log_error("number of record import is different from the record to import, probably duplicate xml_id")
- return False
-
- return True
-
-
-def filter_line_ignore(ignore, header, line):
- new_line = []
- for k, val in zip(header, line):
- if k not in ignore:
- new_line.append(val)
- return new_line
-
-
-def filter_header_ignore(ignore, header):
- new_header = []
- for val in header:
- if val not in ignore:
- new_header.append(val)
- return new_header
-
-
-def read_file(file_to_read, delimiter=';', encoding='utf-8', skip=0):
- def get_real_header(header):
- """ Get real header cut at the first empty column """
- new_header = []
- for head in header:
- if head:
- new_header.append(head)
- else:
- break
- return new_header
-
- def check_id_column(header):
- try:
- header.index('id')
- except ValueError as ve:
- log_error("No External Id (id) column defined, please add one")
- raise ve
-
- def skip_line(reader):
- log_info("Skipping until line %s excluded" % skip)
- for _ in range(1, skip):
- reader.next()
-
- log('open %s' % file_to_read)
- file_ref = open_read(file_to_read, encoding=encoding)
- reader = UnicodeReader(file_ref, delimiter=delimiter, encoding=encoding)
- header = next(reader)
- header = get_real_header(header)
- check_id_column(header)
- skip_line(reader)
- data = [l for l in reader]
- return header, data
-
-
-"""
- Splitting helper method
-"""
-
-
-def split_sort(split, header, data):
- split_index = 0
- if split:
- try:
- split_index = header.index(split)
- except ValueError as ve:
- log("column %s not defined" % split)
- raise ve
- data = sorted(data, key=lambda d: d[split_index])
- return data, split_index
-
-
-def do_not_split(split, previous_split_value, split_index, line, o2m=False, id_index=0):
- # Do not split if you want to keep the one2many line with it's parent
- # The column id should be empty
- if o2m and not line[id_index]:
- return True
-
- if not split: # If no split no need to continue
- return False
-
- split_value = line[split_index]
- if split_value != previous_split_value: # Different Value no need to not split
- return False
-
- return True
-
-
-def import_data(config_file, model, header=None, data=None, file_csv=None, context=None, fail_file=False,
- encoding='utf-8', separator=";", ignore=False, split=False, check=True, max_connection=1,
- batch_size=10, skip=0, o2m=False):
- """
- header and data mandatory in file_csv is not provided
-
- """
- ignore = ignore or []
- context = context or {}
-
- if file_csv:
- header, data = read_file(file_csv, delimiter=separator, encoding=encoding, skip=skip)
- fail_file = fail_file or file_csv + ".fail"
- file_result = open_write(fail_file, encoding=encoding)
-
- if not header or data == None:
- raise ValueError("Please provide either a data file or a header and data")
-
- object_registry = conf_lib.get_server_connection(config_file).get_model(model)
-
- if file_csv:
- writer = UnicodeWriter(file_result, delimiter=separator, encoding=encoding, quoting=csv.QUOTE_ALL)
- else:
- writer = ListWriter()
-
- writer.writerow(filter_header_ignore(ignore, header))
- if file_csv:
- file_result.flush()
- rpc_thread = RPCThreadImport(int(max_connection), object_registry, filter_header_ignore(ignore, header), writer,
- batch_size, context)
- st = time()
-
- try:
- id_index = header.index('id')
- except:
- id_index = list(header).index('id') # Support python3 dict_keys
- data, split_index = split_sort(split, header, data)
-
- i = 0
- previous_split_value = False
- while i < len(data):
- lines = []
- j = 0
- while i < len(data) and (
- j < batch_size or do_not_split(split, previous_split_value, split_index, data[i], o2m=o2m,
- id_index=id_index)):
- line = data[i][:len(header)]
- lines.append(filter_line_ignore(ignore, header, line))
- previous_split_value = line[split_index]
- j += 1
- i += 1
- batch_number = split and "[%s] - [%s]" % (
- rpc_thread.thread_number(), previous_split_value) or "[%s]" % rpc_thread.thread_number()
- rpc_thread.launch_batch(lines, batch_number, check, o2m=o2m)
-
- rpc_thread.wait()
- if file_csv:
- file_result.close()
-
- log_info("%s %s imported, total time %s second(s)" % (len(data), model, (time() - st)))
- if file_csv:
- return False, False
- else:
- return writer.header, writer.data
diff --git a/odoo_csv_tools/lib/__init__.py b/odoo_csv_tools/lib/__init__.py
deleted file mode 100644
index 82df14f9..00000000
--- a/odoo_csv_tools/lib/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from . import internal
-from . import conf_lib
-from . import workflow
-from . import checker
-from . import mapper
-from . import transform
-from . import xml_transform
diff --git a/odoo_csv_tools/lib/checker.py b/odoo_csv_tools/lib/checker.py
deleted file mode 100644
index 61d27dc2..00000000
--- a/odoo_csv_tools/lib/checker.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on 29 feb. 2016
-
-@author: Thibault Francois
-'''
-#TODO
-import re
-
-def id_validity_checker(id_field, pattern, null_values=['NULL']):
- def check_id_validity(header, data):
- regular = re.compile(pattern)
- res = True
- for i, line in enumerate(data):
- line = [s.strip() if s.strip() not in null_values else '' for s in line]
- line_dict = dict(zip(header, line))
- if not regular.match(line_dict[id_field]):
- print("Check Failed Id Validity", i+1, line_dict[id_field])
- res = False
- return res
- return check_id_validity
-
-def line_length_checker(length):
- def check_line_length(header, data):
- i = 1
- res = True
- for line in data:
- i+=1
- if len(line) != length:
- print("Check Failed", i, "Line Length", len(line))
- res = False
- return res
- return check_line_length
-
-def line_number_checker(line_number):
- def check_line_numner(header, data):
- if len(data) + 1 != line_number:
- print("Check Line Number Failed %s instead of %s" % (len(data) + 1, line_number))
- return False
- else:
- return True
- return check_line_numner
-
-def cell_len_checker(max_cell_len):
- def check_max_cell_len(header, data):
- res = True
- for i, line in enumerate(data):
- for ele in line:
- if len(ele) > max_cell_len:
- print("Check Failed", i + 1, "Cell Length", len(ele))
- print(line)
- res = False
- return res
- return check_max_cell_len
diff --git a/odoo_csv_tools/lib/conf_lib.py b/odoo_csv_tools/lib/conf_lib.py
deleted file mode 100644
index e76971b4..00000000
--- a/odoo_csv_tools/lib/conf_lib.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import odoolib
-import sys
-if sys.version_info >= (3, 0, 0):
- import configparser as ConfigParser
-else:
- import ConfigParser
-import logging
-import sys
-
-
-def get_server_connection(config_file):
- config = ConfigParser.RawConfigParser({'protocol' : 'xmlrpc', 'port' : 8069})
- config.read(config_file)
-
- hostname = config.get('Connection', 'hostname')
- database = config.get('Connection', 'database')
- login = config.get('Connection', 'login')
- password = config.get('Connection', 'password')
- protocol = config.get('Connection', 'protocol')
- port = int(config.get('Connection', 'port'))
- uid = int(config.get('Connection', 'uid'))
- return odoolib.get_connection(hostname=hostname, database=database, login=login, password=password, protocol=protocol, port=port, user_id=uid)
-
-def init_logger():
- logger_err = logging.getLogger("error")
- logger_err.setLevel(logging.INFO)
- err = logging.StreamHandler(sys.stderr)
- logger_err.addHandler(err)
- logger = logging.getLogger("info")
- logger.setLevel(logging.INFO)
- out = logging.StreamHandler(sys.stdout)
- logger.addHandler(out)
-
-def log_info(msg):
- logging.getLogger("info").info(msg)
-
-def log_error(msg):
- logging.getLogger("error").info(msg)
-
-def log(msg):
- log_info(msg)
- log_error(msg)
-
-init_logger()
diff --git a/odoo_csv_tools/lib/internal/__init__.py b/odoo_csv_tools/lib/internal/__init__.py
deleted file mode 100644
index 5f6aaf50..00000000
--- a/odoo_csv_tools/lib/internal/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from . import exceptions
-from . import tools
-from . import csv_reader
-from . import io
-from . import rpc_thread
diff --git a/odoo_csv_tools/lib/internal/csv_reader.py b/odoo_csv_tools/lib/internal/csv_reader.py
deleted file mode 100644
index e81c01c7..00000000
--- a/odoo_csv_tools/lib/internal/csv_reader.py
+++ /dev/null
@@ -1,58 +0,0 @@
-'''
-Created on 16 mai 2014
-
-@author: openerp
-'''
-from __future__ import absolute_import
-import sys
-#import csv, codecs
-if sys.version_info >= (3, 0, 0):
- import csv
-else:
- import unicodecsv as csv
-from io import StringIO
-import threading
-
-class UnicodeReader:
- """
- A CSV reader which will iterate over lines in the CSV file "f",
- which is encoded in the given encoding.
- """
-
- def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
- self.reader = csv.reader(f, dialect=dialect, **kwds)
-
- def next(self):
- #For python2
- return self.reader.next()
-
- def __next__(self):
- #For python3
- return self.reader.__next__()
-
- def __iter__(self):
- return self
-
-
-class UnicodeWriter:
- """
- A CSV writer which will write rows to CSV file "f",
- which is encoded in the given encoding.
- """
-
- def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
- # Redirect output to a queue
- self.stream = f
- self.writer = writer = csv.writer(f, dialect=dialect, **kwds)
- self.lock = threading.RLock()
-
- def writerow(self, row):
- self.lock.acquire()
- self.writer.writerow(row)
- self.lock.release()
-
- def writerows(self, rows):
- self.lock.acquire()
- self.writer.writerows(rows)
- self.stream.flush()
- self.lock.release()
diff --git a/odoo_csv_tools/lib/internal/exceptions.py b/odoo_csv_tools/lib/internal/exceptions.py
deleted file mode 100644
index 1234a9d6..00000000
--- a/odoo_csv_tools/lib/internal/exceptions.py
+++ /dev/null
@@ -1,9 +0,0 @@
-'''
-Created on 9 sept. 2016
-
-@author: Thibault Francois
-'''
-
-class SkippingException(Exception):
- def __init__(self, message):
- self.message = message
diff --git a/odoo_csv_tools/lib/internal/io.py b/odoo_csv_tools/lib/internal/io.py
deleted file mode 100644
index 05bc0609..00000000
--- a/odoo_csv_tools/lib/internal/io.py
+++ /dev/null
@@ -1,130 +0,0 @@
-'''
-Created on 10 sept. 2016
-
-@author: mythrys
-'''
-from __future__ import absolute_import
-
-import csv
-import os
-import sys
-from . csv_reader import UnicodeWriter, UnicodeReader
-
-"""
- Compatibility layer between python 2.7 and python 3
-"""
-def is_string(f):
- if sys.version_info >= (3, 0, 0):
- return isinstance(f, str)
- else:
- return isinstance(f, basestring)
-
-def open_read(f, encoding='utf-8'):
- if not is_string(f):
- return f
- if sys.version_info >= (3, 0, 0):
- return open(f, 'r', newline='', encoding=encoding)
- else:
- return open(f, 'r')
-
-def open_write(f, encoding='utf-8'):
- if not is_string(f):
- return f
- if sys.version_info >= (3, 0, 0):
- return open(f, "w", newline='', encoding=encoding)
- else:
- return open(f, "w")
-
-def write_csv(filename, header, data, encoding="utf-8"):
- file_result = open_write(filename, encoding=encoding)
- c = UnicodeWriter(file_result, delimiter=';', quoting=csv.QUOTE_ALL, encoding=encoding)
- c.writerow(header)
- for d in data:
- c.writerow(d)
- file_result.close()
-
-def write_file(filename=None, header=None, data=None, fail=False, model="auto",
- launchfile="import_auto.sh", worker=1, batch_size=10, init=False, encoding="utf-8",
- conf_file=False, groupby='', sep=";", python_exe='python', path='', context=None, ignore=""):
- def get_model():
- if model == "auto":
- return filename.split(os.sep)[-1][:-4]
- else:
- return model
-
- context = '--context="%s"' % str(context) if context else ''
- conf_file = conf_file or "%s%s%s" % ('conf', os.sep, 'connection.conf')
- write_csv(filename, header, data, encoding=encoding)
- if not launchfile:
- return
-
- if not path.endswith(os.sep):
- path = os.path.join(path, "")
-
- py_script = 'odoo_import_thread.py'
- os_cmd = os.path.join(path, py_script)
- if ' ' in os_cmd:
- os_cmd =''.join(('"', os_cmd, '"'))
-
- mode = init and 'w' or 'a'
- with open(launchfile, mode) as myfile:
- myfile.write("%s %s -c %s --file=%s --model=%s --encoding=%s --worker=%s --size=%s --groupby=%s --ignore=%s --sep=\"%s\" %s\n" %
- (python_exe, os_cmd, conf_file, filename, get_model(), encoding, worker, batch_size, groupby, ignore, sep, context))
- if fail:
- myfile.write("%s %s -c %s --fail --file=%s --model=%s --encoding=%s --ignore=%s --sep=\"%s\" %s\n" %
- (python_exe, os_cmd, conf_file, filename, get_model(), encoding, ignore, sep, context))
-
-
-################################################
-# Method to merge file together based on a key #
-################################################
-
-def write_file_dict(filename, header, data):
- data_rows = []
- for _, val in data.iteritems():
- r = [val.get(h, '') for h in header]
- data_rows.append(r)
- write_csv(filename, header, data_rows)
-
-
-
-def read_file_dict(file_name, id_name):
- file_ref = open(file_name, 'r')
- reader = UnicodeReader(file_ref, delimiter=';')
-
- head = reader.next()
- res = {}
- for line in reader:
- if any(line):
- line_dict = dict(zip(head, line))
- res[line_dict[id_name]] = line_dict
- return res, head
-
-def merge_file(master, child, field):
- res = {}
- for key, val in master.iteritems():
- data = dict(child.get(val[field], {}))
- new_dict = dict(val)
- new_dict.update(data)
- res[key] = new_dict
- return res
-
-
-def merge_header(*args):
- old_header = [item for sublist in args for item in sublist]
- header = []
- for h in old_header:
- if h and h not in header:
- header.append(h)
- return header
-
-class ListWriter(object):
- def __init__(self):
- self.data = []
- self.header = []
-
- def writerow(self, header):
- self.header = list(header)
-
- def writerows(self, line):
- self.data.extend(list(line))
diff --git a/odoo_csv_tools/lib/internal/rpc_thread.py b/odoo_csv_tools/lib/internal/rpc_thread.py
deleted file mode 100644
index 28a75604..00000000
--- a/odoo_csv_tools/lib/internal/rpc_thread.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#-*- coding: utf-8 -*-
-'''
-Created on 19 august 2016
-
-@author: Thibault Francois
-'''
-
-import threading
-
-class RpcThread(object):
-
- def __init__(self, max_connection):
- self.semaphore = threading.BoundedSemaphore(max_connection)
- self.max_thread_semaphore = threading.BoundedSemaphore(max_connection * 4)
- self.thread_list = []
-
- def spawn_thread(self, fun, args, kwarg=None):
- def wrapper(args, kwarg):
- kwarg = kwarg or {}
- self.semaphore.acquire()
- try:
- fun(*args, **kwarg)
- except:
- self.semaphore.release()
- self.max_thread_semaphore.release()
- raise
- self.semaphore.release()
- self.max_thread_semaphore.release()
- self.max_thread_semaphore.acquire()
-
- thread = threading.Thread(None, wrapper, None, [args, kwarg], {})
- thread.start()
- self.thread_list.append(thread)
-
- def wait(self):
- for t in self.thread_list:
- t.join()
-
- def thread_number(self):
- return len(self.thread_list)
diff --git a/odoo_csv_tools/lib/internal/tools.py b/odoo_csv_tools/lib/internal/tools.py
deleted file mode 100644
index 90bf0170..00000000
--- a/odoo_csv_tools/lib/internal/tools.py
+++ /dev/null
@@ -1,95 +0,0 @@
-'''
-Created on 9 sept. 2016
-
-@author: Thibault Francois
-'''
-from itertools import islice, chain
-
-def batch(iterable, size):
- sourceiter = iter(iterable)
- while True:
- batchiter = islice(sourceiter, size)
- try:
- yield chain([next(batchiter)], batchiter)
- except StopIteration:
- return
-"""
- Data formatting tools
-"""
-def to_xmlid(name):
- return name.replace('.', '_').replace(',', '_').replace('\n', '_').replace('|', '_').replace(' ', '_').strip()
-
-def list_to_xml_id(names):
- return '_'.join([to_xmlid(name) for name in names])
-
-def to_m2o(PREFIX, value, default=''):
- if not value:
- return default
- return PREFIX + '.' + to_xmlid(value)
-
-def to_m2m(PREFIX, value):
- if not value:
- return ''
-
- ids = []
- for val in value.split(','):
- if val.strip():
- ids.append(PREFIX + '.' + to_xmlid(val))
- return ','.join(ids)
-
-def generate_attribute_list(PREFIX, *attributes):
- header = ['id', 'name']
- lines = set()
- for att in attributes:
- lines.add((to_m2o(PREFIX, att), att))
- return header, lines
-
-"""
- Secondary data file helper
-
-"""
-class ReprWrapper(object):
- def __init__(self, repr_str, func):
- self._repr = repr_str
- self._func = func
-
- def __call__(self, *args, **kw):
- return self._func(*args, **kw)
-
- def __repr__(self):
- return self._repr
-
-class AttributeLineDict:
- def __init__(self, attribute_list_ids, id_gen_fun):
- self.data = {}
- self.att_list = attribute_list_ids
- self.id_gen = id_gen_fun
-
- def add_line(self, line, header):
- """
- line = ['product_tmpl_id/id' : id, 'attribute_id/id' : dict (att : id), 'value_ids/id' : dict(att: id)]
- """
- line_dict = dict(zip(header, line))
- if self.data.get(line_dict['product_tmpl_id/id']):
- for att_id, att in self.att_list:
- if not line_dict['attribute_id/id'].get(att):
- continue
- template_info = self.data[line_dict['product_tmpl_id/id']]
- template_info.setdefault(att_id, [line_dict['value_ids/id'][att]]).append(line_dict['value_ids/id'][att])
- else:
- d = {}
- for att_id, att in self.att_list:
- if line_dict['attribute_id/id'].get(att):
- d[att_id] = [line_dict['value_ids/id'][att]]
- self.data[line_dict['product_tmpl_id/id']] = d
-
- def generate_line(self):
- lines_header = ['id', 'product_tmpl_id/id', 'attribute_id/id', 'value_ids/id']
- lines_out = []
- for template_id, attributes in self.data.items():
- if not template_id:
- continue
- for attribute, values in attributes.items():
- line = [self.id_gen(template_id, attributes), template_id, attribute, ','.join(values)]
- lines_out.append(line)
- return lines_header, lines_out
diff --git a/odoo_csv_tools/lib/mapper.py b/odoo_csv_tools/lib/mapper.py
deleted file mode 100644
index 151a5684..00000000
--- a/odoo_csv_tools/lib/mapper.py
+++ /dev/null
@@ -1,381 +0,0 @@
-"""
- Mapper
-"""
-from . internal.tools import to_m2m, to_m2o
-from . internal.io import is_string
-from . internal.exceptions import SkippingException
-import base64
-import os
-import requests
-
-def str_to_mapper(field):
- if is_string(field):
- return val(field)
- return field
-
-def list_to_mapper(args):
- return [val(f) if is_string(f) else f for f in args]
-
-
-def field(col):
- """ Return the col name if the col value for the given line is not empty
- Use for product.attribute mapping
- """
- def field_fun(line):
- return col if line[col] else ''
- return field_fun
-
-def const(value):
- def const_fun(line):
- return value
- return const_fun
-
-def val(field, default='', postprocess=lambda x: x, skip=False):
- def val_fun(line):
- if not line[field] and skip:
- raise SkippingException("Missing Value for %s" % field)
- return postprocess(line.get(field, default) or default)
- return val_fun
-
-def val_fallback(field, fallback_file, default='', postprocess=lambda x: x, skip=False):
- def val_fun(line):
- if not line[field] and not line[fallback_file] and skip:
- raise SkippingException("Missing Value for %s" % field)
- value = line[field] or line[fallback_file] or default
- return postprocess(value)
- return val_fun
-
-def val_label(field, default='', postprocess=lambda x: x, skip=False):
- val_m = val(field, default=default, postprocess=postprocess, skip=skip)
- def val_label_fun(line):
- return "%s : %s" % (field, val_m(line))
- return val_label_fun
-
-def concat_mapper(separtor, *mapper):
- def concat_fun(line):
- return separtor.join([m(line) for m in mapper if m(line)])
- return concat_fun
-
-def concat_mapper_all(separtor, *mapper):
- """
- Same as concat mapper, but if one value in the list of value to concat is empty, the all value return is
- an empty string
- Use for product.attribute
- """
- def concat_fun(line):
- values = [m(line) for m in mapper]
- if not all(values):
- return ''
- return separtor.join(values)
- return concat_fun
-
-
-def concat(separtor, *fields):
- return concat_mapper(separtor, *[val(f) for f in fields])
-
-def concat_field(separtor, *fields):
- return concat_mapper(separtor, *[val_label(f) for f in fields])
-
-def concat_field_value_m2m(separator, *args):
- def concat_name_value_fun(line):
- return ','.join([separator.join([f, line[f]]) for f in args if line[f]])
- return concat_name_value_fun
-
-def map_val(field, mapping, default=''):
- return val(field, postprocess=lambda x : mapping.get(x, default))
-
-def num(field, default='0.0'):
- return val(field, default, postprocess=lambda x: x.replace(',', '.'))
-
-def m2o_map(PREFIX, mapper, default='', skip=False):
- def m2o_fun(line):
- if skip and not mapper(line):
- raise SkippingException("Missing Value for %s" % mapper(line))
- return to_m2o(PREFIX, mapper(line), default=default)
- return m2o_fun
-
-def m2o(PREFIX, field, default='', skip=False):
- def m2o_fun(line):
- if skip and not line[field]:
- raise SkippingException("Missing Value for %s" % field)
- return to_m2o(PREFIX, line[field], default=default)
- return m2o_fun
-
-def m2m(PREFIX, *args):
- """
- @param args: list of string that should be included into the m2m field
- """
- #TODO: add default
- def m2m_fun(line):
- return ','.join([to_m2m(PREFIX, line[f]) for f in args if line[f]])
- return m2m_fun
-
-def m2m_map(PREFIX, mapper):
- """
- @param args: list of string that should be included into the m2m field
- """
- #TODO: add default
- def m2m_fun(line):
- return to_m2m(PREFIX, mapper(line))
- return m2m_fun
-
-def bool_val(field, true_vals=[], false_vals=[]):
- def bool_val_fun(line):
- if line[field] in true_vals:
- return '1'
- if line[field] in false_vals:
- return '0'
- return '1' if line[field] else '0'
- return bool_val_fun
-
-def binary_map(mapper, path_prefix, skip=False, encoding="utf-8"):
- def binary_val(line):
- field = mapper(line)
- path = path_prefix + (mapper(line) or '')
- if not os.path.exists(path) or not field:
- if skip:
- raise SkippingException("Missing File %s for field %s" % (path, field))
- return ''
-
- with open(path, "rb") as image_file:
- encoded_string = base64.b64encode(image_file.read()).decode(encoding)
- image_file.close()
- return encoded_string
- return binary_val
-
-def binary(field, path_prefix, skip=False, encoding="utf-8"):
- return binary_map(val(field), path_prefix, skip=skip, encoding=encoding)
-
-
-
-def binary_url_map(mapper, skip=False, verbose=False, encoding="utf-8"):
- def binary_url_fun(line):
- url = mapper(line)
- if verbose:
- print("Fetch %s" % url)
- res = requests.get(url)
- if not res.status_code == 200:
- if skip:
- raise SkippingException("Cannot fetch file at url %s" % url)
- return ''
-
- return base64.b64encode(res.content).decode(encoding)
- return binary_url_fun
-
-def binary_url(field, skip=False, verbose=False):
- return binary_url_map(val(field), skip=skip, verbose=verbose)
-
-
-
-"""
- Specific to attribute mapper for V9 product.attribute_import
-"""
-
-def val_att(att_list):
- def val_att_fun(line):
- return { att : line[att] for att in att_list if line[att]}
- return val_att_fun
-
-def m2o_att(PREFIX, att_list):
- def m2o_att_fun(line):
- return { att : to_m2o(PREFIX, '_'.join([att, line[att]])) for att in att_list if line[att]}
- return m2o_att_fun
-
-def m2o_att_name(PREFIX, att_list):
- def m2o_att_fun(line):
- return { att : to_m2o(PREFIX, att) for att in att_list if line[att]}
- return m2o_att_fun
-
-def m2m_attribute_value(PREFIX, *args):
- return m2m_map(PREFIX, concat_field_value_m2m('_', *args))
-
-"""
-Specific to attribute mapper for V13+ product.template.attribute.value
-"""
-
-def m2m_template_attribute_value(PREFIX, template_id_field, *args):
- """
- Generates a mapping function for product.template.attribute.value XMLIDs,
- including the product template identifier.
-
- This function is specifically designed to create a mapper that constructs
- comma-separated strings of XML IDs for product attribute values, incorporating
- the identifier of the associated product template. This is useful when you need
- to establish relationships based on attribute values within a specific product template context.
-
- Args:
- PREFIX (str): The prefix to use for the generated XML IDs
- (e.g., 'PRODUCT_ATTRIBUTE_VALUE'). This prefix should
- be consistent with how your XML IDs are structured.
- template_id_field (str): The name of the field/column in the CSV data
- that contains the identifier (e.g., XML ID,
- database ID, or other unique key) of the
- related product template. This identifier
- will be included in the generated XML IDs.
- *args (str): A variable number of field/column names from the CSV data
- that represent attribute values. These values will be
- used to construct the XML IDs.
-
- Returns:
- function: A mapper function that takes a CSV row (as a dictionary) as
- input and returns a comma-separated string of generated XML IDs.
- If the 'template_id_field' is missing in the CSV row, it returns an empty string.
-
- Example:
- Assuming you have a CSV with columns 'product_template_ref', 'color', and 'size',
- and your XML IDs for product attribute values are like
- 'PRODUCT_ATTRIBUTE_VALUE_product_template_ref_color_red',
- you would use:
-
- mapper.m2m_template_attribute_value('PRODUCT_ATTRIBUTE_VALUE', 'product_template_ref', 'color', 'size')
-
- Important Notes:
- - The generated XML IDs are constructed by concatenating the 'PREFIX',
- the value from 'template_id_field', and the values from the provided
- attribute columns.
- - The function handles cases where the 'template_id_field' might be
- missing in the CSV data, returning an empty string to avoid errors.
- - Ensure that the 'PREFIX' and the column names in 'args' are consistent
- with your actual data structure and XML ID conventions.
- """
-
- def m2m_fun(line):
- template_id = line.get(template_id_field)
- if not template_id:
- return "" # Handle cases where template ID is missing
-
- def mapper(line):
- return ','.join([f"{template_id}_{f}_{line[f]}" for f in args if line[f]])
-
- return to_m2m(PREFIX, mapper(line))
-
- return m2m_fun
-
-
-"""
- Mapper that require rpc Connection (conf_lib)
-"""
-def database_id_mapper(PREFIX, field, connection, skip=False):
- def database_id_mapper_fun(line):
- res = to_m2o(PREFIX, line[field])
- if res:
- module, name = res.split('.')
- rec = connection.get_model('ir.model.data').search_read([('module', '=', module), ('name', '=', name)], ['res_id'])
- if rec and rec[0]['res_id']:
- return str(rec[0]['res_id'])
- if skip:
- raise SkippingException("%s not found" % res)
- return ''
- return database_id_mapper_fun
-
-def database_id_mapper_fallback(connection, *fields_mapper, **kwargs):
- skip = kwargs.get("skip")
- def database_id_mapper_fun(line):
- res = [f(line) for f in fields_mapper if f(line)]
- if res:
- res = res[0]
- module, name = res.split('.')
- rec = connection.get_model('ir.model.data').search_read([('module', '=', module), ('name', '=', name)], ['res_id'])
- if rec and rec[0]['res_id']:
- return str(rec[0]['res_id'])
- if skip:
- raise SkippingException("%s not found" % res)
- return ''
- return database_id_mapper_fun
-
-def database_id_mapper_fallback_create(connection, model, *fields_mapper, **kwargs):
- skip = kwargs.get("skip")
- def database_id_mapper_fun(line):
- res = [f(line) for f in fields_mapper if f(line)]
- if res:
- res = res[0]
- module, name = res.split('.')
- rec = connection.get_model('ir.model.data').search_read([('module', '=', module), ('name', '=', name)], ['res_id'])
- if rec and rec[0]['res_id']:
- return str(rec[0]['res_id'])
- else:
- connection.get_model(model).load(['id', 'name'], [[res, res]], context={'tracking_disable' : True, 'create_product_variant' : True,})
- return database_id_mapper_fun(line)
- if skip:
- raise SkippingException("%s not found" % res)
- return ''
- return database_id_mapper_fun
-
-
-
-#For many2many specific process
-def m2m_id_list(PREFIX, *args, **kwargs):
- """
- @param args: list of string that should be included into the m2m field
- @param const_values: constant values that will be add to all line
- """
- const_values = kwargs.get("const_values", [])
- def split_m2m_id_fun(line):
- """ Return a list of unique element (xml_id, name)
- """
- map_list = list_to_mapper(args)
- value = ','.join([to_m2m(PREFIX, m(line)) for m in map_list if m(line)] + const_values)
- s = []
- for val in value.split(','):
- if val.strip():
- s.append(val)
- return s
- return split_m2m_id_fun
-
-def m2m_value_list(*args, **kwargs):
- """
- @param args: list of string that should be included into the m2m field
- @param const_values: constant values that will be add to all line
- """
- const_values = kwargs.get("const_values", [])
- def split_m2m_value_fun(line):
- """ Return a list of unique element value
- """
- map_list = list_to_mapper(args)
- value = ','.join([m(line) for m in map_list if m(line)] + const_values)
- s = []
- for val in value.split(','):
- if val.strip():
- s.append(val)
- return s
- return split_m2m_value_fun
-
-def remove_sep_mapper(f):
- """
- @param f: field that will have the starting folder separator removed
- """
- def remove_sep_mapper_fun(line):
- if line[f].startswith(os.sep):
- return line[f][len(os.sep):]
- else:
- return line[f]
- return remove_sep_mapper_fun
-
-
-##############################
-# #
-# Split Mapper #
-# #
-##############################
-
-def split_line_number(line_nb):
- """
- Return a function that can we used by split method from Processor class,
- this function will split the data every x lines where x is given by the param line_nb
- :param line_nb:
- """
- def split(line, i):
- return divmod(i, line_nb)[0]
- return split
-
-
-def split_file_number(file_nb):
- """
- Return a function that can we used by split method from Processor class,
- this function will split the data into x file where x is given by the param file_nb
- Order of data is not kept
- :param line_nb:
- """
- def split(line, i):
- return divmod(i, file_nb)[1]
- return split
diff --git a/odoo_csv_tools/lib/transform.py b/odoo_csv_tools/lib/transform.py
deleted file mode 100644
index 6fbe4a6f..00000000
--- a/odoo_csv_tools/lib/transform.py
+++ /dev/null
@@ -1,250 +0,0 @@
-#-*- coding: utf-8 -*-
-'''
-Created on 10 sept. 2016
-
-@author: Thibault Francois
-'''
-import os
-
-from collections import OrderedDict
-
-from . internal.csv_reader import UnicodeReader
-from . internal.tools import ReprWrapper, AttributeLineDict
-from . internal.io import write_file, is_string, open_read
-from . internal.exceptions import SkippingException
-from . import mapper
-
-class Processor(object):
- def __init__(self, filename=None, delimiter=";", encoding='utf-8', header=None, data=None, preprocess=lambda header, data: (header, data), conf_file=False):
- self.file_to_write = OrderedDict()
- if header and data:
- self.header = header
- self.data = data
- elif filename:
- self.header, self.data = self.__read_file(filename, delimiter, encoding)
- else:
- raise Exception("No Filename nor header and data provided")
- self.header, self.data = preprocess(self.header, self.data)
- self.conf_file = conf_file
-
- def check(self, check_fun, message=None):
- res = check_fun(self.header, self.data)
- if not res:
- if message:
- print(message)
- else:
- print("%s failed" % check_fun.__name__)
- return res
-
- def split(self, split_fun):
- res = {}
- for i, d in enumerate(self.data):
- k = split_fun(dict(zip(self.header, d)), i)
- res.setdefault(k, []).append(d)
- processor_dict = {}
- for k, data in res.items():
- processor_dict[k] = Processor(header=list(self.header), data=data)
- return processor_dict
-
- def get_o2o_mapping(self):
- """Will generate a mapping with 'key' : mapper.val('key') for each key
-
- you can print using pprint to print the equivalent python of the mapping to use it in your file
-
- :return: a dict where the key is a str and the value a mapper.val function,
- the key and the field pass to the mapper are identical
-
- {
- 'id' : mapper.val('id'),
- .....
- }
- """
- mapping = {}
- for column in [h for h in self.header if h]:
- map_val_rep = ReprWrapper("mapper.val('%s')" %column, mapper.val(column))
- mapping[str(column)] = map_val_rep
- return mapping
-
- def process(self, mapping, filename_out, import_args, t='list', null_values=['NULL', False], verbose=True, m2m=False):
- if m2m:
- head, data = self.__process_mapping_m2m(mapping, null_values=null_values, verbose=verbose)
- else:
- head, data = self.__process_mapping(mapping, t=t, null_values=null_values, verbose=verbose)
- self._add_data(head, data, filename_out, import_args)
- return head, data
-
- def write_to_file(self, script_filename, fail=True, append=False, python_exe='python', path='', encoding='utf-8'):
- init = not append
- for _, info in self.file_to_write.items():
- info_copy = dict(info)
- info_copy.update({
- 'model' : info.get('model', 'auto'),
- 'init' : init,
- 'launchfile' : script_filename,
- 'fail' : fail,
- 'python_exe' : python_exe,
- 'path' : path,
- 'conf_file' : self.conf_file,
- 'encoding': encoding,
- })
-
- write_file(**info_copy)
- init = False
-
- def get_processed_data(self, filename_out):
- return self.file_to_write[filename_out]
-
- def join_file(self, filename, master_key, child_key, header_prefix="child", delimiter=";", encoding='utf-8'):
- """
- Join another file with the main file defined in the constructor.
- Need a key (column name) on the master file and on the file to join
- The line of the file to join will be added a the end of a line if
- the value of the column master_key match the value of the column child_key
-
- If the key is not found in the file to join, empty cell are added at the end of the master file
-
- A prefix is added (after the merge operation) to all the column of the child file
- to avoid collision with the header of the master file
-
- E.g.: join_file(filename, 'category_id', 'name')
- Master file | Child file
- name category_id | name color
- A A | A Blue
- B A | B Red
- C B
- D B
- E C
-
- Final File
- name category_id child_name child_color
- A A A Blue
- B A A Blue
- C B B Red
- D B B Red
- E C
- """
- header, data = self.__read_file(filename, delimiter, encoding)
- child_key_pos = header.index(child_key)
- master_key_pos = self.header.index(master_key)
-
- data_map = {}
- for d in data:
- data_map[d[child_key_pos]] = d
-
- for d in self.data:
- if data_map.get(d[master_key_pos]):
- d.extend(data_map[d[master_key_pos]])
- else:
- d.extend([""] * len(header))
-
- self.header += ["%s_%s" % (header_prefix, h) for h in header]
-
- ########################################
- # #
- # Private Method #
- # #
- ########################################
- def __read_file(self, filename, delimiter, encoding):
- file_ref = open_read(filename, encoding=encoding)
- reader = UnicodeReader(file_ref, delimiter=delimiter, encoding=encoding)
- head = next(reader)
- data = [d for d in reader]
- return head, data
-
- def __process_mapping(self, mapping, t, null_values, verbose):
- """
- @param t: type of return, list or set
- """
- lines_out = [] if t == 'list' else set()
- for i, line in enumerate(self.data):
- line = [s.strip() if s and s.strip() not in null_values else '' for s in line]
- line_dict = dict(zip(self.header, line))
- try:
- line_out = [mapping[k](line_dict) for k in mapping.keys()]
- except SkippingException as e:
- if verbose:
- print("Skipping", i)
- print(e.message)
- continue
- if t == 'list':
- lines_out.append(line_out)
- else:
- lines_out.add(tuple(line_out))
- return mapping.keys(), lines_out
-
- def __process_mapping_m2m(self, mapping, null_values, verbose):
- """
-
- """
- head, data = self.__process_mapping(mapping, 'list', null_values, verbose)
- lines_out = set()
- for line_out in data:
- index_list = []
- zip_list = []
- for index, value in enumerate(line_out):
- if isinstance(value, list):
- index_list.append(index)
- zip_list.append(value)
- values_list = zip(*zip_list)
- for values in values_list:
- new_line = list(line_out)
- for i, val in enumerate(values):
- new_line[index_list[i]] = val
- lines_out.add(tuple(new_line))
-
- return head, lines_out
-
- def _add_data(self, head, data, filename_out, import_args):
- import_args = dict(import_args)
- import_args['filename'] = os.path.abspath(filename_out) if filename_out else False
- import_args['header'] = head
- import_args['data'] = data
- self.file_to_write[filename_out] = import_args
-
-
-class ProductProcessorV9(Processor):
- def __generate_attribute_data(self, attributes_list, ATTRIBUTE_PREFIX):
- self.attr_header = ['id', 'name']
- self.attr_data = [[mapper.to_m2o(ATTRIBUTE_PREFIX, att), att] for att in attributes_list]
-
- def process_attribute_mapping(self, mapping, line_mapping, attributes_list, ATTRIBUTE_PREFIX, path, import_args, id_gen_fun=None, null_values=['NULL']):
- """
- Mapping : name is mandatory vat_att(attribute_list)
- """
- def add_value_line(values_out, line):
- for att in attributes_list:
- value_name = line[list(mapping.keys()).index('name')].get(att)
- if value_name:
- line_value = [ele[att] if isinstance(ele, dict) else ele for ele in line]
- values_out.add(tuple(line_value))
-
- id_gen_fun = id_gen_fun or (lambda template_id, values : mapper.to_m2o(template_id.split('.')[0] + '_LINE', template_id))
-
- values_header = mapping.keys()
- values_data = set()
-
- self.__generate_attribute_data(attributes_list, ATTRIBUTE_PREFIX)
- att_data = AttributeLineDict(self.attr_data, id_gen_fun)
- for line in self.data:
- line = [s.strip() if s.strip() not in null_values else '' for s in line]
- line_dict = dict(zip(self.header, line))
- line_out = [mapping[k](line_dict) for k in mapping.keys()]
-
- add_value_line(values_data, line_out)
- values_lines = [line_mapping[k](line_dict) for k in line_mapping.keys()]
- att_data.add_line(values_lines, line_mapping.keys())
-
- line_header, line_data = att_data.generate_line()
- context = import_args.get('context', {})
- context['create_product_variant'] = True
- import_args['context'] = context
- self._add_data(self.attr_header, self.attr_data, path + 'product.attribute.csv', import_args)
- self._add_data(values_header, values_data, path + 'product.attribute.value.csv', import_args)
- import_args = dict(import_args, groupby='product_tmpl_id/id')
- self._add_data(line_header, line_data, path + 'product.attribute.line.csv', import_args)
-
-class ProductProcessorV10(Processor):
- def process_attribute_data(self, attributes_list, ATTRIBUTE_PREFIX, filename_out, import_args):
- attr_header = ['id', 'name', 'create_variant']
- attr_data = [[mapper.to_m2o(ATTRIBUTE_PREFIX, att), att, 'Dynamically'] for att in attributes_list]
- self._add_data(attr_header, attr_data, filename_out, import_args)
diff --git a/odoo_csv_tools/lib/workflow.py b/odoo_csv_tools/lib/workflow.py
deleted file mode 100644
index 82011ec9..00000000
--- a/odoo_csv_tools/lib/workflow.py
+++ /dev/null
@@ -1,156 +0,0 @@
-'''
-Created on 7 avr. 2016
-
-@author: odoo
-'''
-#from __future__ import absolute_import
-import sys
-if sys.version_info >= (3, 0, 0):
- from xmlrpc.client import Fault
-else:
- from xmlrpclib import Fault
-
-from time import time
-from . internal.rpc_thread import RpcThread
-
-class InvoiceWorkflowV9():
- def __init__(self, connection, field, status_map, paid_date_field, payment_journal, max_connection=4):
- """
- @param connection : need to use a jsonrpc connection
- @param field: the that contains the state imported from legacy data
- @param status_map: dict that contains the mapping between the odoo invoice status and legacy system status
- the value should be a list
- {
- 'open' : ['satus1'],
- 'paid' : ['status2', 'status3'],
- 'cancel' : ...
- 'proforma' :
- }
- """
- self.connection = connection
- self.invoice_obj = connection.get_model('account.invoice')
- self.payement_obj = connection.get_model('account.payment')
- self.account_invoice_tax = self.connection.get_model('account.invoice.tax')
- self.field = field
- self.status_map = status_map
- self.paid_date = paid_date_field
- self.payment_journal = payment_journal
- self.max_connection = max_connection
-
- def display_percent(self, i, percent_step, total):
- if i % percent_step == 0:
- print("%s%% : %s/%s time %s sec" % (round(i / float(total) * 100, 2), i, total, time() - self.time))
-
- def set_tax(self):
- def create_tax(invoice_id):
- taxes = self.invoice_obj.get_taxes_values(invoice_id)
- for tax in taxes.values():
- self.account_invoice_tax.create(tax)
-
- invoices = self.invoice_obj.search([('state', '=', 'draft'),
- ('type', '=', 'out_invoice'),
- ('tax_line_ids', '=', False)])
- total = len(invoices)
- percent_step = int(total / 5000) or 1
- self.time = time()
- rpc_thread = RpcThread(self.max_connection)
- print("Compute Tax %s invoice" % total)
- for i, invoice_id in enumerate(invoices):
- self.display_percent(i, percent_step, total)
- rpc_thread.spawn_thread(create_tax, [invoice_id])
- rpc_thread.wait()
-
- def validate_invoice(self):
- invoice_to_validate = self.invoice_obj.search([(self.field, 'in', self.status_map['open'] + self.status_map['paid']),
- ('state', '=', 'draft'),
- ('type', '=', 'out_invoice')])
- total = len(invoice_to_validate)
- percent_step = int(total / 5000) or 1
- rpc_thread = RpcThread(1)
- print("Validate %s invoice" % total)
- self.time = time()
- for i, invoice_id in enumerate(invoice_to_validate):
- self.display_percent(i, percent_step, total)
- fun = self.connection.get_service('object').exec_workflow
- rpc_thread.spawn_thread(fun, [self.connection.database,
- self.connection.user_id,
- self.connection.password,
- 'account.invoice',
- 'invoice_open',
- invoice_id])
- rpc_thread.wait()
-
- def proforma_invoice(self):
- invoice_to_proforma = self.invoice_obj.search([(self.field, 'in', self.status_map['proforma']),
- ('state', '=', 'draft'),
- ('type', '=', 'out_invoice')])
- total = len(invoice_to_proforma)
- percent_step = int(total / 100) or 1
- self.time = time()
- rpc_thread = RpcThread(self.max_connection)
- print("Pro Format %s invoice" % total)
- for i, invoice_id in enumerate(invoice_to_proforma):
- self.display_percent(i, percent_step, total)
- fun = self.connection.get_service('object').exec_workflow()
- rpc_thread.spawn_thread(fun, [self.connection.database,
- self.connection.user_id,
- self.connection.password,
- 'account.invoice',
- 'invoice_proforma2',
- invoice_id], {})
- rpc_thread.wait()
-
- def paid_invoice(self):
- def pay_single_invoice(data_update, wizard_context):
- data = self.payement_obj.default_get(["communication", "currency_id", "invoice_ids",
- "payment_difference", "partner_id", "payment_method_id",
- "payment_difference_handling", "journal_id",
- "state", "writeoff_account_id", "payment_date",
- "partner_type", "hide_payment_method",
- "payment_method_code", "partner_bank_account_id",
- "amount", "payment_type"], context=wizard_context)
- data.update(data_update)
- wizard_id = self.payement_obj.create(data, context=wizard_context)
- try:
- self.payement_obj.post([wizard_id], context=wizard_context)
- except Fault:
- pass
-
-
- invoice_to_paid = self.invoice_obj.search_read([(self.field, 'in', self.status_map['paid']), ('state', '=', 'open'), ('type', '=', 'out_invoice')],
- [self.paid_date, 'date_invoice'])
- total = len(invoice_to_paid)
- percent_step = int(total / 1000) or 1
- self.time = time()
- rpc_thread = RpcThread(self.max_connection)
- print("Paid %s invoice" % total)
- for i, invoice in enumerate(invoice_to_paid):
- self.display_percent(i, percent_step, total)
- wizard_context = {
- 'active_id' : invoice['id'],
- 'active_ids' : [invoice['id']],
- 'active.model' : 'account.invoice',
- 'default_invoice_ids' : [(4, invoice['id'], 0)],
- 'type' : "out_invoice",
- "journal_type":"sale"
- }
- data_update = {
- 'journal_id' : self.payment_journal, #payement journal
- 'payment_date' : invoice[self.paid_date] or invoice['date_invoice'],
- 'payment_method_id' : 1,
- }
- rpc_thread.spawn_thread(pay_single_invoice, [data_update, wizard_context], {})
- rpc_thread.wait()
-
- def rename(self, name_field):
- invoice_to_paid = self.invoice_obj.search_read([(name_field, '!=', False),(name_field, '!=', '0.0'),('state', '!=', 'draft'), ('type', '=', 'out_invoice')],
- [name_field])
- total = len(invoice_to_paid)
- percent_step = int(total / 1000) or 1
- self.time = time()
- rpc_thread = RpcThread(int(self.max_connection * 1.5))
- print("Rename %s invoice" % total)
- for i, invoice in enumerate(invoice_to_paid):
- self.display_percent(i, percent_step, total)
- rpc_thread.spawn_thread(self.invoice_obj.write, [invoice['id'], {'number' : invoice[name_field], name_field : False}], {})
- rpc_thread.wait()
diff --git a/odoo_csv_tools/lib/xml_transform.py b/odoo_csv_tools/lib/xml_transform.py
deleted file mode 100644
index cc602386..00000000
--- a/odoo_csv_tools/lib/xml_transform.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#-*- coding: utf-8 -*-
-from . import transform
-from collections import OrderedDict
-from lxml import etree
-
-
-class XMLProcessor(transform.Processor):
- def __init__(self, filename, root_node_path, conf_file=False): # Add conf_file parameter
- super().__init__(filename=filename) # Call Processor's __init__
- self.root = etree.parse(filename)
- self.root_path = root_node_path
- self.file_to_write = OrderedDict()
- self.conf_file = conf_file # Initialize conf_file
-
- def process(self, mapping, filename_out, import_args, t='list', null_values=['NULL', False], verbose=True, m2m=False):
- """
- Transforms data from the XML file based on the provided mapping.
-
- Args:
- mapping (dict): A dictionary that defines how data from the XML file
- should be mapped to fields in the output format (e.g., CSV).
- The keys of the dictionary are the target field names,
- and the values are XPath expressions to extract the
- corresponding data from the XML.
- filename_out (str): The name of the output file where the transformed
- data will be written.
- import_args (dict): A dictionary containing arguments that will be
- passed to the `odoo_import_thread.py` script
- (e.g., `{'model': 'res.partner', 'context': "{'tracking_disable': True}"}`).
- t (str, optional): This argument is kept for compatibility but is not
- used in `XMLProcessor`. Defaults to 'list'.
- null_values (list, optional): This argument is kept for compatibility
- but is not used in `XMLProcessor`.
- Defaults to `['NULL', False]`.
- verbose (bool, optional): This argument is kept for compatibility but
- is not used in `XMLProcessor`. Defaults to
- `True`.
- m2m (bool, optional): This argument is kept for compatibility but is
- not used in `XMLProcessor`. Defaults to `False`.
-
- Returns:
- tuple: A tuple containing the header (list of field names) and the
- transformed data (list of lists).
-
- Important Notes:
- - The `t`, `null_values`, `verbose`, and `m2m` arguments are present
- for compatibility with the `Processor` class but are not actually
- used by the `XMLProcessor`.
- - The `mapping` dictionary values should be XPath expressions that
- select the desired data from the XML nodes.
- """
- header = mapping.keys()
- lines = []
- for r in self.root.xpath(self.root_path):
- line = [r.xpath(mapping[k])[0] for k in header]
- lines.append(line)
- self._add_data(header, lines, filename_out, import_args)
- return header, lines
-
- def split(self, split_fun):
- raise NotImplementedError("Method split not supported for XMLProcessor")
diff --git a/odoo_csv_tools/migrate.py b/odoo_csv_tools/migrate.py
deleted file mode 100644
index 6775d304..00000000
--- a/odoo_csv_tools/migrate.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python
-#-*- coding: utf-8 -*-
-'''
-Created on 27 déc. 2016
-
-@author: Thibault Francois
-'''
-from lib.transform import Processor
-from export_threaded import export_data
-from import_threaded import import_data
-
-class Migrator(object):
-
- def __init__(self, config_export, config_import):
- self.config_export = config_export
- self.config_import = config_import
- self.import_batch_size = 10
- self.import_max_con = 1
- self.export_batch_size = 100
- self.export_max_con = 1
-
- def migrate(self, model, domain, field_export, mappings=[None]):
- header, data = export_data(self.config_export, model, domain, field_export, max_connection=self.export_max_con, batch_size=self.export_batch_size)
- processor = Processor(header=header, data=data)
- for mapping in mappings:
- if not mapping:
- mapping = processor.get_o2o_mapping()
- to_import_header, to_import_data = processor.process(mapping, False, {})
- import_data(self.config_import, model, header=to_import_header, data=to_import_data, max_connection=self.import_max_con, batch_size=self.import_batch_size)
\ No newline at end of file
diff --git a/odoo_export_thread.py b/odoo_export_thread.py
deleted file mode 100755
index 04e6c400..00000000
--- a/odoo_export_thread.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-'''
-Copyright (C) Thibault Francois
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as
-published by the Free Software Foundation, version 3.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Lesser Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program. If not, see .
-'''
-
-import argparse
-from odoo_csv_tools import export_threaded
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='Import data in batch and in parallel')
- parser.add_argument('-c', '--config', dest='config', default="conf/connection.conf",
- help='Configuration File that contains connection parameters', required=True)
- parser.add_argument('--file', dest='filename', help='Output File', required=True)
- parser.add_argument('--model', dest='model', help='Model to Export', required=True)
- parser.add_argument('--field', dest='fields', help='Fields to Export', required=True)
- parser.add_argument('--domain', dest='domain', help='Filter', default="[]")
- parser.add_argument('--worker', dest='worker', default=1, help='Number of simultaneous connection')
- parser.add_argument('--size', dest='batch_size', default=10, help='Number of line to import per connection')
- parser.add_argument('-s', '--sep', dest="separator", default=";", help='CSV separator')
- parser.add_argument('--context', dest='context',
- help='context that will be passed to the load function, need to be a valid python dict',
- default="{'tracking_disable' : True}")
- parser.add_argument('--encoding', dest='encoding', default="utf-8", help='Encoding of the data file')
- # TODO args : encoding
- # {'update_many2many': True,'tracking_disable' : True, 'create_product_variant' : True, 'check_move_validity' : False}
- args = parser.parse_args()
-
- config_file = args.config
- file_csv = args.filename
- batch_size = int(args.batch_size)
- model = args.model
- max_connection = int(args.worker)
- separator = args.separator
- encoding = args.encoding
- context = eval(args.context)
- domain = eval(args.domain)
- header = args.fields.split(',')
- export_threaded.export_data(config_file, model, domain, header, context=context, output=file_csv,
- max_connection=max_connection, batch_size=batch_size, separator=separator,
- encoding=encoding)
diff --git a/odoo_import_thread.py b/odoo_import_thread.py
deleted file mode 100755
index 402fb479..00000000
--- a/odoo_import_thread.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-#-*- coding: utf-8 -*-
-'''
-Copyright (C) Thibault Francois
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as
-published by the Free Software Foundation, version 3.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Lesser Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program. If not, see .
-'''
-
-import argparse
-from odoo_csv_tools import import_threaded
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='Import data in batch and in parallel')
- parser.add_argument('-c', '--config', dest='config', default="conf/connection.conf", help='Configuration File that contains connection parameters', required = True)
- parser.add_argument('--file', dest='filename', help='File to import', required = True)
- parser.add_argument('--model', dest='model', help='Model to import', required = True)
- parser.add_argument('--worker', dest='worker', default=1, help='Number of simultaneous connection')
- parser.add_argument('--size', dest='batch_size', default=10, help='Number of line to import per connection')
- parser.add_argument('--skip', dest='skip', default=0, help='Skip until line [SKIP]')
- parser.add_argument('--fail', action='store_true', dest="fail", help='Fail mode')
- parser.add_argument('-s', '--sep', dest="separator", default=";", help='CSV separator')
- parser.add_argument('--groupby', dest='split', help='Group data per batch with the same value for the given column in order to avoid concurrent update error')
- parser.add_argument('--ignore', dest='ignore', help='list of column separate by comma. Those column will be remove from the import request')
- parser.add_argument('--check', dest='check', action='store_true', help='Check if record are imported after each batch.')
- parser.add_argument('--context', dest='context', help='context that will be passed to the load function, need to be a valid python dict', default="{'tracking_disable' : True}")
- parser.add_argument('--o2m', action='store_true', dest="o2m", help="When you want to import o2m field, don't cut the batch until we find a new id")
- parser.add_argument('--encoding', dest='encoding', default="utf-8", help='Encoding of the data file')
- #TODO args : encoding
- #{'update_many2many': True,'tracking_disable' : True, 'create_product_variant' : True, 'check_move_validity' : False}
- args = parser.parse_args()
-
- file_csv = args.filename
- batch_size = int(args.batch_size)
- fail_file = file_csv + ".fail"
- max_connection = int(args.worker)
- split = False
- encoding= args.encoding
- context= eval(args.context)
- ignore = False
- if args.ignore:
- ignore = args.ignore.split(',')
-
- if args.fail:
- file_csv = fail_file
- fail_file = fail_file + ".bis"
- batch_size = 1
- max_connection = 1
- split = False
-
- import_threaded.import_data(args.config, args.model, file_csv=file_csv, context=context,
- fail_file=fail_file, encoding=encoding, separator=args.separator,
- ignore=ignore, split=args.split, check=args.check,
- max_connection=max_connection, batch_size=batch_size, skip=int(args.skip), o2m=args.o2m)
diff --git a/pics/account_move.png b/pics/account_move.png
deleted file mode 100644
index 0eccf182..00000000
Binary files a/pics/account_move.png and /dev/null differ
diff --git a/pics/cascade_update.png b/pics/cascade_update.png
deleted file mode 100644
index 691d16d4..00000000
Binary files a/pics/cascade_update.png and /dev/null differ
diff --git a/pics/fail.png b/pics/fail.png
deleted file mode 100644
index c8baed82..00000000
Binary files a/pics/fail.png and /dev/null differ
diff --git a/pics/group_by_1.png b/pics/group_by_1.png
deleted file mode 100644
index 97b8fc6f..00000000
Binary files a/pics/group_by_1.png and /dev/null differ
diff --git a/pics/group_by_2.png b/pics/group_by_2.png
deleted file mode 100644
index 16540b51..00000000
Binary files a/pics/group_by_2.png and /dev/null differ
diff --git a/pics/import_tool_options.png b/pics/import_tool_options.png
deleted file mode 100644
index 137e98dd..00000000
Binary files a/pics/import_tool_options.png and /dev/null differ
diff --git a/pics/o2m_csv.png b/pics/o2m_csv.png
deleted file mode 100644
index c57ca9e0..00000000
Binary files a/pics/o2m_csv.png and /dev/null differ
diff --git a/pics/o2m_csv_gen.png b/pics/o2m_csv_gen.png
deleted file mode 100644
index 74f37192..00000000
Binary files a/pics/o2m_csv_gen.png and /dev/null differ
diff --git a/pics/phase_load.png b/pics/phase_load.png
deleted file mode 100644
index d83b67af..00000000
Binary files a/pics/phase_load.png and /dev/null differ
diff --git a/pics/phase_transform.png b/pics/phase_transform.png
deleted file mode 100644
index 6ac76683..00000000
Binary files a/pics/phase_transform.png and /dev/null differ
diff --git a/pics/run_time_1.png b/pics/run_time_1.png
deleted file mode 100644
index 5ce7cc93..00000000
Binary files a/pics/run_time_1.png and /dev/null differ
diff --git a/pics/run_time_2.png b/pics/run_time_2.png
deleted file mode 100644
index 31c434b8..00000000
Binary files a/pics/run_time_2.png and /dev/null differ
diff --git a/pics/run_time_3.png b/pics/run_time_3.png
deleted file mode 100644
index e287fbf4..00000000
Binary files a/pics/run_time_3.png and /dev/null differ
diff --git a/pydoclint-baseLine.txt b/pydoclint-baseLine.txt
new file mode 100644
index 00000000..392614e2
--- /dev/null
+++ b/pydoclint-baseLine.txt
@@ -0,0 +1,39 @@
+src/odoo_data_flow/lib/conf_lib.py
+ DOC111: Function `get_connection_from_config`: The option `--arg-type-hints-in-docstring` is `False` but there are type hints in the docstring arg list
+ DOC501: Function `get_connection_from_config` has raise statements, but the docstring does not have a "Raises" section
+ DOC503: Function `get_connection_from_config` exceptions in the "Raises" section in the docstring do not match those in the function body. Raised exceptions in the docstring: []. Raised exceptions in the body: ['Exception', 'FileNotFoundError', 'KeyError', 'ValueError'].
+--------------------
+src/odoo_data_flow/lib/internal/exceptions.py
+ DOC301: Class `SkippingError`: __init__() should not have a docstring; please combine it with the docstring of the class
+--------------------
+src/odoo_data_flow/lib/internal/io.py
+ DOC107: Function `write_csv`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints
+--------------------
+src/odoo_data_flow/lib/internal/rpc_thread.py
+ DOC301: Class `RpcThread`: __init__() should not have a docstring; please combine it with the docstring of the class
+--------------------
+src/odoo_data_flow/lib/internal/tools.py
+ DOC404: Function `batch` yield type(s) in docstring not consistent with the return annotation. The yield type (the 0th arg in Generator[...]/Iterator[...]): Any; docstring "yields" section types:
+ DOC201: Function `to_m2o` does not have a return section in docstring
+ DOC203: Function `to_m2o` return type(s) in docstring not consistent with the return annotation. Return annotation has 1 type(s); docstring return section has 0 type(s).
+ DOC001: Function/method `to_m2m`: Potential formatting errors in docstring. Error message: Expected a colon in 'separated by commas.'. (Note: DOC001 could trigger other unrelated violations under this function/method too. Please fix the docstring formatting first.)
+ DOC101: Function `to_m2m`: Docstring contains fewer arguments than in function signature.
+ DOC103: Function `to_m2m`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [prefix: str, value: str].
+ DOC201: Function `to_m2m` does not have a return section in docstring
+ DOC203: Function `to_m2m` return type(s) in docstring not consistent with the return annotation. Return annotation has 1 type(s); docstring return section has 0 type(s).
+--------------------
+src/odoo_data_flow/lib/workflow/invoice_v9.py
+ DOC301: Class `InvoiceWorkflowV9`: __init__() should not have a docstring; please combine it with the docstring of the class
+--------------------
+src/odoo_data_flow/lib/xml_transform.py
+ DOC001: Function/method `process`: Potential formatting errors in docstring. Error message: Expected a colon in '`XMLProcessor`.'. (Note: DOC001 could trigger other unrelated violations under this function/method too. Please fix the docstring formatting first.)
+ DOC101: Method `XMLProcessor.process`: Docstring contains fewer arguments than in function signature.
+ DOC103: Method `XMLProcessor.process`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [filename_out: str, import_args: dict[str, Any], m2m: bool, mapping: dict[str, str], null_values: Union[list[Any], None], t: str, verbose: bool].
+ DOC201: Method `XMLProcessor.process` does not have a return section in docstring
+ DOC203: Method `XMLProcessor.process` return type(s) in docstring not consistent with the return annotation. Return annotation has 1 type(s); docstring return section has 0 type(s).
+--------------------
+src/odoo_data_flow/logging_config.py
+ DOC106: Function `setup_logging`: The option `--arg-type-hints-in-signature` is `True` but there are no argument type hints in the signature
+ DOC107: Function `setup_logging`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints
+ DOC111: Function `setup_logging`: The option `--arg-type-hints-in-docstring` is `False` but there are type hints in the docstring arg list
+--------------------
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..91200891
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,143 @@
+[project]
+name = "odoo-data-flow"
+version = "0.0.0"
+description = "Odoo Data Flow"
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "LGPL-3.0" }
+authors = [
+ { name = "bosd", email = "c5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me" },
+]
+classifiers = ["Development Status :: 3 - Alpha"]
+
+dependencies = ["click >=8.0.1", "odoo-client-lib", "requests", "lxml"]
+
+
+[project.urls]
+Homepage = "https://github.com/OdooDataFlow/odoo-data-flow"
+Repository = "https://github.com/OdooDataFlow/odoo-data-flow"
+Documentation = "https://odoo-data-flow.readthedocs.io"
+Changelog = "https://github.com/OdooDataFlow/odoo-data-flow/releases"
+
+[dependency-groups]
+dev = [
+ "coverage[toml] >= 6.2",
+ "pre-commit >=2.16.0",
+ "pre-commit-hooks >=4.6.0",
+ "pytest >=6.2.5",
+ "pygments >=2.10.0",
+ "nox >=2024.04.14",
+]
+lint = ["ruff >=0.5.5", "pydoclint >=0.5.0"]
+docs = [
+ "shibuya >=2025.5.30",
+ "myst-parser >= 3.0.1",
+ "sphinx >= 4.3.2",
+ "sphinx-autobuild >=2021.3.14",
+ "sphinx-click >=3.0.2",
+ "sphinx_mermaid",
+ "sphinx_copybutton",
+]
+mypy = ["mypy >=0.930"]
+typeguard = ["typeguard >=2.13.3"]
+xdoctest = ["xdoctest[colors] >=0.15.10"]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.uv]
+package = true
+
+[project.scripts]
+odoo-data-flow = "odoo_data_flow.__main__:cli"
+
+[tool.coverage.paths]
+source = ["src", "*/site-packages"]
+tests = ["tests", "*/tests"]
+
+[tool.coverage.run]
+branch = true
+source = ["odoo_data_flow", "tests"]
+omit = [
+ # Exclude the legacy v9 workflow from coverage reports
+ "src/odoo_data_flow/lib/workflow/invoice_v9.py",
+]
+
+[tool.coverage.report]
+show_missing = true
+fail_under = 85
+exclude_lines = ["pragma: no cover", "if TYPE_CHECKING:"]
+
+[tool.mypy]
+strict = true
+warn_unreachable = true
+pretty = true
+show_column_numbers = true
+show_error_context = true
+
+[tool.ruff]
+src = ["src", "tests"]
+
+[tool.ruff.lint]
+select = [
+ "B", # flake8-bugbear
+ "C90", # mccabe
+ "D", # pydocstyle
+ "E", # pycodestyle
+ "F", # pyflakes
+ "I", # isort
+ "N", # pep8-naming
+ "RUF", # Ruff-specific rules
+ "S", # flake8-bandit
+ "UP", # pyupgrade
+ "W", # pycodestyle
+]
+ignore = [
+ "COM812",
+ "COM819",
+ "D107", # undocumented-public-init conflicts with DOC301
+ "D206",
+ "D300",
+ "E111",
+ "E114",
+ "E117",
+ "ISC001",
+ "ISC002",
+ "Q000",
+ "Q001",
+ "Q002",
+ "Q003",
+ "W191",
+]
+exclude = [
+ ".git",
+ ".mypy_cache",
+ ".nox",
+ ".pytest_cache",
+ ".venv",
+ "__pypackages__",
+ "_build",
+ "build",
+ "dist",
+ "docs/conf.py",
+ # Add the file you want to ignore to this list
+ "pydoclint-baseLine.txt",
+ "pydoclint-baseline.txt",
+ "uv.lock",
+]
+
+
+[tool.ruff.lint.per-file-ignores]
+"*/test_*.py" = ["S101"]
+"noxfile.py" = ["S101"]
+"**/conftest.py" = ["S101"]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+
+[tool.pydoclint]
+style = 'google'
+exclude = '\.git|\.nox|noxfile.py'
+arg-type-hints-in-docstring = false
+baseline = 'pydoclint-baseline.txt'
+auto-generate-baseline = 'True'
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 6eca7405..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-odoo-client-lib==1.2.0
-unicodecsv==0.14.1
-future==0.16.0
-requests>=2.20.0
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 86eb59c8..00000000
--- a/setup.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Copyright (C) Thibault Francois
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as
-published by the Free Software Foundation, version 3.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Lesser Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program. If not, see .
-'''
-
-from setuptools import setup, find_packages
-
-setup(name='odoo-import-export-client',
- version='3.0.0',
- install_requires=['odoo-client-lib', 'future', 'unicodecsv', 'requests'],
- description='Library and script that allow to export and import data to Odoo using rpc api.',
- author='Thibault Francois',
- author_email='francois.th@gmail.com',
- url='https://github.com/tfrancoi/odoo_csv_import',
- packages=find_packages(exclude=['contrib', 'docs', 'tests*']),
- scripts=['odoo_export_thread.py', 'odoo_import_thread.py', 'odoo_convert_path_to_image.py', 'odoo_convert_url_to_image.py'],
- long_description="See the home page for any information: https://github.com/tfrancoi/odoo_csv_import",
- keywords="odoo library import export thread python client lib web service",
- license="LGPLv3",
- classifiers=[
- "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
- "Programming Language :: Python",
- ],
- )
diff --git a/src/odoo_data_flow/__init__.py b/src/odoo_data_flow/__init__.py
new file mode 100644
index 00000000..43b4c0f7
--- /dev/null
+++ b/src/odoo_data_flow/__init__.py
@@ -0,0 +1,9 @@
+"""Odoo Dataflow."""
+
+from . import export_threaded, import_threaded, lib
+
+__all__ = [
+ "export_threaded",
+ "import_threaded",
+ "lib",
+]
diff --git a/src/odoo_data_flow/__main__.py b/src/odoo_data_flow/__main__.py
new file mode 100644
index 00000000..d0a99692
--- /dev/null
+++ b/src/odoo_data_flow/__main__.py
@@ -0,0 +1,299 @@
+"""Command-line interface for odoo-data-flow."""
+
+import ast
+from typing import Any, Optional
+
+import click
+
+from .converter import run_path_to_image, run_url_to_image
+from .exporter import run_export
+from .importer import run_import
+from .logging_config import setup_logging
+from .migrator import run_migration
+from .workflow_runner import run_invoice_v9_workflow
+
+
+@click.group(
+ context_settings=dict(help_option_names=["-h", "--help"]),
+ invoke_without_command=True,
+)
+@click.version_option()
+@click.option(
+ "-v", "--verbose", is_flag=True, help="Enable verbose, debug-level logging."
+)
+@click.option(
+ "--log-file",
+ default=None,
+ type=click.Path(),
+ help="Path to a file to write logs to, in addition to the console.",
+)
+@click.pass_context
+def cli(ctx: click.Context, verbose: bool, log_file: Optional[str]) -> None:
+ """Odoo Data Flow: A tool for importing, exporting, and processing data."""
+ setup_logging(verbose, log_file)
+ if ctx.invoked_subcommand is None:
+ click.echo(ctx.get_help())
+
+
+# --- Workflow Command Group ---
+# This defines 'workflow' as a subcommand of 'cli'.
+@cli.group(name="workflow")
+def workflow_group() -> None:
+ """Run post-import processing workflows."""
+ pass
+
+
+# --- Invoice v9 Workflow Sub-command ---
+# This command is now correctly nested under the 'workflow' group.
+@workflow_group.command(name="invoice-v9")
+@click.option(
+ "-c",
+ "--config",
+ default="conf/connection.conf",
+ show_default=True,
+ help="Path to the connection configuration file.",
+)
+@click.option(
+ "--action",
+ "actions",
+ multiple=True,
+ type=click.Choice(
+ ["tax", "validate", "pay", "proforma", "rename", "all"],
+ case_sensitive=False,
+ ),
+ default=["all"],
+ help="Workflow action to run. Can be specified multiple times. Defaults to 'all'.",
+)
+@click.option(
+ "--field",
+ required=True,
+ help="The source field containing the legacy invoice status.",
+)
+@click.option(
+ "--status-map",
+ "status_map_str",
+ required=True,
+ help="Dictionary string mapping Odoo states to legacy states. "
+ "e.g., \"{'open': ['OP']}\"",
+)
+@click.option(
+ "--paid-date-field",
+ required=True,
+ help="The source field containing the payment date.",
+)
+@click.option(
+ "--payment-journal",
+ required=True,
+ type=int,
+ help="The database ID of the payment journal.",
+)
+@click.option(
+ "--max-connection", default=4, type=int, help="Number of parallel threads."
+)
+def invoice_v9_cmd(**kwargs: Any) -> None:
+ """Runs the legacy Odoo v9 invoice processing workflow."""
+ run_invoice_v9_workflow(**kwargs)
+
+
+# --- Import Command ---
+# This command is attached directly to the main 'cli' group.
+@cli.command(name="import")
+@click.option(
+ "-c",
+ "--config",
+ default="conf/connection.conf",
+ show_default=True,
+ help="Configuration file for connection parameters.",
+)
+@click.option("--file", "filename", required=True, help="File to import.")
+@click.option(
+ "--model",
+ default=None,
+ help="Odoo model to import into. If not provided, it's inferred from the filename.",
+)
+@click.option(
+ "--worker", default=1, type=int, help="Number of simultaneous connections."
+)
+@click.option(
+ "--size",
+ "batch_size",
+ default=10,
+ type=int,
+ help="Number of lines to import per connection.",
+)
+@click.option("--skip", default=0, type=int, help="Number of initial lines to skip.")
+@click.option(
+ "--fail",
+ is_flag=True,
+ default=False,
+ help="Run in fail mode, retrying records from the .fail.csv file.",
+)
+@click.option("-s", "--sep", "separator", default=";", help="CSV separator character.")
+@click.option(
+ "--groupby",
+ "split",
+ default=None,
+ help="Column to group data by to avoid concurrent updates.",
+)
+@click.option(
+ "--ignore", default=None, help="Comma-separated list of columns to ignore."
+)
+@click.option(
+ "--check",
+ is_flag=True,
+ default=False,
+ help="Check if records are imported after each batch.",
+)
+@click.option(
+ "--context",
+ default="{'tracking_disable': True}",
+ help="Odoo context as a dictionary string.",
+)
+@click.option(
+ "--o2m",
+ is_flag=True,
+ default=False,
+ help="Special handling for one-to-many imports.",
+)
+@click.option("--encoding", default="utf-8", help="Encoding of the data file.")
+def import_cmd(**kwargs: Any) -> None:
+ """Runs the data import process."""
+ run_import(**kwargs)
+
+
+# --- Export Command ---
+@cli.command(name="export")
+@click.option(
+ "-c",
+ "--config",
+ default="conf/connection.conf",
+ show_default=True,
+ help="Configuration file for connection parameters.",
+)
+@click.option("--file", "filename", required=True, help="Output file path.")
+@click.option("--model", required=True, help="Odoo model to export from.")
+@click.option(
+ "--fields", required=True, help="Comma-separated list of fields to export."
+)
+@click.option("--domain", default="[]", help="Odoo domain filter as a list string.")
+@click.option(
+ "--worker", default=1, type=int, help="Number of simultaneous connections."
+)
+@click.option(
+ "--size",
+ "batch_size",
+ default=10,
+ type=int,
+ help="Number of records to process per batch.",
+)
+@click.option("-s", "--sep", "separator", default=";", help="CSV separator character.")
+@click.option(
+ "--context",
+ default="{'tracking_disable': True}",
+ help="Odoo context as a dictionary string.",
+)
+@click.option("--encoding", default="utf-8", help="Encoding of the data file.")
+def export_cmd(**kwargs: Any) -> None:
+ """Runs the data export process."""
+ run_export(**kwargs)
+
+
+# --- Path-to-Image Command ---
+@cli.command(name="path-to-image")
+@click.argument("file")
+@click.option(
+ "-f",
+ "--fields",
+ required=True,
+ help="Comma-separated list of fields to convert from path to base64.",
+)
+@click.option(
+ "--path",
+ default=None,
+ help="Image path prefix. Defaults to the current working directory.",
+)
+@click.option("--out", default="out.csv", help="Name of the resulting output file.")
+def path_to_image_cmd(**kwargs: Any) -> None:
+ """Converts columns with local file paths into base64 strings."""
+ run_path_to_image(**kwargs)
+
+
+# --- URL-to-Image Command ---
+@cli.command(name="url-to-image")
+@click.argument("file")
+@click.option(
+ "-f",
+ "--fields",
+ required=True,
+ help="Comma-separated list of fields with URLs to convert to base64.",
+)
+@click.option("--out", default="out.csv", help="Name of the resulting output file.")
+def url_to_image_cmd(**kwargs: Any) -> None:
+ """Downloads content from URLs in columns and converts to base64."""
+ run_url_to_image(**kwargs)
+
+
+# --- Migrate Command ---
+@cli.command(name="migrate")
+@click.option(
+ "--config-export",
+ required=True,
+ help="Path to the source Odoo connection config.",
+)
+@click.option(
+ "--config-import",
+ required=True,
+ help="Path to the destination Odoo connection config.",
+)
+@click.option("--model", required=True, help="The Odoo model to migrate.")
+@click.option(
+ "--domain", default="[]", help="Domain filter to select records for export."
+)
+@click.option(
+ "--fields", required=True, help="Comma-separated list of fields to migrate."
+)
+@click.option(
+ "--mapping",
+ default=None,
+ help="A dictionary string defining the transformation mapping.",
+)
+@click.option(
+ "--export-worker",
+ default=1,
+ type=int,
+ help="Number of workers for the export phase.",
+)
+@click.option(
+ "--export-batch-size",
+ default=100,
+ type=int,
+ help="Batch size for the export phase.",
+)
+@click.option(
+ "--import-worker",
+ default=1,
+ type=int,
+ help="Number of workers for the import phase.",
+)
+@click.option(
+ "--import-batch-size",
+ default=10,
+ type=int,
+ help="Batch size for the import phase.",
+)
+def migrate_cmd(**kwargs: Any) -> None:
+ """Performs a direct server-to-server data migration."""
+ if kwargs.get("mapping"):
+ try:
+ kwargs["mapping"] = ast.literal_eval(kwargs["mapping"])
+ except Exception as e:
+ print(
+ "Error: Invalid mapping provided. "
+ f"Must be a valid Python dictionary string. Error: {e}"
+ )
+ return
+ run_migration(**kwargs)
+
+
+if __name__ == "__main__":
+ cli()
diff --git a/src/odoo_data_flow/converter.py b/src/odoo_data_flow/converter.py
new file mode 100755
index 00000000..f62bb1b3
--- /dev/null
+++ b/src/odoo_data_flow/converter.py
@@ -0,0 +1,89 @@
+"""CSV Data converter.
+
+This module contains functions for converting data, such as image paths
+or URLs to base64 strings, for use in Odoo imports.
+"""
+
+import base64
+import os
+from typing import Any, Callable, Optional
+
+from .lib import mapper
+from .lib.transform import Processor
+from .logging_config import log
+
+
+def to_base64(filepath: str) -> str:
+ """Reads a local file and returns its base64 encoded content."""
+ try:
+ with open(filepath, "rb") as f:
+ return base64.b64encode(f.read()).decode("utf-8")
+ except FileNotFoundError:
+ log.warning(f"File not found at '{filepath}', skipping.")
+ return "" # Return empty string if file is not found
+
+
+def run_path_to_image(
+ file: str, fields: str, out: str = "out.csv", path: Optional[str] = None
+) -> None:
+ """Path to image.
+
+ Takes a CSV file and converts columns containing local file paths
+ into base64 encoded strings.
+ """
+ log.info("Starting path-to-image conversion...")
+
+ base_path = path or os.getcwd()
+
+ processor = Processor(file)
+ mapping = processor.get_o2o_mapping()
+
+ # Create a new mapping with the correct value type for the 'process' method
+ callable_mapping: dict[str, Callable[..., Any]] = {
+ k: v.func for k, v in mapping.items()
+ }
+
+ for f in fields.split(","):
+ field_name = f.strip()
+ if field_name not in callable_mapping:
+ log.warning(f"Field '{field_name}' not found in source file. Skipping.")
+ continue
+
+ log.info(f"Setting up conversion for column: '{field_name}'")
+ callable_mapping[field_name] = mapper.val(
+ field_name,
+ postprocess=lambda x: to_base64(os.path.join(base_path, x)) if x else "",
+ )
+
+ processor.process(callable_mapping, out, t="list")
+ processor.write_to_file("")
+ log.info(f"Conversion complete. Output written to '{out}'.")
+
+
+def run_url_to_image(file: str, fields: str, out: str = "out.csv") -> None:
+ """URL to image.
+
+ Takes a CSV file and converts columns containing URLs
+ into base64 encoded strings by downloading the content.
+ """
+ log.info("Starting url-to-image conversion...")
+
+ processor = Processor(file)
+ mapping = processor.get_o2o_mapping()
+
+ callable_mapping: dict[str, Callable[..., Any]] = {
+ k: v.func for k, v in mapping.items()
+ }
+
+ for f in fields.split(","):
+ field_name = f.strip()
+ if field_name not in callable_mapping:
+ log.warning(f"Field '{field_name}' not found in source file. Skipping.")
+ continue
+
+ log.info(f"Setting up URL download and conversion for column: '{field_name}'")
+ callable_mapping[field_name] = mapper.binary_url_map(field_name)
+
+ processor.process(callable_mapping, out, t="list")
+ processor.write_to_file("")
+ log.info(f"Conversion complete. Output written to '{out}'.")
diff --git a/src/odoo_data_flow/export_threaded.py b/src/odoo_data_flow/export_threaded.py
new file mode 100755
index 00000000..0428cc24
--- /dev/null
+++ b/src/odoo_data_flow/export_threaded.py
@@ -0,0 +1,147 @@
+"""Export thread.
+
+This module contains the low-level, multi-threaded logic for exporting
+data from an Odoo instance.
+"""
+
+import csv
+import sys
+from time import time
+from typing import Any, Optional
+
+from .lib import conf_lib
+from .lib.internal.rpc_thread import RpcThread
+from .lib.internal.tools import batch
+from .logging_config import log
+
+# --- Fix for csv.field_size_limit OverflowError ---
+max_int = sys.maxsize
+decrement = True
+while decrement:
+ decrement = False
+ try:
+ csv.field_size_limit(max_int)
+ except OverflowError:
+ max_int = int(max_int / 10)
+ decrement = True
+
+
+class RPCThreadExport(RpcThread):
+ """Export Thread handler.
+
+ A specialized RpcThread for handling the export of data batches from Odoo.
+ It collects results from multiple threads in a thread-safe manner.
+ """
+
+ def __init__(
+ self,
+ max_connection: int,
+ model: Any,
+ header: list[str],
+ context: Optional[dict[str, Any]] = None,
+ ) -> None:
+ """Initializes the export thread handler."""
+ super().__init__(max_connection)
+ self.model = model
+ self.header = header
+ self.context = context or {}
+ self.results: dict[int, list[list[Any]]] = {}
+
+ def launch_batch(self, data_ids: list[int], batch_number: int) -> None:
+ """Submits a batch of IDs to be exported by a worker thread."""
+
+ def launch_batch_fun(ids_to_export: list[int], num: int) -> None:
+ start_time = time()
+ try:
+ log.debug(f"Exporting batch {num} with {len(ids_to_export)} records...")
+ datas = self.model.export_data(
+ ids_to_export, self.header, context=self.context
+ ).get("datas", [])
+ self.results[num] = datas
+ log.debug(
+ f"Batch {num} finished in {time() - start_time:.2f}s. "
+ f"Fetched {len(datas)} records."
+ )
+ except Exception as e:
+ log.error(f"Export for batch {num} failed: {e}", exc_info=True)
+ self.results[num] = []
+
+ self.spawn_thread(launch_batch_fun, [data_ids, batch_number])
+
+ def get_data(self) -> list[list[Any]]:
+ """Get data.
+
+ Waits for all threads to complete and returns the collected data
+ in the correct order.
+ """
+ super().wait()
+
+ all_data = []
+ for batch_number in sorted(self.results.keys()):
+ all_data.extend(self.results[batch_number])
+ return all_data
+
+
+def export_data(
+ config_file: str,
+ model: str,
+ domain: list[Any],
+ header: list[str],
+ context: Optional[dict[str, Any]] = None,
+ output: Optional[str] = None,
+ max_connection: int = 1,
+ batch_size: int = 100,
+ separator: str = ";",
+ encoding: str = "utf-8",
+) -> tuple[Optional[list[str]], Optional[list[list[Any]]]]:
+ """Export Data.
+
+ The main function for exporting data. It can either write to a file or
+ return the data in-memory for migrations.
+ """
+ try:
+ connection = conf_lib.get_connection_from_config(config_file)
+ model_obj = connection.get_model(model)
+ except Exception as e:
+ log.error(
+ f"Failed to connect to Odoo or get model '{model}'. "
+ f"Please check your configuration. Error: {e}"
+ )
+ return None, None
+
+ rpc_thread = RPCThreadExport(max_connection, model_obj, header, context)
+ start_time = time()
+
+ log.info(f"Searching for records in model '{model}' to export...")
+ ids = model_obj.search(domain, context=context)
+ total_ids = len(ids)
+ log.info(
+ f"Found {total_ids} records to export. Splitting into batches of {batch_size}."
+ )
+
+ i = 0
+ for id_batch in batch(ids, batch_size):
+ rpc_thread.launch_batch(list(id_batch), i)
+ i += 1
+
+ all_exported_data = rpc_thread.get_data()
+
+ log.info(
+ f"Exported {len(all_exported_data)} records in total. Total time: "
+ f"{time() - start_time:.2f}s."
+ )
+
+ if output:
+ log.info(f"Writing exported data to file: {output}")
+ try:
+ with open(output, "w", newline="", encoding=encoding) as f:
+ writer = csv.writer(f, delimiter=separator, quoting=csv.QUOTE_ALL)
+ writer.writerow(header)
+ writer.writerows(all_exported_data)
+ log.info("File writing complete.")
+ except OSError as e:
+ log.error(f"Failed to write to output file {output}: {e}")
+ return None, None
+ else:
+ log.info("Returning exported data in-memory.")
+ return header, all_exported_data
diff --git a/src/odoo_data_flow/exporter.py b/src/odoo_data_flow/exporter.py
new file mode 100755
index 00000000..6999b53c
--- /dev/null
+++ b/src/odoo_data_flow/exporter.py
@@ -0,0 +1,121 @@
+"""This module contains the core logic for exporting data from Odoo."""
+
+import ast
+from typing import Any, Optional
+
+from . import export_threaded
+from .logging_config import log
+
+
+def run_export(
+ config: str,
+ filename: str,
+ model: str,
+ fields: str,
+ domain: str = "[]",
+ worker: int = 1,
+ batch_size: int = 10,
+ separator: str = ";",
+ context: str = "{'tracking_disable' : True}",
+ encoding: str = "utf-8",
+) -> None:
+ """Export runner.
+
+ Orchestrates the data export process, writing the output to a CSV file.
+ This function is designed to be called from the main CLI.
+ """
+ log.info("Starting data export process...")
+
+ # Safely evaluate the domain and context strings
+ try:
+ parsed_domain = ast.literal_eval(domain)
+ if not isinstance(parsed_domain, list):
+ raise TypeError("Domain must be a list of tuples.")
+ except Exception as e:
+ log.error(f"Invalid domain provided. Must be a valid Python list string. {e}")
+ return
+
+ try:
+ parsed_context = ast.literal_eval(context)
+ if not isinstance(parsed_context, dict):
+ raise TypeError("Context must be a dictionary.")
+ except Exception as e:
+ log.error(
+ f"Invalid context provided. Must be a valid Python dictionary string. {e}"
+ )
+ return
+
+ # Process the fields string into a list
+ header = fields.split(",")
+
+ log.info(f"Exporting from model: {model}")
+ log.info(f"Output file: {filename}")
+ log.info(f"Workers: {worker}, Batch Size: {batch_size}")
+
+ # Call the core export function with an output filename
+ export_threaded.export_data(
+ config,
+ model,
+ parsed_domain,
+ header,
+ context=parsed_context,
+ output=filename,
+ max_connection=int(worker),
+ batch_size=int(batch_size),
+ separator=separator,
+ encoding=encoding,
+ )
+
+ log.info("Export process finished.")
+
+
+def run_export_for_migration(
+ config: str,
+ model: str,
+ fields: list[str],
+ domain: str = "[]",
+ worker: int = 1,
+ batch_size: int = 10,
+ context: str = "{'tracking_disable' : True}",
+ encoding: str = "utf-8",
+) -> tuple[Optional[list[str]], Optional[list[list[Any]]]]:
+ """Migration exporter.
+
+ Orchestrates the data export process, returning the data in memory.
+ This function is designed to be called by the migration tool.
+ """
+ log.info(f"Starting in-memory export from model '{model}' for migration...")
+
+ try:
+ parsed_domain = ast.literal_eval(domain)
+ except Exception:
+ log.warning(
+ "Invalid domain string for migration export,"
+ "defaulting to empty domain '[]'."
+ )
+ parsed_domain = []
+
+ try:
+ parsed_context = ast.literal_eval(context)
+ except Exception:
+ parsed_context = {}
+
+ header, data = export_threaded.export_data(
+ config,
+ model,
+ parsed_domain,
+ fields,
+ context=parsed_context,
+ output=None, # This signals the function to return data
+ max_connection=int(worker),
+ batch_size=int(batch_size),
+ encoding=encoding,
+ separator=";", # Provide a default separator
+ )
+
+ if data:
+ log.info(f"In-memory export complete. Fetched {len(data)} records.")
+ else:
+ log.info("In-memory export complete. No records fetched.")
+
+ return header, data
diff --git a/src/odoo_data_flow/import_threaded.py b/src/odoo_data_flow/import_threaded.py
new file mode 100755
index 00000000..3f302596
--- /dev/null
+++ b/src/odoo_data_flow/import_threaded.py
@@ -0,0 +1,332 @@
+"""Import thread.
+
+This module contains the low-level, multi-threaded logic for importing
+data into an Odoo instance.
+"""
+
+import csv
+import sys
+from collections.abc import Generator
+from time import time
+from typing import Any, Optional
+
+from .lib import conf_lib
+from .lib.internal.rpc_thread import RpcThread
+from .lib.internal.tools import batch
+from .logging_config import log
+
+# --- Fix for csv.field_size_limit OverflowError ---
+# In newer Python versions (3.10+), especially on 64-bit systems,
+# sys.maxsize is too large for the C long that the csv module's
+# field_size_limit function expects. This causes an OverflowError.
+# The following code block finds the maximum possible value that works
+# by reducing it until it's accepted.
+max_int = sys.maxsize
+decrement = True
+while decrement:
+ decrement = False
+ try:
+ csv.field_size_limit(max_int)
+ except OverflowError:
+ max_int = int(max_int / 10)
+ decrement = True
+
+
+class RPCThreadImport(RpcThread):
+ """RPC Import Thread.
+
+ A specialized RpcThread for handling the import of data batches into Odoo.
+ It writes failed records to a file.
+ """
+
+ def __init__(
+ self,
+ max_connection: int,
+ model: Any,
+ header: list[str],
+ writer: Optional[Any] = None, # csv.writer is not a type, use Any
+ context: Optional[dict[str, Any]] = None,
+ add_error_reason: bool = False,
+ ) -> None:
+ """Initializes the import thread handler."""
+ super().__init__(max_connection)
+ self.model = model
+ self.header = header
+ self.writer = writer
+ self.context = context or {}
+ self.add_error_reason = add_error_reason
+
+ def _handle_odoo_messages(
+ self, messages: list[dict[str, Any]], original_lines: list[list[Any]]
+ ) -> list[list[Any]]:
+ """Processes error messages from an Odoo load response."""
+ failed_lines = []
+ full_error_message = ""
+ for msg in messages:
+ message = msg.get("message", "Unknown Odoo error")
+ full_error_message += message + "\n"
+ record_index = msg.get("record", -1)
+ if record_index >= 0 and record_index < len(original_lines):
+ failed_line = original_lines[record_index]
+ if self.add_error_reason:
+ failed_line.append(message.replace("\n", " | "))
+ failed_lines.append(failed_line)
+
+ # If Odoo sends a generic message without record details, assume all failed.
+ if not failed_lines:
+ if self.add_error_reason:
+ for line in original_lines:
+ line.append(full_error_message.replace("\n", " | "))
+ failed_lines.extend(original_lines)
+ return failed_lines
+
+ def _handle_rpc_error(
+ self, error: Exception, lines: list[list[Any]]
+ ) -> list[list[Any]]:
+ """Handles a general RPC exception, marking all lines as failed."""
+ error_message = str(error).replace("\n", " | ")
+ if self.add_error_reason:
+ for line in lines:
+ line.append(error_message)
+ return lines
+
+ def _handle_record_mismatch(
+ self, response: dict[str, Any], lines: list[list[Any]]
+ ) -> list[list[Any]]:
+ """Handles the case where imported records don't match sent lines."""
+ error_message = (
+ f"Record count mismatch. Expected {len(lines)}, "
+ f"got {len(response.get('ids', []))}. "
+ "Probably a duplicate XML ID."
+ )
+ log.error(error_message)
+ if self.add_error_reason:
+ for line in lines:
+ line.append(error_message)
+ return lines
+
+ def launch_batch(
+ self,
+ data_lines: list[list[Any]],
+ batch_number: Any,
+ check: bool = False,
+ ) -> None:
+ """Submits a batch of data lines to be imported by a worker thread."""
+
+ def launch_batch_fun(lines: list[list[Any]], num: Any, do_check: bool) -> None:
+ """The actual function executed by the worker thread."""
+ start_time = time()
+ failed_lines = []
+ try:
+ log.debug(f"Importing batch {num} with {len(lines)} records...")
+ res = self.model.load(self.header, lines, context=self.context)
+
+ if res.get("messages"):
+ failed_lines = self._handle_odoo_messages(res["messages"], lines)
+ elif do_check and len(res.get("ids", [])) != len(lines):
+ failed_lines = self._handle_record_mismatch(res, lines)
+
+ except Exception as e:
+ log.error(f"RPC call for batch {num} failed: {e}", exc_info=True)
+ failed_lines = self._handle_rpc_error(e, lines)
+
+ if failed_lines and self.writer:
+ self.writer.writerows(failed_lines)
+
+ success = not bool(failed_lines)
+ log.info(
+ f"Time for batch {num}: {time() - start_time:.2f}s. Success: {success}"
+ )
+
+ self.spawn_thread(
+ launch_batch_fun, [data_lines, batch_number], {"do_check": check}
+ )
+
+
+def _filter_ignored_columns(
+ ignore: list[str], header: list[str], data: list[list[Any]]
+) -> tuple[list[str], list[list[Any]]]:
+ """Removes ignored columns from header and data."""
+ if not ignore:
+ return header, data
+
+ indices_to_keep = [i for i, h in enumerate(header) if h not in ignore]
+ new_header = [header[i] for i in indices_to_keep]
+ new_data = [[row[i] for i in indices_to_keep] for row in data]
+
+ return new_header, new_data
+
+
+def _read_data_file(
+ file_path: str, separator: str, encoding: str, skip: int
+) -> tuple[list[str], list[list[Any]]]:
+ """Reads a CSV file and returns its header and data."""
+ log.info(f"Reading data from file: {file_path}")
+ try:
+ with open(file_path, encoding=encoding, newline="") as f:
+ reader = csv.reader(f, delimiter=separator)
+ header = next(reader)
+
+ if "id" not in header:
+ raise ValueError(
+ "Source file must contain an 'id' column for external IDs."
+ )
+
+ if skip > 0:
+ log.info(f"Skipping first {skip} lines...")
+ for _ in range(skip):
+ next(reader)
+
+ return header, [row for row in reader]
+ except FileNotFoundError:
+ log.error(f"Source file not found: {file_path}")
+ return [], []
+ except Exception as e:
+ log.error(f"Failed to read file {file_path}: {e}")
+ return [], []
+
+
+def _create_batches(
+ data: list[list[Any]],
+ split_by_col: Optional[str],
+ header: list[str],
+ batch_size: int,
+ o2m: bool,
+) -> Generator[tuple[Any, list[list[Any]]], None, None]:
+ """A generator that yields batches of data.
+
+ If split_by_col is provided, it
+ groups records with the same value in that column into the same batch.
+ """
+ if not split_by_col:
+ # Simple batching without grouping
+ for i, data_batch in enumerate(batch(data, batch_size)):
+ yield i, list(data_batch)
+ return
+
+ try:
+ split_index = header.index(split_by_col)
+ id_index = header.index("id")
+ except ValueError as e:
+ log.error(f"Grouping column '{e}' not found in header. Cannot use --groupby.")
+ return
+
+ data.sort(key=lambda row: row[split_index])
+
+ current_batch: list[list[Any]] = []
+ current_split_value: Optional[str] = None
+ batch_num = 0
+
+ for row in data:
+ is_o2m_line = o2m and not row[id_index]
+ row_split_value = row[split_index]
+
+ if (
+ current_batch
+ and not is_o2m_line
+ and (
+ row_split_value != current_split_value
+ or len(current_batch) >= batch_size
+ )
+ ):
+ yield f"{batch_num}-{current_split_value}", current_batch
+ current_batch = []
+ batch_num += 1
+
+ current_batch.append(row)
+ current_split_value = row_split_value
+
+ if current_batch:
+ yield f"{batch_num}-{current_split_value}", current_batch
+
+
+def import_data(
+ config_file: str,
+ model: str,
+ header: Optional[list[str]] = None,
+ data: Optional[list[list[Any]]] = None,
+ file_csv: Optional[str] = None,
+ context: Optional[dict[str, Any]] = None,
+ fail_file: Optional[str] = None,
+ encoding: str = "utf-8",
+ separator: str = ";",
+ ignore: Optional[list[str]] = None,
+ split: Optional[str] = None,
+ check: bool = True,
+ max_connection: int = 1,
+ batch_size: int = 10,
+ skip: int = 0,
+ o2m: bool = False,
+ is_fail_run: bool = False,
+) -> None:
+ """Main function to orchestrate the import process.
+
+ Can be run from a file or from in-memory data.
+ """
+ _ignore = ignore or []
+ _context = context or {}
+
+ if file_csv:
+ header, data = _read_data_file(file_csv, separator, encoding, skip)
+ if not data:
+ return # Stop if file reading failed
+
+ if header is None or data is None:
+ raise ValueError(
+ "Please provide either a data file or both 'header' and 'data'."
+ )
+
+ # Filter out ignored columns from both header and data
+ final_header, final_data = _filter_ignored_columns(_ignore, header, data)
+
+ try:
+ connection = conf_lib.get_connection_from_config(config_file)
+ model_obj = connection.get_model(model)
+ except Exception as e:
+ log.error(f"Failed to connect to Odoo: {e}")
+ return
+
+ # Set up the writer for the fail file
+ fail_file_writer: Optional[Any] = None
+ fail_file_handle = None
+ if fail_file:
+ try:
+ fail_file_handle = open(fail_file, "w", newline="", encoding=encoding)
+ fail_file_writer = csv.writer(
+ fail_file_handle, delimiter=separator, quoting=csv.QUOTE_ALL
+ )
+ # Add the error reason column to the header only for the second failure file
+ header_to_write = list(final_header)
+ if is_fail_run:
+ header_to_write.append("_ERROR_REASON")
+ fail_file_writer.writerow(header_to_write)
+ except OSError as e:
+ log.error(f"Could not open fail file for writing: {fail_file}. Error: {e}")
+ return
+
+ rpc_thread = RPCThreadImport(
+ max_connection,
+ model_obj,
+ final_header,
+ fail_file_writer,
+ _context,
+ add_error_reason=is_fail_run,
+ )
+ start_time = time()
+
+ # Create batches and launch them in threads
+ for batch_number, lines_batch in _create_batches(
+ final_data, split, final_header, batch_size, o2m
+ ):
+ rpc_thread.launch_batch(lines_batch, batch_number, check)
+
+ # Wait for all threads to complete
+ rpc_thread.wait()
+
+ if fail_file_handle:
+ fail_file_handle.close()
+
+ log.info(
+ f"{len(final_data)} records processed for model '{model}'. "
+ f"Total time: {time() - start_time:.2f}s."
+ )
diff --git a/src/odoo_data_flow/importer.py b/src/odoo_data_flow/importer.py
new file mode 100755
index 00000000..7aa3bdbd
--- /dev/null
+++ b/src/odoo_data_flow/importer.py
@@ -0,0 +1,162 @@
+"""This module contains the core logic for importing data into Odoo."""
+
+import ast
+import os
+from datetime import datetime
+from typing import Any, Optional
+
+from . import import_threaded
+from .logging_config import log
+
+
+def run_import(
+ config: str,
+ filename: str,
+ model: Optional[str] = None,
+ worker: int = 1,
+ batch_size: int = 10,
+ skip: int = 0,
+ fail: bool = False,
+ separator: str = ";",
+ split: Optional[str] = None,
+ ignore: Optional[str] = None,
+ check: bool = False,
+ context: str = "{'tracking_disable' : True}",
+ o2m: bool = False,
+ encoding: str = "utf-8",
+) -> None:
+ """Orchestrates the data import process from a CSV file.
+
+ Args:
+ config: Path to the connection configuration file.
+ filename: Path to the source CSV file to import.
+ model: The Odoo model to import data into. If not provided, it's inferred
+ from the filename.
+ worker: The number of simultaneous connections to use.
+ batch_size: The number of records to process in each batch.
+ skip: The number of initial lines to skip in the source file.
+ fail: If True, runs in fail mode, retrying records from the .fail file.
+ separator: The delimiter used in the CSV file.
+ split: The column name to group records by to avoid concurrent updates.
+ ignore: A comma-separated string of column names to ignore.
+ check: If True, checks if records were successfully imported.
+ context: A string representation of the Odoo context dictionary.
+ o2m: If True, enables special handling for one-to-many imports.
+ encoding: The file encoding of the source file.
+ """
+ log.info("Starting data import process from file...")
+
+ final_model = model
+ if not final_model:
+ base_name = os.path.basename(filename)
+ inferred_model = os.path.splitext(base_name)[0].replace("_", ".")
+ # Add a check for invalid inferred names (like hidden files)
+ if not inferred_model or inferred_model.startswith("."):
+ log.error(
+ "Model not specified and could not be inferred from filename "
+ f"'{base_name}'. Please use the --model option."
+ )
+ return
+ final_model = inferred_model
+ log.info(f"No model provided. Inferred model '{final_model}' from filename.")
+
+ try:
+ parsed_context = ast.literal_eval(context)
+ if not isinstance(parsed_context, dict):
+ raise TypeError("Context must be a dictionary.")
+ except Exception as e:
+ log.error(
+ f"Invalid context provided. Must be a valid Python dictionary string. {e}"
+ )
+ return
+
+ ignore_list = ignore.split(",") if ignore else []
+
+ file_dir = os.path.dirname(filename)
+
+ file_to_process: str
+ fail_output_file: str
+ is_fail_run: bool
+ batch_size_run: int
+ max_connection_run: int
+
+ if fail:
+ log.info("Running in --fail mode. Retrying failed records...")
+ file_to_process = os.path.join(file_dir, f"{final_model}.fail.csv")
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ original_basename = os.path.splitext(os.path.basename(filename))[0]
+ fail_output_file = os.path.join(
+ file_dir, f"{original_basename}_{timestamp}_failed.csv"
+ )
+ batch_size_run = 1
+ max_connection_run = 1
+ is_fail_run = True
+ else:
+ file_to_process = filename
+ fail_output_file = os.path.join(file_dir, f"{final_model}.fail.csv")
+ batch_size_run = int(batch_size)
+ max_connection_run = int(worker)
+ is_fail_run = False
+
+ log.info(f"Importing file: {file_to_process}")
+ log.info(f"Target model: {final_model}")
+ log.info(f"Workers: {max_connection_run}, Batch Size: {batch_size_run}")
+ log.info(f"Failed records will be saved to: {fail_output_file}")
+
+ import_threaded.import_data(
+ config,
+ final_model,
+ file_csv=file_to_process,
+ context=parsed_context,
+ fail_file=fail_output_file,
+ encoding=encoding,
+ separator=separator,
+ ignore=ignore_list,
+ split=split,
+ check=check,
+ max_connection=max_connection_run,
+ batch_size=batch_size_run,
+ skip=int(skip),
+ o2m=o2m,
+ is_fail_run=is_fail_run,
+ )
+
+ log.info("Import process finished.")
+
+
+def run_import_for_migration(
+ config: str,
+ model: str,
+ header: list[str],
+ data: list[list[Any]],
+ worker: int = 1,
+ batch_size: int = 10,
+) -> None:
+ """Orchestrates the data import process from in-memory data.
+
+ Args:
+ config: Path to the connection configuration file.
+ model: The Odoo model to import data into.
+ header: A list of strings representing the column headers.
+ data: A list of lists representing the data rows.
+ worker: The number of simultaneous connections to use.
+ batch_size: The number of records to process in each batch.
+ """
+ log.info("Starting data import from in-memory data...")
+
+ parsed_context = {"tracking_disable": True}
+
+ log.info(f"Importing {len(data)} records into model: {model}")
+ log.info(f"Workers: {worker}, Batch Size: {batch_size}")
+
+ import_threaded.import_data(
+ config,
+ model,
+ header=header,
+ data=data,
+ context=parsed_context,
+ max_connection=int(worker),
+ batch_size=int(batch_size),
+ )
+
+ log.info("In-memory import process finished.")
diff --git a/src/odoo_data_flow/lib/__init__.py b/src/odoo_data_flow/lib/__init__.py
new file mode 100644
index 00000000..9009a51b
--- /dev/null
+++ b/src/odoo_data_flow/lib/__init__.py
@@ -0,0 +1,19 @@
+"initialize Library."
+
+from . import (
+ checker,
+ conf_lib,
+ internal,
+ mapper,
+ transform,
+ workflow,
+)
+
+__all__ = [
+ "checker",
+ "conf_lib",
+ "internal",
+ "mapper",
+ "transform",
+ "workflow",
+]
diff --git a/src/odoo_data_flow/lib/checker.py b/src/odoo_data_flow/lib/checker.py
new file mode 100644
index 00000000..512662bb
--- /dev/null
+++ b/src/odoo_data_flow/lib/checker.py
@@ -0,0 +1,116 @@
+"""This module provides a library of "checker" functions.
+
+Each function is a factory that returns a new function designed to be passed
+to the Processor's `.check()` method to perform data quality validations
+before the transformation process begins.
+"""
+
+import re
+from typing import Callable, Optional
+
+from ..logging_config import log
+
+# Type aliases for clarity
+Header = list[str]
+Data = list[list[str]]
+CheckFunc = Callable[[Header, Data], bool]
+
+
+def id_validity_checker(
+ id_field: str, pattern: str, null_values: Optional[list[str]] = None
+) -> CheckFunc:
+ """ID Validity checker.
+
+ Returns a checker that validates a specific column
+ against a regex pattern.
+ """
+ if null_values is None:
+ null_values = ["NULL"]
+
+ def check_id_validity(header: Header, data: Data) -> bool:
+ try:
+ regex = re.compile(pattern)
+ except re.error as e:
+ log.error(f"Invalid regex pattern provided to id_validity_checker: {e}")
+ return False
+
+ is_valid = True
+ for i, line in enumerate(data, start=1):
+ line_dict = dict(zip(header, line))
+ id_value = line_dict.get(id_field, "")
+
+ # Skip check if the value is considered null
+ if id_value in null_values or not id_value:
+ continue
+
+ if not regex.match(id_value):
+ log.warning(
+ f"Check Failed (ID Validity) on line {i}: Value "
+ f"'{id_value}' in column '{id_field}' "
+ f"does not match pattern '{pattern}'."
+ )
+ is_valid = False
+ return is_valid
+
+ return check_id_validity
+
+
+def line_length_checker(expected_length: int) -> CheckFunc:
+ """Line Length Checker.
+
+ Returns a checker that verifies each row has an exact number of columns.
+ """
+
+ def check_line_length(header: Header, data: Data) -> bool:
+ is_valid = True
+ for i, line in enumerate(data, start=2): # Start from 2 to account for header
+ if len(line) != expected_length:
+ log.warning(
+ f"Check Failed (Line Length) on line {i}: "
+ f"Expected {expected_length} columns, but found "
+ f"{len(line)}."
+ )
+ is_valid = False
+ return is_valid
+
+ return check_line_length
+
+
+def line_number_checker(expected_line_count: int) -> CheckFunc:
+ """Returns a checker that verifies the total number of data rows."""
+
+ def check_line_number(header: Header, data: Data) -> bool:
+ actual_line_count = len(data)
+ if actual_line_count != expected_line_count:
+ log.warning(
+ f"Check Failed (Line Count): Expected {expected_line_count} "
+ f"data rows, but found {actual_line_count}."
+ )
+ return False
+ return True
+
+ return check_line_number
+
+
+def cell_len_checker(max_cell_len: int) -> CheckFunc:
+ """Cell Length Checker.
+
+ Returns a checker that verifies no cell exceeds a maximum character length.
+ """
+
+ def check_max_cell_len(header: Header, data: Data) -> bool:
+ is_valid = True
+ for i, line in enumerate(data, start=2):
+ # Start from 2 to account for header
+ for j, cell in enumerate(line):
+ if len(cell) > max_cell_len:
+ column_name = header[j] if j < len(header) else f"column {j + 1}"
+ log.warning(
+ f"Check Failed (Cell Length) on line {i}, column "
+ f"'{column_name}': Cell length is {len(cell)}, "
+ f"which exceeds the max of {max_cell_len}."
+ )
+ is_valid = False
+ return is_valid
+
+ return check_max_cell_len
diff --git a/src/odoo_data_flow/lib/conf_lib.py b/src/odoo_data_flow/lib/conf_lib.py
new file mode 100644
index 00000000..14ad0d7e
--- /dev/null
+++ b/src/odoo_data_flow/lib/conf_lib.py
@@ -0,0 +1,67 @@
+"""Config File Handler.
+
+This module handles reading the connection configuration file and
+establishing a connection to the Odoo server using odoo-client-lib.
+"""
+
+import configparser
+from typing import Any
+
+import odoolib # type: ignore[import-untyped]
+
+from ..logging_config import log
+
+
+def get_connection_from_config(config_file: str) -> Any:
+ """Get connection from config.
+
+ Reads an Odoo connection configuration file and returns an
+ initialized OdooClient object.
+
+ Args:
+ config_file: The path to the connection.conf file.
+
+ Returns:
+ An initialized and connected Odoo client object,
+ (returned by odoolib.get_connection)
+ or raises an exception on failure.
+ """
+ config = configparser.ConfigParser()
+ if not config.read(config_file):
+ log.error(f"Configuration file not found or is empty: {config_file}")
+ raise FileNotFoundError(f"Configuration file not found: {config_file}")
+
+ try:
+ conn_details: dict[str, Any] = dict(config["Connection"])
+
+ # Explicitly check for required keys before proceeding.
+ # This loop is the crucial fix.
+ required_keys = ["hostname", "database", "login", "password"]
+ for key in required_keys:
+ if key not in conn_details:
+ raise KeyError(f"Required key '{key}' not found in config file.")
+
+ # Ensure port and uid are integers if they exist
+ if "port" in conn_details:
+ conn_details["port"] = int(conn_details["port"])
+ if "uid" in conn_details:
+ # The OdooClient expects the user ID as 'user_id'
+ conn_details["user_id"] = int(conn_details.pop("uid"))
+
+ log.info(f"Connecting to Odoo server at {conn_details.get('hostname')}...")
+
+ # Use odoo-client-lib to establish the connection
+ connection = odoolib.get_connection(**conn_details)
+
+ log.info("Connection successful.")
+ return connection
+
+ except (KeyError, ValueError) as e:
+ log.error(
+ f"Configuration file '{config_file}' is missing a required key "
+ f"or has a malformed value: {e}"
+ )
+ raise
+ except Exception as e:
+ log.error(f"An unexpected error occurred while connecting to Odoo: {e}")
+ raise
diff --git a/src/odoo_data_flow/lib/internal/__init__.py b/src/odoo_data_flow/lib/internal/__init__.py
new file mode 100644
index 00000000..730b15e4
--- /dev/null
+++ b/src/odoo_data_flow/lib/internal/__init__.py
@@ -0,0 +1,17 @@
+"""Internal helper tools for odoo-data-flow.
+
+This __init__.py file makes the internal modules available under the
+'internal' namespace and defines the public API of this sub-package.
+"""
+
+from . import exceptions, io, rpc_thread, tools
+
+# By defining __all__, we explicitly state which names are part of the
+# public API of this package. This also signals to linters like ruff
+# that the imports above are intentional, which resolves the F401 error.
+__all__ = [
+ "exceptions",
+ "io",
+ "rpc_thread",
+ "tools",
+]
diff --git a/src/odoo_data_flow/lib/internal/exceptions.py b/src/odoo_data_flow/lib/internal/exceptions.py
new file mode 100644
index 00000000..1b828330
--- /dev/null
+++ b/src/odoo_data_flow/lib/internal/exceptions.py
@@ -0,0 +1,26 @@
+"""Excpention handler.
+
+This module defines custom exceptions used throughout the library.
+"""
+
+from typing import Any
+
+
+class SkippingError(Exception):
+ """An exception raised to signal that the current row should be skipped.
+
+ This is used within mappers to control the data processing flow and
+ intentionally filter out certain records without causing the entire
+ process to fail.
+ """
+
+ def __init__(self, message: str, *args: Any):
+ """Initializes the exception with a descriptive message.
+
+ *args:
+ message: The reason why the row is being skipped.
+ """
+ self.message = message
+ # Call the parent Exception's __init__ to ensure it behaves
+ # like a standard Python exception.
+ super().__init__(message, *args)
diff --git a/src/odoo_data_flow/lib/internal/io.py b/src/odoo_data_flow/lib/internal/io.py
new file mode 100644
index 00000000..f67935e5
--- /dev/null
+++ b/src/odoo_data_flow/lib/internal/io.py
@@ -0,0 +1,119 @@
+"""IO helpers.
+
+This module contains low-level helper functions for file I/O,
+including writing CSV data and generating shell scripts.
+"""
+
+import csv
+import os
+import shlex
+from typing import Any, Optional
+
+from ...logging_config import log
+
+
+def write_csv(
+ filename: str,
+ header: list[str],
+ data: list[list[Any]],
+ encoding: str = "utf-8",
+) -> None:
+ """Writes data to a CSV file with a semicolon separator.
+
+ Args:
+ filename: The path to the output CSV file.
+ header: A list of strings for the header row.
+ data: A list of lists representing the data rows.
+ encoding: The file encoding to use.
+ """
+ try:
+ with open(filename, "w", newline="", encoding=encoding) as f:
+ writer = csv.writer(f, delimiter=";", quoting=csv.QUOTE_ALL)
+ writer.writerow(header)
+ writer.writerows(data)
+ except OSError as e:
+ log.error(f"Failed to write to file {filename}: {e}")
+
+
+def write_file(
+ filename: Optional[str] = None,
+ header: Optional[list[str]] = None,
+ data: Optional[list[list[Any]]] = None,
+ fail: bool = False,
+ model: str = "auto",
+ launchfile: str = "import_auto.sh",
+ worker: int = 1,
+ batch_size: int = 10,
+ init: bool = False,
+ encoding: str = "utf-8",
+ groupby: str = "",
+ sep: str = ";",
+ context: Optional[dict[str, Any]] = None,
+ ignore: str = "",
+ **kwargs: Any, # to catch other unused params
+) -> None:
+ """Filewriter.
+
+ Writes data to a CSV file and generates a corresponding shell script
+ to import that file using the odoo-data-flow CLI.
+ """
+ # Step 1: Write the actual data file
+ if filename and header is not None and data is not None:
+ write_csv(filename, header, data, encoding=encoding)
+
+ # Step 2: If no launchfile is specified, we are done.
+ if not launchfile:
+ return
+
+ # Step 3: Only generate the import script if a filename was provided.
+ if filename:
+ # Determine the target model name
+ if model == "auto":
+ model_name = os.path.basename(filename).replace(".csv", "")
+ else:
+ model_name = model
+
+ # Build the base command with its arguments
+ # We use shlex.quote to ensure all arguments
+ # are safely escaped for the shell.
+ command_parts = [
+ "odoo-data-flow",
+ "import",
+ "--config",
+ shlex.quote(kwargs.get("conf_file", "conf/connection.conf")),
+ "--file",
+ shlex.quote(filename),
+ "--model",
+ shlex.quote(model_name),
+ "--encoding",
+ shlex.quote(encoding),
+ "--worker",
+ str(worker),
+ "--size",
+ str(batch_size),
+ "--sep",
+ shlex.quote(sep),
+ ]
+
+ # Add optional arguments if they have a value
+ if groupby:
+ command_parts.extend(["--groupby", shlex.quote(groupby)])
+ if ignore:
+ command_parts.extend(["--ignore", shlex.quote(ignore)])
+ if context:
+ command_parts.extend(["--context", shlex.quote(str(context))])
+
+ # Write the command(s) to the shell script
+ mode = "w" if init else "a"
+ try:
+ with open(launchfile, mode, encoding="utf-8") as f:
+ # Write the main import command
+ f.write(" ".join(command_parts) + "\n")
+
+ # If fail mode is enabled,
+ # write the second command with the --fail flag
+ if fail:
+ fail_command_parts = [*command_parts, "--fail"]
+ f.write(" ".join(fail_command_parts) + "\n")
+ except OSError as e:
+ log.error(f"Failed to write to launch file {launchfile}: {e}")
diff --git a/src/odoo_data_flow/lib/internal/rpc_thread.py b/src/odoo_data_flow/lib/internal/rpc_thread.py
new file mode 100644
index 00000000..c5469541
--- /dev/null
+++ b/src/odoo_data_flow/lib/internal/rpc_thread.py
@@ -0,0 +1,78 @@
+"""RPC Threads.
+
+This module provides a robust, thread-safe mechanism for executing
+RPC calls to Odoo in parallel.
+"""
+
+import concurrent.futures
+from typing import Any, Callable, Optional
+
+from ...logging_config import log
+
+
+class RpcThread:
+ """A wrapper around ThreadPoolExecutor to manage parallel RPC calls to Odoo.
+
+ This class simplifies running multiple functions concurrently while limiting
+ the number of simultaneous connections to the server.
+ """
+
+ def __init__(self, max_connection: int) -> None:
+ """Initializes the thread pool.
+
+ Args:
+ max_connection: The maximum number of threads to run in parallel.
+ """
+ if not isinstance(max_connection, int) or max_connection < 1:
+ raise ValueError("max_connection must be a positive integer.")
+
+ self.executor = concurrent.futures.ThreadPoolExecutor(
+ max_workers=max_connection
+ )
+ self.futures: list[concurrent.futures.Future[Any]] = []
+
+ def spawn_thread(
+ self,
+ fun: Callable[..., Any],
+ args: list[Any],
+ kwargs: Optional[dict[str, Any]] = None,
+ ) -> None:
+ """Submits a function to be executed by a worker thread in the pool.
+
+ Args:
+ fun: The function to execute.
+ args: A list of positional arguments to pass to the function.
+ kwargs: A dictionary of keyword arguments to pass to the function.
+ """
+ if kwargs is None:
+ kwargs = {}
+
+ future = self.executor.submit(fun, *args, **kwargs)
+ self.futures.append(future)
+
+ def wait(self) -> None:
+ """Waits for all submitted tasks to complete.
+
+ This method will block until every task has finished. If any task
+ raised an exception during its execution, that exception will be logged.
+ """
+ log.info(f"Waiting for {len(self.futures)} tasks to complete...")
+
+ # Use as_completed to process results as they finish,
+ # which is memory efficient.
+ for future in concurrent.futures.as_completed(self.futures):
+ try:
+ # Calling .result() will re-raise any exception that occurred
+ # in the worker thread. We catch it to log it.
+ future.result()
+ except Exception as e:
+ # Log the exception from the failed thread.
+ log.error(f"A task in a worker thread failed: {e}", exc_info=True)
+
+ # Shutdown the executor gracefully.
+ self.executor.shutdown(wait=True)
+ log.info("All tasks have completed.")
+
+ def thread_number(self) -> int:
+ """Returns the total number of tasks submitted to the pool."""
+ return len(self.futures)
diff --git a/src/odoo_data_flow/lib/internal/tools.py b/src/odoo_data_flow/lib/internal/tools.py
new file mode 100644
index 00000000..e1ea5177
--- /dev/null
+++ b/src/odoo_data_flow/lib/internal/tools.py
@@ -0,0 +1,172 @@
+"""Internal odoo-data-flow Tools.
+
+This module provides low-level utility functions for data formatting
+and iteration,
+primarily used by the mapper and processor modules.
+"""
+
+from collections.abc import Iterable, Iterator
+from itertools import islice
+from typing import Any, Callable
+
+
+def batch(iterable: Iterable[Any], size: int) -> Iterator[list[Any]]:
+ """Splits an iterable into batches of a specified size.
+
+ Args:
+ iterable: The iterable to process.
+ size: The desired size of each batch.
+
+ Yields:
+ A list containing the next batch of items.
+ """
+ source_iterator = iter(iterable)
+ while True:
+ batch_iterator = islice(source_iterator, size)
+ # Get the first item to check if the iterator is exhausted
+ try:
+ first_item = next(batch_iterator)
+ except StopIteration:
+ return
+
+ # Chain the first item back with the rest of the batch iterator
+ # and yield the complete batch as a list.
+ yield [first_item, *list(batch_iterator)]
+
+
+# --- Data Formatting Tools ---
+
+
+def to_xmlid(name: str) -> str:
+ """Create valid xmlid.
+
+ Sanitizes a string to make it a valid XML ID, replacing special
+ characters with underscores.
+ """
+ # A mapping of characters to replace.
+ replacements = {".": "_", ",": "_", "\n": "_", "|": "_", " ": "_"}
+ for old, new in replacements.items():
+ name = name.replace(old, new)
+ return name.strip()
+
+
+def to_m2o(prefix: str, value: Any, default: str = "") -> str:
+ """Creates a full external ID for a Many2one relationship.
+
+ Creates a full external ID for a Many2one relationship by combining
+ a prefix and a sanitized value.
+
+ Args:
+ prefix: The XML ID prefix (e.g., 'my_module').
+ value: The value to be sanitized and appended to the prefix.
+ default: The value to return if the input value is empty.
+
+ Return:
+ The formatted external ID (e.g., 'my_module.sanitized_value').
+ """
+ if not value:
+ return default
+
+ # Ensure the prefix ends with a dot,
+ # but don't add one if it's already there.
+ if not prefix.endswith("."):
+ prefix += "."
+
+ return f"{prefix}{to_xmlid(str(value))}"
+
+
+def to_m2m(prefix: str, value: str) -> str:
+ """Creates a comma-separated list of external IDs .
+
+ Creates a comma-separated list of external IDs for a Many2many relationship.
+ It takes a string of comma-separated values, sanitizes each one, and
+ prepends the prefix.
+
+ Args:
+ prefix: The XML ID prefix to apply to each value.
+ value: A single string containing one or more values,
+ separated by commas.
+
+ Return:
+ A comma-separated string of formatted external IDs.
+ """
+ if not value:
+ return ""
+
+ ids = [to_m2o(prefix, val.strip()) for val in value.split(",") if val.strip()]
+ return ",".join(ids)
+
+
+class AttributeLineDict:
+ """Aggregates attribute line data for product templates."""
+
+ def __init__(
+ self,
+ attribute_list_ids: list[list[str]],
+ id_gen_fun: Callable[..., str],
+ ) -> None:
+ """Initializes the aggregator."""
+ self.data: dict[str, dict[str, list[str]]] = {}
+ self.att_list: list[list[str]] = attribute_list_ids
+ self.id_gen: Callable[..., str] = id_gen_fun
+
+ def add_line(self, line: list[Any], header: list[str]) -> None:
+ """Add line.
+
+ Processes a single line of attribute data and aggregates it
+ by product template ID.
+
+ `line` is expected to contain:
+ - 'product_tmpl_id/id': The template's external ID.
+ - 'attribute_id/id': A dict mapping attribute name to its ID.
+ - 'value_ids/id': A dict mapping attribute name to the value's ID.
+ """
+ line_dict = dict(zip(header, line))
+ template_id = line_dict.get("product_tmpl_id/id")
+ if not template_id:
+ return
+
+ if self.data.get(template_id):
+ # Template already exists, add new attribute values
+ template_info = self.data[template_id]
+ for att_id, att_name in self.att_list:
+ # Check if the current line contains this attribute
+ if line_dict.get("attribute_id/id", {}).get(att_name):
+ value = line_dict["value_ids/id"][att_name]
+ # Ensure value is unique before adding
+ if value not in template_info.setdefault(att_id, []):
+ template_info[att_id].append(value)
+ else:
+ # This is a new template
+ d: dict[str, list[str]] = {}
+ for att_id, att_name in self.att_list:
+ if line_dict.get("attribute_id/id", {}).get(att_name):
+ d[att_id] = [line_dict["value_ids/id"][att_name]]
+ self.data[template_id] = d
+
+ def generate_line(self) -> tuple[list[str], list[list[str]]]:
+ """Generate line.
+
+ Generates the final list of attribute lines for the CSV file,
+ one line per attribute per product template.
+ """
+ lines_header = [
+ "id",
+ "product_tmpl_id/id",
+ "attribute_id/id",
+ "value_ids/id",
+ ]
+ lines_out: list[list[str]] = []
+ for template_id, attributes in self.data.items():
+ if not template_id:
+ continue
+ # Create a unique line for each attribute associated with the template
+ for attribute_id, values in attributes.items():
+ line = [
+ self.id_gen(template_id, attributes),
+ template_id,
+ attribute_id,
+ ",".join(values), # Odoo m2m/o2m often use comma-separated IDs
+ ]
+ lines_out.append(line)
+ return lines_header, lines_out
diff --git a/src/odoo_data_flow/lib/mapper.py b/src/odoo_data_flow/lib/mapper.py
new file mode 100644
index 00000000..2339e33c
--- /dev/null
+++ b/src/odoo_data_flow/lib/mapper.py
@@ -0,0 +1,673 @@
+"""This module contains a library of mapper functions.
+
+Mappers are the core building blocks for data transformations. Each function
+in this module is a "mapper factory" - it is a function that you call to
+configure and return another function, which will then be executed by the
+Processor for each row of the source data.
+"""
+
+import base64
+import inspect
+import os
+from typing import Any, Callable, cast
+
+import requests # type: ignore[import-untyped]
+
+from ..logging_config import log
+from .internal.exceptions import SkippingError
+from .internal.tools import to_m2m, to_m2o
+
+__all__ = [
+ "binary",
+ "binary_url_map",
+ "bool_val",
+ "concat",
+ "concat_field_value_m2m",
+ "concat_mapper_all",
+ "cond",
+ "const",
+ "field",
+ "m2m",
+ "m2m_attribute_value",
+ "m2m_id_list",
+ "m2m_map",
+ "m2m_template_attribute_value",
+ "m2m_value_list",
+ "m2o",
+ "m2o_att",
+ "m2o_att_name",
+ "m2o_map",
+ "map_val",
+ "num",
+ "record",
+ "split_file_number",
+ "split_line_number",
+ "to_m2m",
+ "to_m2o",
+ "val",
+ "val_att",
+]
+
+# Type alias for clarity
+LineDict = dict[str, Any]
+StateDict = dict[str, Any]
+MapperFunc = Callable[[LineDict, StateDict], Any]
+
+
+# ... (rest of the file remains the same as your version) ...
+
+
+def _get_field_value(line: LineDict, field: str, default: Any = "") -> Any:
+ """Safely retrieves a value from the current data row."""
+ return line.get(field, default) or default
+
+
+def _str_to_mapper(field: Any) -> MapperFunc:
+ """Converts a string field name into a basic val mapper.
+
+ If the input is not a string, it is assumed to be a valid mapper function.
+ """
+ if isinstance(field, str):
+ return val(field)
+ return cast(MapperFunc, field)
+
+
+def _list_to_mappers(args: tuple[Any, ...]) -> list[MapperFunc]:
+ """Converts a list of strings or mappers into a list of mappers."""
+ return [_str_to_mapper(f) for f in args]
+
+
+def const(value: Any) -> MapperFunc:
+ """Returns a mapper that always provides a constant value."""
+
+ def const_fun(line: LineDict, state: StateDict) -> Any:
+ return value
+
+ return const_fun
+
+
+def val(
+ field: str,
+ default: Any = "",
+ postprocess: Callable[..., Any] = lambda x, s: x,
+ skip: bool = False,
+) -> MapperFunc:
+ """Returns a mapper that gets a value from a specific field in the row."""
+
+ def val_fun(line: LineDict, state: StateDict) -> Any:
+ value = _get_field_value(line, field)
+ if not value and skip:
+ raise SkippingError(f"Missing required value for field '{field}'")
+
+ final_value = value or default
+ try:
+ sig = inspect.signature(postprocess)
+ if len(sig.parameters) == 1:
+ return postprocess(final_value)
+ else:
+ return postprocess(final_value, state)
+ except (ValueError, TypeError):
+ try:
+ return postprocess(final_value, state)
+ except TypeError:
+ return postprocess(final_value)
+
+ return val_fun
+
+
+def concat(separator: str, *fields: Any, skip: bool = False) -> MapperFunc:
+ """Returns a mapper that joins values from multiple fields or static strings.
+
+ Args:
+ separator: The string to place between each value.
+ *fields: A variable number of source column names or static strings.
+ skip: If True, raises SkippingError if the final result is empty.
+
+ Returns:
+ A mapper function that returns the concatenated string.
+ """
+ mappers = _list_to_mappers(fields)
+
+ def concat_fun(line: LineDict, state: StateDict) -> str:
+ values = [str(m(line, state)) for m in mappers]
+ result = separator.join([v for v in values if v])
+ if not result and skip:
+ raise SkippingError(f"Concatenated value for fields {fields} is empty.")
+ return result
+
+ return concat_fun
+
+
+def concat_mapper_all(separator: str, *fields: Any) -> MapperFunc:
+ """Returns a mapper that joins values, but only if all values exist.
+
+ If any of the values from the specified fields is empty, this mapper
+ returns an empty string.
+
+ Args:
+ separator: The string to place between each value.
+ *fields: A variable number of source column names or static strings.
+
+ Returns:
+ A mapper function that returns the concatenated string or an empty string.
+ """
+ mappers = _list_to_mappers(fields)
+
+ def concat_all_fun(line: LineDict, state: StateDict) -> str:
+ values = [str(m(line, state)) for m in mappers]
+ if not all(values):
+ return ""
+ return separator.join(values)
+
+ return concat_all_fun
+
+
+def cond(field: str, true_mapper: Any, false_mapper: Any) -> MapperFunc:
+ """Returns a mapper that applies one of two mappers based on a condition.
+
+ Args:
+ field: The source column to check for a truthy value.
+ true_mapper: The mapper to apply if the value in `field` is truthy.
+ false_mapper: The mapper to apply if the value in `field` is falsy.
+
+ Returns:
+ A mapper function that returns the result of the chosen mapper.
+ """
+ true_m = _str_to_mapper(true_mapper)
+ false_m = _str_to_mapper(false_mapper)
+
+ def cond_fun(line: LineDict, state: StateDict) -> Any:
+ if _get_field_value(line, field):
+ return true_m(line, state)
+ else:
+ return false_m(line, state)
+
+ return cond_fun
+
+
+def bool_val(field: str, true_values: list[str]) -> MapperFunc:
+ """Returns a mapper that converts a field value to a boolean '1' or '0'.
+
+ Args:
+ field: The source column to check.
+ true_values: A list of strings that should be considered `True`.
+
+ Returns:
+ A mapper function that returns "1" or "0".
+ """
+
+ def bool_val_fun(line: LineDict, state: StateDict) -> str:
+ return "1" if _get_field_value(line, field) in true_values else "0"
+
+ return bool_val_fun
+
+
+def num(field: str, default: str = "0.0") -> MapperFunc:
+ """Returns a mapper that standardizes a numeric string.
+
+ It replaces all commas with dots.
+
+ Args:
+ field: The source column containing the numeric string.
+ default: The default value to use if the source value is empty.
+
+ Returns:
+ A mapper function that returns the standardized numeric string.
+ """
+
+ def num_fun(line: LineDict, state: StateDict) -> str:
+ value = _get_field_value(line, field, default)
+ return str(value).replace(",", ".")
+
+ return num_fun
+
+
+def field(col: str) -> MapperFunc:
+ """Returns the column name itself if the column has a value.
+
+ This is useful for some dynamic product attribute mappings.
+
+ Args:
+ col: The name of the column to check.
+
+ Returns:
+ A mapper function that returns the column name or an empty string.
+ """
+
+ def field_fun(line: LineDict, state: StateDict) -> str:
+ return col if _get_field_value(line, col) else ""
+
+ return field_fun
+
+
+def m2o(prefix: str, field: str, default: str = "", skip: bool = False) -> MapperFunc:
+ """Returns a mapper that creates a Many2one external ID from a field's value.
+
+ Args:
+ prefix: The XML ID prefix (e.g., 'my_module').
+ field: The source column containing the value for the ID.
+ default: The value to return if the source value is empty.
+ skip: If True, raises SkippingError if the source value is empty.
+
+ Returns:
+ A mapper function that returns the formatted external ID.
+ """
+
+ def m2o_fun(line: LineDict, state: StateDict) -> str:
+ value = _get_field_value(line, field)
+ if skip and not value:
+ raise SkippingError(f"Missing Value for {field}")
+ return to_m2o(prefix, value, default=default)
+
+ return m2o_fun
+
+
+def m2o_map(
+ prefix: str, *fields: Any, default: str = "", skip: bool = False
+) -> MapperFunc:
+ """Returns a mapper that creates a Many2one external ID by concatenating fields.
+
+ This is useful when the unique identifier for a record is spread across
+ multiple columns.
+
+ Args:
+ prefix: The XML ID prefix (e.g., 'my_module').
+ *fields: A variable number of source column names or static strings to join.
+ default: The value to return if the final concatenated value is empty.
+ skip: If True, raises SkippingError if the final result is empty.
+
+ Returns:
+ A mapper function that returns the formatted external ID.
+ """
+ concat_mapper = concat("_", *fields)
+
+ def m2o_fun(line: LineDict, state: StateDict) -> str:
+ value = concat_mapper(line, state)
+ if not value and skip:
+ raise SkippingError(f"Missing value for m2o_map with prefix '{prefix}'")
+ return to_m2o(prefix, value, default=default)
+
+ return m2o_fun
+
+
+def m2m(prefix: str, *fields: Any, sep: str = ",") -> MapperFunc:
+ """Returns a mapper that creates a comma-separated list of Many2many external IDs.
+
+ This mapper has two modes:
+ 1. **Multi-column**: If multiple fields are provided, it treats the value of
+ each field as a single ID.
+ 2. **Single-column**: If one field is provided, it splits the value of that
+ field by the separator `sep`.
+
+ Args:
+ prefix: The XML ID prefix to apply to each value.
+ *fields: One or more source column names.
+ sep: The separator to use when splitting a single field.
+
+ Returns:
+ A mapper function that returns a comma-separated string of external IDs.
+ """
+
+ def m2m_fun(line: LineDict, state: StateDict) -> str:
+ all_values = []
+ if len(fields) > 1: # Mode 1: Multiple columns
+ for field_name in fields:
+ value = _get_field_value(line, field_name)
+ if value:
+ all_values.append(to_m2o(prefix, value))
+ elif len(fields) == 1: # Mode 2: Single column with separator
+ field_name = fields[0]
+ value = _get_field_value(line, field_name)
+ if value and isinstance(value, str):
+ all_values.extend(to_m2o(prefix, v.strip()) for v in value.split(sep))
+
+ return ",".join(all_values)
+
+ return m2m_fun
+
+
+def m2m_map(prefix: str, mapper_func: MapperFunc) -> MapperFunc:
+ """Returns a mapper that wraps another mapper for Many2many fields.
+
+ It takes the comma-separated string result of another mapper and applies
+ the `to_m2m` formatting to it.
+
+ Args:
+ prefix: The XML ID prefix to apply.
+ mapper_func: The inner mapper function to execute first.
+
+ Returns:
+ A mapper function that returns a formatted m2m external ID list.
+ """
+
+ def m2m_map_fun(line: LineDict, state: StateDict) -> str:
+ value = mapper_func(line, state)
+ return to_m2m(prefix, value)
+
+ return m2m_map_fun
+
+
+def m2o_att_name(prefix: str, att_list: list[str]) -> MapperFunc:
+ """Returns a mapper that creates a dictionary of attribute-to-ID mappings.
+
+ This is used in legacy product import workflows.
+
+ Args:
+ prefix: The XML ID prefix to use for the attribute IDs.
+ att_list: A list of attribute column names to check for.
+
+ Returns:
+ A mapper function that returns a dictionary.
+ """
+
+ def m2o_att_fun(line: LineDict, state: StateDict) -> dict[str, str]:
+ return {
+ att: to_m2o(prefix, att) for att in att_list if _get_field_value(line, att)
+ }
+
+ return m2o_att_fun
+
+
+def m2m_id_list(prefix: str, *fields: Any, sep: str = ",") -> MapperFunc:
+ """Returns a mapper for creating a list of M2M external IDs.
+
+ This is primarily used when creating the related records for a M2M field,
+ such as creating all unique `res.partner.category` records.
+
+ Args:
+ prefix: The XML ID prefix to apply to each value.
+ *fields: One or more source fields to read values from.
+ sep: The separator to use when splitting values.
+
+ Returns:
+ A mapper function that returns a comma-separated string of external IDs.
+ """
+ concat_m = concat("", *fields)
+
+ def m2m_id_list_fun(line: LineDict, state: StateDict) -> str:
+ value = concat_m(line, state)
+ if not value:
+ return ""
+ values = [v.strip() for v in value.split(sep)]
+ return ",".join(to_m2o(prefix, v) for v in values if v)
+
+ return m2m_id_list_fun
+
+
+def m2m_value_list(*fields: Any, sep: str = ",") -> MapperFunc:
+ """Returns a mapper that creates a Python list of unique values.
+
+ This is used in conjunction with `m2m_id_list` when creating related
+ records for a M2M field.
+
+ Args:
+ *fields: One or more source fields to read values from.
+ sep: The separator to use when splitting values.
+
+ Returns:
+ A mapper function that returns a list of strings.
+ """
+ concat_m = concat("", *fields)
+
+ def m2m_value_list_fun(line: LineDict, state: StateDict) -> list[str]:
+ value = concat_m(line, state)
+ if not value:
+ return []
+ return [v.strip() for v in value.split(sep) if v.strip()]
+
+ return m2m_value_list_fun
+
+
+def map_val(
+ mapping_dict: dict[Any, Any],
+ key_mapper: Any,
+ default: Any = "",
+ m2m: bool = False,
+) -> MapperFunc:
+ """Returns a mapper that translates a value using a provided dictionary.
+
+ Args:
+ mapping_dict: The dictionary to use as a translation table.
+ key_mapper: A mapper that provides the key to look up.
+ default: A default value to return if the key is not found.
+ m2m: If True, splits the key by commas and translates each part.
+
+ Returns:
+ A mapper function that returns the translated value.
+ """
+ key_m = _str_to_mapper(key_mapper)
+
+ def map_val_fun(line: LineDict, state: StateDict) -> Any:
+ key = key_m(line, state)
+ if m2m and isinstance(key, str):
+ keys = [k.strip() for k in key.split(",")]
+ return ",".join([str(mapping_dict.get(k, default)) for k in keys])
+ return mapping_dict.get(key, default)
+
+ return map_val_fun
+
+
+def record(mapping: dict[str, MapperFunc]) -> MapperFunc:
+ """Returns a mapper that processes a sub-mapping for a related record.
+
+ Used for creating one-to-many records (e.g., sales order lines).
+
+ Args:
+ mapping: A mapping dictionary for the related record.
+
+ Returns:
+ A mapper function that returns a dictionary of the processed sub-record.
+ """
+
+ def record_fun(line: LineDict, state: StateDict) -> dict[str, Any]:
+ return {key: mapper_func(line, state) for key, mapper_func in mapping.items()}
+
+ return record_fun
+
+
+def binary(field: str, path_prefix: str = "", skip: bool = False) -> MapperFunc:
+ """Returns a mapper that converts a local file to a base64 string.
+
+ Args:
+ field: The source column containing the path to the file.
+ path_prefix: An optional prefix to prepend to the file path.
+ skip: If True, raises SkippingError if the file is not found.
+
+ Returns:
+ A mapper function that returns the base64 encoded string.
+ """
+
+ def binary_fun(line: LineDict, state: StateDict) -> str:
+ filepath = _get_field_value(line, field)
+ if not filepath:
+ return ""
+
+ full_path = os.path.join(path_prefix, filepath)
+ try:
+ with open(full_path, "rb") as f:
+ return base64.b64encode(f.read()).decode("utf-8")
+ except FileNotFoundError as e:
+ if skip:
+ raise SkippingError(f"File not found at '{full_path}'") from e
+ log.warning(f"File not found at '{full_path}', skipping.")
+ return ""
+
+ return binary_fun
+
+
+def binary_url_map(field: str, skip: bool = False) -> MapperFunc:
+ """Returns a mapper that downloads a file from a URL and converts to base64.
+
+ Args:
+ field: The source column containing the URL.
+ skip: If True, raises SkippingError if the URL cannot be fetched.
+
+ Returns:
+ A mapper function that returns the base64 encoded string.
+ """
+
+ def binary_url_fun(line: LineDict, state: StateDict) -> str:
+ url = _get_field_value(line, field)
+ if not url:
+ return ""
+
+ try:
+ res = requests.get(url, timeout=10)
+ res.raise_for_status()
+ return base64.b64encode(res.content).decode("utf-8")
+ except requests.exceptions.RequestException as e:
+ if skip:
+ raise SkippingError(f"Cannot fetch file at URL '{url}': {e}") from e
+ log.warning(f"Cannot fetch file at URL '{url}': {e}")
+ return ""
+
+ return binary_url_fun
+
+
+def val_att(att_list: list[str]) -> MapperFunc:
+ """(Legacy V9-V12) Returns a dictionary of attributes that have a value.
+
+ This is a helper for legacy product attribute workflows.
+
+ Args:
+ att_list: A list of attribute column names to check.
+
+ Returns:
+ A mapper function that returns a dictionary.
+ """
+
+ def val_att_fun(line: LineDict, state: StateDict) -> dict[str, Any]:
+ return {
+ att: _get_field_value(line, att)
+ for att in att_list
+ if _get_field_value(line, att)
+ }
+
+ return val_att_fun
+
+
+def m2o_att(prefix: str, att_list: list[str]) -> MapperFunc:
+ """(Legacy V9-V12) Returns a dictionary of attribute-to-ID mappings.
+
+ This is a helper for legacy product attribute workflows where IDs for
+ attribute values were manually constructed.
+
+ Args:
+ prefix: The XML ID prefix to use for the attribute value IDs.
+ att_list: A list of attribute column names to process.
+
+ Returns:
+ A mapper function that returns a dictionary.
+ """
+
+ def m2o_att_fun(line: LineDict, state: StateDict) -> dict[str, str]:
+ result = {}
+ for att in att_list:
+ value = _get_field_value(line, att)
+ if value:
+ id_value = f"{att}_{value}"
+ result[att] = to_m2o(prefix, id_value)
+ return result
+
+ return m2o_att_fun
+
+
+def concat_field_value_m2m(separator: str, *fields: str) -> MapperFunc:
+ """(Legacy V9-V12) Specialized concat for attribute value IDs.
+
+ Joins each field name with its value (e.g., 'Color' + 'Blue' -> 'Color_Blue'),
+ then joins all resulting parts with a comma. This was used to create
+ unique external IDs for `product.attribute.value` records.
+
+ Args:
+ separator: The character to join the field name and value with.
+ *fields: The attribute columns to process.
+
+ Returns:
+ A mapper function that returns the concatenated string.
+ """
+
+ def concat_fun(line: LineDict, state: StateDict) -> str:
+ parts = []
+ for field in fields:
+ value = _get_field_value(line, field)
+ if value:
+ parts.append(f"{field}{separator}{value}")
+ return ",".join(parts)
+
+ return concat_fun
+
+
+def m2m_attribute_value(prefix: str, *fields: str) -> MapperFunc:
+ """(Legacy V9-V12) Creates a list of external IDs for attribute values.
+
+ This is a composite mapper for the legacy product attribute workflow.
+
+ Args:
+ prefix: The XML ID prefix.
+ *fields: The attribute columns to process.
+
+ Returns:
+ A mapper that returns a comma-separated string of external IDs.
+ """
+ return m2m_map(prefix, concat_field_value_m2m("_", *fields))
+
+
+def m2m_template_attribute_value(prefix: str, *fields: Any) -> MapperFunc:
+ """(Modern V13+) Creates a comma-separated list of attribute values.
+
+ This mapper concatenates the *values* of the given fields. This is used for
+ the modern product attribute system where Odoo automatically
+ creates the `product.attribute.value` records from the raw value names.
+
+ It will return an empty string if the `template_id` is missing from the
+ source line, preventing the creation of orphaned attribute lines.
+
+ Args:
+ prefix: (Unused) Kept for backward compatibility.
+ *fields: The attribute columns (e.g. 'Color', 'Size') to get values from.
+
+ Returns:
+ A mapper that returns a comma-separated string of attribute values.
+ """
+ concat_m = concat(",", *fields)
+
+ def m2m_attribute_fun(line: LineDict, state: StateDict) -> str:
+ # This check is critical for the modern workflow.
+ if not line.get("template_id"):
+ return ""
+ return cast(str, concat_m(line, state))
+
+ return m2m_attribute_fun
+
+
+def split_line_number(line_nb: int) -> Callable[[LineDict, int], int]:
+ """Returns a function to split data into chunks of a specific line count.
+
+ Args:
+ line_nb: The number of lines per chunk.
+
+ Returns:
+ A function compatible with the `Processor.split` method.
+ """
+
+ def split(line: LineDict, i: int) -> int:
+ return i // line_nb
+
+ return split
+
+
+def split_file_number(file_nb: int) -> Callable[[LineDict, int], int]:
+ """Returns a function to split data across a fixed number of chunks.
+
+ Args:
+ file_nb: The total number of chunks to create.
+
+ Returns:
+ A function compatible with the `Processor.split` method.
+ """
+
+ def split(line: LineDict, i: int) -> int:
+ return i % file_nb
+
+ return split
diff --git a/src/odoo_data_flow/lib/transform.py b/src/odoo_data_flow/lib/transform.py
new file mode 100644
index 00000000..c063ce45
--- /dev/null
+++ b/src/odoo_data_flow/lib/transform.py
@@ -0,0 +1,543 @@
+"""This module contains the core Processor class for transforming data."""
+
+import csv
+import os
+from collections import OrderedDict
+from typing import Any, Callable, Optional, Union
+
+from lxml import etree # type: ignore[import-untyped]
+
+from ..logging_config import log
+from . import mapper
+from .internal.exceptions import SkippingError
+from .internal.io import write_file
+from .internal.tools import AttributeLineDict
+
+
+class MapperRepr:
+ """A wrapper to provide a useful string representation for mapper functions."""
+
+ def __init__(self, repr_string: str, func: Callable[..., Any]) -> None:
+ """Initializes the MapperRepr.
+
+ Args:
+ repr_string: The string representation to use for the mapper.
+ func: The actual callable mapper function.
+ """
+ self._repr_string = repr_string
+ self.func = func
+
+ def __call__(self, *args: Any, **kwargs: Any) -> Any:
+ """Calls the wrapped mapper function."""
+ return self.func(*args, **kwargs)
+
+ def __repr__(self) -> str:
+ """Returns the custom string representation."""
+ return self._repr_string
+
+
+class Processor:
+ """Core class for reading, transforming, and preparing data for Odoo."""
+
+ def __init__(
+ self,
+ filename: Optional[str] = None,
+ separator: str = ";",
+ encoding: str = "utf-8",
+ header: Optional[list[str]] = None,
+ data: Optional[list[list[Any]]] = None,
+ preprocess: Callable[
+ [list[str], list[list[Any]]], tuple[list[str], list[list[Any]]]
+ ] = lambda h, d: (h, d),
+ **kwargs: Any,
+ ) -> None:
+ """Initializes the Processor.
+
+ The Processor can be initialized either by providing a `filename` to read
+ from disk, or by providing `header` and `data` lists to work with
+ in-memory data.
+
+ Args:
+ filename: The path to the source CSV or XML file.
+ separator: The column delimiter for CSV files.
+ encoding: The character encoding of the source file.
+ header: A list of strings for the header row (for in-memory data).
+ data: A list of lists representing the data rows (for in-memory data).
+ preprocess: A function to modify the raw data before mapping begins.
+ **kwargs: Catches other arguments, primarily for XML processing.
+ """
+ self.file_to_write: OrderedDict[str, dict[str, Any]] = OrderedDict()
+ self.header: list[str]
+ self.data: list[list[Any]]
+
+ # Determine if initializing from a file or in-memory data
+ if filename:
+ self.header, self.data = self._read_file(
+ filename, separator, encoding, **kwargs
+ )
+ elif header is not None and data is not None:
+ self.header = header
+ self.data = data
+ else:
+ raise ValueError(
+ "Processor must be initialized with either a 'filename' or both"
+ " 'header' and 'data'."
+ )
+
+ # Apply any pre-processing hooks
+ self.header, self.data = preprocess(self.header, self.data)
+
+ def _read_file(
+ self, filename: str, separator: str, encoding: str, **kwargs: Any
+ ) -> tuple[list[str], list[list[Any]]]:
+ """Reads a CSV or XML file and returns its header and data."""
+ xml_root_path = kwargs.get("xml_root_tag")
+
+ if xml_root_path:
+ log.info(f"Reading XML file: {filename}")
+ try:
+ parser = etree.XMLParser(
+ resolve_entities=False,
+ no_network=True,
+ dtd_validation=False,
+ load_dtd=False,
+ )
+ tree = etree.parse(filename, parser=parser)
+ nodes = tree.xpath(xml_root_path)
+
+ if not nodes:
+ log.warning(f"No nodes found for root path '{xml_root_path}'")
+ return [], []
+
+ header = [elem.tag for elem in nodes[0]]
+ data = []
+ for node in nodes:
+ row = [
+ (node.find(col).text if node.find(col) is not None else "")
+ for col in header
+ ]
+ data.append(row)
+ return header, data
+ except etree.XMLSyntaxError as e:
+ log.error(f"Failed to parse XML file {filename}: {e}")
+ return [], []
+ except Exception as e:
+ log.error(
+ "An unexpected error occurred while reading XML file "
+ f"{filename}: {e}"
+ )
+ return [], []
+ else:
+ log.info(f"Reading CSV file: {filename}")
+ try:
+ with open(filename, encoding=encoding, newline="") as f:
+ reader = csv.reader(f, delimiter=separator)
+ header = next(reader)
+ data = [row for row in reader]
+ return header, data
+ except FileNotFoundError:
+ log.error(f"Source file not found at: {filename}")
+ return [], []
+ except Exception as e:
+ log.error(f"Failed to read file {filename}: {e}")
+ return [], []
+
+ def check(
+ self, check_fun: Callable[..., bool], message: Optional[str] = None
+ ) -> bool:
+ """Runs a data quality check function against the loaded data.
+
+ Args:
+ check_fun: The checker function to execute.
+ message: An optional custom error message to display on failure.
+
+ Returns:
+ True if the check passes, False otherwise.
+ """
+ res = check_fun(self.header, self.data)
+ if not res:
+ error_message = (
+ message or f"Data quality check '{check_fun.__name__}' failed."
+ )
+ log.warning(error_message)
+ return res
+
+ def split(self, split_fun: Callable[..., Any]) -> dict[Any, "Processor"]:
+ """Splits the processor's data into multiple new Processor objects.
+
+ Args:
+ split_fun: A function that takes a row dictionary and index, and
+ returns a key to group the row by.
+
+ Returns:
+ A dictionary where keys are the grouping keys and values are new
+ Processor instances containing the grouped data.
+ """
+ grouped_data: OrderedDict[Any, list[list[Any]]] = OrderedDict()
+ for i, row in enumerate(self.data):
+ row_dict = dict(zip(self.header, row))
+ key = split_fun(row_dict, i)
+ if key not in grouped_data:
+ grouped_data[key] = []
+ grouped_data[key].append(row)
+
+ return {
+ key: Processor(header=list(self.header), data=data)
+ for key, data in grouped_data.items()
+ }
+
+ def get_o2o_mapping(self) -> dict[str, MapperRepr]:
+ """Generates a direct 1-to-1 mapping dictionary."""
+ return {
+ str(column): MapperRepr(f"mapper.val('{column}')", mapper.val(column))
+ for column in self.header
+ if column
+ }
+
+ def process(
+ self,
+ mapping: dict[str, Callable[..., Any]],
+ filename_out: str,
+ params: Optional[dict[str, Any]] = None,
+ t: str = "list",
+ null_values: Optional[list[Any]] = None,
+ m2m: bool = False,
+ dry_run: bool = False,
+ ) -> tuple[list[str], Union[list[Any], set[tuple[Any, ...]]]]:
+ """Processes the data using a mapping and prepares it for writing.
+
+ Args:
+ mapping: The mapping dictionary defining the transformation rules.
+ filename_out: The path where the output CSV file will be saved.
+ params: A dictionary of parameters for the `odoo-data-flow import`
+ command, used when generating the load script.
+ t: The type of collection to return data in ('list' or 'set').
+ null_values: A list of values to be treated as empty.
+ m2m: If True, activates special processing for many-to-many data.
+ dry_run: If True, prints a sample of the output to the console
+ instead of writing files.
+
+ Returns:
+ A tuple containing the header list and the transformed data.
+ """
+ if null_values is None:
+ null_values = ["NULL", False]
+ if params is None:
+ params = {}
+
+ head: list[str]
+ data: Union[list[Any], set[tuple[Any, ...]]]
+ if m2m:
+ head, data = self._process_mapping_m2m(mapping, null_values=null_values)
+ else:
+ head, data = self._process_mapping(mapping, t=t, null_values=null_values)
+
+ if dry_run:
+ log.info("--- DRY RUN MODE ---")
+ log.info("No files will be written.")
+ log.info(f"Header: {head}")
+ data_list = list(data)
+ log.info(f"Total rows that would be generated: {len(data_list)}")
+ log.info("Sample of first 5 rows:")
+ for row in data_list[:5]:
+ log.info(row)
+ return head, data
+
+ self._add_data(head, data, filename_out, params)
+ return head, data
+
+ def write_to_file(
+ self,
+ script_filename: str,
+ fail: bool = True,
+ append: bool = False,
+ python_exe: str = "python",
+ path: str = "",
+ ) -> None:
+ """Generates the .sh script for the import.
+
+ Args:
+ script_filename: The path where the shell script will be saved.
+ fail: If True, includes a second command with the --fail flag.
+ append: If True, appends to the script file instead of overwriting.
+ python_exe: The python executable to use in the script.
+ path: The path to prepend to the odoo-data-flow command.
+ """
+ init = not append
+ for _, info in self.file_to_write.items():
+ info_copy = info.copy()
+ info_copy.update(
+ {
+ "model": info.get("model", "auto"),
+ "init": init,
+ "launchfile": script_filename,
+ "fail": fail,
+ "python_exe": python_exe,
+ "path": path,
+ }
+ )
+ write_file(**info_copy)
+ init = False
+
+ def join_file(
+ self,
+ filename: str,
+ master_key: str,
+ child_key: str,
+ header_prefix: str = "child",
+ separator: str = ";",
+ encoding: str = "utf-8",
+ ) -> None:
+ """Joins data from a secondary file into the processor's main data.
+
+ Args:
+ filename: The path to the secondary file to join.
+ master_key: The column name in the main data to join on.
+ child_key: The column name in the secondary data to join on.
+ header_prefix: A prefix to add to the headers from the child file.
+ separator: The column separator for the child CSV file.
+ encoding: The character encoding of the child file.
+ """
+ child_header, child_data = self._read_file(filename, separator, encoding)
+
+ try:
+ child_key_pos = child_header.index(child_key)
+ master_key_pos = self.header.index(master_key)
+ except ValueError as e:
+ log.error(
+ f"Join key error: {e}. Check if '{master_key}' and "
+ f"'{child_key}' exist in their respective files."
+ )
+ return
+
+ child_data_map = {row[child_key_pos]: row for row in child_data}
+
+ empty_child_row = [""] * len(child_header)
+ for master_row in self.data:
+ key_value = master_row[master_key_pos]
+ row_to_join = child_data_map.get(key_value, empty_child_row)
+ master_row.extend(row_to_join)
+
+ self.header.extend([f"{header_prefix}_{h}" for h in child_header])
+
+ def _add_data(
+ self,
+ head: list[str],
+ data: Union[list[Any], set[tuple[Any, ...]]],
+ filename_out: str,
+ params: dict[str, Any],
+ ) -> None:
+ """Adds data to the internal write queue."""
+ params_copy = params.copy()
+ params_copy["filename"] = (
+ os.path.abspath(filename_out) if filename_out else False
+ )
+ params_copy["header"] = head
+ params_copy["data"] = data
+ self.file_to_write[filename_out] = params_copy
+
+ def _process_mapping(
+ self,
+ mapping: dict[str, Callable[..., Any]],
+ t: str,
+ null_values: list[Any],
+ ) -> tuple[list[str], Union[list[Any], set[tuple[Any, ...]]]]:
+ """The core transformation loop."""
+ lines_out: Union[list[Any], set[tuple[Any, ...]]] = [] if t == "list" else set()
+ state: dict[str, Any] = {}
+
+ for i, line in enumerate(self.data):
+ cleaned_line = [
+ s.strip() if s and s.strip() not in null_values else "" for s in line
+ ]
+ line_dict = dict(zip(self.header, cleaned_line))
+
+ try:
+ line_out = [mapping[k](line_dict, state) for k in mapping.keys()]
+ except SkippingError as e:
+ log.debug(f"Skipping line {i}: {e.message}")
+ continue
+ except TypeError:
+ line_out = [mapping[k](line_dict) for k in mapping.keys()]
+
+ if isinstance(lines_out, list):
+ lines_out.append(line_out)
+ else:
+ lines_out.add(tuple(line_out))
+ return list(mapping.keys()), lines_out
+
+ def _process_mapping_m2m(
+ self,
+ mapping: dict[str, Callable[..., Any]],
+ null_values: list[Any],
+ ) -> tuple[list[str], list[Any]]:
+ """Handles special m2m mapping by expanding list values into unique rows."""
+ head, data_unioned = self._process_mapping(mapping, "list", null_values)
+ data: list[Any]
+ if isinstance(data_unioned, set):
+ data = list(data_unioned)
+ else:
+ data = data_unioned
+
+ lines_out: list[Any] = []
+
+ for line_out in data:
+ index_list, zip_list = [], []
+ for index, value in enumerate(line_out):
+ if isinstance(value, list):
+ index_list.append(index)
+ zip_list.append(value)
+
+ if not zip_list:
+ if line_out not in lines_out:
+ lines_out.append(line_out)
+ continue
+
+ values_list = zip(*zip_list)
+ for values in values_list:
+ new_line = list(line_out)
+ for i, val in enumerate(values):
+ new_line[index_list[i]] = val
+ if new_line not in lines_out:
+ lines_out.append(new_line)
+
+ return head, lines_out
+
+
+class ProductProcessorV10(Processor):
+ """Processor for the modern (Odoo v13+) product attribute model."""
+
+ def process_attribute_data(
+ self,
+ attributes_list: list[str],
+ attribute_prefix: str,
+ filename_out: str,
+ import_args: dict[str, Any],
+ ) -> None:
+ """Creates and registers the `product.attribute.csv` file.
+
+ Args:
+ attributes_list: A list of attribute names (e.g., ['Color', 'Size']).
+ attribute_prefix: The prefix for generating external IDs.
+ filename_out: The output path for the CSV file.
+ import_args: A dictionary of parameters for the import script.
+ """
+ attr_header = ["id", "name", "create_variant"]
+ attr_data = [
+ [mapper.to_m2o(attribute_prefix, att), att, "Dynamically"]
+ for att in attributes_list
+ ]
+ self._add_data(attr_header, attr_data, filename_out, import_args)
+
+
+class ProductProcessorV9(Processor):
+ """Processor for the legacy (Odoo v9-v12) product attribute model."""
+
+ def _generate_attribute_file_data(
+ self, attributes_list: list[str], prefix: str
+ ) -> tuple[list[str], list[list[str]]]:
+ """Generates header and data for 'product.attribute.csv'."""
+ header = ["id", "name"]
+ data = [[mapper.to_m2o(prefix, attr), attr] for attr in attributes_list]
+ return header, data
+
+ def _extract_attribute_value_data(
+ self,
+ mapping: dict[str, Callable[..., Any]],
+ attributes_list: list[str],
+ processed_rows: list[dict[str, Any]],
+ ) -> set[tuple[Any, ...]]:
+ """Extracts and transforms data for 'product.attribute.value.csv'."""
+ attribute_values: set[tuple[Any, ...]] = set()
+ name_key = "name"
+
+ for row_dict in processed_rows:
+ try:
+ line_out_results = [mapping[k](row_dict) for k in mapping.keys()]
+ except TypeError:
+ line_out_results = [mapping[k](row_dict, {}) for k in mapping.keys()]
+
+ name_mapping_index = list(mapping.keys()).index(name_key)
+ values_dict = line_out_results[name_mapping_index]
+
+ if not isinstance(values_dict, dict):
+ continue
+
+ for attr_name in attributes_list:
+ if values_dict.get(attr_name):
+ value_line = tuple(
+ res[attr_name] if isinstance(res, dict) else res
+ for res in line_out_results
+ )
+ attribute_values.add(value_line)
+
+ return attribute_values
+
+ def process_attribute_mapping(
+ self,
+ mapping: dict[str, Callable[..., Any]],
+ line_mapping: dict[str, Callable[..., Any]],
+ attributes_list: list[str],
+ attribute_prefix: str,
+ path: str,
+ import_args: dict[str, Any],
+ id_gen_fun: Optional[Callable[..., str]] = None,
+ null_values: Optional[list[str]] = None,
+ ) -> None:
+ """Orchestrates the processing of legacy product attributes.
+
+ This method generates three CSV files required for the legacy workflow.
+ """
+ _null_values = null_values if null_values is not None else ["NULL"]
+ attr_header, attr_data = self._generate_attribute_file_data(
+ attributes_list, attribute_prefix
+ )
+
+ processed_rows: list[dict[str, Any]] = []
+ for line in self.data:
+ cleaned_line = [
+ s.strip() if s and s.strip() not in _null_values else "" for s in line
+ ]
+ processed_rows.append(dict(zip(self.header, cleaned_line)))
+
+ values_header = list(mapping.keys())
+ values_data = self._extract_attribute_value_data(
+ mapping, attributes_list, processed_rows
+ )
+
+ _id_gen_fun = id_gen_fun or (
+ lambda tmpl_id, vals: mapper.to_m2o(
+ tmpl_id.split(".")[0] + "_LINE", tmpl_id
+ )
+ )
+ line_aggregator = AttributeLineDict(attr_data, _id_gen_fun)
+ for row_dict in processed_rows:
+ try:
+ values_lines = [line_mapping[k](row_dict) for k in line_mapping.keys()]
+ except TypeError:
+ values_lines = [
+ line_mapping[k](row_dict, {}) for k in line_mapping.keys()
+ ]
+ line_aggregator.add_line(values_lines, list(line_mapping.keys()))
+ line_header, line_data = line_aggregator.generate_line()
+
+ context = import_args.setdefault("context", {})
+ context["create_product_variant"] = True
+
+ self._add_data(
+ attr_header, attr_data, path + "product.attribute.csv", import_args
+ )
+ self._add_data(
+ values_header,
+ values_data,
+ path + "product.attribute.value.csv",
+ import_args,
+ )
+
+ line_import_args = dict(import_args, groupby="product_tmpl_id/id")
+ self._add_data(
+ line_header,
+ line_data,
+ path + "product.attribute.line.csv",
+ line_import_args,
+ )
diff --git a/src/odoo_data_flow/lib/workflow/__init__.py b/src/odoo_data_flow/lib/workflow/__init__.py
new file mode 100644
index 00000000..466b78a6
--- /dev/null
+++ b/src/odoo_data_flow/lib/workflow/__init__.py
@@ -0,0 +1,7 @@
+"""Workflow tools for pre-post processing data."""
+
+from . import invoice_v9
+
+__all__ = [
+ "invoice_v9",
+]
diff --git a/src/odoo_data_flow/lib/workflow/invoice_v9.py b/src/odoo_data_flow/lib/workflow/invoice_v9.py
new file mode 100644
index 00000000..3ee37f36
--- /dev/null
+++ b/src/odoo_data_flow/lib/workflow/invoice_v9.py
@@ -0,0 +1,241 @@
+"""Invoice helper for odoo version 9.
+
+This module contains a legacy workflow helper for processing imported
+invoices in Odoo v9. It is preserved for reference but will need to be
+updated to work with modern Odoo versions.
+"""
+
+from time import time
+from typing import Any
+from xmlrpc.client import Fault
+
+from ...logging_config import log
+from ..internal.rpc_thread import RpcThread
+
+
+class InvoiceWorkflowV9:
+ """Automate odoo 9 Invoice Workflow.
+
+ A class to automate the lifecycle of imported invoices in Odoo v9,
+ such as validating, paying, and setting taxes.
+ """
+
+ def __init__(
+ self,
+ connection: Any,
+ field: str,
+ status_map: dict[str, list[str]],
+ paid_date_field: str,
+ payment_journal: int,
+ max_connection: int = 4,
+ ) -> None:
+ """Initializes the workflow processor.
+
+ Args:
+ connection: An active odoo-client-lib connection object.
+ field: The field that contains the legacy status from source data
+ status_map: A dict mapping Odoo states to lists of legacy states.
+ e.g., {'open': ['status1'], 'paid': ['status2']}
+ paid_date_field: The field containing the payment date.
+ payment_journal: The database ID of the payment journal to use.
+ max_connection: The number of parallel threads to use.
+ """
+ self.connection = connection
+ self.invoice_obj = connection.get_model("account.invoice")
+ self.payment_obj = connection.get_model("account.payment")
+ self.account_invoice_tax = self.connection.get_model("account.invoice.tax")
+ self.field = field
+ self.status_map = status_map
+ self.paid_date = paid_date_field
+ self.payment_journal = payment_journal
+ self.max_connection = max_connection
+ self.time = time()
+
+ def _display_percent(self, i: int, percent_step: int, total: int) -> None:
+ if i % percent_step == 0:
+ percentage = round(i / float(total) * 100, 2)
+ elapsed_time = time() - self.time
+ log.info(f"{percentage}% : {i}/{total} time {elapsed_time:.2f} sec")
+
+ def set_tax(self) -> None:
+ """Finds draft invoices and computes their taxes."""
+
+ def create_tax(invoice_id: int) -> None:
+ taxes = self.invoice_obj.get_taxes_values(invoice_id)
+ for tax in taxes.values():
+ self.account_invoice_tax.create(tax)
+
+ invoices: list[int] = self.invoice_obj.search(
+ [
+ ("state", "=", "draft"),
+ ("type", "=", "out_invoice"),
+ ("tax_line_ids", "=", False),
+ ]
+ )
+ total = len(invoices)
+ percent_step = int(total / 5000) or 1
+ self.time = time()
+ rpc_thread = RpcThread(self.max_connection)
+ log.info(f"Computing tax for {total} invoices...")
+ for i, invoice_id in enumerate(invoices):
+ self._display_percent(i, percent_step, total)
+ rpc_thread.spawn_thread(create_tax, [invoice_id])
+ rpc_thread.wait()
+
+ def validate_invoice(self) -> None:
+ """Finds and validates invoices that should be open or paid."""
+ statuses_to_validate = self.status_map.get("open", []) + self.status_map.get(
+ "paid", []
+ )
+ invoice_to_validate: list[int] = self.invoice_obj.search(
+ [
+ (self.field, "in", statuses_to_validate),
+ ("state", "=", "draft"),
+ ("type", "=", "out_invoice"),
+ ]
+ )
+ total = len(invoice_to_validate)
+ percent_step = int(total / 5000) or 1
+ rpc_thread = RpcThread(1) # Validation should be single-threaded
+ log.info(f"Validating {total} invoices...")
+ self.time = time()
+ for i, invoice_id in enumerate(invoice_to_validate):
+ self._display_percent(i, percent_step, total)
+ fun = self.connection.get_service("object").exec_workflow
+ rpc_thread.spawn_thread(
+ fun,
+ [
+ self.connection.database,
+ self.connection.user_id,
+ self.connection.password,
+ "account.invoice",
+ "invoice_open",
+ invoice_id,
+ ],
+ )
+ rpc_thread.wait()
+
+ def proforma_invoice(self) -> None:
+ """Finds and moves invoices to the pro-forma state."""
+ invoice_to_proforma: list[int] = self.invoice_obj.search(
+ [
+ (self.field, "in", self.status_map.get("proforma", [])),
+ ("state", "=", "draft"),
+ ("type", "=", "out_invoice"),
+ ]
+ )
+ total = len(invoice_to_proforma)
+ percent_step = int(total / 100) or 1
+ self.time = time()
+ rpc_thread = RpcThread(self.max_connection)
+ log.info(f"Setting {total} invoices to pro-forma...")
+ for i, invoice_id in enumerate(invoice_to_proforma):
+ self._display_percent(i, percent_step, total)
+ fun = self.connection.get_service("object").exec_workflow
+ rpc_thread.spawn_thread(
+ fun,
+ [
+ self.connection.database,
+ self.connection.user_id,
+ self.connection.password,
+ "account.invoice",
+ "invoice_proforma2",
+ invoice_id,
+ ],
+ {},
+ )
+ rpc_thread.wait()
+
+ def paid_invoice(self) -> None:
+ """Finds open invoices and registers payments for them."""
+
+ def pay_single_invoice(
+ data_update: dict[str, Any], wizard_context: dict[str, Any]
+ ) -> None:
+ fields_to_get = [
+ "communication",
+ "currency_id",
+ "invoice_ids",
+ "payment_difference",
+ "partner_id",
+ "payment_method_id",
+ "payment_difference_handling",
+ "journal_id",
+ "state",
+ "writeoff_account_id",
+ "payment_date",
+ "partner_type",
+ "hide_payment_method",
+ "payment_method_code",
+ "partner_bank_account_id",
+ "amount",
+ "payment_type",
+ ]
+ data = self.payment_obj.default_get(fields_to_get, context=wizard_context)
+ data.update(data_update)
+ wizard_id = self.payment_obj.create(data, context=wizard_context)
+ try:
+ self.payment_obj.post([wizard_id], context=wizard_context)
+ except Fault:
+ # Odoo may raise a fault for various reasons
+ # (e.g., already paid),
+ # which can be ignored in a batch process.
+ pass
+
+ invoices_to_paid: list[dict[str, Any]] = self.invoice_obj.search_read(
+ domain=[
+ (self.field, "in", self.status_map.get("paid", [])),
+ ("state", "=", "open"),
+ ("type", "=", "out_invoice"),
+ ],
+ fields=[self.paid_date, "date_invoice"],
+ )
+ total = len(invoices_to_paid)
+ percent_step = int(total / 1000) or 1
+ self.time = time()
+ rpc_thread = RpcThread(self.max_connection)
+ log.info(f"Registering payment for {total} invoices...")
+ for i, invoice in enumerate(invoices_to_paid):
+ self._display_percent(i, percent_step, total)
+ wizard_context = {
+ "active_id": invoice["id"],
+ "active_ids": [invoice["id"]],
+ "active.model": "account.invoice",
+ "default_invoice_ids": [(4, invoice["id"], 0)],
+ "type": "out_invoice",
+ "journal_type": "sale",
+ }
+ data_update = {
+ "journal_id": self.payment_journal,
+ "payment_date": invoice.get(self.paid_date)
+ or invoice.get("date_invoice"),
+ "payment_method_id": 1, # Manual
+ }
+ rpc_thread.spawn_thread(
+ pay_single_invoice, [data_update, wizard_context], {}
+ )
+ rpc_thread.wait()
+
+ def rename(self, name_field: str) -> None:
+ """Utility to move a value from a custom field to the invoice number."""
+ invoices_to_rename: list[dict[str, Any]] = self.invoice_obj.search_read(
+ domain=[
+ (name_field, "!=", False),
+ (name_field, "!=", "0.0"),
+ ("state", "!=", "draft"),
+ ("type", "=", "out_invoice"),
+ ],
+ fields=[name_field],
+ )
+ total = len(invoices_to_rename)
+ percent_step = int(total / 1000) or 1
+ self.time = time()
+ rpc_thread = RpcThread(int(self.max_connection * 1.5))
+ log.info(f"Renaming {total} invoices...")
+ for i, invoice in enumerate(invoices_to_rename):
+ self._display_percent(i, percent_step, total)
+ update_vals = {"number": invoice[name_field], name_field: False}
+ rpc_thread.spawn_thread(
+ self.invoice_obj.write, [invoice["id"], update_vals], {}
+ )
+ rpc_thread.wait()
diff --git a/src/odoo_data_flow/logging_config.py b/src/odoo_data_flow/logging_config.py
new file mode 100755
index 00000000..be63200d
--- /dev/null
+++ b/src/odoo_data_flow/logging_config.py
@@ -0,0 +1,47 @@
+"""Centralized logging configuration for the odoo-data-flow application."""
+
+import logging
+import sys
+from typing import Optional
+
+# Get the root logger for the application package
+log = logging.getLogger("odoo_data_flow")
+
+
+def setup_logging(verbose: bool = False, log_file: Optional[str] = None) -> None:
+ """Configures the root logger for the application.
+
+ This function sets up handlers to print logs to the console and optionally
+ to a specified file.
+
+ Args:
+ verbose: If True, the logging level is set to DEBUG.
+ Otherwise, it's set to INFO.
+ log_file: If provided, logs will also be written to this file path.
+ """
+ # Determine the logging level
+ level = logging.DEBUG if verbose else logging.INFO
+ log.setLevel(level)
+
+ # Clear any existing handlers to avoid duplicate logs if this is called
+ # multiple times
+ if log.hasHandlers():
+ log.handlers.clear()
+
+ # Create a formatter to be used by all handlers
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+
+ # Always create a handler to print to the console
+ console_handler = logging.StreamHandler(sys.stdout)
+ console_handler.setFormatter(formatter)
+ log.addHandler(console_handler)
+
+ # If a log file is specified, create a file handler as well
+ if log_file:
+ try:
+ file_handler = logging.FileHandler(log_file)
+ file_handler.setFormatter(formatter)
+ log.addHandler(file_handler)
+ log.info(f"Logging to file: {log_file}")
+ except Exception as e:
+ log.error(f"Failed to set up log file at {log_file}: {e}")
diff --git a/src/odoo_data_flow/migrator.py b/src/odoo_data_flow/migrator.py
new file mode 100644
index 00000000..3b97a5af
--- /dev/null
+++ b/src/odoo_data_flow/migrator.py
@@ -0,0 +1,86 @@
+"""Migrate data between two odoo databases.
+
+This module contains the logic for performing a direct, in-memory
+migration of data from one Odoo instance to another.
+"""
+
+from typing import Any, Callable, Optional
+
+from .exporter import run_export_for_migration
+from .importer import run_import_for_migration
+from .lib.transform import Processor
+from .logging_config import log
+
+
+def run_migration(
+ config_export: str,
+ config_import: str,
+ model: str,
+ domain: str = "[]",
+ fields: Optional[list[str]] = None,
+ mapping: Optional[dict[str, Callable[..., Any]]] = None,
+ export_worker: int = 1,
+ export_batch_size: int = 100,
+ import_worker: int = 1,
+ import_batch_size: int = 10,
+) -> None:
+ """Performs a server-to-server data migration.
+
+ This function chains together the export, transform, and import processes
+ without creating intermediate files.
+ """
+ log.info("--- Starting Server-to-Server Migration ---")
+
+ # Step 1: Export data from the source database
+ log.info(f"Exporting data from model '{model}'...")
+ header, data = run_export_for_migration(
+ config=config_export,
+ model=model,
+ domain=domain,
+ fields=fields or [],
+ worker=export_worker,
+ batch_size=export_batch_size,
+ )
+
+ if not header or not data:
+ log.warning("No data exported. Migration finished.")
+ return
+
+ log.info(f"Successfully exported {len(data)} records.")
+
+ # Step 2: Transform the data in memory
+ log.info("Transforming data in memory...")
+ processor = Processor(header=header, data=data)
+
+ final_mapping: dict[str, Callable[..., Any]]
+ if not mapping:
+ log.info("No mapping provided, using 1-to-1 mapping.")
+ # Convert the MapperRepr dict to a callable dict for the process method
+ final_mapping = {k: v.func for k, v in processor.get_o2o_mapping().items()}
+ else:
+ final_mapping = mapping
+
+ # The process method returns the transformed header and data
+ to_import_header, to_import_data_unioned = processor.process(
+ final_mapping, filename_out=""
+ )
+
+ # Ensure to_import_data is a list of lists
+ to_import_data_list: list[list[Any]]
+ if isinstance(to_import_data_unioned, set):
+ to_import_data_list = [list(row) for row in to_import_data_unioned]
+ else:
+ to_import_data_list = to_import_data_unioned
+
+ # Step 3: Import the transformed data into the destination database
+ log.info(f"Importing {len(to_import_data_list)} records into destination...")
+ run_import_for_migration(
+ config=config_import,
+ model=model,
+ header=to_import_header,
+ data=to_import_data_list,
+ worker=import_worker,
+ batch_size=import_batch_size,
+ )
+
+ log.info("--- Migration Finished Successfully ---")
diff --git a/src/odoo_data_flow/py.typed b/src/odoo_data_flow/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/src/odoo_data_flow/workflow_runner.py b/src/odoo_data_flow/workflow_runner.py
new file mode 100644
index 00000000..241db3b8
--- /dev/null
+++ b/src/odoo_data_flow/workflow_runner.py
@@ -0,0 +1,86 @@
+"""Workflow Runner, invoke workflows.
+
+This module acts as a dispatcher for running post-import workflows
+from the command line.
+"""
+
+import ast
+from typing import Any
+
+from .lib.conf_lib import get_connection_from_config
+from .lib.workflow.invoice_v9 import InvoiceWorkflowV9
+from .logging_config import log
+
+
+def run_invoice_v9_workflow(
+ actions: list[str],
+ config: str,
+ field: str,
+ status_map_str: str,
+ paid_date_field: str,
+ payment_journal: int,
+ max_connection: int,
+) -> None:
+ """Initializes and runs the requested actions for the InvoiceWorkflowV9.
+
+ Args:
+ actions: A list of workflow actions to perform (e.g., ['tax', 'validate']).
+ config: The path to the connection configuration file.
+ field: The source field containing the legacy invoice status.
+ status_map_str: A string representation of the dictionary mapping Odoo
+ states to legacy states.
+ paid_date_field: The source field containing the payment date.
+ payment_journal: The database ID of the payment journal.
+ max_connection: The number of parallel threads to use.
+ """
+ log.info("--- Initializing Invoice Workflow for Odoo v9 ---")
+
+ try:
+ connection: Any = get_connection_from_config(config_file=config)
+
+ # Safely evaluate the status map string into a dictionary
+ status_map = ast.literal_eval(status_map_str)
+
+ if not isinstance(status_map, dict):
+ raise TypeError("Status map must be a dictionary.")
+
+ except Exception as e:
+ log.error(f"Failed to initialize workflow: {e}")
+ return
+
+ # Instantiate the legacy workflow class
+ wf = InvoiceWorkflowV9(
+ connection,
+ field=field,
+ status_map=status_map,
+ paid_date_field=paid_date_field,
+ payment_journal=payment_journal,
+ max_connection=max_connection,
+ )
+
+ # Run the requested actions in a specific order
+ final_actions = actions
+ if not final_actions or "all" in final_actions:
+ final_actions = ["tax", "validate", "pay", "proforma", "rename"]
+
+ log.info(f"Executing workflow actions: {', '.join(final_actions)}")
+
+ if "tax" in final_actions:
+ wf.set_tax()
+ if "validate" in final_actions:
+ wf.validate_invoice()
+ if "pay" in final_actions:
+ wf.paid_invoice()
+ if "proforma" in final_actions:
+ wf.proforma_invoice()
+ if "rename" in final_actions:
+ rename_field = "x_legacy_number"
+ log.info(f"Note: 'rename' action is using a placeholder field: {rename_field}")
+ wf.rename(rename_field)
+
+ log.info("--- Invoice Workflow Finished ---")
+
+
+# We can add runners for other workflows here in the future
+# def run_sale_order_workflow(...):
+# pass
diff --git a/tests/.coveragerc b/tests/.coveragerc
deleted file mode 100644
index 6e0a1720..00000000
--- a/tests/.coveragerc
+++ /dev/null
@@ -1,3 +0,0 @@
-[run]
-branch = True
-source = .,..
diff --git a/tests/5_partner_export.sh b/tests/5_partner_export.sh
deleted file mode 100644
index d0123f8d..00000000
--- a/tests/5_partner_export.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-$1 ../odoo_export_thread.py -c conf/connection.conf --file=data/res.partner.exported.csv --model=res.partner --worker=4 --size=200 --domain="[]" --field="id,name,phone,website,street,city,country_id/id" --sep=";" --encoding=utf-8-sig
diff --git a/tests/6_o2m_import.sh b/tests/6_o2m_import.sh
deleted file mode 100644
index 70904c14..00000000
--- a/tests/6_o2m_import.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-$1 ../odoo_import_thread.py --file=origin/res.partner_o2m.csv --model='res.partner' --size=1 --worker=1 --conf=conf/connection.conf --o2m
diff --git a/tests/7_convert_binary.sh b/tests/7_convert_binary.sh
deleted file mode 100644
index 7673552d..00000000
--- a/tests/7_convert_binary.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/env bash
-$1 ../odoo_convert_path_to_image.py --path=./origin/img/ -f Image origin/contact.csv
-$1 ../odoo_convert_url_to_image.py -f Image origin/contact_url.csv
-
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..895256c7
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""Test suite for the odoo-data-flow package."""
diff --git a/tests/clean.sh b/tests/clean.sh
index 5960daa8..32fad191 100755
--- a/tests/clean.sh
+++ b/tests/clean.sh
@@ -1,14 +1,24 @@
#!/usr/bin/env bash
-#Need to launch odoo database accessible with the configuration given in conf/connection.conf
-#Modules contacts need to be installed
-
-rm -rf data
-rm -rf htmlcov
-rm 0_partner_generated.sh
-rm 1_partner_split.sh
-rm 2_contact_import.sh
-rm 3_product_import.sh
-rm 4_product_import.sh
-rm .coverage
-rm error.log
-rm out.csv
+#
+# Cleans up all artifacts generated by the test suite.
+# This script should be run from the root of the repository.
+
+echo "Cleaning up test artifacts..."
+
+# Remove the main data output directory
+rm -rf data/
+
+# Remove coverage report files and database
+rm -rf htmlcov/
+rm -f .coverage
+
+# Remove specific log files and default outputs
+rm -f error.log
+rm -f out.csv
+
+# Remove all fail and bis files from any directory
+# The find command is robust and will search recursively.
+find . -name "*.fail.csv" -type f -delete
+find . -name "*_failed.csv" -type f -delete
+
+echo "Cleanup complete."
diff --git a/tests/conf/connection.conf b/tests/conf/connection.conf
index 4e8ba3b6..378380ae 100644
--- a/tests/conf/connection.conf
+++ b/tests/conf/connection.conf
@@ -6,5 +6,3 @@ password = admin
protocol = jsonrpc
port = 8069
uid = 1
-
-
diff --git a/tests/const.py b/tests/const.py
deleted file mode 100644
index a8821437..00000000
--- a/tests/const.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# EXEC = 'coverage run -a'
-EXEC = 'python3'
diff --git a/tests/launch_test.sh b/tests/launch_test.sh
deleted file mode 100755
index 552c4592..00000000
--- a/tests/launch_test.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env bash
-#Need to launch odoo database accessible with the configuration given in conf/connection.conf
-#test works well on V11
-#Modules contacts need to be installed
-#EXEC="python2"
-for EXEC in "python2" "python3" "python3.7" "coverage run -a"
-do
- echo "============== Test $EXEC =============="
- rm -rf data
- mkdir data
- export PYTHONPATH=../
- echo "> Erase"
- coverage erase
- echo "> Generate data for import"
- $EXEC test_import.py "$EXEC"
- echo "> Run test import"
- sh 0_partner_generated.sh
- echo "> Run test split file"
- $EXEC test_split.py "$EXEC"
- echo "> Test mapping from file"
- $EXEC test_from_file.py "$EXEC"
- echo "> Import data with error"
- sh 2_contact_import.sh 2> error.log
- echo "> Import Product"
- $EXEC test_product_v9.py "$EXEC"
- sh 3_product_import.sh
- echo "> Import Product v10"
- $EXEC test_product_v10.py "$EXEC"
- sh 4_product_import.sh
- sh 5_partner_export.sh "$EXEC"
- echo "> Import One2Many"
- sh 6_o2m_import.sh "$EXEC"
- echo "> Convert Binary"
- sh 7_convert_binary.sh "$EXEC"
- echo "Test join"
- $EXEC test_merge.py
- coverage html
-done
diff --git a/tests/origin/data.xml b/tests/origin/data.xml
index e05f3c50..35ea0e29 100644
--- a/tests/origin/data.xml
+++ b/tests/origin/data.xml
@@ -20,4 +20,4 @@
-
\ No newline at end of file
+
diff --git a/tests/origin/test_merge1.csv b/tests/origin/test_merge1.csv
index 1f652237..5abb8318 100644
--- a/tests/origin/test_merge1.csv
+++ b/tests/origin/test_merge1.csv
@@ -3,4 +3,4 @@ A;A
B;B
C;A
D;B
-E;C
\ No newline at end of file
+E;C
diff --git a/tests/origin/test_merge2.csv b/tests/origin/test_merge2.csv
index 87706bbb..9ae3c8d0 100644
--- a/tests/origin/test_merge2.csv
+++ b/tests/origin/test_merge2.csv
@@ -1,3 +1,3 @@
name;color
A;Red
-B;Blue
\ No newline at end of file
+B;Blue
diff --git a/tests/test_conf_lib.py b/tests/test_conf_lib.py
new file mode 100644
index 00000000..b5a16f99
--- /dev/null
+++ b/tests/test_conf_lib.py
@@ -0,0 +1,93 @@
+"""Test the converter.
+
+This test script generates data for the image converter functions
+to be used in the main test suite.
+"""
+
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from odoo_data_flow.lib.conf_lib import get_connection_from_config
+
+
+@patch("odoo_data_flow.lib.conf_lib.odoolib.get_connection")
+def test_get_connection_from_config_success(
+ mock_get_connection: MagicMock, tmp_path: Path
+) -> None:
+ """Tests successful connection configuration parsing.
+
+ Verifies that it reads a valid config file and calls the underlying
+ connection library with correctly parsed and typed parameters.
+ """
+ # 1. Setup: Create a valid temporary config file
+ config_file = tmp_path / "connection.conf"
+ config_content = """
+[Connection]
+hostname = test-server
+port = 8070
+database = test-db
+login = test-user
+password = test-pass
+uid = 2
+"""
+ config_file.write_text(config_content)
+
+ # 2. Action: Call the function we are testing
+ get_connection_from_config(str(config_file))
+
+ # 3. Assertions: Check that the connection function was called correctly
+ mock_get_connection.assert_called_once()
+ call_kwargs = mock_get_connection.call_args.kwargs
+
+ assert call_kwargs.get("hostname") == "test-server"
+ assert call_kwargs.get("port") == 8070 # Should be converted to int
+ assert call_kwargs.get("database") == "test-db"
+ assert call_kwargs.get("login") == "test-user"
+ assert call_kwargs.get("password") == "test-pass"
+ # 'uid' should be popped and renamed to 'user_id'
+ assert "uid" not in call_kwargs
+ assert call_kwargs.get("user_id") == 2 # Should be converted to int
+
+
+def test_get_connection_file_not_found() -> None:
+ """Tests that a FileNotFoundError is raised if the config file does not exist."""
+ with pytest.raises(FileNotFoundError):
+ get_connection_from_config("non_existent_file.conf")
+
+
+def test_get_connection_missing_key(tmp_path: Path) -> None:
+ """Tests that a KeyError is raised if a required key is missing."""
+ config_file = tmp_path / "missing_key.conf"
+ # This config is missing the 'database' key
+ config_content = """
+[Connection]
+hostname = test-server
+port = 8069
+login = admin
+password = admin
+"""
+ config_file.write_text(config_content)
+
+ with pytest.raises(KeyError):
+ get_connection_from_config(str(config_file))
+
+
+def test_get_connection_malformed_value(tmp_path: Path) -> None:
+ """Tests that a ValueError is raised if a value cannot be converted to int."""
+ config_file = tmp_path / "malformed.conf"
+ # 'port' is not a valid integer
+ config_content = """
+[Connection]
+hostname = test-server
+port = not-a-number
+database = test-db
+login = admin
+password = admin
+uid = 2
+"""
+ config_file.write_text(config_content)
+
+ with pytest.raises(ValueError):
+ get_connection_from_config(str(config_file))
diff --git a/tests/test_converter.py b/tests/test_converter.py
new file mode 100644
index 00000000..29e27e30
--- /dev/null
+++ b/tests/test_converter.py
@@ -0,0 +1,140 @@
+"""Test the converter.
+
+This test script generates data for the image converter functions
+to be used in the main test suite.
+"""
+
+import base64
+import csv
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import requests # type: ignore[import-untyped]
+
+from odoo_data_flow.converter import run_path_to_image, run_url_to_image
+
+
+def test_run_path_to_image(tmp_path: Path) -> None:
+ """Tests the run_path_to_image function.
+
+ This test verifies that:
+ 1. It correctly reads a source CSV.
+ 2. It finds local image files and converts them to base64.
+ 3. It writes the correct data to the output CSV.
+ 4. It handles cases where image files are not found.
+ """
+ # 1. Setup: Create source CSV and dummy image files
+ source_dir = tmp_path
+ image_dir = source_dir / "images"
+ image_dir.mkdir()
+
+ source_csv = source_dir / "source.csv"
+ output_csv = source_dir / "output.csv"
+
+ # Create a dummy image file
+ image_file_path = image_dir / "test_image.png"
+ image_content = b"fake-image-data"
+ image_file_path.write_bytes(image_content)
+ expected_base64 = base64.b64encode(image_content).decode("utf-8")
+
+ source_header = ["id", "name", "image_path"]
+ source_data = [
+ ["1", "Product A", "images/test_image.png"],
+ ["2", "Product B", "images/not_found.png"], # This file does not exist
+ ["3", "Product C", ""], # Empty path
+ ]
+
+ with open(source_csv, "w", newline="", encoding="utf-8") as f:
+ # Use semicolon as the delimiter to match the Processor's default
+ writer = csv.writer(f, delimiter=";")
+ writer.writerow(source_header)
+ writer.writerows(source_data)
+
+ # 2. Action: Run the converter function
+ run_path_to_image(
+ file=str(source_csv),
+ fields="image_path",
+ out=str(output_csv),
+ path=str(source_dir),
+ )
+
+ # 3. Assertions
+ assert output_csv.exists()
+ with open(output_csv, encoding="utf-8") as f:
+ reader = csv.DictReader(f, delimiter=";")
+ result_data = list(reader)
+
+ assert len(result_data) == 3
+ assert result_data[0]["name"] == "Product A"
+ assert result_data[0]["image_path"] == expected_base64
+
+ assert result_data[1]["name"] == "Product B"
+ assert result_data[1]["image_path"] == "", "Path for missing file should be empty"
+
+ assert result_data[2]["name"] == "Product C"
+ assert result_data[2]["image_path"] == "", "Empty path should result in empty"
+
+
+# Patch the target where it is looked up: in the mapper module
+@patch("odoo_data_flow.lib.mapper.requests.get")
+def test_run_url_to_image(mock_requests_get: MagicMock, tmp_path: Path) -> None:
+ """Tests the run_url_to_image function.
+
+ This test verifies that:
+ 1. It correctly reads a source CSV.
+ 2. It "downloads" content from a URL and converts it to base64.
+ 3. It handles cases where a URL download fails.
+ """
+ # 1. Setup: Mock the requests library and create a source file
+ source_csv = tmp_path / "source_urls.csv"
+ output_csv = tmp_path / "output_urls.csv"
+
+ # Configure the mock to simulate a successful and a failed request
+ mock_response_success = MagicMock()
+ mock_response_success.content = b"fake-url-image-data"
+ mock_response_success.raise_for_status.return_value = None
+
+ mock_response_fail = MagicMock()
+ # Raise the correct exception type that the code expects to catch
+ mock_response_fail.raise_for_status.side_effect = (
+ requests.exceptions.RequestException("404 Not Found")
+ )
+
+ # The side_effect will return these values in order for each call to get()
+ mock_requests_get.side_effect = [
+ mock_response_success,
+ mock_response_fail,
+ ]
+ expected_base64 = base64.b64encode(b"fake-url-image-data").decode("utf-8")
+
+ source_header = ["id", "name", "image_url"]
+ source_data = [
+ ["10", "Product D", "http://example.com/image.png"],
+ ["20", "Product E", "http://example.com/not_found.png"],
+ ]
+
+ with open(source_csv, "w", newline="", encoding="utf-8") as f:
+ # Use semicolon as the delimiter
+ writer = csv.writer(f, delimiter=";")
+ writer.writerow(source_header)
+ writer.writerows(source_data)
+
+ # 2. Action: Run the converter function
+ run_url_to_image(
+ file=str(source_csv),
+ fields="image_url",
+ out=str(output_csv),
+ )
+
+ # 3. Assertions
+ assert output_csv.exists()
+ with open(output_csv, encoding="utf-8") as f:
+ reader = csv.DictReader(f, delimiter=";")
+ result_data = list(reader)
+
+ assert len(result_data) == 2
+ assert result_data[0]["name"] == "Product D"
+ assert result_data[0]["image_url"] == expected_base64
+
+ assert result_data[1]["name"] == "Product E"
+ assert result_data[1]["image_url"] == "", "URL for failed download should be empty"
diff --git a/tests/test_export_threaded.py b/tests/test_export_threaded.py
new file mode 100644
index 00000000..4c4cb7ed
--- /dev/null
+++ b/tests/test_export_threaded.py
@@ -0,0 +1,122 @@
+"""Test the Export Handling mechanism."""
+
+# tests/test_export_threaded.py
+
+import csv
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from odoo_data_flow.export_threaded import export_data
+
+
+def test_export_data_to_file(tmp_path: Path) -> None:
+ """Tests the main export_data function when writing to a file.
+
+ This test verifies that:
+ 1. It correctly connects and searches for records.
+ 2. It processes multiple batches.
+ 3. It writes the header and all data rows to the output CSV file.
+ """
+ # 1. Setup: Mock the Odoo connection and define test data
+ output_file = tmp_path / "export_output.csv"
+ model_name = "res.partner"
+ header = ["id", "name"]
+
+ # This mock simulates the odoo-client-lib connection
+ mock_connection = MagicMock()
+ mock_model_obj = MagicMock()
+
+ # Simulate Odoo's search method returning a list of IDs
+ mock_model_obj.search.return_value = [1, 2, 3, 4, 5]
+
+ # Simulate Odoo's export_data method returning different data for each call
+ mock_model_obj.export_data.side_effect = [
+ {"datas": [["1", "Partner A"], ["2", "Partner B"]]}, # Batch 1
+ {"datas": [["3", "Partner C"], ["4", "Partner D"]]}, # Batch 2
+ {"datas": [["5", "Partner E"]]}, # Batch 3
+ ]
+ mock_connection.get_model.return_value = mock_model_obj
+
+ # 2. Action: Run the export function
+ with patch(
+ "odoo_data_flow.export_threaded.conf_lib.get_connection_from_config",
+ return_value=mock_connection,
+ ):
+ export_data(
+ config_file="dummy.conf",
+ model=model_name,
+ domain=[("is_company", "=", True)],
+ header=header,
+ output=str(output_file),
+ batch_size=2, # Use a small batch size to test batching logic
+ separator=",",
+ )
+
+ # 3. Assertions
+ assert output_file.exists(), "Output file was not created."
+
+ with open(output_file, encoding="utf-8") as f:
+ reader = csv.reader(f)
+ result_header = next(reader)
+ result_data = list(reader)
+
+ assert result_header == header
+ assert len(result_data) == 5
+ assert result_data[0] == ["1", "Partner A"]
+ assert result_data[4] == ["5", "Partner E"]
+
+ # Verify that search and export_data were called correctly
+ mock_model_obj.search.assert_called_once()
+ assert mock_model_obj.export_data.call_count == 3
+
+
+def test_export_data_in_memory() -> None:
+ """Tests the main export_data function when returning data in-memory."""
+ # 1. Setup
+ mock_connection = MagicMock()
+ mock_model_obj = MagicMock()
+ mock_model_obj.search.return_value = [1, 2]
+ mock_model_obj.export_data.return_value = {
+ "datas": [["1", "Mem Partner"], ["2", "Mem Partner 2"]]
+ }
+ mock_connection.get_model.return_value = mock_model_obj
+
+ # 2. Action
+ with patch(
+ "odoo_data_flow.export_threaded.conf_lib.get_connection_from_config",
+ return_value=mock_connection,
+ ):
+ header, data = export_data(
+ config_file="dummy.conf",
+ model="res.partner",
+ domain=[],
+ header=["id", "name"],
+ output=None, # This signals the function to return data
+ )
+
+ # 3. Assertions
+ assert header == ["id", "name"]
+ assert data is not None
+ assert len(data) == 2
+ assert data[0] == ["1", "Mem Partner"]
+
+
+def test_export_data_connection_failure() -> None:
+ """Tests that the export function handles a connection failure gracefully."""
+ # 1. Setup: This time, the get_connection call will raise an exception
+ with patch(
+ "odoo_data_flow.export_threaded.conf_lib.get_connection_from_config",
+ side_effect=Exception("Connection failed"),
+ ) as mock_get_conn:
+ # 2. Action
+ header, data = export_data(
+ config_file="bad.conf",
+ model="res.partner",
+ domain=[],
+ header=["name"],
+ )
+
+ # 3. Assertions
+ mock_get_conn.assert_called_once()
+ assert header is None
+ assert data is None
diff --git a/tests/test_exporter.py b/tests/test_exporter.py
new file mode 100644
index 00000000..4294145b
--- /dev/null
+++ b/tests/test_exporter.py
@@ -0,0 +1,149 @@
+"""Test The Exporter Orchestrator."""
+
+from unittest.mock import MagicMock, patch
+
+from odoo_data_flow.exporter import run_export, run_export_for_migration
+
+
+@patch("odoo_data_flow.exporter.export_threaded.export_data")
+def test_run_export(mock_export_data: MagicMock) -> None:
+ """Tests the main `run_export` function.
+
+ Verifies that it correctly parses string arguments and calls the underlying
+ `export_threaded.export_data` function with the correct parameters.
+ """
+ # 1. Setup
+ config_file = "conf/test.conf"
+ filename = "output.csv"
+ model = "res.partner"
+ fields_str = "id,name,email"
+ domain_str = "[('is_company', '=', True)]"
+ context_str = "{'lang': 'fr_FR'}"
+
+ # 2. Action: Call the function we want to test
+ run_export(
+ config=config_file,
+ filename=filename,
+ model=model,
+ fields=fields_str,
+ domain=domain_str,
+ context=context_str,
+ worker=2,
+ batch_size=50,
+ separator="|",
+ encoding="latin1",
+ )
+
+ # 3. Assertions: Check that the mocked function was called correctly
+ mock_export_data.assert_called_once()
+
+ # Correctly inspect positional and keyword arguments
+ pos_args, kw_args = mock_export_data.call_args
+
+ assert pos_args[0] == config_file
+ assert pos_args[1] == model
+ assert pos_args[2] == [("is_company", "=", True)] # parsed domain
+ assert pos_args[3] == ["id", "name", "email"] # parsed fields
+
+ assert kw_args.get("context") == {"lang": "fr_FR"}
+ assert kw_args.get("output") == filename
+ assert kw_args.get("max_connection") == 2
+ assert kw_args.get("batch_size") == 50
+ assert kw_args.get("separator") == "|"
+ assert kw_args.get("encoding") == "latin1"
+
+
+@patch("odoo_data_flow.exporter.export_threaded.export_data")
+def test_run_export_for_migration(mock_export_data: MagicMock) -> None:
+ """Tests the `run_export_for_migration` function.
+
+ Verifies that it correctly prepares arguments for an in-memory data export.
+ """
+ # 1. Setup
+ # Simulate the return value from the mocked function
+ mock_export_data.return_value = (["id", "name"], [["1", "Test Partner"]])
+ fields_list = ["id", "name"]
+
+ # 2. Action
+ header, data = run_export_for_migration(
+ config="conf/test.conf",
+ model="res.partner",
+ fields=fields_list,
+ )
+
+ # 3. Assertions
+ mock_export_data.assert_called_once()
+
+ pos_args, kw_args = mock_export_data.call_args
+
+ assert pos_args[0] == "conf/test.conf"
+ assert pos_args[1] == "res.partner"
+ assert pos_args[3] == fields_list # Correctly check positional argument
+
+ assert kw_args.get("output") is None, "Output should be None for in-memory return"
+
+ assert header == ["id", "name"]
+ assert data == [["1", "Test Partner"]]
+
+
+@patch("odoo_data_flow.exporter.log.error")
+def test_run_export_invalid_domain(mock_log_error: MagicMock) -> None:
+ """Tests that `run_export` logs an error for a malformed domain string."""
+ # 1. Action
+ run_export(
+ config="dummy.conf",
+ filename="dummy.csv",
+ model="dummy.model",
+ fields="id",
+ domain="this-is-not-a-list",
+ )
+
+ # 2. Assertions
+ mock_log_error.assert_called_once()
+ assert "Invalid domain provided" in mock_log_error.call_args[0][0]
+
+
+@patch("odoo_data_flow.exporter.log.error")
+def test_run_export_invalid_context(mock_log_error: MagicMock) -> None:
+ """Tests that `run_export` logs an error for a malformed context string."""
+ # 1. Action
+ run_export(
+ config="dummy.conf",
+ filename="dummy.csv",
+ model="dummy.model",
+ fields="id",
+ context="this-is-not-a-dict",
+ )
+
+ # 2. Assertions
+ mock_log_error.assert_called_once()
+ assert "Invalid context provided" in mock_log_error.call_args[0][0]
+
+
+@patch("odoo_data_flow.exporter.export_threaded.export_data")
+def test_run_export_for_migration_bad_domain(
+ mock_export_data: MagicMock,
+) -> None:
+ """Tests that `run_export_for_migration` handles a bad domain string."""
+ mock_export_data.return_value = ([], [])
+ run_export_for_migration(
+ config="dummy.conf",
+ model="res.partner",
+ fields=["id"],
+ domain="bad-domain",
+ )
+ # Assert that the domain passed to the core function is an empty list
+ assert mock_export_data.call_args.args[2] == []
+
+
+@patch("odoo_data_flow.exporter.export_threaded.export_data")
+def test_run_export_for_migration_no_data(mock_export_data: MagicMock) -> None:
+ """Tests that `run_export_for_migration`.
+
+ These handles the case where no data is returned.
+ """
+ mock_export_data.return_value = (["id"], None)
+ _header, data = run_export_for_migration(
+ config="dummy.conf", model="res.partner", fields=["id"]
+ )
+ assert data is None
diff --git a/tests/test_failure_handling.py b/tests/test_failure_handling.py
new file mode 100644
index 00000000..3fbbab10
--- /dev/null
+++ b/tests/test_failure_handling.py
@@ -0,0 +1,142 @@
+"""Test the Failure Handling mechanism."""
+
+import csv
+from pathlib import Path
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+from odoo_data_flow.importer import run_import
+
+
+def test_two_step_failure_handling(tmp_path: Path) -> None:
+ """Tests the complete two-tier failure handling process.
+
+ This test verifies that:
+ 1. A normal run writes the entire failed batch to a `.fail.csv` file.
+ 2. A `--fail` run processes the `.fail.csv` file.
+ 3. Records that still fail are written to a final, timestamped `_failed.csv`
+ file with an added `_ERROR_REASON` column.
+ """
+ # --- 1. Setup: Create mock data and a mock Odoo connection ---
+
+ source_file = tmp_path / "source_data.csv"
+ model_name = "my.test.model"
+ intermediate_fail_file = tmp_path / f"{model_name}.fail.csv"
+
+ header = ["id", "name", "value"]
+ # We will make the record with id='my_import.rec_02' fail on the second pass
+ source_data = [
+ ["my_import.rec_01", "Record 1", "100"],
+ ["my_import.rec_02", "Record 2 (will fail again)", "200"],
+ ["my_import.rec_03", "Record 3", "300"],
+ ]
+
+ with open(source_file, "w", newline="", encoding="utf-8") as f:
+ writer = csv.writer(f)
+ writer.writerow(header)
+ writer.writerows(source_data)
+
+ # This mock simulates the Odoo model's `load` method
+ mock_model_load = MagicMock()
+
+ # Define the behavior for the mock `load` method
+ def load_side_effect(
+ header: list[str], data: list[list[Any]], **kwargs: Any
+ ) -> dict[str, Any]:
+ # First pass: fail if it's a batch import
+ if len(data) > 1:
+ return {"messages": [{"message": "Generic batch import error"}]}
+ # Second pass: succeed for some, fail for a specific record
+ else:
+ record_id = data[0][0]
+ if record_id == "my_import.rec_02":
+ return {
+ "messages": [
+ {
+ "record": 0,
+ "message": "Validation Error: "
+ "The value '200' is not valid for this field.",
+ }
+ ]
+ }
+ # Simulate success for other records
+ return {"ids": [123]}
+
+ mock_model_load.side_effect = load_side_effect
+
+ # This mock simulates the odoo-client-lib connection
+ mock_connection = MagicMock()
+ mock_model_obj = MagicMock()
+ mock_model_obj.load = mock_model_load
+ mock_connection.get_model.return_value = mock_model_obj
+
+ # --- 2. First Pass: Run the initial import ---
+
+ with patch(
+ "odoo_data_flow.import_threaded.conf_lib.get_connection_from_config",
+ return_value=mock_connection,
+ ):
+ run_import(
+ config="dummy_config.conf",
+ filename=str(source_file),
+ model=model_name,
+ fail=False,
+ separator=",",
+ )
+
+ # --- Assertions for the First Pass ---
+ assert intermediate_fail_file.exists(), "Intermediate .fail.csv was not created"
+
+ with open(intermediate_fail_file, encoding="utf-8") as f:
+ reader = csv.reader(f)
+ header_fail1 = next(reader)
+ data_fail1 = list(reader)
+
+ assert header_fail1 == header
+ assert len(data_fail1) == 3, (
+ "The entire failed batch should be in the .fail.csv file"
+ )
+ assert data_fail1[1][1] == "Record 2 (will fail again)" # Check content integrity
+
+ # --- 3. Second Pass: Run the import with the --fail flag ---
+
+ with patch(
+ "odoo_data_flow.import_threaded.conf_lib.get_connection_from_config",
+ return_value=mock_connection,
+ ):
+ run_import(
+ config="dummy_config.conf",
+ filename=str(
+ source_file
+ ), # The original filename is still used to derive paths
+ model=model_name,
+ fail=True,
+ separator=",",
+ )
+
+ # --- Assertions for the Second Pass ---
+ # Find the final, timestamped failure file
+ final_fail_files = list(tmp_path.glob("*_failed.csv"))
+ assert len(final_fail_files) == 1, (
+ "The final timestamped _failed.csv file was not created"
+ )
+ final_fail_file = final_fail_files[0]
+
+ with open(final_fail_file, encoding="utf-8") as f:
+ reader = csv.reader(f)
+ header_fail2 = next(reader)
+ data_fail2 = list(reader)
+
+ assert "_ERROR_REASON" in header_fail2, "The _ERROR_REASON column is missing"
+ assert len(data_fail2) == 1, (
+ "Only the single permanently failing record should be in the final file"
+ )
+
+ failed_record = data_fail2[0]
+ error_reason_index = header_fail2.index("_ERROR_REASON")
+
+ assert failed_record[0] == "my_import.rec_02"
+ assert (
+ failed_record[error_reason_index]
+ == "Validation Error: The value '200' is not valid for this field."
+ )
diff --git a/tests/test_from_file.py b/tests/test_from_file.py
index f7b3774c..7d8741a4 100644
--- a/tests/test_from_file.py
+++ b/tests/test_from_file.py
@@ -1,65 +1,83 @@
-# -*- coding: utf-8 -*-
-import sys
+"""Test The mapper from file.
-import os
-from const import EXEC
+This test script reads a CSV file from the 'origin' directory,
+applies a mapping with various mappers, checks data quality,
+and generates a clean CSV file ready for import.
+"""
-from odoo_csv_tools.lib import mapper, checker
-from odoo_csv_tools.lib.transform import Processor
+import os
+import pprint
-if len(sys.argv) == 2:
- EXEC = sys.argv[1]
+from odoo_data_flow.lib import checker, mapper
+from odoo_data_flow.lib.transform import Processor
+# --- Configuration ---
+# Define translation maps and prefixes
lang_map = {
- '': '',
- 'French': u'French (BE) / Français (BE)',
- 'English': u'English',
- 'Dutch': u'Dutch / Nederlands',
+ "": "",
+ "French": "French (BE) / Français (BE)",
+ "English": "English",
+ "Dutch": "Dutch / Nederlands",
}
country_map = {
- 'Belgique': 'base.be',
- 'BE': 'base.be',
- 'FR': 'base.fr',
- 'U.S': 'base.us',
- 'US': 'base.us',
- 'NL': 'base.nl',
+ "Belgique": "base.be",
+ "BE": "base.be",
+ "FR": "base.fr",
+ "U.S": "base.us",
+ "US": "base.us",
+ "NL": "base.nl",
}
PARTNER_PREFIX = "TEST_PARTNER"
+IMAGE_PATH_PREFIX = "tests/origin/img/"
-# STEP 1 : read the needed file(s)
-processor = Processor('origin%scontact.csv' % os.sep)
-# Print o2o mapping
-import pprint
+# --- Main Logic ---
+
+# STEP 1: Initialize the processor with the source file
+source_file = os.path.join("tests", "origin", "contact.csv")
+processor = Processor(source_file)
+# Print the 1-to-1 mapping for debugging purposes
+print("--- Auto-detected o2o Mapping ---")
pprint.pprint(processor.get_o2o_mapping())
+print("---------------------------------")
+
-# STEP 2 : Define the mapping for every object to import
+# STEP 2: Define the mapping for every object to import
mapping = {
- 'id': mapper.m2o(PARTNER_PREFIX, 'Company_ID', skip=True),
- 'name': mapper.val('Company_Name', skip=True),
- 'phone': mapper.val('Phone'),
- 'website': mapper.val('www'),
- 'street': mapper.val('address1'),
- 'city': mapper.val('city'),
- 'zip': mapper.val('zip code'),
- 'country_id/id': mapper.map_val('country', country_map),
- 'company_type': mapper.const('company'),
- 'customer': mapper.bool_val('IsCustomer', ['1'], ['0']),
- 'supplier': mapper.bool_val('IsSupplier', ['1'], ['0']),
- 'lang': mapper.map_val('Language', lang_map),
- 'image': mapper.binary("Image", "origin/img/"),
+ "id": mapper.concat(PARTNER_PREFIX, "_", "Company_ID", skip=True),
+ "name": mapper.val("Company_Name", skip=True),
+ "phone": mapper.val("Phone"),
+ "website": mapper.val("www"),
+ "street": mapper.val("address1"),
+ "city": mapper.val("city"),
+ "zip": mapper.val("zip code"),
+ "country_id/id": mapper.map_val(country_map, mapper.val("country")),
+ "company_type": mapper.const("company"),
+ # CORRECTED: bool_val now only takes a list of true values.
+ "customer_rank": mapper.bool_val("IsCustomer", ["1"]),
+ "supplier_rank": mapper.bool_val("IsSupplier", ["1"]),
+ "lang": mapper.map_val(lang_map, mapper.val("Language")),
+ # CORRECTED: Prepend the image path prefix using a postprocess function.
+ # "image_1920": mapper.binary(
+ # "Image",
+ # postprocess=lambda p: os.path.join(IMAGE_PATH_PREFIX, p) if p else "",
+ # ), TODO
+ "image_1920": mapper.binary("Image", "origin/img/"),
}
# Step 3: Check data quality (Optional)
+print("Running data quality checks...")
processor.check(checker.cell_len_checker(30))
-processor.check(checker.id_validity_checker('Company_ID', "COM\d"))
+processor.check(checker.id_validity_checker("Company_ID", r"COM\d"))
processor.check(checker.line_length_checker(13))
processor.check(checker.line_number_checker(21))
# Step 4: Process data
-processor.process(mapping, 'data%sres.partner.csv' % os.sep, {'worker': 2, 'batch_size': 5}, 'set')
+print("Processing data transformation...")
+output_file = os.path.join("data", "res.partner.from_file.csv")
+params = {"model": "res.partner", "worker": 2, "batch_size": 5}
+processor.process(mapping, output_file, params)
-# Step 5: Define output and import parameter
-processor.write_to_file("2_contact_import.sh", python_exe=EXEC, path='../')
+print(f"File transformation complete. Output at: {output_file}")
diff --git a/tests/test_import.py b/tests/test_import.py
index d8e2bf64..5e5a3a73 100644
--- a/tests/test_import.py
+++ b/tests/test_import.py
@@ -1,64 +1,80 @@
-"""
-Created on 14 sept. 2016
+"""Generate Test data.
-@author: mythrys
+This test script generates data for partner categories and partners
+to be used in the main test suite.
"""
-import random
-import sys
-
-from const import EXEC
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib import transform
+import random
-if sys.version_info < (3, 0, 0):
- from builtins import range
+from odoo_data_flow.lib import mapper, transform
-if len(sys.argv) == 2:
- EXEC = sys.argv[1]
+# --- Configuration ---
+PARTNER_PREFIX = "partner_generated"
+TAG_PREFIX = "partner_tag"
+PARTNER_OUTPUT = "data/res.partner.generated.csv"
+TAG_OUTPUT = "data/res.partner.category.csv"
-PARTNER_PREFIX = 'partner_generated'
-TAG_PREFIX = 'partner_tag'
-output = 'data/res.partner.generated.csv'
-tag_output = 'data/res.partner.category.csv'
-script = '0_partner_generated.sh'
+# --- Test Data Generation ---
+# Create 100 unique tags
+tags = [f"Tag {i}" for i in range(100)]
-tags = ["Tag %s" % i for i in range(0, 100)]
+# Create a dataset for 200 partners, each assigned 5 random tags
+header = ["id", "tags"]
+data = [
+ [str(i), ",".join(random.choice(tags) for _ in range(5))] # noqa
+ for i in range(200)
+]
-header = ['id', 'tags']
-data = [[str(i), ','.join(tags[random.randint(0, 99)] for i in range(0, 5))] for i in range(0, 200)]
+# --- Mapping Definitions ---
-mapping = {
- 'id': mapper.m2o(PARTNER_PREFIX, 'id'),
- 'name': mapper.val('id', postprocess=lambda x: "Partner %s" % x),
- 'phone': mapper.val('id', postprocess=lambda x: "0032%s" % (int(x) * 11)),
- 'website': mapper.val('id', postprocess=lambda x: "http://website-%s.com" % x),
- 'street': mapper.val('id', postprocess=lambda x: "Street %s" % x),
- 'city': mapper.val('id', postprocess=lambda x: "City %s" % x),
- 'zip': mapper.val('id', postprocess=lambda x: ("%s" % x).zfill(6)),
- 'country_id/id': mapper.const('base.be'),
- 'company_type': mapper.const('company'),
- 'customer': mapper.val('id', postprocess=lambda x: str(int(x) % 2)),
- 'supplier': mapper.val('id', postprocess=lambda x: str((int(x) + 1) % 2)),
- 'lang': mapper.const('English'),
- 'category_id/id': mapper.m2m(TAG_PREFIX, 'tags')
+# Mapping to create the partner category records.
+# This will be processed in a special m2m mode to create one record
+# per unique tag.
+tag_mapping = {
+ "id": mapper.m2m_id_list(TAG_PREFIX, "tags"),
+ "name": mapper.m2m("tags", sep=","),
+ "parent_id/id": mapper.const("base.res_partner_category_0"),
}
-tag_mapping = {
- 'id': mapper.m2m_id_list(TAG_PREFIX, 'tags'),
- 'name': mapper.m2m_value_list('tags'),
- 'parent_id/id': mapper.const('base.res_partner_category_0'),
+# Mapping to create the partner records, linking them to the tags created above.
+partner_mapping = {
+ "id": mapper.concat(PARTNER_PREFIX, "_", "id"),
+ "name": mapper.val("id", postprocess=lambda x: f"Partner {x}"),
+ "phone": mapper.val("id", postprocess=lambda x: f"0032{int(x) * 11}"),
+ "website": mapper.val("id", postprocess=lambda x: f"http://website-{x}.com"),
+ "street": mapper.val("id", postprocess=lambda x: f"Street {x}"),
+ "city": mapper.val("id", postprocess=lambda x: f"City {x}"),
+ "zip": mapper.val("id", postprocess=lambda x: str(x).zfill(6)),
+ "country_id/id": mapper.const("base.be"),
+ "company_type": mapper.const("company"),
+ "customer_rank": mapper.val("id", postprocess=lambda x: int(x) % 2),
+ "supplier_rank": mapper.val("id", postprocess=lambda x: (int(x) + 1) % 2),
+ "lang": mapper.const("en_US"),
+ "category_id/id": mapper.m2m(TAG_PREFIX, "tags"),
}
+# --- Processing ---
+
+# Initialize the processor with the in-memory data
processor = transform.Processor(header=header, data=data)
-processor.process(tag_mapping, tag_output, {
- 'worker': 1,
- 'batch_size': 10,
- 'model': 'res.partner.category',
-}, m2m=True)
-processor.process(mapping, output, {
- 'worker': 4,
- 'batch_size': 100,
- 'model': 'res.partner',
-})
-processor.write_to_file(script, python_exe=EXEC, path='../', encoding="utf-8-sig")
+
+# Process the tags first, using the special m2m=True mode.
+# This will find all unique tags from the 'tags' column and create a clean
+# CSV file with one row for each unique tag.
+print(f"Generating partner category data at: {TAG_OUTPUT}")
+processor.process(
+ tag_mapping,
+ TAG_OUTPUT,
+ {"model": "res.partner.category"},
+ m2m=True,
+)
+
+# Next, process the main partner records.
+print(f"Generating partner data at: {PARTNER_OUTPUT}")
+processor.process(
+ partner_mapping,
+ PARTNER_OUTPUT,
+ {"model": "res.partner"},
+)
+
+print("Test data generation complete.")
diff --git a/tests/test_import_threaded.py b/tests/test_import_threaded.py
new file mode 100644
index 00000000..4e43c3d7
--- /dev/null
+++ b/tests/test_import_threaded.py
@@ -0,0 +1,174 @@
+"""Test the low-level, multi-threaded import logic."""
+
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from odoo_data_flow.import_threaded import (
+ RPCThreadImport,
+ _create_batches,
+ _read_data_file,
+ import_data,
+)
+
+
+class TestRPCThreadImport:
+ """Tests for the RPCThreadImport class."""
+
+ def test_handle_odoo_messages_with_error_reason(self) -> None:
+ """Tests that when add_error_reason is True, the reason is appended."""
+ header = ["id", "name"]
+ lines = [["1", "A"], ["2", "B"]]
+ mock_writer = MagicMock()
+ rpc_thread = RPCThreadImport(
+ 1, None, header, mock_writer, add_error_reason=True
+ )
+ messages = [{"message": "Generic Error"}]
+ failed_lines = rpc_thread._handle_odoo_messages(messages, lines)
+ assert failed_lines[0][-1] == "Generic Error | "
+
+ def test_handle_odoo_messages_no_error_reason(self) -> None:
+ """Tests that when add_error_reason is False, the reason is not appended."""
+ header = ["id", "name"]
+ lines = [["1", "A"], ["2", "B"]]
+ mock_writer = MagicMock()
+ rpc_thread = RPCThreadImport(
+ 1, None, header, mock_writer, add_error_reason=False
+ )
+ messages = [{"message": "Generic Error", "record": 0}]
+ failed_lines = rpc_thread._handle_odoo_messages(messages, lines)
+ assert len(failed_lines[0]) == 2 # No extra column added
+
+ def test_handle_record_mismatch(self) -> None:
+ """Tests the logic for handling a record count mismatch."""
+ header = ["id", "name"]
+ lines = [["1", "A"], ["2", "B"]]
+ mock_writer = MagicMock()
+ rpc_thread = RPCThreadImport(
+ 1, None, header, mock_writer, add_error_reason=True
+ )
+ response = {"ids": [123]}
+ failed_lines = rpc_thread._handle_record_mismatch(response, lines)
+ assert len(failed_lines) == 2
+ assert "Record count mismatch" in failed_lines[0][2]
+
+ def test_handle_rpc_error(self) -> None:
+ """Tests the logic for handling a general RPC exception."""
+ header = ["id", "name"]
+ lines = [["1", "A"], ["2", "B"]]
+ mock_writer = MagicMock()
+ rpc_thread = RPCThreadImport(
+ 1, None, header, mock_writer, add_error_reason=True
+ )
+ error = Exception("Connection Timed Out")
+ failed_lines = rpc_thread._handle_rpc_error(error, lines)
+ assert len(failed_lines) == 2
+ assert failed_lines[0][2] == "Connection Timed Out"
+
+
+class TestHelperFunctions:
+ """Tests for the standalone helper functions in the module."""
+
+ def test_read_data_file_not_found(self) -> None:
+ """Tests that _read_data_file returns empty lists for a non-existent file."""
+ header, data = _read_data_file("non_existent_file.csv", ";", "utf-8", 0)
+ assert header == []
+ assert data == []
+
+ @patch("odoo_data_flow.import_threaded.open")
+ def test_read_data_file_generic_exception(self, mock_open: MagicMock) -> None:
+ """Tests that _read_data_file handles generic exceptions during read."""
+ mock_open.side_effect = Exception("A generic read error")
+ header, data = _read_data_file("any_file.csv", ";", "utf-8", 0)
+ assert header == []
+ assert data == []
+
+ @patch("odoo_data_flow.import_threaded.log.error")
+ def test_read_data_file_no_id_column(
+ self, mock_log_error: MagicMock, tmp_path: Path
+ ) -> None:
+ """Tests that _read_data_file logs an error if 'id' column is missing."""
+ source_file = tmp_path / "no_id.csv"
+ source_file.write_text("name,value\nTest,100")
+ header, data = _read_data_file(str(source_file), ",", "utf-8", 0)
+ assert header == []
+ assert data == []
+ mock_log_error.assert_called_once()
+ assert "Failed to read file" in mock_log_error.call_args[0][0]
+
+ def test_create_batches_split_by_size(self) -> None:
+ """Tests that batches are created by size when the group value is the same."""
+ header = ["id", "group_id"]
+ data = [
+ ["1", "A"],
+ ["2", "A"],
+ ["3", "A"],
+ ["4", "A"],
+ ["5", "A"],
+ ]
+ # Batch size of 3 should create two batches for group A
+ batches = list(_create_batches(data, "group_id", header, 3, False))
+ assert len(batches) == 2
+ assert len(batches[0][1]) == 3
+ assert len(batches[1][1]) == 2
+
+
+class TestImportData:
+ """Tests for the main import_data orchestrator function."""
+
+ def test_import_data_no_header_or_data(self) -> None:
+ """Tests that import_data raises ValueError if no data is provided."""
+ with pytest.raises(ValueError, match="Please provide either a data file"):
+ import_data(config_file="dummy.conf", model="dummy.model")
+
+ @patch("odoo_data_flow.import_threaded.conf_lib.get_connection_from_config")
+ def test_import_data_connection_fails(self, mock_get_conn: MagicMock) -> None:
+ """Tests that the function exits gracefully if the connection fails."""
+ mock_get_conn.side_effect = Exception("Cannot connect")
+ import_data(
+ config_file="bad.conf",
+ model="dummy.model",
+ header=["id"],
+ data=[["1"]],
+ )
+ mock_get_conn.assert_called_once()
+
+ @patch("odoo_data_flow.import_threaded.conf_lib.get_connection_from_config")
+ @patch("odoo_data_flow.import_threaded.open")
+ def test_import_data_fail_file_oserror(
+ self, mock_open: MagicMock, mock_get_conn: MagicMock
+ ) -> None:
+ """Tests that the function handles an OSError when opening the fail file."""
+ mock_get_conn.return_value = MagicMock()
+ mock_open.side_effect = OSError("Permission denied")
+ import_data(
+ config_file="dummy.conf",
+ model="dummy.model",
+ header=["id"],
+ data=[["1"]],
+ fail_file="protected/fail.csv",
+ )
+ mock_open.assert_called_once()
+
+ @patch("odoo_data_flow.import_threaded.RPCThreadImport")
+ def test_import_data_ignore_columns(self, mock_rpc_thread: MagicMock) -> None:
+ """Tests that the 'ignore' parameter correctly filters columns."""
+ header = ["id", "name", "field_to_ignore"]
+ data = [["1", "A", "ignore_me"]]
+
+ with patch(
+ "odoo_data_flow.import_threaded.conf_lib.get_connection_from_config"
+ ):
+ import_data(
+ config_file="dummy.conf",
+ model="dummy.model",
+ header=header,
+ data=data,
+ ignore=["field_to_ignore"],
+ )
+
+ # Assert that the header passed to RPCThreadImport was filtered
+ init_args = mock_rpc_thread.call_args.args
+ filtered_header = init_args[2]
+ assert filtered_header == ["id", "name"]
diff --git a/tests/test_importer.py b/tests/test_importer.py
new file mode 100644
index 00000000..c580b26b
--- /dev/null
+++ b/tests/test_importer.py
@@ -0,0 +1,120 @@
+"""Test the high-level import orchestrator."""
+
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from odoo_data_flow.importer import run_import, run_import_for_migration
+
+
+@patch("odoo_data_flow.importer.import_threaded.import_data")
+def test_run_import_infers_model_from_filename(
+ mock_import_data: MagicMock, tmp_path: Path
+) -> None:
+ """Test infer model name.
+
+ Tests that the model name is correctly inferred from the filename
+ when the --model argument is not provided.
+ """
+ # 1. Setup: Create a dummy file for the function to read.
+ source_file = tmp_path / "res_partner.csv"
+ source_file.write_text("id,name\n1,test")
+
+ # 2. Action
+ run_import(config="dummy.conf", filename=str(source_file), separator=",")
+
+ # 3. Assertions
+ mock_import_data.assert_called_once()
+ # The second positional argument passed to import_data should be the model name.
+ called_model = mock_import_data.call_args.args[1]
+ assert called_model == "res.partner"
+
+
+@patch("odoo_data_flow.importer.import_threaded.import_data")
+@patch("odoo_data_flow.importer.log.error")
+def test_run_import_no_model_fails(
+ mock_log_error: MagicMock, mock_import_data: MagicMock, tmp_path: Path
+) -> None:
+ """Tests that the import fails if no model can be inferred from the filename."""
+ # 1. Setup: A filename starting with a dot will result in an invalid model name.
+ bad_file = tmp_path / ".badfilename"
+ bad_file.touch()
+
+ # 2. Action
+ run_import(config="dummy.conf", filename=str(bad_file))
+
+ # 3. Assertions
+ mock_log_error.assert_called_once()
+ assert "could not be inferred" in mock_log_error.call_args[0][0]
+ # Ensure the import process was stopped and the threaded import was not called
+ mock_import_data.assert_not_called()
+
+
+@patch("odoo_data_flow.importer.import_threaded.import_data")
+def test_run_import_fail_mode(mock_import_data: MagicMock, tmp_path: Path) -> None:
+ """Test import in fail mode.
+
+ Tests that when --fail is True, the correct parameters for a fail run
+ are passed down to the core import function.
+ """
+ # 1. Setup
+ source_file = tmp_path / "res_partner.csv"
+ source_file.touch() # Ensure the source file exists
+
+ # 2. Action
+ run_import(
+ config="dummy.conf",
+ filename=str(source_file),
+ model="res.partner",
+ fail=True,
+ )
+
+ # 3. Assertions
+ mock_import_data.assert_called_once()
+ call_kwargs = mock_import_data.call_args.kwargs
+
+ # Check that the file paths and flags are set correctly for a fail run
+ assert call_kwargs["file_csv"].endswith("res.partner.fail.csv")
+ assert call_kwargs["fail_file"].endswith("_failed.csv")
+ assert call_kwargs["is_fail_run"] is True
+ assert call_kwargs["batch_size"] == 1
+ assert call_kwargs["max_connection"] == 1
+
+
+@patch("odoo_data_flow.importer.log.error")
+def test_run_import_bad_context_string(
+ mock_log_error: MagicMock, tmp_path: Path
+) -> None:
+ """Tests that a malformed context string is handled gracefully."""
+ # Setup: Create a dummy file to get past the file-read stage
+ source_file = tmp_path / "data.csv"
+ source_file.write_text("id,name\n1,test")
+
+ run_import(
+ config="dummy.conf",
+ filename=str(source_file),
+ model="res.partner",
+ context="this-is-not-a-dict",
+ )
+ mock_log_error.assert_called_once()
+ assert "Invalid context provided" in mock_log_error.call_args[0][0]
+
+
+@patch("odoo_data_flow.importer.import_threaded.import_data")
+def test_run_import_for_migration(mock_import_data: MagicMock) -> None:
+ """Tests the in-memory import runner used for migrations."""
+ # 1. Action
+ run_import_for_migration(
+ config="dummy.conf",
+ model="res.partner",
+ header=["id", "name"],
+ data=[["1", "Test"]],
+ worker=2,
+ batch_size=50,
+ )
+
+ # 2. Assertions
+ mock_import_data.assert_called_once()
+ call_kwargs = mock_import_data.call_args.kwargs
+ assert call_kwargs["max_connection"] == 2
+ assert call_kwargs["batch_size"] == 50
+ assert "tracking_disable" in call_kwargs["context"]
diff --git a/tests/test_io.py b/tests/test_io.py
new file mode 100644
index 00000000..8a16efbb
--- /dev/null
+++ b/tests/test_io.py
@@ -0,0 +1,156 @@
+"""Test the IO Handling functionalities."""
+
+# tests/test_io.py
+
+import shlex
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from odoo_data_flow.lib.internal.io import write_csv, write_file
+
+# --- Tests for write_csv ---
+
+
+@patch("odoo_data_flow.lib.internal.io.open")
+@patch("odoo_data_flow.lib.internal.io.log.error")
+def test_write_csv_oserror(mock_log_error: MagicMock, mock_open: MagicMock) -> None:
+ """Tests that write_csv logs an error if an OSError occurs."""
+ # 1. Setup: Make the open call raise an OSError
+ mock_open.side_effect = OSError("Permission denied")
+
+ # 2. Action
+ write_csv("protected/file.csv", ["h1"], [["d1"]])
+
+ # 3. Assertions
+ mock_log_error.assert_called_once()
+ assert "Failed to write to file" in mock_log_error.call_args[0][0]
+
+
+# --- Tests for write_file ---
+
+
+def test_write_file_writes_csv_data(tmp_path: Path) -> None:
+ """Tests that write_file correctly calls write_csv to create the data file."""
+ data_file = tmp_path / "data.csv"
+
+ with patch("odoo_data_flow.lib.internal.io.write_csv") as mock_write_csv:
+ write_file(
+ filename=str(data_file),
+ header=["id", "name"],
+ data=[["1", "test"]],
+ launchfile="", # Correctly pass an empty string instead of None
+ )
+ mock_write_csv.assert_called_once_with(
+ str(data_file), ["id", "name"], [["1", "test"]], encoding="utf-8"
+ )
+
+
+@patch("odoo_data_flow.lib.internal.io.write_csv") # Mock the CSV writing part
+@patch("odoo_data_flow.lib.internal.io.open")
+def test_write_file_no_launchfile(
+ mock_open: MagicMock, mock_write_csv: MagicMock, tmp_path: Path
+) -> None:
+ """Tests that write_file exits early if no launchfile is specified."""
+ data_file = tmp_path / "data.csv"
+
+ write_file(
+ filename=str(data_file),
+ header=["id"],
+ data=[["1"]],
+ launchfile="", # Empty string means no script
+ )
+
+ # Assert that write_csv was called, but open was not (for the launchfile)
+ mock_write_csv.assert_called_once()
+ mock_open.assert_not_called()
+
+
+def test_write_file_full_script_generation(tmp_path: Path) -> None:
+ """Tests that write_file generates a complete shell script with all options."""
+ # 1. Setup
+ script_file = tmp_path / "load.sh"
+ data_file = tmp_path / "my_model.csv"
+
+ # 2. Action
+ write_file(
+ filename=str(data_file),
+ header=["id", "name"],
+ data=[["1", "test"]],
+ launchfile=str(script_file),
+ model="my.model",
+ fail=True,
+ init=True,
+ worker=4,
+ batch_size=50,
+ groupby="parent_id/id",
+ ignore="field_to_ignore",
+ context={"active_test": False}, # Correctly pass a dict instead of a string
+ conf_file="conf/custom.conf",
+ )
+
+ # 3. Assertions
+ assert script_file.exists()
+ content = script_file.read_text()
+
+ # Check for the main command
+ assert "odoo-data-flow import" in content
+ assert f"--config {shlex.quote('conf/custom.conf')}" in content
+ assert f"--file {shlex.quote(str(data_file))}" in content
+ assert f"--model {shlex.quote('my.model')}" in content
+ assert "--worker 4" in content
+ assert "--size 50" in content
+ assert f"--groupby {shlex.quote('parent_id/id')}" in content
+ assert f"--ignore {shlex.quote('field_to_ignore')}" in content
+ assert f"--context {shlex.quote(str({'active_test': False}))}" in content
+
+ # Check for the second command with the --fail flag
+ assert "--fail" in content
+ # Count occurrences to ensure both commands are present
+ assert content.count("odoo-data-flow import") == 2
+
+
+def test_write_file_auto_model_name(tmp_path: Path) -> None:
+ """Tests that the model name is correctly inferred when model='auto'."""
+ script_file = tmp_path / "load_auto.sh"
+ data_file = tmp_path / "res.partner.csv"
+
+ write_file(
+ filename=str(data_file),
+ header=["id"],
+ data=[["1"]],
+ launchfile=str(script_file),
+ model="auto",
+ init=True,
+ )
+
+ content = script_file.read_text()
+ # The model name should be inferred from 'res.partner.csv' -> 'res.partner'
+ assert f"--model {shlex.quote('res.partner')}" in content
+
+
+@patch("odoo_data_flow.lib.internal.io.write_csv") # Mock the CSV part
+@patch("odoo_data_flow.lib.internal.io.open")
+@patch("odoo_data_flow.lib.internal.io.log.error")
+def test_write_file_oserror(
+ mock_log_error: MagicMock, mock_open: MagicMock, mock_write_csv: MagicMock
+) -> None:
+ """Test write fle os error.
+
+ Tests that write_file logs an error if an OSError occurs during script writing.
+ """
+ # 1. Setup: This time, the 'open' for the launchfile will fail
+ mock_open.side_effect = OSError("Permission denied on script file")
+
+ # 2. Action
+ write_file(
+ filename="data.csv",
+ header=["id"],
+ data=[["1"]],
+ launchfile="protected/load.sh",
+ init=True,
+ )
+
+ # 3. Assertions
+ mock_write_csv.assert_called_once() # Ensure the CSV part was attempted
+ mock_log_error.assert_called_once()
+ assert "Failed to write to launch file" in mock_log_error.call_args[0][0]
diff --git a/tests/test_logging.py b/tests/test_logging.py
new file mode 100644
index 00000000..772e1c36
--- /dev/null
+++ b/tests/test_logging.py
@@ -0,0 +1,72 @@
+"""Test Logging functionality."""
+
+import logging
+from pathlib import Path
+
+from odoo_data_flow.logging_config import log, setup_logging
+
+
+def test_setup_logging_console_only() -> None:
+ """Tests that logging is set up correctly for console-only output."""
+ # 1. Setup: Ensure logger is in a clean state
+ log.handlers.clear()
+
+ # 2. Action: Configure logging without a file path
+ setup_logging(verbose=True)
+
+ # 3. Assertions
+ assert len(log.handlers) == 1, (
+ "There should be exactly one handler for the console."
+ )
+ assert isinstance(log.handlers[0], logging.StreamHandler)
+ assert not isinstance(log.handlers[0], logging.FileHandler)
+
+
+def test_setup_logging_with_file(tmp_path: Path) -> None:
+ """Test log file writing.
+
+ Tests that logging is set up with both console and file handlers
+ when a log file path is provided.
+ """
+ # 1. Setup
+ log.handlers.clear()
+ log_file = tmp_path / "test.log"
+
+ # 2. Action
+ setup_logging(verbose=True, log_file=str(log_file))
+
+ # 3. Assertions
+ assert len(log.handlers) == 2, "There should be two handlers: console and file."
+
+ # Check that we have one of each type of handler
+ handler_types = [type(h) for h in log.handlers]
+ assert logging.StreamHandler in handler_types
+ assert logging.FileHandler in handler_types
+
+ # Find the file handler and check its path
+ file_handler = next(
+ (h for h in log.handlers if isinstance(h, logging.FileHandler)), None
+ )
+ assert file_handler is not None
+ assert file_handler.baseFilename == str(log_file)
+
+
+def test_log_output_is_written_to_file(tmp_path: Path) -> None:
+ """Tests that log messages are correctly written to the specified log file."""
+ # 1. Setup
+ log.handlers.clear()
+ log_file = tmp_path / "output.log"
+ test_message = "This is a test message for the log file."
+
+ # 2. Action
+ setup_logging(verbose=False, log_file=str(log_file))
+ log.info(test_message)
+
+ # To ensure the log is written, we need to shut down the logging system
+ # This closes the file handle.
+ logging.shutdown()
+
+ # 3. Assertions
+ assert log_file.exists(), "Log file was not created."
+ log_content = log_file.read_text()
+ assert test_message in log_content
diff --git a/tests/test_main.py b/tests/test_main.py
new file mode 100644
index 00000000..4f79ddcf
--- /dev/null
+++ b/tests/test_main.py
@@ -0,0 +1,51 @@
+"""Test cases for the __main__ module."""
+
+import pytest
+from click.testing import CliRunner
+
+# CORRECTED: Use an underscore for the package name in the import.
+from odoo_data_flow import __main__
+
+
+@pytest.fixture
+def runner() -> CliRunner:
+ """Fixture for invoking command-line interfaces."""
+ return CliRunner()
+
+
+def test_main_succeeds_without_command(runner: CliRunner) -> None:
+ """Test main Succeeds.
+
+ It exits with a status code of 0 when no command is provided
+ and should show the main help message.
+ """
+ # CORRECTED: The entry point function from our __main__.py is now 'cli'.
+ result = runner.invoke(__main__.cli)
+ assert result.exit_code == 0
+ # A good basic test is to ensure the main commands are listed in the help output.
+ assert "import" in result.output
+ assert "export" in result.output
+ assert "path-to-image" in result.output
+ assert "url-to-image" in result.output
+
+
+def test_main_shows_version(runner: CliRunner) -> None:
+ """It shows the version of the package when --version is used."""
+ result = runner.invoke(__main__.cli, ["--version"])
+ assert result.exit_code == 0
+ # This checks that the command runs and that the word 'version'
+ # appears in the output, which is a robust check for the --version flag.
+ assert "version" in result.output
+
+
+# You can also add more specific tests for each command.
+# For example, testing that the 'import' command fails without required options:
+def test_import_fails_without_options(runner: CliRunner) -> None:
+ """The import command should fail if required options are missing."""
+ # We invoke the 'import' sub-command directly.
+ result = runner.invoke(__main__.cli, ["import"])
+ # It should exit with a non-zero status code because options are missing.
+ assert result.exit_code != 0
+ # Click's error message should mention the missing options.
+ assert "Missing option" in result.output
+ assert "--file" in result.output
diff --git a/tests/test_mapper.py b/tests/test_mapper.py
new file mode 100644
index 00000000..1f964470
--- /dev/null
+++ b/tests/test_mapper.py
@@ -0,0 +1,253 @@
+"""Test the core mapper functions."""
+
+import inspect
+from unittest.mock import MagicMock, patch
+
+import pytest
+import requests # type: ignore[import-untyped]
+
+from odoo_data_flow.lib import mapper
+from odoo_data_flow.lib.internal.exceptions import SkippingError
+
+# --- Test Data ---
+LINE_SIMPLE = {"col1": "A", "col2": "B", "col3": "C", "empty_col": ""}
+LINE_NUMERIC = {"price": "12,50", "qty": "100"}
+LINE_M2M = {"tags": "T1, T2", "other_tags": "T3", "empty_tags": ""}
+LINE_BOOL = {"is_active": "yes", "is_vip": "no"}
+LINE_HIERARCHY = {
+ "order_ref": "SO001",
+ "product_sku": "PROD-A",
+ "product_qty": "5",
+}
+
+
+def test_val_postprocess_builtin() -> None:
+ """Post Process val tester.
+
+ Tests the val mapper's postprocess with a built-in function that
+ cannot be inspected, covering the try/except block.
+ """
+ mapper_func = mapper.val("col1", postprocess=str.lower)
+ assert mapper_func(LINE_SIMPLE, {}) == "a"
+
+
+def test_val_postprocess_fallback() -> None:
+ """Test post process fallback.
+
+ Tests the val mapper's fallback from a 2-arg to a 1-arg postprocess call.
+ This simulates a callable that doesn't accept the 'state' argument.
+ """
+
+ def one_arg_lambda(x: str) -> str:
+ return x.lower()
+
+ # Force the two-argument call to ensure the fallback to one argument is tested
+ with patch("inspect.signature") as mock_signature:
+ # Pretend the signature check passed, forcing a TypeError on the call.
+ # The parameters attribute must be a dictionary-like object.
+ mock_signature.return_value.parameters = {
+ "arg1": MagicMock(kind=inspect.Parameter.POSITIONAL_OR_KEYWORD),
+ "arg2": MagicMock(kind=inspect.Parameter.POSITIONAL_OR_KEYWORD),
+ }
+ mapper_func = mapper.val("col1", postprocess=one_arg_lambda)
+ assert mapper_func(LINE_SIMPLE, {}) == "a"
+
+
+def test_concat_mapper_all() -> None:
+ """Tests that concat_mapper_all returns an empty string if any value is empty."""
+ mapper_func = mapper.concat_mapper_all("_", "col1", "col2")
+ assert mapper_func(LINE_SIMPLE, {}) == "A_B"
+ mapper_func_fail = mapper.concat_mapper_all("_", "col1", "empty_col")
+ assert mapper_func_fail(LINE_SIMPLE, {}) == ""
+
+
+def test_concat_skip_on_empty() -> None:
+ """Tests that concat raises SkippingError when skip=True and result is empty."""
+ mapper_func = mapper.concat("_", "empty_col", skip=True)
+ with pytest.raises(SkippingError):
+ mapper_func(LINE_SIMPLE, {})
+
+
+def test_num_mapper() -> None:
+ """Tests the num mapper for comma replacement."""
+ mapper_func = mapper.num("price")
+ assert mapper_func(LINE_NUMERIC, {}) == "12.50"
+
+
+def test_m2o_map_success() -> None:
+ """Tests a successful m2o_map operation."""
+ mapper_func = mapper.m2o_map("prefix", "col1", "col2")
+ assert mapper_func(LINE_SIMPLE, {}) == "prefix.A_B"
+
+
+def test_m2m_multi_column() -> None:
+ """Tests the m2m mapper in multi-column mode."""
+ mapper_func = mapper.m2m("tag_prefix", "tags", "other_tags")
+ result = mapper_func(LINE_M2M, {})
+ assert "tag_prefix.T1__T2" in result
+ assert "tag_prefix.T3" in result
+
+
+def test_m2m_multi_column_with_missing_field() -> None:
+ """Tests the m2m mapper in multi-column mode with a non-existent field."""
+ mapper_func = mapper.m2m("tag_prefix", "tags", "non_existent_field")
+ result = mapper_func(LINE_M2M, {})
+ assert result == "tag_prefix.T1__T2"
+
+
+def test_m2m_multi_column_with_empty_value() -> None:
+ """Tests the m2m mapper in multi-column mode with an empty field value."""
+ line_with_empty = {"f1": "val1", "f2": ""}
+ mapper_func = mapper.m2m("p", "f1", "f2")
+ result = mapper_func(line_with_empty, {})
+ assert result == "p.val1"
+
+
+def test_m2m_single_empty_field() -> None:
+ """Tests the m2m mapper in single-column mode with an empty field."""
+ mapper_func = mapper.m2m("tag_prefix", "empty_tags", sep=",")
+ assert mapper_func(LINE_M2M, {}) == ""
+
+
+def test_m2m_map_with_concat() -> None:
+ """Tests m2m_map wrapping another mapper."""
+ concat_mapper = mapper.concat(",", "tags", "other_tags")
+ m2m_mapper = mapper.m2m_map("tag_prefix", concat_mapper)
+ result = m2m_mapper(LINE_M2M, {})
+ assert "tag_prefix.T1" in result
+ assert "tag_prefix.T2" in result
+ assert "tag_prefix.T3" in result
+
+
+def test_m2m_map_with_empty_result() -> None:
+ """Tests m2m_map when the wrapped mapper returns an empty value."""
+ empty_mapper = mapper.val("empty_col")
+ m2m_mapper = mapper.m2m_map("tag_prefix", empty_mapper)
+ assert m2m_mapper(LINE_SIMPLE, {}) == ""
+
+
+def test_m2m_id_list_empty() -> None:
+ """Tests that m2m_id_list returns an empty string for empty input."""
+ mapper_func = mapper.m2m_id_list("prefix", "empty_col")
+ assert mapper_func(LINE_SIMPLE, {}) == ""
+
+
+def test_m2m_value_list_empty() -> None:
+ """Tests that m2m_value_list returns an empty list for empty input."""
+ mapper_func = mapper.m2m_value_list("empty_col")
+ assert mapper_func(LINE_SIMPLE, {}) == []
+
+
+def test_map_val_m2m() -> None:
+ """Tests the map_val mapper in m2m mode."""
+ translation_map = {"T1": "Tag One", "T2": "Tag Two"}
+ mapper_func = mapper.map_val(translation_map, mapper.val("tags"), m2m=True)
+ assert mapper_func(LINE_M2M, {}) == "Tag One,Tag Two"
+
+
+def test_record_mapper() -> None:
+ """Tests that the record mapper correctly creates a dictionary of results."""
+ line_mapping = {
+ "product_id/id": mapper.m2o_map("prod_", "product_sku"),
+ "product_uom_qty": mapper.num("product_qty"),
+ }
+ record_mapper = mapper.record(line_mapping)
+ result = record_mapper(LINE_HIERARCHY, {})
+ assert isinstance(result, dict)
+ assert result.get("product_id/id") == "prod_.PROD-A"
+ assert result.get("product_uom_qty") == "5"
+
+
+def test_binary_empty_path() -> None:
+ """Tests that the binary mapper returns an empty string for an empty path."""
+ mapper_func = mapper.binary("empty_col")
+ assert mapper_func(LINE_SIMPLE, {}) == ""
+
+
+def test_binary_skip_on_not_found() -> None:
+ """Tests that binary raises SkippingError when skip=True and file not found."""
+ mapper_func = mapper.binary("col1", skip=True)
+ with pytest.raises(SkippingError):
+ mapper_func(LINE_SIMPLE, {})
+
+
+@patch("odoo_data_flow.lib.mapper.log.warning")
+def test_binary_file_not_found_no_skip(mock_log_warning: MagicMock) -> None:
+ """Tests that a warning is logged when a file is not found and skip=False."""
+ mapper_func = mapper.binary("col1", skip=False)
+ assert mapper_func(LINE_SIMPLE, {}) == ""
+ mock_log_warning.assert_called_once()
+ assert "File not found" in mock_log_warning.call_args[0][0]
+
+
+def test_binary_url_map_empty() -> None:
+ """Tests that binary_url_map returns empty string for an empty URL."""
+ mapper_func = mapper.binary_url_map("empty_col")
+ assert mapper_func(LINE_SIMPLE, {}) == ""
+
+
+@patch("odoo_data_flow.lib.mapper.requests.get")
+def test_binary_url_map_skip_on_not_found(mock_requests_get: MagicMock) -> None:
+ """Tests that binary_url_map raises SkippingError when request fails."""
+ mock_requests_get.side_effect = requests.exceptions.RequestException("Timeout")
+ mapper_func = mapper.binary_url_map("col1", skip=True)
+ with pytest.raises(SkippingError):
+ mapper_func(LINE_SIMPLE, {})
+
+
+@patch("odoo_data_flow.lib.mapper.requests.get")
+@patch("odoo_data_flow.lib.mapper.log.warning")
+def test_binary_url_map_request_exception(
+ mock_log_warning: MagicMock, mock_requests_get: MagicMock
+) -> None:
+ """Tests that a warning is logged when a URL request fails and skip=False."""
+ mock_requests_get.side_effect = requests.exceptions.RequestException("Timeout")
+ mapper_func = mapper.binary_url_map("col1", skip=False)
+ assert mapper_func(LINE_SIMPLE, {}) == ""
+ mock_log_warning.assert_called_once()
+ assert "Cannot fetch file" in mock_log_warning.call_args[0][0]
+
+
+def test_legacy_mappers() -> None:
+ """Tests the legacy attribute mappers."""
+ line = {"Color": "Blue", "Size": "L", "Finish": ""}
+
+ val_att_mapper = mapper.val_att(["Color", "Size", "Finish"])
+ assert val_att_mapper(line, {}) == {"Color": "Blue", "Size": "L"}
+
+ m2o_att_mapper = mapper.m2o_att("ATT", ["Color", "Size"])
+ assert m2o_att_mapper(line, {}) == {
+ "Color": "ATT.Color_Blue",
+ "Size": "ATT.Size_L",
+ }
+
+ concat_legacy_mapper = mapper.concat_field_value_m2m("_", "Color", "Size")
+ assert concat_legacy_mapper(line, {}) == "Color_Blue,Size_L"
+
+ m2m_att_val_mapper = mapper.m2m_attribute_value("PREFIX", "Color", "Size")
+ assert "PREFIX.Color_Blue" in m2m_att_val_mapper(line, {})
+ assert "PREFIX.Size_L" in m2m_att_val_mapper(line, {})
+
+
+def test_modern_template_attribute_mapper() -> None:
+ """Tests the m2m_template_attribute_value mapper for modern Odoo versions."""
+ # Case 1: template_id exists, should return concatenated values
+ line_with_template = {"template_id": "TPL1", "Color": "Blue", "Size": "L"}
+ mapper_func = mapper.m2m_template_attribute_value("PREFIX", "Color", "Size")
+ assert mapper_func(line_with_template, {}) == "Blue,L"
+
+ # Case 2: template_id is missing, should return an empty string
+ line_without_template = {"Color": "Blue", "Size": "L"}
+ assert mapper_func(line_without_template, {}) == ""
+
+
+def test_split_mappers() -> None:
+ """Tests the split helper functions."""
+ split_line_func = mapper.split_line_number(100)
+ assert split_line_func({}, 0) == 0
+ assert split_line_func({}, 99) == 0
+ assert split_line_func({}, 100) == 1
+
+ split_file_func = mapper.split_file_number(8)
+ assert split_file_func({}, 7) == 7
+ assert split_file_func({}, 8) == 0
diff --git a/tests/test_merge.py b/tests/test_merge.py
index 8c1650f7..b0748855 100644
--- a/tests/test_merge.py
+++ b/tests/test_merge.py
@@ -1,12 +1,30 @@
-'''
-Created on 10 dec. 2019
+"""This test script checks the file joining functionality of the Processor.
-@author: Thibault Francois
-'''
-import random
+It merges two source CSV files based on a common key.
+"""
-from odoo_csv_tools.lib import transform
+import os
+from odoo_data_flow.lib import transform
-processor = transform.Processor(filename='origin/test_merge1.csv')
-processor.join_file("origin/test_merge2.csv", "category", "name")
\ No newline at end of file
+# --- Configuration ---
+SOURCE_FILE_1 = os.path.join("tests", "origin", "test_merge1.csv")
+SOURCE_FILE_2 = os.path.join("tests", "origin", "test_merge2.csv")
+
+# --- Main Logic ---
+print(f"Initializing processor with primary file: {SOURCE_FILE_1}")
+# The 'filename' argument is deprecated, but we keep it for now
+# to match the existing test file structure.
+# A future refactor could update the Processor to use a more explicit name.
+processor = transform.Processor(filename=SOURCE_FILE_1)
+
+print(f"Joining with secondary file: {SOURCE_FILE_2}")
+# Join the second file into the processor's data buffer.
+# The join happens where the value in the 'category' column of file 1
+# matches the value in the 'name' column of file 2.
+processor.join_file(SOURCE_FILE_2, "category", "name")
+
+print("File join complete. The processor now holds the merged data in memory.")
+# Note: This test script only performs the in-memory join.
+# A subsequent step in a test runner would be needed to process
+# this merged data into a final output file.
diff --git a/tests/test_migrator.py b/tests/test_migrator.py
new file mode 100644
index 00000000..e54b1443
--- /dev/null
+++ b/tests/test_migrator.py
@@ -0,0 +1,120 @@
+"""Test the high-level data migration orchestrator."""
+
+from unittest.mock import MagicMock, patch
+
+from odoo_data_flow.lib import mapper
+from odoo_data_flow.migrator import run_migration
+
+
+@patch("odoo_data_flow.migrator.run_import_for_migration")
+@patch("odoo_data_flow.migrator.run_export_for_migration")
+@patch("odoo_data_flow.migrator.Processor")
+def test_run_migration_success_with_mapping(
+ mock_processor: MagicMock,
+ mock_run_export: MagicMock,
+ mock_run_import: MagicMock,
+) -> None:
+ """Tests the successful end-to-end migration workflow with a custom mapping."""
+ # 1. Setup
+ # Mock the return value of the export function
+ mock_run_export.return_value = (["id", "name"], [["1", "Source Name"]])
+
+ # Mock the processor and its process method
+ mock_processor_instance = MagicMock()
+ mock_processor_instance.process.return_value = (
+ ["id", "name"],
+ [["1", "Transformed Name"]],
+ )
+ mock_processor.return_value = mock_processor_instance
+
+ # Define a valid custom mapping where the value is a callable mapper function
+ custom_mapping = {
+ "name": mapper.val("name", postprocess=lambda x, s: f"Transformed {x}")
+ }
+
+ # 2. Action
+ run_migration(
+ config_export="src.conf",
+ config_import="dest.conf",
+ model="res.partner",
+ fields=["id", "name"],
+ mapping=custom_mapping,
+ )
+
+ # 3. Assertions
+ mock_run_export.assert_called_once()
+ mock_processor.assert_called_once_with(
+ header=["id", "name"], data=[["1", "Source Name"]]
+ )
+ mock_processor_instance.process.assert_called_once_with(
+ custom_mapping, filename_out=""
+ )
+ mock_run_import.assert_called_once_with(
+ config="dest.conf",
+ model="res.partner",
+ header=["id", "name"],
+ data=[["1", "Transformed Name"]],
+ worker=1,
+ batch_size=10,
+ )
+
+
+@patch("odoo_data_flow.migrator.run_import_for_migration")
+@patch("odoo_data_flow.migrator.run_export_for_migration")
+@patch("odoo_data_flow.migrator.Processor")
+def test_run_migration_success_no_mapping(
+ mock_processor: MagicMock,
+ mock_run_export: MagicMock,
+ mock_run_import: MagicMock,
+) -> None:
+ """Tests that a 1-to-1 mapping is generated if none is provided."""
+ # 1. Setup
+ mock_run_export.return_value = (["id", "name"], [["1", "Source Name"]])
+
+ # Mock the processor and its methods
+ mock_processor_instance = MagicMock()
+ # Simulate get_o2o_mapping returning a simple callable mapping
+ mock_processor_instance.get_o2o_mapping.return_value = {
+ "id": MagicMock(func=lambda line, state: line["id"]),
+ "name": MagicMock(func=lambda line, state: line["name"]),
+ }
+ mock_processor_instance.process.return_value = (
+ ["id", "name"],
+ [["1", "Source Name"]],
+ )
+ mock_processor.return_value = mock_processor_instance
+
+ # 2. Action
+ run_migration(
+ config_export="src.conf", config_import="dest.conf", model="res.partner"
+ )
+
+ # 3. Assertions
+ mock_run_export.assert_called_once()
+ mock_processor_instance.get_o2o_mapping.assert_called_once()
+ mock_processor_instance.process.assert_called_once()
+ mock_run_import.assert_called_once()
+
+
+@patch("odoo_data_flow.migrator.run_import_for_migration")
+@patch("odoo_data_flow.migrator.run_export_for_migration")
+@patch("odoo_data_flow.migrator.log.warning")
+def test_run_migration_no_data_exported(
+ mock_log_warning: MagicMock,
+ mock_run_export: MagicMock,
+ mock_run_import: MagicMock,
+) -> None:
+ """Tests that the migration stops gracefully if no data is exported."""
+ # 1. Setup: Simulate the export function returning no data
+ mock_run_export.return_value = ([], [])
+
+ # 2. Action
+ run_migration(
+ config_export="src.conf", config_import="dest.conf", model="res.partner"
+ )
+
+ # 3. Assertions
+ mock_run_export.assert_called_once()
+ mock_log_warning.assert_called_once_with("No data exported. Migration finished.")
+ # The import function should never be called
+ mock_run_import.assert_not_called()
diff --git a/tests/test_product_v10.py b/tests/test_product_v10.py
index f706e1a8..d7e9a350 100644
--- a/tests/test_product_v10.py
+++ b/tests/test_product_v10.py
@@ -1,102 +1,179 @@
-# -*- coding: utf-8 -*-
-import sys
+"""Test Odoo version 10 product import.
+
+This test script generates a complete set of data files for importing
+products with variants, including categories, attributes, and attribute lines.
+This is based on the v10 product structure.
+"""
import os
-from const import EXEC
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import ProductProcessorV10
+from odoo_data_flow.lib import mapper
-if len(sys.argv) == 2:
- EXEC = sys.argv[1]
+# We assume ProductProcessorV10 is a custom class inheriting from Processor
+from odoo_data_flow.lib.transform import ProductProcessorV10
+# --- Configuration ---
TEMPLATE_PREFIX = "PRODUCT_TEMPLATE"
PRODUCT_PREFIX = "PRODUCT_PRODUCT"
CATEGORY_PREFIX = "PRODUCT_CATEGORY"
-
ATTRIBUTE_PREFIX = "PRODUCT_ATTRIBUTE"
ATTRIBUTE_VALUE_PREFIX = "PRODUCT_ATTRIBUTE_VALUE"
ATTRIBUTE_LINE_PREFIX = "PRODUCT_ATTRIBUTE_LINE"
-context = {'create_product_variant': True, 'tracking_disable': True}
+# Define the attributes to be processed from the source file
+attribute_list = ["Color", "Gender", "Size_H", "Size_W"]
+source_file = os.path.join("tests", "origin", "product.csv")
+context = {"create_product_variant": True, "tracking_disable": True}
+
-# STEP 1 : read the needed file(s)
-processor = ProductProcessorV10('origin%sproduct.csv' % os.sep, delimiter=',')
+# --- Main Logic ---
+# STEP 1: Initialize the custom processor with the source file
+print(f"Initializing processor for product import from: {source_file}")
+processor = ProductProcessorV10(source_file, separator=",")
-# STEP 2 : Category and Parent Category
+# STEP 2: Generate data for Parent and Child Categories
+print("Generating data for product categories...")
categ_parent_map = {
- 'id': mapper.m2o(CATEGORY_PREFIX, 'categoy'),
- 'name': mapper.val('categoy'),
+ "id": mapper.m2o_map(CATEGORY_PREFIX, "categoy"),
+ "name": mapper.val("categoy"),
}
-
categ_map = {
- 'id': mapper.m2o(CATEGORY_PREFIX, 'Sub Category'),
- 'parent_id/id': mapper.m2o(CATEGORY_PREFIX, 'categoy'),
- 'name': mapper.val('Sub Category'),
+ "id": mapper.m2o_map(CATEGORY_PREFIX, "Sub Category"),
+ "parent_id/id": mapper.m2o_map(CATEGORY_PREFIX, "categoy"),
+ "name": mapper.val("Sub Category"),
}
+processor.process(
+ categ_parent_map,
+ os.path.join("data", "product.category.parent.csv"),
+ {"model": "product.category", "worker": 1, "batch_size": 5},
+ "set",
+ m2m=True, # Use m2m=True to get a unique set of parent categories
+)
+processor.process(
+ categ_map,
+ os.path.join("data", "product.category.csv"),
+ {"model": "product.category", "worker": 1, "batch_size": 20},
+ "set",
+ m2m=True, # Use m2m=True to get a unique set of child categories
+)
-processor.process(categ_parent_map, 'data%sproduct.category.parent.csv' % os.sep, {'worker': 1, 'batch_size': 5,
- 'model': 'product.category'}, 'set')
-processor.process(categ_map, 'data%sproduct.category.csv' % os.sep, {'worker': 1, 'batch_size': 20}, 'set')
-
-# STEP 3 : Product Template mapping
+# STEP 3: Generate data for Product Templates
+print("Generating data for product templates...")
template_map = {
- 'id': mapper.m2o(TEMPLATE_PREFIX, 'ref'),
- 'categ_id/id': mapper.m2o(CATEGORY_PREFIX, 'Sub Category'),
- 'standard_price': mapper.num('cost'),
- 'lst_price': mapper.num('public_price'),
- 'default_code': mapper.val('ref'),
- 'name': mapper.val('name'),
+ "id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"),
+ "categ_id/id": mapper.m2o_map(CATEGORY_PREFIX, "Sub Category"),
+ "standard_price": mapper.num("cost"),
+ "list_price": mapper.num("public_price"),
+ "default_code": mapper.val("ref"),
+ "name": mapper.val("name"),
+ "type": mapper.const("product"),
}
-processor.process(template_map, 'data%sproduct.template.csv' % os.sep, {'worker': 4, 'batch_size': 10,
- 'context': context}, 'set')
+processor.process(
+ template_map,
+ os.path.join("data", "product.template.csv"),
+ {
+ "model": "product.template",
+ "worker": 4,
+ "batch_size": 10,
+ "context": context,
+ },
+ m2m=True, # A product template should only be created once per ref
+)
+
+# STEP 4: Generate data for Attributes
+print("Generating data for product attributes...")
+# The custom processor method handles creating a simple list of attributes
+processor.process_attribute_data(
+ attribute_list,
+ ATTRIBUTE_PREFIX,
+ os.path.join("data", "product.attribute.csv"),
+ {
+ "model": "product.attribute",
+ "worker": 4,
+ "batch_size": 10,
+ "context": context,
+ },
+)
-# STEP 4: Attribute List
-attribute_list = ['Color', 'Gender', 'Size_H', 'Size_W']
-processor.process_attribute_data(attribute_list, ATTRIBUTE_PREFIX, 'data%sproduct.attribute.csv' % os.sep,
- {'worker': 4, 'batch_size': 10,
- 'context': context})
-# STEP 5: Attribute Value
-attribue_value_mapping = {
- 'id': mapper.m2m_id_list(ATTRIBUTE_VALUE_PREFIX, *[mapper.concat_field_value_m2m('_', f) for f in attribute_list]),
- 'name': mapper.m2m_value_list(*attribute_list),
- 'attribute_id/id': mapper.m2m_id_list(ATTRIBUTE_PREFIX, *[mapper.field(f) for f in attribute_list]),
+# STEP 5: Generate data for Attribute Values
+print("Generating data for product attribute values...")
+attribute_value_mapping = {
+ "id": mapper.m2m_template_attribute_value(ATTRIBUTE_VALUE_PREFIX, *attribute_list),
+ "name": mapper.m2m_value_list(*attribute_list),
+ "attribute_id/id": mapper.m2m_id_list(
+ ATTRIBUTE_PREFIX, *[mapper.field(f) for f in attribute_list]
+ ),
}
-processor.process(attribue_value_mapping, 'data%sproduct.attribute.value.csv' % os.sep, {'worker': 3, 'batch_size': 50,
- 'context': context,
- 'groupby': 'attribute_id/id'},
- m2m=True)
+processor.process(
+ attribute_value_mapping,
+ os.path.join("data", "product.attribute.value.csv"),
+ {
+ "model": "product.attribute.value",
+ "worker": 3,
+ "batch_size": 50,
+ "context": context,
+ "groupby": "attribute_id/id",
+ },
+ m2m=True,
+)
-# STEP 6: Attribute Value Line
+# STEP 6: Generate data for Attribute Lines (linking attributes to templates)
+print("Generating data for product attribute lines...")
line_mapping = {
- 'id': mapper.m2m_id_list(ATTRIBUTE_LINE_PREFIX,
- *[mapper.concat_mapper_all('_', mapper.field(f), mapper.val('ref')) for f in
- attribute_list]),
- 'product_tmpl_id/id': mapper.m2o(TEMPLATE_PREFIX, 'ref'),
- 'attribute_id/id': mapper.m2m_id_list(ATTRIBUTE_PREFIX, *[mapper.field(f) for f in attribute_list]),
- 'value_ids/id': mapper.m2m_id_list(ATTRIBUTE_VALUE_PREFIX,
- *[mapper.concat_field_value_m2m('_', f) for f in attribute_list]),
+ "id": mapper.m2m_id_list(
+ ATTRIBUTE_LINE_PREFIX,
+ *[
+ mapper.concat_mapper_all("_", mapper.field(f), mapper.val("ref"))
+ for f in attribute_list
+ ],
+ ),
+ "product_tmpl_id/id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"),
+ "attribute_id/id": mapper.m2m_id_list(
+ ATTRIBUTE_PREFIX, *[mapper.field(f) for f in attribute_list]
+ ),
+ "value_ids/id": mapper.m2m_template_attribute_value(
+ ATTRIBUTE_VALUE_PREFIX, *attribute_list
+ ),
}
-context['update_many2many'] = True
-processor.process(line_mapping, 'data%sproduct.attribute.line.csv' % os.sep, {'worker': 3, 'batch_size': 50,
- 'context': dict(context),
- 'groupby': 'product_tmpl_id/id'},
- m2m=True)
-context.pop('update_many2many')
+context_with_update = context.copy()
+context_with_update["update_many2many"] = True
+processor.process(
+ line_mapping,
+ os.path.join("data", "product.attribute.line.csv"),
+ {
+ "model": "product.attribute.line",
+ "worker": 3,
+ "batch_size": 50,
+ "context": context_with_update,
+ "groupby": "product_tmpl_id/id",
+ },
+ m2m=True,
+)
-# STEP 7: Product Variant
+# STEP 7: Generate data for final Product Variants (product.product)
+print("Generating data for product variants...")
product_mapping = {
- 'id': mapper.m2o_map(PRODUCT_PREFIX, mapper.concat('_', 'barcode', 'Color', 'Gender', 'Size_H', 'Size_W'),
- skip=True),
- 'barcode': mapper.val('barcode'),
- 'product_tmpl_id/id': mapper.m2o(TEMPLATE_PREFIX, 'ref'),
- 'attribute_value_ids/id': mapper.m2m_attribute_value(ATTRIBUTE_VALUE_PREFIX, 'Color', 'Gender', 'Size_H', 'Size_W'),
- 'default_code': mapper.val('ref'),
- 'standard_price': mapper.num('cost'),
+ "id": mapper.m2o_map(PRODUCT_PREFIX, "barcode", skip=True),
+ "barcode": mapper.val("barcode"),
+ "product_tmpl_id/id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"),
+ # This mapper seems to handle the complex logic of finding the correct
+ # attribute values for a given variant.
+ "attribute_value_ids/id": mapper.m2m_template_attribute_value(
+ ATTRIBUTE_VALUE_PREFIX, "Color", "Gender", "Size_H", "Size_W"
+ ),
+ "default_code": mapper.val("ref"),
+ "standard_price": mapper.num("cost"),
}
-processor.process(product_mapping, 'data%sproduct.product.csv' % os.sep, {'worker': 3, 'batch_size': 50,
- 'groupby': 'product_tmpl_id/id',
- 'context': context}, 'set')
+processor.process(
+ product_mapping,
+ os.path.join("data", "product.product.csv"),
+ {
+ "model": "product.product",
+ "worker": 3,
+ "batch_size": 50,
+ "groupby": "product_tmpl_id/id",
+ "context": context,
+ },
+)
-# #Step 8: Define output and import parameter
-processor.write_to_file("4_product_import.sh", python_exe=EXEC, path='../')
+print("Product v10 test data generation complete.")
diff --git a/tests/test_product_v9.py b/tests/test_product_v9.py
index 4143ddfb..b2e9ca02 100644
--- a/tests/test_product_v9.py
+++ b/tests/test_product_v9.py
@@ -1,81 +1,141 @@
-# -*- coding: utf-8 -*-
-import sys
+"""Test Odoo version 9 product import.
+
+This test script generates a complete set of data files for importing
+products with variants, including categories and attributes.
+This is based on the v9 product structure.
+"""
import os
-from const import EXEC
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib.transform import ProductProcessorV9
+from odoo_data_flow.lib import mapper
-if len(sys.argv) == 2:
- EXEC = sys.argv[1]
+# We assume ProductProcessorV9 is a custom class inheriting from Processor
+from odoo_data_flow.lib.transform import ProductProcessorV9
+# --- Configuration ---
TEMPLATE_PREFIX = "PRODUCT_TEMPLATE"
PRODUCT_PREFIX = "PRODUCT_PRODUCT"
CATEGORY_PREFIX = "PRODUCT_CATEGORY"
-
ATTRIBUTE_PREFIX = "PRODUCT_ATTRIBUTE"
ATTRIBUTE_VALUE_PREFIX = "PRODUCT_ATTRIBUTE_VALUE"
-# Define the context that will be used
-context = {'create_product_variant': True, 'tracking_disable': True}
-# STEP 1 : read the needed file(s)
-processor = ProductProcessorV9('origin%sproduct.csv' % os.sep, delimiter=',')
+# Define the attributes to be processed from the source file
+attribute_list = ["Color", "Gender", "Size_H", "Size_W"]
+source_file = os.path.join("tests", "origin", "product.csv")
+context = {"create_product_variant": True, "tracking_disable": True}
+
+# --- Main Logic ---
+# STEP 1: Initialize the custom processor with the source file
+print(f"Initializing processor for v9 product import from: {source_file}")
+processor = ProductProcessorV9(source_file, separator=",")
-# STEP 2 : Category and Parent Category
+# STEP 2: Generate data for Parent and Child Categories
+print("Generating data for product categories...")
categ_parent_map = {
- 'id': mapper.m2o(CATEGORY_PREFIX, 'categoy'),
- 'name': mapper.val('categoy'),
+ "id": mapper.m2o_map(CATEGORY_PREFIX, "categoy"),
+ "name": mapper.val("categoy"),
}
-
categ_map = {
- 'id': mapper.m2o(CATEGORY_PREFIX, 'Sub Category'),
- 'parent_id/id': mapper.m2o(CATEGORY_PREFIX, 'categoy'),
- 'name': mapper.val('Sub Category'),
+ "id": mapper.m2o_map(CATEGORY_PREFIX, "Sub Category"),
+ "parent_id/id": mapper.m2o_map(CATEGORY_PREFIX, "categoy"),
+ "name": mapper.val("Sub Category"),
}
+processor.process(
+ categ_parent_map,
+ os.path.join("data", "product.category.parent.v9.csv"),
+ {"model": "product.category"},
+ m2m=True,
+)
+processor.process(
+ categ_map,
+ os.path.join("data", "product.category.v9.csv"),
+ {"model": "product.category"},
+ m2m=True,
+)
-processor.process(categ_parent_map, 'data%sproduct.category.parent.csv' % os.sep,
- {'worker': 1, 'batch_size': 5, 'model': 'product.category'}, 'set')
-processor.process(categ_map, 'data%sproduct.category.csv' % os.sep, {'worker': 1, 'batch_size': 20}, 'set')
-
-# STEP 3 : Product Template mapping
+# STEP 3: Generate data for Product Templates
+print("Generating data for product templates...")
template_map = {
- 'id': mapper.m2o(TEMPLATE_PREFIX, 'ref'),
- 'categ_id/id': mapper.m2o(CATEGORY_PREFIX, 'Sub Category'),
- 'standard_price': mapper.num('cost'),
- 'lst_price': mapper.num('public_price'),
- 'default_code': mapper.val('ref'),
- 'name': mapper.val('name'),
+ "id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"),
+ "categ_id/id": mapper.m2o_map(CATEGORY_PREFIX, "Sub Category"),
+ "standard_price": mapper.num("cost"),
+ "list_price": mapper.num("public_price"),
+ "default_code": mapper.val("ref"),
+ "name": mapper.val("name"),
+ "type": mapper.const("product"),
}
-processor.process(template_map, 'data%sproduct.template.csv' % os.sep,
- {'worker': 4, 'batch_size': 10, 'context': context}, 'set')
+processor.process(
+ template_map,
+ os.path.join("data", "product.template.v9.csv"),
+ {"model": "product.template", "context": context},
+ m2m=True,
+)
-# STEP 4: Attribute List
-attribute_list = ['Color', 'Gender', 'Size_H', 'Size_W']
+# STEP 4: Generate data for Attributes and Values (in one go for v9)
+# This was handled by a custom process_attribute_mapping in the original script.
+# We now standardize this to create two separate, clean files.
+print("Generating data for product attributes and values...")
+
+
+# Attribute Value mapping
+attribute_value_map = {
+ "id": mapper.m2m_template_attribute_value(ATTRIBUTE_VALUE_PREFIX, *attribute_list),
+ "name": mapper.m2m_value_list(*attribute_list),
+ "attribute_id/id": mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list),
+}
+processor.process(
+ attribute_value_map,
+ os.path.join("data", "product.attribute.value.v9.csv"),
+ {
+ "model": "product.attribute.value",
+ "context": context,
+ "groupby": "attribute_id/id",
+ },
+ m2m=True,
+)
+
+attribute_list = ["Color", "Gender", "Size_H", "Size_W"]
attribue_value_mapping = {
- 'id': mapper.m2o_att(ATTRIBUTE_VALUE_PREFIX, attribute_list), # TODO
- 'name': mapper.val_att(attribute_list), # TODO
- 'attribute_id/id': mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list),
+ "id": mapper.m2o_att(ATTRIBUTE_VALUE_PREFIX, attribute_list), # TODO
+ "name": mapper.val_att(attribute_list), # TODO
+ "attribute_id/id": mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list),
}
line_mapping = {
- 'product_tmpl_id/id': mapper.m2o(TEMPLATE_PREFIX, 'ref'),
- 'attribute_id/id': mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list),
- 'value_ids/id': mapper.m2o_att(ATTRIBUTE_VALUE_PREFIX, attribute_list) # TODO
+ "product_tmpl_id/id": mapper.m2o(TEMPLATE_PREFIX, "ref"),
+ "attribute_id/id": mapper.m2o_att_name(ATTRIBUTE_PREFIX, attribute_list),
+ "value_ids/id": mapper.m2o_att(ATTRIBUTE_VALUE_PREFIX, attribute_list), # TODO
}
-processor.process_attribute_mapping(attribue_value_mapping, line_mapping, attribute_list, ATTRIBUTE_PREFIX, 'data/',
- {'worker': 3, 'batch_size': 50, 'context': context})
+processor.process_attribute_mapping(
+ attribue_value_mapping,
+ line_mapping,
+ attribute_list,
+ ATTRIBUTE_PREFIX,
+ "data/",
+ {"worker": 3, "batch_size": 50, "context": context},
+)
+
-# STEP 5: Product Variant
+# STEP 5: Generate data for Product Variants (product.product)
+print("Generating data for product variants...")
product_mapping = {
- 'id': mapper.m2o_map(PRODUCT_PREFIX, mapper.concat('_', 'barcode', 'Color', 'Gender', 'Size_H', 'Size_W'),
- skip=True),
- 'barcode': mapper.val('barcode'),
- 'product_tmpl_id/id': mapper.m2o(TEMPLATE_PREFIX, 'ref'),
- 'attribute_value_ids/id': mapper.m2m_attribute_value(ATTRIBUTE_VALUE_PREFIX, 'Color', 'Gender', 'Size_H', 'Size_W'),
+ "id": mapper.m2o_map(PRODUCT_PREFIX, "barcode", skip=True),
+ "barcode": mapper.val("barcode"),
+ "product_tmpl_id/id": mapper.m2o_map(TEMPLATE_PREFIX, "ref"),
+ "attribute_value_ids/id": mapper.m2m_template_attribute_value(
+ ATTRIBUTE_VALUE_PREFIX, "Color", "Gender", "Size_H", "Size_W"
+ ),
}
-processor.process(product_mapping, 'data%sproduct.product.csv' % os.sep,
- {'worker': 3, 'batch_size': 50, 'groupby': 'product_tmpl_id/id', 'context': context}, 'set')
+processor.process(
+ product_mapping,
+ os.path.join("data", "product.product.v9.csv"),
+ {
+ "model": "product.product",
+ "worker": 3,
+ "batch_size": 50,
+ "groupby": "product_tmpl_id/id",
+ "context": context,
+ },
+)
-# Step 6: Define output and import parameter
-processor.write_to_file("3_product_import.sh", python_exe=EXEC, path='../')
+print("Product v9 test data generation complete.")
diff --git a/tests/test_split.py b/tests/test_split.py
index 2030dfa2..e2741b81 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -1,59 +1,87 @@
-'''
-Created on 14 sept. 2016
+"""Test Split.
+
+This test script generates a large dataset and then splits it into multiple
+files to test the processor's split functionality.
+"""
-@author: mythrys
-'''
import random
-from odoo_csv_tools.lib import mapper
-from odoo_csv_tools.lib import transform
-
-PARTNER_PREFIX = 'partner_generated'
-TAG_PREFIX = 'partner_tag'
-output = 'data/res.partner.generated.csv'
-tag_output = 'data/res.partner.category.csv'
-script = '1_partner_split.sh'
-
-tags = ["Tag %s" % i for i in range(0, 100)]
-
-header = ['id', 'tags']
-data = [[str(i), ','.join(tags[random.randint(0, 99)] for i in range(0, 5))] for i in range(0, 10000)]
-
-mapping = {
- 'id': mapper.m2o(PARTNER_PREFIX, 'id'),
- 'name': mapper.val('id', postprocess=lambda x: "Partner %s" % x),
- 'phone': mapper.val('id', postprocess=lambda x: "0032%s" % (int(x) * 11)),
- 'website': mapper.val('id', postprocess=lambda x: "http://website-%s.com" % x),
- 'street': mapper.val('id', postprocess=lambda x: "Street %s" % x),
- 'city': mapper.val('id', postprocess=lambda x: "City %s" % x),
- 'zip': mapper.val('id', postprocess=lambda x: ("%s" % x).zfill(6)),
- 'country_id/id': mapper.const('base.be'),
- 'company_type': mapper.const('company'),
- 'customer': mapper.val('id', postprocess=lambda x: str(int(x) % 2)),
- 'supplier': mapper.val('id', postprocess=lambda x: str((int(x) + 1) % 2)),
- 'lang': mapper.const('English'),
- 'category_id/id': mapper.m2m(TAG_PREFIX, 'tags')
-}
+from odoo_data_flow.lib import mapper, transform
+
+# --- Configuration ---
+PARTNER_PREFIX = "partner_generated"
+TAG_PREFIX = "partner_tag"
+PARTNER_OUTPUT_PREFIX = "data/res.partner.generated.split"
+TAG_OUTPUT = "data/res.partner.category.split.csv"
+
+# --- Test Data Generation ---
+# Create 100 unique tags
+tags = [f"Tag {i}" for i in range(100)]
+# Create a larger dataset for 10,000 partners
+header = ["id", "tags"]
+data = [
+ [str(i), ",".join(random.choice(tags) for _ in range(5))] # noqa nosec B311
+ for i in range(10000)
+] # nosec B311
+
+# --- Mapping Definitions (consistent with test_import.py) ---
+
+# Mapping to create the partner category records.
tag_mapping = {
- 'id': mapper.m2m_id_list(TAG_PREFIX, 'tags'),
- 'name': mapper.m2m_value_list('tags'),
- 'parent_id/id': mapper.const('base.res_partner_category_0'),
+ "id": mapper.m2m_id_list(TAG_PREFIX, "tags"),
+ "name": mapper.m2m("tags", sep=","),
+ "parent_id/id": mapper.const("base.res_partner_category_0"),
}
+# Mapping to create the partner records.
+partner_mapping = {
+ "id": mapper.concat(PARTNER_PREFIX, "_", "id"),
+ "name": mapper.val("id", postprocess=lambda x: f"Partner {x}"),
+ "phone": mapper.val("id", postprocess=lambda x: f"0032{int(x) * 11}"),
+ "website": mapper.val("id", postprocess=lambda x: f"http://website-{x}.com"),
+ "street": mapper.val("id", postprocess=lambda x: f"Street {x}"),
+ "city": mapper.val("id", postprocess=lambda x: f"City {x}"),
+ "zip": mapper.val("id", postprocess=lambda x: str(x).zfill(6)),
+ "country_id/id": mapper.const("base.be"),
+ "company_type": mapper.const("company"),
+ "customer": mapper.val("id", postprocess=lambda x: int(x) % 2),
+ "supplier": mapper.val("id", postprocess=lambda x: (int(x) + 1) % 2),
+ "lang": mapper.const("en_US"),
+ "category_id/id": mapper.m2m(TAG_PREFIX, "tags"),
+}
+
+# --- Processing ---
+print("Initializing processor with 10,000 records.")
processor = transform.Processor(header=header, data=data)
-p_dict = processor.split(mapper.split_line_number(1000)) # Useless just for coverage
-p_dict = processor.split(mapper.split_file_number(8))
-processor.process(tag_mapping, tag_output, {
- 'worker': 1, # OPTIONAL
- 'batch_size': 10, # OPTIONAL
- 'model': 'res.partner.category',
-}, m2m=True)
-processor.write_to_file(script, path='../')
-for index, p in p_dict.items():
- p.process(mapping, '%s.%s' % (output, index), {
- 'worker': 4, # OPTIONAL
- 'batch_size': 100, # OPTIONAL
- 'model': 'res.partner',
- })
- p.write_to_file(script, path='../', append=True)
+
+# This first split is primarily for test coverage purposes.
+print("Running split by line number (for coverage)...")
+processor.split(mapper.split_line_number(1000))
+
+# This is the main test: split the dataset into 8 separate files.
+print("Splitting data into 8 files...")
+processor_dictionary = processor.split(mapper.split_file_number(8))
+
+# First, process the tags into a single file from the main processor.
+print(f"Generating single tag file for all splits at: {TAG_OUTPUT}")
+processor.process(
+ tag_mapping,
+ TAG_OUTPUT,
+ {"model": "res.partner.category"},
+ m2m=True,
+)
+
+# Now, loop through the dictionary of split processors and have each one
+# generate its own numbered output file.
+print("Processing each data split into a separate partner file...")
+for index, p in processor_dictionary.items():
+ output_filename = f"{PARTNER_OUTPUT_PREFIX}.{index}.csv"
+ print(f" - Generating {output_filename}")
+ p.process(
+ partner_mapping,
+ output_filename,
+ {"model": "res.partner"},
+ )
+
+print("Split file generation complete.")
diff --git a/tests/test_transform.py b/tests/test_transform.py
new file mode 100644
index 00000000..93918725
--- /dev/null
+++ b/tests/test_transform.py
@@ -0,0 +1,124 @@
+"""Test the trasnform functions."""
+
+from pathlib import Path
+from typing import Any, Callable
+
+import pytest
+
+from odoo_data_flow.lib import mapper
+from odoo_data_flow.lib.transform import (
+ MapperRepr,
+ Processor,
+ ProductProcessorV9,
+)
+
+
+def test_mapper_repr() -> None:
+ """Tests the __repr__ method of the MapperRepr class."""
+ mapper_repr = MapperRepr("mapper.val('test')", lambda: "value")
+ assert repr(mapper_repr) == "mapper.val('test')"
+
+
+def test_processor_init_fails_without_args() -> None:
+ """Tests that the Processor raises a ValueError if initialized without args."""
+ with pytest.raises(
+ ValueError, match="must be initialized with either a 'filename' or both"
+ ):
+ Processor()
+
+
+def test_read_file_xml_syntax_error(tmp_path: Path) -> None:
+ """Tests that a syntax error in an XML file is handled correctly."""
+ xml_file = tmp_path / "malformed.xml"
+ xml_file.write_text("a None:
+ """Tests that a non-existent CSV file is handled correctly."""
+ processor = Processor(filename="non_existent_file.csv")
+ assert processor.header == []
+ assert processor.data == []
+
+
+def test_join_file_missing_key(tmp_path: Path) -> None:
+ """Tests that join_file handles a missing join key gracefully."""
+ master_file = tmp_path / "master.csv"
+ master_file.write_text("id,name\n1,master_record")
+ child_file = tmp_path / "child.csv"
+ child_file.write_text("child_id,value\n1,child_value")
+
+ processor = Processor(filename=str(master_file), separator=",")
+ original_header_len = len(processor.header)
+
+ # Attempt to join on a key that doesn't exist in the master file
+ processor.join_file(
+ str(child_file),
+ master_key="non_existent_key",
+ child_key="child_id",
+ separator=",",
+ )
+
+ # The header and data should remain unchanged because the join failed
+ assert len(processor.header) == original_header_len
+
+
+def test_process_with_legacy_mapper() -> None:
+ """Tests that process works with a legacy mapper that only accepts one arg."""
+ header = ["col1"]
+ data = [["A"]]
+ processor = Processor(header=header, data=data)
+
+ # This lambda only accepts one argument, which would cause a TypeError
+ # without the backward-compatibility logic in _process_mapping.
+ legacy_mapping = {"new_col": lambda line: line["col1"].lower()}
+
+ head, processed_data = processor.process(legacy_mapping, filename_out="")
+ assert list(processed_data) == [["a"]]
+
+
+def test_process_returns_set() -> None:
+ """Tests that process correctly returns a set when t='set'."""
+ header = ["col1"]
+ # Include duplicate data
+ data = [["A"], ["B"], ["A"]]
+ processor = Processor(header=header, data=data)
+ mapping = {"new_col": mapper.val("col1")}
+
+ # Process with t='set' to get unique records
+ head, processed_data = processor.process(mapping, filename_out="", t="set")
+
+ assert isinstance(processed_data, set)
+ # The set should only contain unique values
+ assert len(processed_data) == 2
+ assert ("A",) in processed_data
+ assert ("B",) in processed_data
+
+
+def test_v9_extract_attribute_value_data_malformed_mapping() -> None:
+ """Tests that _extract_attribute_value_data handles a malformed mapping.
+
+ This test ensures the `if not isinstance(values_dict, dict): continue`
+ branch is covered.
+ """
+ processor = ProductProcessorV9(header=["col1"], data=[["val1"]])
+
+ # Create a malformed mapping where the 'name' mapper returns a string,
+ # not a dict
+ # The lambda is defined to accept an optional state to handle the fallback
+ # logic.
+ # Explicitly type the dictionary to satisfy mypy.
+ malformed_mapping: dict[str, Callable[..., Any]] = {
+ "name": mapper.val("col1"),
+ "attribute_id/id": lambda line, state=None: "some_id",
+ }
+
+ # This should run without error and simply return an empty set
+ result = processor._extract_attribute_value_data(
+ malformed_mapping, ["Color"], [{"col1": "val1"}]
+ )
+ assert result == set()
diff --git a/tests/test_workflow_runner.py b/tests/test_workflow_runner.py
new file mode 100644
index 00000000..49c949c7
--- /dev/null
+++ b/tests/test_workflow_runner.py
@@ -0,0 +1,110 @@
+"""Test Logging functionality."""
+# tests/test_workflow_runner.py
+
+from unittest.mock import MagicMock, patch
+
+from odoo_data_flow.workflow_runner import run_invoice_v9_workflow
+
+
+@patch("odoo_data_flow.workflow_runner.InvoiceWorkflowV9")
+@patch("odoo_data_flow.workflow_runner.get_connection_from_config")
+def test_run_invoice_v9_workflow_all_actions(
+ mock_get_connection: MagicMock, mock_invoice_workflow: MagicMock
+) -> None:
+ """Tests that when action is 'all', all workflow methods are called."""
+ # 1. Setup: Create a mock instance of the workflow class
+ mock_wf_instance = MagicMock()
+ mock_invoice_workflow.return_value = mock_wf_instance
+
+ # 2. Action
+ run_invoice_v9_workflow(
+ actions=["all"],
+ config="dummy.conf",
+ field="x_legacy_status",
+ status_map_str="{'open': ['OP'], 'paid': ['PA']}",
+ paid_date_field="x_paid_date",
+ payment_journal=1,
+ max_connection=4,
+ )
+
+ # 3. Assertions
+ mock_get_connection.assert_called_once_with(config_file="dummy.conf")
+ mock_invoice_workflow.assert_called_once()
+
+ # Check that all methods were called
+ mock_wf_instance.set_tax.assert_called_once()
+ mock_wf_instance.validate_invoice.assert_called_once()
+ mock_wf_instance.paid_invoice.assert_called_once()
+ mock_wf_instance.proforma_invoice.assert_called_once()
+ mock_wf_instance.rename.assert_called_once_with("x_legacy_number")
+
+
+@patch("odoo_data_flow.workflow_runner.InvoiceWorkflowV9")
+@patch("odoo_data_flow.workflow_runner.get_connection_from_config")
+def test_run_invoice_v9_workflow_specific_action(
+ mock_get_connection: MagicMock, mock_invoice_workflow: MagicMock
+) -> None:
+ """Tests that when a specific action is provided, only that method is called."""
+ # 1. Setup
+ mock_wf_instance = MagicMock()
+ mock_invoice_workflow.return_value = mock_wf_instance
+
+ # 2. Action
+ run_invoice_v9_workflow(
+ actions=["pay"], # Only run the 'pay' action
+ config="dummy.conf",
+ field="x_legacy_status",
+ status_map_str="{'paid': ['PA']}",
+ paid_date_field="x_paid_date",
+ payment_journal=1,
+ max_connection=4,
+ )
+
+ # 3. Assertions
+ # Check that only the paid_invoice method was called
+ mock_wf_instance.paid_invoice.assert_called_once()
+
+ # Check that other methods were NOT called
+ mock_wf_instance.set_tax.assert_not_called()
+ mock_wf_instance.validate_invoice.assert_not_called()
+ mock_wf_instance.proforma_invoice.assert_not_called()
+ mock_wf_instance.rename.assert_not_called()
+
+
+@patch("odoo_data_flow.workflow_runner.get_connection_from_config") # This was missing
+@patch("odoo_data_flow.workflow_runner.log.error")
+def test_run_invoice_v9_workflow_bad_status_map(
+ mock_log_error: MagicMock, mock_get_connection: MagicMock
+) -> None:
+ """Tests that an error is logged if the status_map string is not a valid dict."""
+ run_invoice_v9_workflow(
+ actions=["all"],
+ config="dummy.conf",
+ field="x_legacy_status",
+ status_map_str="this-is-not-a-dict",
+ paid_date_field="x_paid_date",
+ payment_journal=1,
+ max_connection=4,
+ )
+ mock_log_error.assert_called_once()
+ assert "Failed to initialize workflow" in mock_log_error.call_args[0][0]
+
+
+@patch("odoo_data_flow.workflow_runner.get_connection_from_config")
+@patch("odoo_data_flow.workflow_runner.log.error")
+def test_run_invoice_v9_workflow_connection_fails(
+ mock_log_error: MagicMock, mock_get_connection: MagicMock
+) -> None:
+ """Tests that an error is logged if the connection fails."""
+ mock_get_connection.side_effect = Exception("Connection Refused")
+ run_invoice_v9_workflow(
+ actions=["all"],
+ config="bad.conf",
+ field="x_legacy_status",
+ status_map_str="{}",
+ paid_date_field="x_paid_date",
+ payment_journal=1,
+ max_connection=4,
+ )
+ mock_log_error.assert_called_once()
+ assert "Failed to initialize workflow" in mock_log_error.call_args[0][0]
diff --git a/tests/test_xml_file.py b/tests/test_xml_file.py
index a27130d0..c5a45e25 100644
--- a/tests/test_xml_file.py
+++ b/tests/test_xml_file.py
@@ -1,18 +1,54 @@
-#-*- coding: utf-8 -*-
-'''
-Created on 21 févr. 2018
+"""Test XML Files.
-@author: mythrys
-'''
-from odoo_csv_tools.lib import xml_transform
+This test script checks the XML processing functionality.
+It reads a source XML file, applies a mapping, and generates a
+clean CSV file.
+"""
+
+import os
+
+from odoo_data_flow.lib import mapper
+from odoo_data_flow.lib.transform import Processor
+
+# --- Configuration ---
+SOURCE_FILE = os.path.join("tests", "origin", "data.xml")
+OUTPUT_FILE = os.path.join("data", "info_from_xml.csv")
+
+# --- Mapping Definition ---
+# This mapping is updated to use dot notation for tags, which is the
+# standard way the Processor handles nested data.
+# Note: The new Processor may not support XPath features like accessing
+# attributes (@name) or indexed elements (neighbor[1]). This test
+# focuses on the documented tag-based mapping.
mapping = {
- 'name' : 'year/text()',
- 'gdp': 'gdppc/text()',
- 'nom': '@name',
- 'neighbor' : 'neighbor[1]/@name',
+ "name": mapper.val("year"),
+ "gdp": mapper.val("gdppc"),
+ # Assuming 'nom' and 'neighbor' are now represented as tags in the XML.
+ "nom": mapper.val("name"),
+ "neighbor": mapper.val("neighbor.name"),
+}
+
+# --- Main Logic ---
+# Initialize the standard Processor, but with XML-specific arguments.
+# We tell the processor that the records are enclosed in tags,
+# and the whole list is inside a root tag (e.g., ).
+print(f"Initializing XML processor for source file: {SOURCE_FILE}")
+processor = Processor(
+ SOURCE_FILE,
+ xml_root_tag="data", # The root element containing all records
+ xml_record_tag="country", # The tag representing a single record
+)
+
+# Define the parameters for the eventual import.
+params = {
+ "model": "res.country.info", # Example model
+ "worker": 2,
+ "batch_size": 5,
}
-p = xml_transform.XMLProcessor("origin/data.xml", "//country", )
-p.process(mapping, 'data/info.csv', { 'worker' : 2, 'batch_size' : 5})
-p.write_to_file("99_contact_import.sh", python_exe='', path='')
\ No newline at end of file
+# Process the XML data using the mapping and write to a CSV file.
+print(f"Processing XML data and writing to: {OUTPUT_FILE}")
+processor.process(mapping, OUTPUT_FILE, params)
+
+print("XML file transformation complete.")