diff --git a/.dockerignore b/.dockerignore index c58bdf48f..6102c73d6 100644 --- a/.dockerignore +++ b/.dockerignore @@ -11,4 +11,7 @@ dist/ htmlcov/ tests/ utils/ -data/ \ No newline at end of file +data/ + +**/__pycache__ +*.pyc \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/add_language.md b/.github/ISSUE_TEMPLATE/add_language.md new file mode 100644 index 000000000..6791045bf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/add_language.md @@ -0,0 +1,36 @@ +--- +name: Add a language +about: Fill in a form with data to request adding a new language to Lute's demos +title: 'Add language: [name]' +labels: 'enhancement' +assignees: '' + +--- + +**NOTE: this form is for you to share your language settings with other new users. If you're a GitHub user, it would be super if you could instead create a Pull Request with your language settings, using the files in this repo's `lute/db/demo/language` and `/stories` as references. Thanks!** + +If your language settings are working well for you, please share them with other new users by providing the following data: + +* name: +* show_romanization: +* right_to_left: +* parser_type: spacedel or mecab (probably "spacedel" is the correct one, "space delimited") +* split_sentences: characters to split sentences on, if the defaults aren't good +* split_sentence_exceptions: +* word_chars: +* character_substitutions: (if there are any special characters) + +Dictionaries. A list of one or more entries: + +* use_for: terms or sentences +* type: embedded or popup +* url + +It's good to have a short demonstration story available as well. Please paste a family-friendly (!) story below: + +``` +Title: [story-title] +Source: [url source or similar] + +[content here] +``` \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md index 1f12796fc..8c5e1a01a 100644 --- a/.github/ISSUE_TEMPLATE/documentation.md +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -9,11 +9,11 @@ assignees: '' **Description** -Brief description of documentation edit/creation requirement for https://jzohrab.github.io/lute-manual/. +Brief description of documentation edit/creation requirement for https://luteorg.github.io/lute-manual/. **For broken/incorrect documentation:** -* specify page in https://jzohrab.github.io/lute-manual/, section, and link +* specify page in https://luteorg.github.io/lute-manual/, section, and link * Take a first shot at correcting the issue. I'll edit it for tone/consistency. **For new documentation:** diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e4940ec9d..32a7970dc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: ci on: push: - # A branch github-ci-updates can be created and used for ci + # A branch github-ci can be created and used for ci # experiments and tweaks. - branches: [ "develop", "master", "github-ci-updates" ] + branches: [ "develop", "master", "github-ci", "windows" ] pull_request: branches: [ "develop", "master" ] @@ -13,13 +13,14 @@ permissions: jobs: - # Lint, format check, and all tests. + # Run all tests. base-ci: runs-on: ubuntu-latest + timeout-minutes: 30 strategy: matrix: - python_version: [ '3.8', '3.9', '3.10', '3.11' ] + python_version: [ '3.8', '3.9', '3.10', '3.11', '3.12', '3.13' ] steps: @@ -35,14 +36,21 @@ jobs: # actual=`readlink -f /usr/bin/mecab` # ldd $actual - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + with: + submodules: true - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python_version }} cache: 'pip' # caching pip dependencies - - run: pip install -r requirements.txt + - name: Install packages using flit + run: | + pip install flit + # --only-deps: Installs only the dependencies, not Lute itself + # --deps develop: both regular and optional dependencies. + flit install --only-deps --deps develop - name: Setup config run: | @@ -53,39 +61,57 @@ jobs: ls ${{ github.workspace }} cat ${{ github.workspace }}/lute/config/config.yml - - name: Lint - run: inv lint - - - name: Coding style - run: | - black --check . - - name: Test run: | + set -e # Have to explicitly set MECAB_PATH for natto-py. export MECAB_PATH=/lib/x86_64-linux-gnu/libmecab.so.2 pytest - # Note that these seem to *pass* even if an acceptance - # test fails in GitHub actions ... which is annoying, - # because everything passes/fails correctly locally. + # Skipping acceptance tests for 3.8, just to save some time. - name: Acceptance testing + if: matrix.python_version != '3.8' + uses: nick-fields/retry@v3 + with: + max_attempts: 3 + timeout_minutes: 30 + command: | + # handle errors manually, so that the retry works. + exit_code=0 + # Have to explicitly set MECAB_PATH for natto-py. + export MECAB_PATH=/lib/x86_64-linux-gnu/libmecab.so.2 + inv accept --show || exit_code=$? + if [ "$exit_code" -ne 0 ]; then + echo "Command failed with exit code $exit_code, retrying..." + fi + exit $exit_code + + # Skipping acceptance tests for 3.8, just to save some time. + - name: Mobile acceptance testing + if: matrix.python_version != '3.8' + uses: nick-fields/retry@v3 + with: + max_attempts: 3 + timeout_minutes: 30 + command: | + # handle errors manually, so that the retry works. + exit_code=0 + # Have to explicitly set MECAB_PATH for natto-py. + export MECAB_PATH=/lib/x86_64-linux-gnu/libmecab.so.2 + inv acceptmobile --show || exit_code=$? + if [ "$exit_code" -ne 0 ]; then + echo "Command failed with exit code $exit_code, retrying..." + fi + exit $exit_code + + - name: Playwright install + run: playwright install + + - name: Playwright smoke test run: | - export MECAB_PATH=/lib/x86_64-linux-gnu/libmecab.so.2 - inv accept --headless -s - - - name: Remove mecab for smoke test without mecab - run: | - sudo apt-get remove -y mecab mecab-ipadic-utf8 - - # Run one particular sanity check. - # - # Note this isn't _completely_ valid because the test loads - # only supported language stories, whereas a prod release - # comes with _all_ stories pre-loaded and the invalid ones - # are deleted ... - - name: Smoke test no mecab - run: inv accept --headless -s -k test_unsupported_language_not_shown + set -x + set -e + inv playwright - name: Check flit package run: | @@ -94,72 +120,42 @@ jobs: flit -f ${{ github.workspace }}/pyproject.toml install python -m lute.main & # Start in background process - sleep 2 + sleep 10 # Verify with utils script back in the workspace # (verify.py is not included in flit install) pushd ${{ github.workspace }} - python -m utils.verify 5000 + python -m utils.verify 5001 popd pkill -f "python -m lute.main" # Kill that process. - # Test build docker container and try running. - # Slightly wasteful re-setup of node. - docker-build: + # Lute should still work if the user doesn't have mecab installed. + no-mecab-check: runs-on: ubuntu-latest - needs: base-ci - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.11 - cache: 'pip' # caching pip dependencies - - run: pip install -r requirements.txt - - name: Test docker build - run: | - docker build -f docker/Dockerfile --build-arg INSTALL_MECAB=false -t lute3 . - # Run container in the background, and check. - docker run -d -p 5000:5000 -v ./my_data:/lute_data -v ./my_backups:/lute_backup --name my-lute lute3:latest - sleep 10 # Give it a moment to start. - python -m utils.verify 5000 - docker stop my-lute - docker rm my-lute - + timeout-minutes: 10 - # Generate a coverage badge, don't worry if it fails. - # Uses https://github.com/Schneegans/dynamic-badges-action to update a secret gist - # (ID a15001ec2ff889f7be0b553df9881566) and an API token, per notes at - # https://nedbatchelder.com/blog/202209/making_a_coverage_badge.html. - # Slightly wasteful in that it re-runs the tests, but it's fastest. - coverage: - needs: base-ci - continue-on-error: true - runs-on: ubuntu-latest + strategy: + matrix: + # Only checking early and late versions. + python_version: [ '3.8', '3.11' ] steps: - - name: Install Dependencies - run: | - sudo apt-get update -y - sudo apt-get install -y mecab mecab-ipadic-utf8 - # Helper checks to find the mecab library path, - # so it can be exported before running tests. - # Without the export, natto-py fails on github. - # echo FIND THE LIB: - # which mecab - # actual=`readlink -f /usr/bin/mecab` - # ldd $actual - - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + with: + submodules: true - uses: actions/setup-python@v4 with: - python-version: '3.8' + python-version: ${{ matrix.python_version }} cache: 'pip' # caching pip dependencies - - run: pip install -r requirements.txt + - name: Install packages using flit + run: | + pip install flit + flit install --only-deps --deps develop - name: Setup config run: | @@ -170,36 +166,154 @@ jobs: ls ${{ github.workspace }} cat ${{ github.workspace }}/lute/config/config.yml - - name: Generate coverage + # Run one particular sanity check. + # + # Note this isn't _completely_ valid because the test loads + # only supported language stories, whereas a prod release + # comes with _all_ stories pre-loaded and the invalid ones + # are deleted ... + - name: Smoke test no mecab + run: inv accept -s -k disabled_data_is_hidden + + + # TODO - reenable plugin tests. + # Mandarin plugin tests started failing in ci, + # but the plugin still worked locally, + # and the tests also passed locally. + # + # Disabling for now ... should re-enable. + ### # Run all plugin tests. + ### # + ### # For each plugin: + ### # - install Lute requirements + ### # - install plugin reqs + ### # - run tests. + ### # + ### # The Lute requirements are installed first b/c the plugins may come + ### # with their own conflicting requirements. Doing a full req install + ### # will (hopefully) uncover conflicts. + ### plugins: + ### runs-on: ubuntu-latest + ### timeout-minutes: 30 + + ### strategy: + ### matrix: + ### python_version: [ '3.10', '3.11' ] + + ### steps: + + ### - uses: actions/checkout@v4 + ### with: + ### submodules: true + + ### - uses: actions/setup-python@v4 + ### with: + ### python-version: ${{ matrix.python_version }} + ### cache: 'pip' # caching pip dependencies + + ### # Plugins likely won't need this config file, but just in case ... + ### - name: Setup config + ### run: | + ### mkdir ${{ github.workspace }}/data + ### echo "ENV: dev" > ${{ github.workspace }}/lute/config/config.yml + ### echo "DATAPATH: ${{ github.workspace }}/data" >> ${{ github.workspace }}/lute/config/config.yml + ### echo "DBNAME: test_lute.db" >> ${{ github.workspace }}/lute/config/config.yml + ### ls ${{ github.workspace }} + ### cat ${{ github.workspace }}/lute/config/config.yml + + ### - name: test all plugins + ### run: | + ### # Lute reqs, such as pytest. + ### # This also installs Lute itself, + ### # so that it can be found + ### # by each plugin's own "pip install ." + ### pip install flit + ### flit install --deps develop + + ### for plugin in $(ls plugins); do + ### pushd plugins/$plugin + ### flit install + ### # Note for future: some plugins may have extra reqs not covered by pip + ### # (e.g. mecab uses apt-get and exports etc). Idea for future: plugin + ### # could have a .github folder as well with additional setup scripts. + + ### pytest tests + + ### # pip uninstall $plugin -y + ### # NOTE: Not bothering to do an uninstall! + ### # if multiple plugins have different/clashing version requirements, + ### # perhaps it is best to run into problems in ci. + ### # This may ultimately come back to haunt me, but it will do for now. + ### popd + ### done + + + code-quality: + runs-on: ubuntu-latest + timeout-minutes: 30 + + strategy: + matrix: + python_version: [ '3.11' ] + + steps: + + - uses: actions/checkout@v4 + with: + submodules: true + + + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python_version }} + cache: 'pip' # caching pip dependencies + + - run: | + pip install flit + flit install --only-deps --deps develop + + - name: Lint + run: inv lint + + - name: Coding style run: | - # Have to explicitly set MECAB_PATH for natto-py. - export MECAB_PATH=/lib/x86_64-linux-gnu/libmecab.so.2 - coverage run -m pytest tests/ - coverage json --omit="tests/*" - export TOTAL=$(python -c "import json;print(json.load(open('coverage.json'))['totals']['percent_covered_display'])") - echo "total=$TOTAL" >> $GITHUB_ENV - echo "### Total coverage: ${TOTAL}%" >> $GITHUB_STEP_SUMMARY - - - name: "Make badge" - uses: schneegans/dynamic-badges-action@v1.4.0 + black --check . + + + # Test build docker container and try running. + # Slightly wasteful re-setup of node. + docker-build: + runs-on: ubuntu-latest + timeout-minutes: 30 + needs: base-ci + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - uses: actions/setup-python@v4 with: - # GIST_TOKEN is a GitHub personal access token with scope "gist". - auth: ${{ secrets.GIST_TOKEN }} - gistID: a15001ec2ff889f7be0b553df9881566 - filename: covbadge.json - label: coverage - message: ${{ env.total }}% - valColorRange: ${{ env.total }} - minColorRange: 50 - maxColorRange: 80 + python-version: 3.11 + cache: 'pip' # caching pip dependencies + - run: | + pip install flit + flit install --only-deps --deps develop + - name: Test docker build + run: | + docker build -f docker/Dockerfile --build-arg INSTALL_EVERYTHING=false -t lute3 . + # Run container in the background, and check. + docker run -d -p 5001:5001 -v ./my_data:/lute_data -v ./my_backups:/lute_backup --name my-lute lute3:latest + sleep 10 # Give it a moment to start. + python -m utils.verify 5001 + docker stop my-lute + docker rm my-lute # Ensure that basic things work correctly on Windows, # particularly PlatformDirs package. windows-ci: - needs: base-ci runs-on: windows-latest + timeout-minutes: 30 strategy: matrix: @@ -207,7 +321,10 @@ jobs: steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + with: + submodules: true + - uses: actions/setup-python@v4 with: @@ -228,11 +345,34 @@ jobs: [System.IO.File]::WriteAllText("${{ github.workspace }}\lute\config\config.yml", $content) Get-Content -Path "${{ github.workspace }}\lute\config\config.yml" - - run: pip install -r requirements.txt + - run: | + pip install flit + flit install --only-deps --deps develop - # Run one particular sanity check. - - name: Smoke test no mecab - run: inv accept --headless -s -k test_unsupported_language_not_shown + # Can't get playwright to run reliably on Windows. + # - name: Playwright install + # run: playwright install + # - name: Playwright smoke test + # run: inv playwright || exit /b + + # Now having problems with tests not working on windows ... + # getting failure message: + # javascript error: clear_datatable_state is not defined + # + # The above message is called from lute_test_client to clear book + # datatables state. This _used_ to work (e.g. in v3.3.0), and + # I can't track it down at the moment!!!!!! + # + # TODO ci: RESTORE AT LEAST ONE SANITY CHECK TEST ON WINDOWS. + # + # Run specific sanity check. + # Old tests no longer run -- datatables may have timing issues on Windows tests, + # tests were far too flaky. + # inv accept -s -k test_unsupported_language_not_shown || exit /b + # inv accept -s -k import_a_valid_term_file || exit /b + # - name: Smoke tests + # run: | + # inv accept -s -k test_updating_term_status_updates_the_reading_frame || exit /b - name: Remove config to force using prod config run: del ${{ github.workspace }}\lute\config\config.yml @@ -256,4 +396,4 @@ jobs: - name: Verify flit install run: | cd ${{ github.workspace }} - python -m utils.verify 5000 + python -m utils.verify 5001 diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 000000000..1dcdf3fb4 --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,82 @@ +name: coverage + +on: + push: + # A branch coverage can be created and used for ci + # experiments and tweaks. + branches: [ "master", "coverage" ] + +permissions: + contents: read + +jobs: + + # Generate a coverage badge, don't worry if it fails. + # Uses https://github.com/Schneegans/dynamic-badges-action to update a secret gist + # (ID a15001ec2ff889f7be0b553df9881566) and an API token, per notes at + # https://nedbatchelder.com/blog/202209/making_a_coverage_badge.html. + # Slightly wasteful in that it re-runs the tests, but it's fastest. + coverage: + continue-on-error: true + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + + - name: Install Dependencies + run: | + sudo apt-get update -y + sudo apt-get install -y mecab mecab-ipadic-utf8 + # Helper checks to find the mecab library path, + # so it can be exported before running tests. + # Without the export, natto-py fails on github. + # echo FIND THE LIB: + # which mecab + # actual=`readlink -f /usr/bin/mecab` + # ldd $actual + + - uses: actions/checkout@v4 + with: + submodules: true + + - uses: actions/setup-python@v4 + with: + python-version: '3.8' + cache: 'pip' # caching pip dependencies + + - name: Install packages using flit + run: | + pip install flit + flit install --only-deps --deps develop + + - name: Setup config + run: | + mkdir ${{ github.workspace }}/data + echo "ENV: dev" > ${{ github.workspace }}/lute/config/config.yml + echo "DATAPATH: ${{ github.workspace }}/data" >> ${{ github.workspace }}/lute/config/config.yml + echo "DBNAME: test_lute.db" >> ${{ github.workspace }}/lute/config/config.yml + ls ${{ github.workspace }} + cat ${{ github.workspace }}/lute/config/config.yml + + - name: Generate coverage + run: | + # Have to explicitly set MECAB_PATH for natto-py. + export MECAB_PATH=/lib/x86_64-linux-gnu/libmecab.so.2 + coverage run -m pytest tests/ + coverage json --omit="tests/*" + export TOTAL=$(python -c "import json;print(json.load(open('coverage.json'))['totals']['percent_covered_display'])") + echo "total=$TOTAL" >> $GITHUB_ENV + echo "### Total coverage: ${TOTAL}%" >> $GITHUB_STEP_SUMMARY + + - name: "Make badge" + uses: schneegans/dynamic-badges-action@v1.4.0 + with: + # GIST_TOKEN is a GitHub personal access token with scope "gist". + auth: ${{ secrets.GIST_TOKEN }} + gistID: a15001ec2ff889f7be0b553df9881566 + filename: covbadge.json + label: coverage + message: ${{ env.total }}% + valColorRange: ${{ env.total }} + minColorRange: 50 + maxColorRange: 80 diff --git a/.gitignore b/.gitignore index 0a21f42c4..d5843b61e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,12 @@ # Ignores. # Configs + +# Override config the user can have at root. +config.yml + +# Regular app config lute/config/config.yml -*.ini -*.ini.bkp docker/docker-compose.yml docker-compose.yml @@ -106,14 +109,17 @@ celerybeat.pid *.sage.py # Environments -.env -.venv +.env* +.venv* env/ venv/ ENV/ env.bak/ venv.bak/ +# VS Code config +.vscode/ + # Spyder project settings .spyderproject .spyproject @@ -121,6 +127,9 @@ venv.bak/ # Rope project settings .ropeproject +# PyCharm project settings +.idea/ + # mkdocs documentation /site diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..5d6f7249b --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "lute/db/language_defs"] + path = lute/db/language_defs + url = git@github.com:LuteOrg/lute-language-defs.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fc24efde6..46609b16d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,9 @@ # pre-commit config, per https://pre-commit.com/ # # These hooks only use local commands. + +fail_fast: true + repos: - repo: local hooks: @@ -10,9 +13,9 @@ repos: language: system types: [python] require_serial: true - - id: pylint - name: pylint - entry: pylint - language: system - types: [python] - require_serial: true + # - id: pylint + # name: pylint + # entry: pylint + # language: system + # types: [python] + # require_serial: true diff --git a/.pytest.ini b/.pytest.ini index 107325cdd..d21927d5d 100644 --- a/.pytest.ini +++ b/.pytest.ini @@ -4,7 +4,7 @@ testpaths = # Acceptance tests are ignored because they are # slow. Run them using "inv accept". -addopts = --ignore=tests/acceptance/ +addopts = --ignore=tests/acceptance/ --ignore=tests/playwright/ # Acceptance tests were raising FutureWarning: # FutureWarning: Deleting all cookies via CookieManager.delete() @@ -29,5 +29,6 @@ markers = # Rather than sorting out how to add a flask cli command # that has access to the configured app and context, # I'm just using some markers to reset/wipe the dev db. - dbdemoload: cli hack to load the dev db with demo data - dbwipe: cli hack to wipe the dev db + dbreset: cli hack to wipe the dev db and set the LoadDemoData flag + + mobile: acceptance tests using a mobile browser \ No newline at end of file diff --git a/README.md b/README.md index c4e1892bd..c0966244d 100644 --- a/README.md +++ b/README.md @@ -7,23 +7,25 @@ [![Discord Server](https://badgen.net/badge/icon/discord?icon=discord&label)](https://discord.gg/CzFUQP5m8u) -This repo contains the source code for Lute v3. Lute (Learning Using Texts) v3 is a full rewrite in Python and Flask of the original [Lute v1 and v2](https://github.com/jzohrab/lute) PHP project. +This repo contains the source code for Lute (Learning Using Texts) v3, a Python/Flask tool for learning foreign languages through reading. -![Lute v3 demo](https://github.com/jzohrab/lute-manual/assets/1637133/7e7f5f66-20bb-4e94-a11c-7b7ffc43255a) +To learn more about Lute v3, or to install it for your own use and study, please see the [Lute v3 manual](https://luteorg.github.io/lute-manual/). + +![Lute v3 demo](https://luteorg.github.io/lute-manual/assets/intro.gif) # Getting Started ## Users -To learn more about Lute v3, or to install it for your own use and study, please see the [Lute v3 manual](https://jzohrab.github.io/lute-manual/). Hop onto the [Discord](https://badgen.net/badge/icon/discord?icon=discord&label) too. +See the [Lute v3 manual](https://luteorg.github.io/lute-manual/). Hop onto the [Discord](https://discord.gg/CzFUQP5m8u) too. ## Developing -For more information on building and developing, please see [Development](./docs/development.md). +For more information on building and developing, please see [Development](../../wiki/Development). ## Contributing -If you'd like to contribute code to Lute (hooray!), check out the [Contribution Guidelines](./docs/contributing.md). And with every repo star, an angel gets its wings. +If you'd like to contribute code to Lute (hooray!), check out the [Contribution Guidelines](../../wiki/Contributing). And with every repo star, an angel gets its wings. # License diff --git a/README_PyPi.md b/README_PyPi.md index 62c545c1e..2b713937f 100644 --- a/README_PyPi.md +++ b/README_PyPi.md @@ -2,8 +2,7 @@ Learning Using Texts v3. -Lute is for learning foreign languages through reading. `lute3` is a rewrite of the original Lute PHP application in Python and Flask. - +Lute is for learning foreign languages through reading. `lute3` is a rewrite of the original Lute PHP application in Python and Flask. See the [Lute manual](https://luteorg.github.io/lute-manual/) for more detail, and notes about installation. ## Requirements @@ -33,7 +32,7 @@ pip install lute3 # Start lute python -m lute.main -# Open your web browser to http://localhost:5000 +# Open your web browser to http://localhost:5001 # When done, hit Ctl-C # Stop the virtual environment. diff --git a/devstart.py b/devstart.py index 0b173da06..43e9809dd 100644 --- a/devstart.py +++ b/devstart.py @@ -5,7 +5,7 @@ You can run with: -inv devstart +inv start python -m devstart If you want to run this with "python", then for some _extremely odd_ @@ -21,8 +21,10 @@ import os import argparse import logging -from lute.app_factory import create_app +from lute import __version__ +from lute.app_factory import create_app, data_initialization from lute.config.app_config import AppConfig +from lute.db import db log = logging.getLogger("werkzeug") log.setLevel(logging.ERROR) @@ -32,34 +34,35 @@ def start(port): """ Start the dev server with reloads on port. """ - ac = AppConfig.create_from_config() - # https://stackoverflow.com/questions/25504149/ - # why-does-running-the-flask-dev-server-run-itself-twice - if os.environ.get("WERKZEUG_RUN_MAIN") == "true": - # Reloading. - pass - else: - # First run - msg = f""" - db name: {ac.dbname} - data: {ac.datapath} + def dev_print(s): + "Print info on first load only." + if os.environ.get("WERKZEUG_RUN_MAIN") == "true": + # https://stackoverflow.com/questions/25504149/ + # why-does-running-the-flask-dev-server-run-itself-twice + # Reloading, do nothing. + return + print(s, flush=True) + + config_file = AppConfig.default_config_filename() + dev_print("") + app = create_app(config_file, output_func=dev_print) + with app.app_context(): + data_initialization(db.session, dev_print) + + ac = AppConfig(config_file) + dev_print(f"\nversion {__version__}") + dev_print(f"db name: {ac.dbname}") + dev_print(f"data: {ac.datapath}") + dev_print(f"Running at: http://localhost:{port}\n") - Running at: - - http://localhost:{port} - - """ - print(msg) - - app = create_app(ac, output_func=print) app.run(debug=True, port=port) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Start dev server lute.") parser.add_argument( - "--port", type=int, default=5000, help="Port number (default: 5000)" + "--port", type=int, default=5001, help="Port number (default: 5001)" ) args = parser.parse_args() start(args.port) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7f66ab112..5877b102d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,32 +1,34 @@ # The Dockerfile can be used to create two variants -# by using "--build-arg INSTALL_MECAB=[true|false]": -# - "true": with mecab and dictionary (800+ MB) -# - "false": without (300 MB) +# by using "--build-arg INSTALL_EVERYTHING=[true|false]": +# - "true": with mecab and dictionary, mandarin parser (800+ MB) +# - "false": without (230 MB) +# +# e.g. docker build --build-arg INSTALL_EVERYTHING=true -t lute3 . # Official python base image. FROM python:3.11-slim-bookworm -# Define a build argument with a default value. -ARG INSTALL_MECAB=false +# Build args +ARG INSTALL_EVERYTHING=false -# Install mecab for Japanese support if INSTALL_MECAB is true, e.g. -# docker build --build-arg INSTALL_MECAB=true -t lute3 . -RUN if [ "$INSTALL_MECAB" = "true" ]; then \ - apt-get update -y && \ - apt-get install -y mecab mecab-ipadic-utf8 && \ - apt-get clean && rm -rf /var/lib/apt/lists/*; \ - fi - -# Lute code and config. -COPY requirements.txt . -ENV PIP_ROOT_USER_ACTION=ignore -RUN pip install -r requirements.txt +# Install base. +COPY pyproject.toml . +COPY README_PyPi.md . COPY lute /lute -RUN mv /lute/config/config.yml.docker /lute/config/config.yml +ENV PIP_ROOT_USER_ACTION=ignore +ENV FLIT_ROOT_INSTALL=1 +RUN pip install flit +RUN flit install --only-deps --deps=production +COPY lute/config/config.yml.docker /lute/config/config.yml + +COPY docker/Dockerfile_scripts/install_everything.sh /lute/install_all.sh +RUN chmod +x /lute/install_all.sh + +RUN if [ "$INSTALL_EVERYTHING" = "true" ]; then /lute/install_all.sh; fi -EXPOSE 5000 +EXPOSE 5001 # Start script. -COPY docker/check_mounts_and_start.sh /lute/start.sh +COPY docker/Dockerfile_scripts/start.sh /lute/start.sh RUN chmod +x /lute/start.sh ENTRYPOINT ["/lute/start.sh"] diff --git a/docker/Dockerfile_scripts/install_everything.sh b/docker/Dockerfile_scripts/install_everything.sh new file mode 100644 index 000000000..802eae3fa --- /dev/null +++ b/docker/Dockerfile_scripts/install_everything.sh @@ -0,0 +1,14 @@ +#!/bin/sh +# +# Install all the extra stuff when using INSTALL_EVERYTHING +# in the Dockerfile. + +# Mecab +apt-get update -y +apt-get install -y mecab mecab-ipadic-utf8 +apt-get clean && rm -rf /var/lib/apt/lists/* + +# Plugins +pip install lute3-mandarin +pip install lute3-thai +pip install lute3-khmer diff --git a/docker/check_mounts_and_start.sh b/docker/Dockerfile_scripts/start.sh similarity index 100% rename from docker/check_mounts_and_start.sh rename to docker/Dockerfile_scripts/start.sh diff --git a/docker/README.md b/docker/README.md index 441977372..58886c9bc 100644 --- a/docker/README.md +++ b/docker/README.md @@ -12,6 +12,14 @@ The `docker-compose.yml.example` in this directory works with the `build_test.sh ./docker/build_test.sh && docker compose up ``` +## Checking + +If the image doesn't start correctly, override entrypoint to log in and check it out: + +``` +docker run -it --entrypoint /bin/bash lute3-lean:latest +``` + ## Building and pushing Build and push all variants: diff --git a/docker/build_all.sh b/docker/build_all.sh index 06b9ff944..241e72e87 100755 --- a/docker/build_all.sh +++ b/docker/build_all.sh @@ -46,7 +46,7 @@ docker buildx build \ --push \ --platform linux/amd64,linux/arm64 \ -f docker/Dockerfile "$@" \ - --build-arg INSTALL_MECAB=true \ + --build-arg INSTALL_EVERYTHING=true \ -t $TAG -t $LATEST . echo @@ -57,7 +57,7 @@ docker buildx build \ --push \ --platform linux/amd64,linux/arm64 \ -f docker/Dockerfile "$@" \ - --build-arg INSTALL_MECAB=false \ + --build-arg INSTALL_EVERYTHING=false \ -t $LEANTAG -t $LEANLATEST . echo diff --git a/docker/build_test.sh b/docker/build_test.sh index 8ed850637..a2d10ddaf 100755 --- a/docker/build_test.sh +++ b/docker/build_test.sh @@ -1,3 +1,5 @@ #!/bin/bash -docker build -f docker/Dockerfile --build-arg INSTALL_MECAB=true -t lute3 . +docker build -f docker/Dockerfile --build-arg INSTALL_EVERYTHING=true -t lute3 . + +docker build -f docker/Dockerfile --build-arg INSTALL_EVERYTHING=false -t lute3-lean . diff --git a/docker/docker-compose.yml.example b/docker/docker-compose.yml.example index 7011f42cb..255ffbb00 100644 --- a/docker/docker-compose.yml.example +++ b/docker/docker-compose.yml.example @@ -5,7 +5,7 @@ services: lute: image: lute3:latest ports: - - 5000:5000 + - 5001:5001 volumes: # Note: you should change these directories # to absolute paths (e.g. "/usr/yourname/etc/my_data") diff --git a/docker/docker_hub_overview.md b/docker/docker_hub_overview.md index 05a352c30..89f816bd7 100644 --- a/docker/docker_hub_overview.md +++ b/docker/docker_hub_overview.md @@ -4,14 +4,14 @@ LUTE (Learning Using Texts) is an application for learning foreign languages through reading. - + -For more information, see the [Lute manual](https://jzohrab.github.io/lute-manual/). +For more information, see the [Lute manual](https://luteorg.github.io/lute-manual/). # How to use this image ``` -docker run -p 5000:5000 -v :/lute_data -v :/lute_backup jzohrab/lute3:latest +docker run -p 5001:5001 -v :/lute_data -v :/lute_backup jzohrab/lute3:latest ``` Docker containers using this image writes to container directories which must be mounted from the host: @@ -26,13 +26,13 @@ Example: ``` mkdir -p ~/lute/data mkdir -p ~/lute/backups -docker run -p 5000:5000 -v ~/lute/data:/lute_data -v ~/lute/backups:/lute_backup --name my-lute jzohrab/lute3:latest +docker run -p 5001:5001 -v ~/lute/data:/lute_data -v ~/lute/backups:/lute_backup --name my-lute jzohrab/lute3:latest ``` The above: * runs the container from the `jzohrab/lute3:latest` image -* exposes port 5000 on the host (so http://localhost:5000 works) +* exposes port 5001 on the host (so http://localhost:5001 works) * mounts the necessary directories * names the running container "my-lute" @@ -48,7 +48,7 @@ and it prints: Running at: - http://localhost:5000 + http://localhost:5001 When you're finished reading, stop this container @@ -56,7 +56,7 @@ and it prints: as appropriate. ``` -(You can now open your browser to `http://localhost:5000` and start working through the Lute demo.) +(You can now open your browser to `http://localhost:5001` and start working through the Lute demo.) With the above command, the `lutev3` process takes over that console window, so start a new console window and enter @@ -81,7 +81,7 @@ services: lute: image: jzohrab/lute3:latest ports: - - 5000:5000 + - 5001:5001 volumes: - ~/lute/data:/lute_data - ./lute/backups:/lute_backup @@ -98,19 +98,19 @@ docker compose stop lute `lute3` has two variants: -* `lute3:` (or `lute3:latest`): Lute v3 and all requirements, plus [MeCab](https://en.wikipedia.org/wiki/MeCab) and a MeCab dictionary for Japanese. ~830 MB. -* `lute3:-lean` (or `lute3:latest-lean`): The same as the above, but without MeCab (Japanese parsing and the Japanese demo are disabled). ~300 MB +* `lute3:` (or `lute3:latest`): Lute v3 and all extra supported parsers (Japanese and Mecab, Mandarin). ~1 GB. +* `lute3:-lean` (or `lute3:latest-lean`): Lute, without extra parsers. If you're not studying Japanese or Mandarin, this will suffice. ~450 MB # Source code and building your own images -Lute v3 is on [GitHub](https://github.com/jzohrab/lute-v3/). +Lute v3 is on [GitHub](https://github.com/luteorg/lute-v3/). The Dockerfile used to build the images, and docs, are in `/docker` in that repo. # Help -If you encounter any issues or have questions, please check the [GitHub Issues](https://github.com/jzohrab/lute-v3/issues) or join the [Discord](https://discord.gg/CzFUQP5m8u). +If you encounter any issues or have questions, please check the [GitHub Issues](https://github.com/luteorg/lute-v3/issues) or join the [Discord](https://discord.gg/CzFUQP5m8u). # License -Lute v3 and its Docker image are under the [MIT license](https://github.com/jzohrab/lute-v3/blob/master/LICENSE.txt). +Lute v3 and its Docker image are under the [MIT license](https://github.com/luteorg/lute-v3/blob/master/LICENSE.txt). diff --git a/docker/try_build_multi.sh b/docker/try_build_multi.sh index caf7296ec..3c3376ce1 100755 --- a/docker/try_build_multi.sh +++ b/docker/try_build_multi.sh @@ -42,7 +42,7 @@ docker buildx build \ --push \ --platform linux/amd64,linux/arm64 \ -f docker/Dockerfile "$@" \ - --build-arg INSTALL_MECAB=true \ + --build-arg INSTALL_EVERYTHING=true \ -t $TAG . # Remove the current builder diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 833a953ed..6f2c3f450 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,4 +1,736 @@ +# 3.10.1 (2025-03-10) + +* #594: add parents.pronunciation to anki mapping. +* Update built-in language definitions (Turkish, wordreference dictionary as popup) +* Add AAC, FLAC, and WebM support: by @imamcr in #589 + + +# 3.10.0 (2025-02-23) + +* #579: Add flexible Anki export. + + +# 3.9.5 (2025-02-15) + +Tweaks: + +* #563: don't block image search page load if Bing is slow (asynchronous search). By @parradam in #570. +* Fix how term image data is stored. +* Allow reference lookup by Term ID. + + +# 3.9.4 (2025-01-26) + +* #573: make term listing editable, with "term list" menu entry to show the terms on the current page. Brief usage notes are in [the manual](https://luteorg.github.io/lute-manual/usage/terms/edit-using-term-listing.html). +* Add configurable hotkey for opening the Term list from the reading page. + + +# 3.9.3 (2025-01-18) + +* add "Quick set status" mode for mobile - tap sets a word to status 1 +* clean up reading slide-out menu for mobile +* Data fix for subsequent migration script. + +# 3.9.2 (2025-01-09) + +* Issue #475: allow split by sentences or by paragraphs. +* Issue #555: add page navigation hotkeys. + +Back end changes: + +* Refactor book creation, break dependency on http/forms + + +# 3.9.1 (2025-01-07) + +* Fix mobile javascript bug +* Add ci for checking mobile + + +# 3.9.0 (2025-01-04) + +Feature changes: + +* Add black and white theme. By @Christhomas17 in #557. +* #543: track starting reading of each page, show in book listing. +* #530: add 'peek' route to display page without tracking it, use for sentence lookups. + + +# 3.8.1 (2024-12-26) + +Tweaks: + +* #547: add some basic "Term Popup" settings. + +Fixes: + +* #550: fix Bing sentence lookup. + + +# 3.8.0 (2024-12-24) + +Tweaks: + +* #540: revise popup: include more data, but make layout more compact. +* #541: change hotkeys to use "code" rather than "key letter pressed" so that people switching keyboard layouts can have constant shortcuts. +* #544: allow showing "Unknown" Terms in filtered Term listing. +* #527: Track words read in new wordsread table, allows for multiple re-reads of same page to be tracked in "words read" stats graph. + +Fixes: + +* #460: Set cascade delete database pragma on connect - ensures no orphaned data. +* #546: fix Bing translation lookups, Bing can be embedded for Term and Sentence lookups. +* Shift-drag copy shouldn't close form. + + +# 3.7.0 (2024-12-20) + +Feature changes: + +* #537: add "bulk term actions" to reading screen and Term listing + +Fixes: + +* #328: apply existing term filters when reloading Term listing after editing Term +* #531: term reference search should handle casing correctly. + + +# 3.6.1 (2024-12-13) + +Feature changes: + +* Show term popup if term has tag. +* #525: add Save Term customizable hotkey. +* #517: show page fraction even if book completed. +* #524: show popup for terms with data, ignoring status. +* #522: allow term import with identical lines. +* Update arabic language definition. + +Back end changes: + +* use pyproject.toml and flit only for managing dependencies +* #534: change how Lute loads demo data for new installs + + +# 3.6.0 (2024-11-15) + +Feature changes: + +* New khmer parser plugin (`lute3-khmer`). By @jaydom28 in #513. +* #302: change term/sentence translation placeholder from `###` to `[LUTE]`. +* #302: `[LUTE]` placeholder not required, some dictionaries don't support extra arguments. + +Code changes: + +* #509: refactor all library code to use db.session argument + + +# 3.5.9 (2024-11-02) + +Feature changes: + +* Add Thai plugin! By @jaydom28 in #510. Will be released to PyPi separately. +* Add Thai definition and story, using the new Thai plugin. +* #12: Allow bulk set status in Term listing. + +Tweaks: + +* #140: keyboard shortcuts to move to prev, next sentences. +* #507: add `--local` startup flag to lute.main (disallows accessing from mobile devices) +* Show 'text copied' message in tooltip only. +* Only hold Shift down at start of text copy. + +Fixes: + +* #506: update stats if distribution is missing. +* #114: allow / in multiword terms from reading screen. + + +# 3.5.8 (2024-10-28) + +Tweaks: + +* #434: New hotkeys to move to prev/next unknown word. +* #496: only propagate new child data to parent (saving an existing child doesn't re-update the parent) + + +# 3.5.7 (2024-10-21) + +Feature changes: + +* #141: allow customized shortcuts +* #495: add new shortcuts: translate page, edit page, copy page text, add bookmark, delete term + +Tweaks: + +* #497: Change default port to 5001. +* #500: show popup for ignored terms with data +* Add version to startup console. + + +# 3.5.6 (2024-10-13) + +Feature changes: + +* Add CLI tool to bulk import books from a CSV file. See [the manual](https://luteorg.github.io/lute-manual/usage/command-line-jobs.html) for notes. By @bwkimmel in #491 + +Tweaks: + +* #103: click parent term auto-saves child +* #467: allow change of book audio file + +Misc code: + +* #492: split prod and dev requirements +* #490: build docker images using source code, not pypi +* Add template for language parser plugins + + +# 3.5.5 (2024-10-06) + +Feature changes: + +* #405: add UserSetting for book stats sample size. + +There was a big refactor/rewrite of much of the book stats +calculation code. The calculation isn't instantaneous, +but as the graphs are ajaxed in now, it should suffice. + +Tweaks: + +* Show term popup if parent is set. +* Ajax in book stats on home page for speed. +* Simplify datatables rendering. + +Fixes: + +* #487: Use term IDs for deletes. +* #488: Handle Japanese "repeat" kanji character. + + +# 3.5.4 (2024-10-06) + +Botched release: forgot to update `pyproject.toml`, process updated. + + +# 3.5.3 (2024-09-20) + +Tweaks: + +* #324: convert pending term parent text to tag on save, if not a real tag yet. +* #480: persist user book table settings. +* #482: don't rearrange whole reading page when adding multiword term. + +Fixes: + +* #483: docker build always uses latest pypi lute. +* #479: fix pyproject.toml start script reference. +* Interpret IS_DOCKER flag correctly. +* #474: fix missing component term error. + + +# 3.5.2 (2024-08-26) + +Tweaks: + +* Issue #472: add parents to term export. +* Fail import terms if too many or few fields in data. +* Disable CSRF check for form posts. + +Fixes: + +* #454: ignore duplicate tag added to Term. +* Show underline for links in error screen, was confusing. +* Allow offline images upload, don't throw 500 error. +* Revert PR #459 (cblanken/issue-379-dictionary-tab-autofocus). + + +# 3.5.1 (2024-08-15) + +Tweaks: + +* Never cache main page, always recalc book stats as needed. +* #466: strip whitespace from jinja comment. +* #497: allow .m4a audio files. +* #437: exclude unknown terms from language term counts. + +Bug fixes: + +* #455: handle TermTag associations when tags are deleted. +* #465: term import fix, only change statuses if explicitly specified. +* #458: omit empty error messages on language save. +* #464: prevent dictionary tabs from stealing focus. By @cblanken in #459. +* #293: fix stats chart shrinking. By @cblanken in #456. + + +# 3.5.0 (2024-06-12) + +Feature changes: + +* Feature #391: add "leave page" confirmation dialog to language edit form. By @cblanken in #444 +* Feature #86: add text bookmarks. By @cblanken in #439. +* Issue #449: increase source_uri book form field max length. +* Add preconfigured languages: Ainu, Bosnian, Kazakh, Macedonian, Nahuatl, Navajo, Okinawan + + +# 3.4.3 (2024-06-08) + +Feature changes: + +* Add `book_term_export` cli job (see [the manual](https://luteorg.github.io/lute-manual/usage/command-line-jobs.html#book_term_export)) +* Issue #416: fix "sync parent" bugs, add more tests. + +Back end changes: + +* Refactoring get_paragraphs, slight simplifications and performance fixes + + +# 3.4.2 (2024-05-28) + +Feature changes: + +* #430: add "parser exceptions file" for Mandarin. See the [Readme on pypi](https://pypi.org/project/lute3-mandarin/) for notes. + +Bug fix: + +* #436: popup not showing due to Term search error. + + +# 3.4.1 (2024-05-19) + +Tweaks/fixes: + +* #424: Fix parser plugin loads for python 3.12. By @cghyzel in #426. +* #414: Add better startup error message if port already in use. By @barash-asenov in #423. + + +# 3.4.0 (2024-05-17) + +Feature changes: + +* Add (first) language parser plugin for Mandarin. By @cghyzel in #413. See [the manual](https://luteorg.github.io/lute-manual/install/plugins.html) for installation notes if you want to study Mandarin. +* Issue #418: let users specify if on mobile or desktop. See [the faq](https://luteorg.github.io/lute-manual/faq/reading/click-not-working.html) +* Allow Opus audio files. By @yue-dongchen in #420 + +Back end changes: + +* Add language parser plugin capability! By @cghyzel in #413 + + +# 3.3.3 (2024-05-05) + +Feature changes: + +* Issue #287: Read custom theme .css files from `userthemes` data directory. See [the manual](https://luteorg.github.io/lute-manual/usage/themes/themes.html#custom-themes) for notes. + +Code tweaks: + +* Issue #409: tweak mobile screen interactions. See [the manual](https://luteorg.github.io/lute-manual/usage/reading-on-mobile.html) for notes. +* Issue #410: include term tags in language export CLI job. +* Term CSV import ignores "added" field, and field names are case-insensitive. +* Issue #355: Remove component term images from hover detail (too many images were getting shown, was confusing) +* Issue #372: Show component terms in hover in the (rough) order they appear in the multiword term. +* Issue #349: Provide default values for sentence terminators and word characters. By @mzraly in #366. + + +# 3.3.2 (2024-04-25) + +Feature changes: + +* Add many predefined languages: Afrikaans, Albanian, Amharic, Armenian, Azerbaijani, Basque, Belarusian, Bengali, Breton, Bulgarian, Catalan, Croatian, Danish, Dutch, Esperanto, Estonian, Farsi, Finnish, Galician, Georgian, Gothic, Hebrew, Hungarian, Icelandic, Indonesian, Italian, Latin, Latvian, Lithuanian, Norwegian, Polish, Portuguese, Punjabi, Romanian, Serbian, Slovak, Slovenian, Swahili, Swedish, Tibetan, Ukrainian, Vietnamese +* Add "load predefined language and sample story" link. +* Redesign audio playback rate control. By @webofpies in #388 + +Bug fixes: + +* Issue #377: fix audio player style for smaller views. By @webofpies in #378 +* Issue #387: importing Term csv shouldn't update parent term status to "new". +* Issue #344: bump openepub dependency to handle parsing error + +Back end changes: + +* Move all repos to new GitHub org, https://github.com/LuteOrg +* Move language definitions to https://github.com/LuteOrg/lute-language-defs, include via git submodule +* Change data loads to use language definitions submodule + + +# 3.3.1 (2024-03-26) + +Fix for issue #375, Japanese production bug. + +# 3.3.0 (2024-03-25) + +This is a minor version bump (from 3.2.7 to 3.3.0) because Lute now +creates "status 0" terms for any page opened for reading. These terms +are effectively "pending terms" that the user processes as they read. +This change fixes some parsing inconsistencies, and allows for import +of "status 0" terms as unknown terms. + +Feature changes/tweaks: + +* #327: autofocus to term input box for new terms. By @imamcr in #368. +* #335: prevent empty book creation. +* #352: don't show empty component terms in popup. +* #353: speed up parent search query. +* #361: tighten up mobile CSS. +* #364: change book listing actions from icons to drop down. +* Allow importing new Terms as "unknown", to pre-populate vocab lists. + +Back end changes: + +* #99: create new terms on open page for reading. +* #117: don't reparse terms created from reading screen. + +# 3.2.7 (2024-03-15) + +Feature changes: + +* #325: Speed up homepage. +* #173: Speed up backups. +* #251: Show component terms of multi-word terms. +* Add zero-width joiners and non-joiners to some language definitions. By @mrzaly in #334, #340 +* #332: remove "bulk parent mapping" + +Bug fixes: + +* #329: fix term listing "select all" checkbox. +* Fix title and tooltip overflow. By @imamcr in #323. + +# 3.2.6 (2024-03-15) + +Botched release: it included .mobi support from #338, subsequently pulled for 3.2.7. + +# 3.2.5 (2024-03-13) + +Feature changes: + +* #84: Add SRT, VTT file imports. By @imamcr in #320. +* #89: Add "add/remove" page operations to reading menu. With nicer UI by @webofpies in #310. +* #272: Get book title from filename. By @Jayanth-Parthsarathy in #322. +* #301: Saving new term in term listing stays on term form entry page. By @Jayanth-Parthsarathy in #309. +* #305: Show date created in term listing page, include in CSV export. +* #312: Right-click on Lute logo to open new tab. By @Jayanth-Parthsarathy in #314. + +Bug fixes: + +* #318: Fix broken links to docs. By @mrzaly in #319. + +Back end: + +* #307: Move vendored css, js into separate folders in lute/static. By @Jayanth-Parthsarathy in #308. +* Hacking at flaky tests. +* Remove unused static/iui + + +# 3.2.4 (2024-03-03) + +Feature changes: + +* #53: add "don't stop audio on term click" setting +* #295: add "open popups in new tab" setting +* #256: add "translate full page" reading menu link +* #209: Ctrl+Enter hotkey saves Term form while reading +* Fix Arabic and Chinese default dicts. By @imamcr in #296, #298 +* #199: Add delete audio button for book. +* #250: allow hide some book columns in listing. +* #288: open pop-up dictionary if it's the first dictionary specified + +Bugfixes: + +* #300: include pronunciation in csv export. + +Back end changes: + +* Update datatables to 2.0.1, include colvis. +* #289: make global js vars' relation to class explicit. + + +# 3.2.3 (2024-02-25) + +Feature changes: + +* #31: Using page break markers ("---") during new book creation only to force page breaks. +* #133: Set "current language" filter (and setting) from home page. +* #14: Allow term image uploads from keyboard or paste from clipboard. +* Add LWT and LingQ themes. From @imamcr in #285. +* Fix touch-drag problem for mobile. From @webofpies in #286. +* Small bug fixes. + + +# 3.2.2 (2024-02-21) + +Feature changes: + +* Move 'Export CSV' into term Actions menu. +* Issue #271: Fix multiword select in some text locations +* Issue #240: Use datatables for language listing. +* Issue #221: Don't scroll reading pane on term delete. +* Issue #269: Fix embedded translation dict. + + +# 3.2.1 (2024-02-19) + +Feature changes: + +* #238: add "Focus mode". From @webofpies in #262, #268. +* #266: Add backup file download link. +* #237: Show last backup date, add listing. By @sakolkar in #227. +* Improve dictionary UI, use tabs. With @webofpies in #264. +* #5: Support variable number of dictionaries. +* #261: fix rtl language controls for book add, edit, page edit. +* #223: resize text areas horiz and vert. + +Back end changes: + +* schema, js changes for dictionary tabs. + + +# 3.1.4 (2024-02-11) + +Feature changes: + +* Issue 25: click term image and Delete/Backspace to delete. +* Issue 214: user must press Return to create parent tag. +* Issue 215: arrow changes status for hovered. +* Issue 213: no hovered elements if clicked. +* Issue 216: parent paste should show dropdown for hints. +* Show parent suggestions after single char input. + + +# 3.1.3 (2024-02-07) + +Feature changes: + +* [#182](https://github.com/jzohrab/lute-v3/issues/182): Confirm book archive. +* [#174](https://github.com/jzohrab/lute-v3/issues/174): Add bulk term deletion. +* [#205](https://github.com/jzohrab/lute-v3/issues/205): Add Actions menu to term listing to simplify adding actions. +* [#175](https://github.com/jzohrab/lute-v3/issues/175): Keep blank lines of imported texts when rendering page for reading. +* [#202](https://github.com/jzohrab/lute-v3/issues/202): Include all books in cli export. +* [#191](https://github.com/jzohrab/lute-v3/issues/191): Scroll back to top on "mark as read". +* [#177](https://github.com/jzohrab/lute-v3/issues/177): Show word count on book listing stats bar hover. +* [#164[(https://github.com/jzohrab/lute-v3/issues/164): Hit backspace to edit pasted parent tag. +* Add "(all)" to term status filter. +* [#166](https://github.com/jzohrab/lute-v3/issues/166): Keep returns in term translation in Terms listing. + +Bug fixes: + +* [#170](https://github.com/jzohrab/lute-v3/issues/170): Fix arrow keys for RTL languages. +* [#207](https://github.com/jzohrab/lute-v3/issues/207): Move title to right for RTL languages. + +Back end changes: + +* Simplify lute.js, remove state tracking + + +# 3.1.2 (2024-02-01) + +* Bugfix: only recalc texts.TxWordCount for valid parsers. + + +# 3.1.1 (2024-01-30) + +Feature changes: + +* Add book stats graph and refresh. By @webofpies in [154](https://github.com/jzohrab/lute-v3/pull/154) and [162](https://github.com/jzohrab/lute-v3/pull/162). +* [138](https://github.com/jzohrab/lute-v3/issues/138): Separate Word Count and % Known into separate columns and support sorting. +* Allow term listing search in translations. +* [155](https://github.com/jzohrab/lute-v3/issues/155): Add "words per page" field during book creation. By @fanyingfx. + +Bug fixes: + +* [112](https://github.com/jzohrab/lute-v3/issues/112): show different options if backup failed (retry, skip, adjust settings) +* Sort statuses properly in the term listing. +* [95](https://github.com/jzohrab/lute-v3/issues/95): editing pages updates book word count +* Shorten migration file names to prevent Windows file path length exceptions + +Misc back-end: + +* Add term Export CSV test. +* Calc book stats on at least 5 pages. +* Clean up some form styles. +* Speed up book stats calculation. + + +# 3.1.0 (2024-01-22) + +Feature changes: + +* [#66](https://github.com/jzohrab/lute-v3/issues/66): add "Link to parent" checkbox for child terms to follow/change parent status +* Restyle radio buttons for nicer layout. By @webofpies. + +Back end changes: + +* db schema and test changes for feature + + +# 3.0.12 (2024-01-18) + +Feature changes: + +* Improve term export: export all terms, change headings to be immediately importable. +* Add hotkeys to reading menu (pulled from manual). + +Bugfixes: + +* Fix sentences link. +* Fix spelling of "dismiss" in anchor tag for remove_demo_flag +* Fix scrolling bug on update. By @webofpies. +* Fix z-index of player and popup. By @webofpies in #127. + +Back end changes: + +* Change tagging library to tagify + + +# 3.0.11 (2024-01-11) + +* rework/optimize form and table styles + + +# 3.0.10 (2024-01-10) + +Feature changes: + +* Make reading screen responsive, handles smaller viewports. By @webofpies in #118. +* [#93](https://github.com/jzohrab/lute-v3/issues/93): add PDF imports. By @dgc08 in #119. +* [#107](https://github.com/jzohrab/lute-v3/issues/107): fix Windows file locking on imports. + + +# 3.0.9 (2024-01-04) + +Feature changes: + +* [#29](https://github.com/jzohrab/lute-v3/issues/29): Add reading screen slider to navigate pages. By @webofpies in #88. +* [#13](https://github.com/jzohrab/lute-v3/issues/13): Allow term deletion from reading screen. By @disfated in #85. +* [#90](https://github.com/jzohrab/lute-v3/issues/90): Add Sanskrit. + +Bug fixes: + +* Remove duplicate terms from list (multiple image records) (addresses [#105](https://github.com/jzohrab/lute-v3/issues/105)). +* Graceful failure for non-utf-8 files (addresses [#67](https://github.com/jzohrab/lute-v3/issues/67)). +* Fix arrow key increment (addresses [#96](https://github.com/jzohrab/lute-v3/issues/96)). + +Back end changes: + +* Fix/disable flaky CI tests for reliability. +* Stats distribution field in db. + + +# 3.0.8 (2023-12-28) + +Feature changes: + +* Add .epub import (feature [19](https://github.com/jzohrab/lute-v3/issues/19)). By @sakolkar in #82. +* Add resize frame option in reading pane. By @webofpies in #77. +* Add "dismiss" demo option, for users who don't want to wipe the db. By @dgc08 in #80. +* Nicer styling for the reading menu. By @webofpies in #72. +* Add Hindi and example to baseline. By @mzraly in #76. +* Update German sample story to new orthography. By @dgc08 in #81. + +Back end changes: + +* Fix javascript attribute names to standard. By @robby1066 in #79. +* Fix GitHub CI to really fail when things fail. +* Restructure book service to support epub import. + + +# 3.0.7 (2023-12-21) + +Feature changes: + +* Add slide-in menu for reading pane (issue [60](https://github.com/jzohrab/lute-v3/issues/60)). +* Add font/line spacing etc to slide-in menu (issue [45](https://github.com/jzohrab/lute-v3/issues/45)). +* Fix audio not loading reliably in Firefox. +* Keep book listing filter state on refresh (issue [46](https://github.com/jzohrab/lute-v3/issues/46)). +* Arrow keys only change status for clicked words, not hovered. +* Remove 'mark known' check on all pages, add 'make done' check on final page (issue [58](https://github.com/jzohrab/lute-v3/issues/58)). + + +# 3.0.6 (2023-12-13) + +Feature changes: + +* Add audio player to play mp3, wav, or ogg files. See [docs](https://jzohrab.github.io/lute-manual/usage/audio.html) +* Add up/down arrow hotkey to change term status. +* Tweak Greek character range. +* Add 'is completed' check to book title in listing. + +Supporting changes: + +* Page content is now ajaxed in. + + +# 3.0.5 (2023-12-13) + +(Skipped, botched release due to wifi problems.) + + +# 3.0.4 (2023-12-01) + +* Fix sentence lookup for new term. + + +# 3.0.3 (2023-11-30) + +Features and big fixes: + +* Add read word count stats page. +* Bugfix: Only return sentence refs in same language. +* Add japanese automatic reading choices (katakana, hiragana, romaji) +* Add cert verification failure workaround message on 500 error. + +Back-end changes: + +* Break setting->JapaneseParser dependency. +* UserSetting mecab_path sets environ MECAB_PATH. +* Break ci dependency. + + +# 3.0.2 (2023-11-27) + +* Add theming and highlight toggling. + + +# 3.0.1 (2023-11-23) + +* [Issue 23](https://github.com/jzohrab/lute-v3/issues/23): paragraphs not rendered in correct order. +* Inject custom styles into other pages. + + +# 3.0.0 (2023-11-21) + +Lute v3 launch. + + +# 3.0.0b11 (2023-11-19) + +Feature changes: + +* Add language term export command. +* Add czech language and demo story to baseline db. +* Bugfix: Respect the 'show pronunciation' lang setting in form. + +Back end changes + +* CLI command sketch. +* Use specified, root, or default config, in that order. +* Change create_app to take config file path, not object. +* Template for demo stories. + + +# 3.0.0b10 (2023-11-17) + +Feature changes: + +* Bugfix: Fix "archive book" broken link at end of book +* Issue 7: hotkey updates term form if displayed. +* Redirect to home if bad book id. +* Add custom 404 error handler. +* Change wiki refs on site to manual. +* Add Russian predefined language and story demo to baseline. + + # 3.0.0b9 (2023-11-17) * bugfix: parser type select box wasn't updating correctly. diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..43ee8eea8 --- /dev/null +++ b/docs/README.md @@ -0,0 +1 @@ +Docs are in the [GitHub wiki](https://github.com/jzohrab/lute-v3/wiki). \ No newline at end of file diff --git a/docs/architecture.md b/docs/architecture.md deleted file mode 100644 index 76e890a70..000000000 --- a/docs/architecture.md +++ /dev/null @@ -1,68 +0,0 @@ -# Architecture - -As I don't know if anyone will ever read this, I'll keep this very high level for now, where I feel it's necessary. - -# Directories - -`lute` is the app module. Each subfolder under it is either a blueprint or a utility/service package: - -* `backup`: the `/backup` routes and service -* `bing`: the `/bing` routes and service -... etc - -Special things: - -* `db`: database setup and migrations, and demo data management -* `dev_api`: special/dangerous routes used during testing only (not loaded if app ENV = prod) -* `models`: the Sqlalchemy models -* `parse`: parsing -* `utils`: utils - -## About `models` - -The classes in `lute.models` are Sqlalchemy classes. Most of these have simple methods for things like finding, loading, etc. - -Some DB models (e.g. `lute.models.language.Language`) are used throughout the application, as they're pretty much just data classes with little functionality. - -### Term and Book domain models - -There are two more useful domain models: - -* `lute.term.model.Term` -* `lute.book.model.Book` - -These are used more frequently in the code as they provide useful domain-level abstractions. They both have corresponding `lute.X.model.Repository` objects that translate these models to and from the DB/Sqlalchemy models. - -### Datatables - -Lute shows some data as tabular data in datatables, which just use Sqlalchemy to query the db directly without models. The `lute.utils.data_tables` module helps with that. - -# Fifty-thousand-foot overview of architecture - -Except for parsing and rendering, the model for Lute is pretty simple: - -* a route in a blueprint's `routes` module receives an incoming requests -* the route delegates to some kind of `service` module in the blueprint -* the `service` deals with either DB or domain models as needed, and commits to the `lute.db` via usual Flask-Sqlalchemy methods. - -## The models - -* The user studies `lute.models.language.Language` -* The user creates `lute.book.model.Book` (domain object), which is saved as a `lute.models.Book` db object. A `DbBook` has one or more `lute.models.book.Text` objects, which are the pages in the `Book`. -* The user reads the text in the `lute.read` routes, and creates `lute.term.model.Term` objects, which are saved in the database as `lute.models.term.Term` objects. - -# App setup - -* The main entry point is `lute.main` which initializes the db and app. -* `lute.main` calls `lute.config.app_config.AppConfig` to get the configuration from the `lute/config/config.yml`. The config.yml file is pre-written for Docker, or the prod example is used. `AppConfig` is used in several places in the code to re-read the config file. Among other things, the config gives the name of the folder where the user's data will be stored; this is suppliedy by the library `PlatformDirs` if it's not configured. -* `lute.main` calls `lute.app_setup`, the app factory. `app_setup`: - * calls `lute.db.setup.main` to run db migrations and backups as needed. Migrations are handled by `lute.db.setup.migrator`, using files in `lute/db/schema/`. - * loads all of the blueprints into the app. -* `lute.main` hands the configured app off to waitress. - -# Parsing and Rendering - -* Parsers are defined in `lute.parse`, with a base `AbstractParser` subclassed by other parsers. The parsers are loaded into a `lute.parse.registry` which is consulted at runtime to determine which parsers are supported in the current environment. -* Any time a page is requested or a new `Term` is created, the appropriate parser is found from the `parse.registry`. The parser uses the `Language` to get a list of `ParsedTokens`. -* If rendering, the list of `ParsedTokens` is given to the `lute.read.render.renderable_calculator` to determine which tokens, and which parts of tokens, should actually be rendered. -* When rendered, the `lute/static/js/lute.js` file adds javascript event handlers for each of the word elements in the rendered HTML. These handlers get/post back to various bluescript routes. \ No newline at end of file diff --git a/docs/contributing.md b/docs/contributing.md deleted file mode 100644 index 2fe6e271a..000000000 --- a/docs/contributing.md +++ /dev/null @@ -1,63 +0,0 @@ -# Contributing Code - -Hi there! - -Contributions are great, but I also need to manage them as sometimes -they can create a lot of discussion and work. - -If you are planning to contribute any non-trivial changes, please -reach out to me (jz) on [Lute Discord](https://discord.gg/CzFUQP5m8u) -or via [GitHub issues](https://github.com/jzohrab/lute-v3/issues) -before you begin work. We need to be sure that your changes fit -within the Lute architecture, or make changes to the latter to support -you, and I may have ideas about directions to go with changes but -haven't yet had time to document them. - -# General coding considerations - -Lute has a few requirements for code: - -* code must be "black formatted" (https://github.com/psf/black) -* pylint must pass -* all unit and acceptance tests pass - -All of these are checked in CI and won't be relaxed. You can check -all of these with `inv full`. - -The git pre-commit hooks check black and pylint. - -## Adding tests - -If your change is non-trivial and not covered by the existing unit -tests, you'll need to add unit tests at the same time. - -The tests are written with pytest, there are many examples in the code. - -Some tests use pytest-bdd and features, which I found to be very -expressive. If we need to create new step definitions to cover your -cases, that's pretty quick. - -## Code Style - -`black`. done. - -## Do One Thing - -A patch or pull request should be the minimum necessary to address one issue. -Please don't make a pull request for a bunch of unrelated changes, as they are -difficult to review and will be rejected - split them up into separate -requests instead. - -## Pull requests - -Pull requests should go to the `develop` branch of the repository. -`lute-v3` uses "git flow" style branching, so all changes first go -into the develop branch and then are packaged for release. - -## Slow PR merges hopefully never happen ... - -I apologize in advance if your PR doesn't get merged in right away. -In software, there are few things are frustrating/disheartening as -putting in time and effort, and having it go nowhere. If I don't -merge it quickly, I'll try to follow up with a comment, but ping me if -I don't. \ No newline at end of file diff --git a/docs/development.md b/docs/development.md deleted file mode 100644 index e05507557..000000000 --- a/docs/development.md +++ /dev/null @@ -1,161 +0,0 @@ -# Development - -> This may need revision, ping me if you need clarification as I wrote it quickly. - -## Prereqs - -* To work on Lute v3, you'll need at least Python 3.8 and pip. You'll probably want to use some kind of virtual environments; I use venv and so will write that out here. -* Note that GitHub CI tests Python versions 3.8 through 3.11, as we can't be sure what version of Python users have, so stay away from newer language features. - -## dependencies - -Full (dev) dependencies are managed with pip: - -`pip install ; pip freeze > requirements.txt` - - -## Setup and verify your dev environment - -1. Clone as usual, checking out `master` (the current production branch). - -2. set up your virtual environment, install all dev dependencies from requirements.txt, activate it: - -``` -python3.8 -m venv .venv -source .venv/bin/activate - -# verify version -python --version - -# Install requirements -pip install -r requirements.txt - -# Install pre-commit hooks (optional, but recommended): -pre-commit install - -deactivate -``` - -3. Copy `lute/config/config.yml.example` to `lute/config/config.yml`, making changes as you see fit. - -If you're going to work on Lute, you're going to want to run unit tests. The unit tests are **destructive**, in that they **wipe and reset the configured database.** - -To guard against mistakes, the `DBNAME` in your config.yml must start with `test_`, `DATAPATH` must be set, and the `ENV` must be `dev`. This *ensures* that you won't accidentally run the tests against your real Lute data. I work with this by having two completely separate environments: one for dev work, and one for real Lute usage. My prod data (actual data) stays in the latter. - -4. Start lute up, ensure it's configured correctly - -``` -source .venv/bin/activate # if necessary - -python -m lute.main - -# Open web browser to http://localhost:5000 -# ... work work work ... -# When done, Ctl-C then -deactivate -``` - -5. Do initial run of all tests - -Shut down your dev instance of Lute if it's running, and then run - -``` -inv full -``` - -to do a full pylint, test, and acceptance test run. This should complete without errors, as lute master and develop branch are always kept passing in CI. - -# Development - -You may/may not find the overview docs of [Lute's architecture](./architecture.md) useful ... let me know. - -## Commit hooks - -Pre-commit hooks are installed with the `pre-commit install` step, and are run on every commit. I find this useful, as it stops me from having to go back and clean up, but YMMV. You can skip a step, e.g.: `SKIP=pylint git commit -m "Some non-lint-compliant commit."` - -## Testing - -Testing is done with pytest and pytest-bdd. Run them as usual: `pytest`, `pytest -s`, `pytest -k test_setup`, `pytest -m somemark`, etc. - -## `inv` or `invoke` for tasks - -Lute3 uses [Invoke](https://docs.pyinvoke.org/en/stable/index.html) to run tasks. Tasks are in `tasks.py`. See `inv --list` for commands. - -Some useful tasks: - -| task | desc | -| --- | --- | -| inv start | start the app on a development Flask server in dev/debug mode | -| inv lint | lint | -| inv accept | start a running instance of the app server if needed, and run acceptance tests | - -## Database changes - -Database changes are _only_ managed through `lute.db.setup.migrator`. To create a script, run `inv db.newscript `, and edit the file to create a Sqlite-compliant change script. See the existing scripts for examples. - -## TODOs - -Todos are in the code as comments, e.g. `# TODO [:] detail`, ``. -`inv todos` collects all of these in a simple report. - -## Docker - -Notes for building and running a Docker container are at ../docker/README.com. - -# Misc dev notes - -## Finding mecab.so for Docker - -This is much tougher than it needs to be ... - -To find the correct path, first build and run the container, -then connect to it, and find libmecab.so.2 like this: - -``` -$ docker exec -it lute_v3-lute-1 bash -root@cid/# which mecab -/usr/bin/mecab -root@cid:/# ldd /usr/bin/mecab - ... - libmecab.so.2 => /lib/aarch64-linux-gnu/libmecab.so.2 (0x0000ffff9b540000) -``` - -Different platform architectures have this in different locations. :-/ - -## datatables - -Datatables css and js was downloaded from https://datatables.net/download/index - -Selected: DataTables, Buttons, HTML5 export; downloaded minified and concat'd files and committed to lute/static/ dirs. - -## read-only db during tests - -It _appears_ that killing acceptance tests mid-run results in a zombie (?) python process that keeps a handle on the db, causing it to get locked in read-only mode. - -I couldn't find a better way to kill this process than do a full machine restart. Sledgehammer approach that works. - - -## Acceptance tests suddenly failing - -Worning during run of tests with `inv accept --exitfail`: - -``` -WARNING selenium.webdriver.common.selenium_manager:selenium_manager.py:139 The chromedriver version (118.0.5993.70) detected in PATH at /opt/homebrew/bin/chromedriver might not be compatible with the detected chrome version (119.0.6045.105); currently, chromedriver 119.0.6045.105 is recommended for chrome 119.*, so it is advised to delete the driver in PATH and retry -``` - - -``` -brew upgrade chromedriver` -``` - -Then, on a Mac, have to "allow" it: - -``` -/opt/homebrew/bin/chromedriver --version -``` - -Will show message: "“chromedriver” can’t be opened because Apple cannot check it for malicious software." Click "Show in Finder", then in Finder, click "Open" and say "OK" when it can't be verified. Yes, this is a security risk. - -# Releases - -Covered in [releases](./releases.md). \ No newline at end of file diff --git a/docs/releases.md b/docs/releases.md deleted file mode 100644 index 73d999ae8..000000000 --- a/docs/releases.md +++ /dev/null @@ -1,3 +0,0 @@ -MOVED to the wiki - -https://github.com/jzohrab/lute-v3/wiki/Releases diff --git a/lute/__init__.py b/lute/__init__.py index f754f154b..a3163283e 100644 --- a/lute/__init__.py +++ b/lute/__init__.py @@ -17,4 +17,4 @@ Flit pulls into the pyproject.toml using "dynamic". """ -__version__ = "3.0.0b9" +__version__ = "3.10.1" diff --git a/lute/ankiexport/__init__.py b/lute/ankiexport/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lute/ankiexport/criteria.py b/lute/ankiexport/criteria.py new file mode 100644 index 000000000..7b9cd8498 --- /dev/null +++ b/lute/ankiexport/criteria.py @@ -0,0 +1,186 @@ +""" +Criteria. Given a string with selection criteria, evaluates it +with a term, returning True or False. +""" + +from typing import Callable, Iterable +import pyparsing as pp +from pyparsing import ( + infixNotation, + opAssoc, + Keyword, + Word, + # ParserElement, + nums, + one_of, + quotedString, + QuotedString, + Suppress, + Literal, +) +from pyparsing.exceptions import ParseException +from lute.models.term import Term +from lute.models.language import Language +from lute.ankiexport.exceptions import AnkiExportConfigurationError + + +def evaluate_criteria(s, term): + "Parse the criteria, return True or False for the given term." + # pylint: disable=too-many-locals + + if (s or "").strip() == "": + return True + + def has_any_matching_tags(tagvals): + term_tags = [t.text for t in term.term_tags] + return any(e in term_tags for e in tagvals) + + def has_any_matching_parent_tags(tagvals): + ptags = [] + for p in term.parents: + ptags.extend([t.text for t in p.term_tags]) + return any(e in ptags for e in tagvals) + + def has_any_matching_all_tags(tagvals): + alltags = [t.text for t in term.term_tags] + for p in term.parents: + alltags.extend([t.text for t in p.term_tags]) + return any(e in alltags for e in tagvals) + + def matches_lang(lang): + return term.language.name == lang[0] + + def check_has_images(): + "True if term or any parent has image." + pi = [p.get_current_image() is not None for p in term.parents] + return term.get_current_image() is not None or any(pi) + + def check_has(args): + "Check has:x" + has_item = args[0] + if has_item == "image": + return check_has_images() + raise RuntimeError(f"Unhandled has check for {has_item}") + + def get_binary_operator(opstring): + "Return lambda matching op." + opMap = { + "<": lambda a, b: a < b, + "<=": lambda a, b: a <= b, + ">": lambda a, b: a > b, + ">=": lambda a, b: a >= b, + "!=": lambda a, b: a != b, + "=": lambda a, b: a == b, + "==": lambda a, b: a == b, + } + return opMap[opstring] + + def check_parent_count(args): + "Check parents." + opstring, val = args + oplambda = get_binary_operator(opstring) + pcount = len(term.parents) + return oplambda(pcount, val) + + def check_status_val(args): + "Check status." + opstring, val = args + oplambda = get_binary_operator(opstring) + return oplambda(term.status, val) + + ### class BoolNot: + ### "Not unary operator." + ### def __init__(self, t): + ### self.arg = t[0][1] + ### def __bool__(self) -> bool: + ### v = bool(self.arg) + ### return not v + ### def __str__(self) -> str: + ### return "~" + str(self.arg) + ### __repr__ = __str__ + + class BoolBinOp: + "Binary operation." + repr_symbol: str = "" + eval_fn: Callable[[Iterable[bool]], bool] = lambda _: False + + def __init__(self, t): + self.args = t[0][0::2] + + def __str__(self) -> str: + sep = f" {self.repr_symbol} " + return f"({sep.join(map(str, self.args))})" + + def __bool__(self) -> bool: + return self.eval_fn(bool(a) for a in self.args) + + class BoolAnd(BoolBinOp): + repr_symbol = "&" + eval_fn = all + + class BoolOr(BoolBinOp): + repr_symbol = "|" + eval_fn = any + + quoteval = QuotedString(quoteChar='"') + quotedString.setParseAction(pp.removeQuotes) + list_of_values = pp.delimitedList(quotedString) + + tagvallist = Suppress("[") + list_of_values + Suppress("]") + tagcrit = tagvallist | quoteval + + tag_matcher = Suppress(Literal("tags") + Literal(":")) + tagcrit + parents_tag_matcher = Suppress(Literal("parents.tags") + Literal(":")) + tagcrit + all_tag_matcher = Suppress(Literal("all.tags") + Literal(":")) + tagcrit + + lang_matcher = Suppress("language") + Suppress(":") + quoteval + + has_options = Literal("image") + has_matcher = Suppress("has") + Suppress(":") + has_options + + comparison_op = one_of("< <= > >= != = == <>") + integer = Word(nums).setParseAction(lambda x: int(x[0])) + + parent_count_matcher = ( + Suppress("parents") + + Suppress(".") + + Suppress("count") + + comparison_op + + integer + ) + + status_matcher = Suppress("status") + comparison_op + integer + + and_keyword = Keyword("and") + or_keyword = Keyword("or") + + multi_check = infixNotation( + tag_matcher.set_parse_action(has_any_matching_tags) + | parents_tag_matcher.set_parse_action(has_any_matching_parent_tags) + | all_tag_matcher.set_parse_action(has_any_matching_all_tags) + | lang_matcher.set_parse_action(matches_lang) + | has_matcher.set_parse_action(check_has) + | parent_count_matcher.set_parse_action(check_parent_count) + | status_matcher.set_parse_action(check_status_val), + [ + (and_keyword, 2, opAssoc.LEFT, BoolAnd), + (or_keyword, 2, opAssoc.LEFT, BoolOr), + ], + ) + + try: + result = multi_check.parseString(s, parseAll=True) + return bool(result[0]) + except pp.ParseException as ex: + msg = f"Criteria syntax error at position {ex.loc} or later: {ex.line}" + raise AnkiExportConfigurationError(msg) from ex + + +def validate_criteria(criteria): + "Check criteria with a dummy Term." + term = Term(Language(), "") + try: + evaluate_criteria(criteria, term) + except ParseException as ex: + msg = f'Invalid criteria "{ex.line}"' + raise AnkiExportConfigurationError(msg) from ex diff --git a/lute/ankiexport/exceptions.py b/lute/ankiexport/exceptions.py new file mode 100644 index 000000000..50651e560 --- /dev/null +++ b/lute/ankiexport/exceptions.py @@ -0,0 +1,9 @@ +""" +Exceptions. +""" + + +class AnkiExportConfigurationError(Exception): + """ + Raised if the config for the export is bad. + """ diff --git a/lute/ankiexport/field_mapping.py b/lute/ankiexport/field_mapping.py new file mode 100644 index 000000000..7273b631d --- /dev/null +++ b/lute/ankiexport/field_mapping.py @@ -0,0 +1,215 @@ +"""Field to value mapper. + +e.g. given dict like + +{ + "lute_term_id": "{ id }", + "term": "{ term }", + "tags": "{ tags:["masc", "fem"] }" +} + +extracts data from the given term and generates a mapping of field to +actual values to send to AnkiConnect. +""" + +import re +import pyparsing as pp +from pyparsing import ( + quotedString, + QuotedString, + Suppress, + Literal, +) +from pyparsing.exceptions import ParseException +from lute.models.term import Term +from lute.models.language import Language +from lute.ankiexport.exceptions import AnkiExportConfigurationError + + +class SentenceLookup: + "Sentence lookup, finds in a supplied dictionary or from db." + + def __init__(self, default_sentences_by_term_id, references_repo): + "init" + sdict = {} + for k, v in default_sentences_by_term_id.items(): + sdict[int(k)] = v + self.default_sentences_by_term_id = sdict + self.references_repo = references_repo + + def get_sentence_for_term(self, term_id): + "Get sentence from the dict, or do a lookup." + tid = int(term_id) + if tid in self.default_sentences_by_term_id: + return self.default_sentences_by_term_id[tid] + + refs = self.references_repo.find_references_by_id(term_id) + term_refs = refs["term"] or [] + if len(term_refs) == 0: + return "" + return term_refs[0].sentence + + +def _all_terms(term): + "Term and any parents." + ret = [term] + ret.extend(term.parents) + return ret + + +def _all_tags(term): + "Tags for term and all parents." + ret = [tt.text for t in _all_terms(term) for tt in t.term_tags] + return sorted(list(set(ret))) + + +def get_values_and_media_mapping(term, sentence_lookup, mapping): + """ + Get the value replacements to be put in the mapping, and build + dict of new filenames to original filenames. + """ + + def all_translations(): + ret = [term.translation or ""] + for p in term.parents: + if p.translation not in ret: + ret.append(p.translation or "") + return [r for r in ret if r.strip() != ""] + + def parse_keys_needing_calculation(calculate_keys, media_mappings): + """ + Build a parser for some keys in the mapping string, return + calculated value to use in the mapping. SIDE EFFECT: + adds ankiconnect post actions to post_actions if needed + (e.g. for image uploads). + + e.g. the mapping "article: { tags:["der", "die", "das"] }" + needs to be parsed to extract certain tags from the current + term. + """ + + def _filtered_tags_in_term_list(term_list, tagvals): + "Get all unique tags." + # tagvals is a pyparsing ParseResults, use list() to convert to strings. + ttext = [tt.text for t in term_list for tt in t.term_tags] + ttext = sorted(list(set(ttext))) + ftags = [tt for tt in ttext if tt in list(tagvals)] + return ", ".join(ftags) + + def get_filtered_tags(tagvals): + "Get term tags matching the list." + return _filtered_tags_in_term_list([term], tagvals) + + def get_filtered_parents_tags(tagvals): + "Get term tags matching the list." + return _filtered_tags_in_term_list(term.parents, tagvals) + + def handle_image(_): + id_images = [ + (t, t.get_current_image()) + for t in _all_terms(term) + if t.get_current_image() is not None + ] + image_srcs = [] + for t, imgfilename in id_images: + new_filename = f"LUTE_TERM_{t.id}.jpg" + image_url = f"/userimages/{t.language.id}/{imgfilename}" + media_mappings[new_filename] = image_url + image_srcs.append(f'') + + return "".join(image_srcs) + + def handle_sentences(_): + "Get sample sentence for term." + if term.id is None: + # Dummy parse. + return "" + return sentence_lookup.get_sentence_for_term(term.id) + + quotedString.setParseAction(pp.removeQuotes) + tagvallist = Suppress("[") + pp.delimitedList(quotedString) + Suppress("]") + tagcrit = tagvallist | QuotedString(quoteChar='"') + tag_matcher = Suppress(Literal("tags") + Literal(":")) + tagcrit + parents_tag_matcher = Suppress(Literal("parents.tags") + Literal(":")) + tagcrit + + image = Suppress("image") + sentence = Suppress("sentence") + + matcher = ( + tag_matcher.set_parse_action(get_filtered_tags) + | parents_tag_matcher.set_parse_action(get_filtered_parents_tags) + | image.set_parse_action(handle_image) + | sentence.set_parse_action(handle_sentences) + ) + + calc_replacements = { + # Matchers return the value that should be used as the + # replacement value for the given mapping string. e.g. + # tags["der", "die"] returns "der" if term.tags = ["der", "x"] + k: matcher.parseString(k).asList()[0] + for k in calculate_keys + } + + return calc_replacements + + def remove_zws(replacements_dict): + cleaned = {} + for key, value in replacements_dict.items(): + if isinstance(value, str): + cleaned[key] = value.replace("\u200B", "") + else: + cleaned[key] = value + return cleaned + + # One-for-one replacements in the mapping string. + # e.g. "{ id }" is replaced by term.termid. + replacements = { + "id": term.id, + "term": term.text, + "language": term.language.name, + "parents": ", ".join([p.text for p in term.parents]), + "tags": ", ".join(sorted({tt.text for tt in term.term_tags})), + "translation": "
".join(all_translations()), + "pronunciation": term.romanization, + "parents.pronunciation": ", ".join( + [p.romanization or "" for p in term.parents] + ), + } + + mapping_string = "; ".join(mapping.values()) + calc_keys = [ + k + for k in set(re.findall(r"{\s*(.*?)\s*}", mapping_string)) + if k not in replacements + ] + + media_mappings = {} + calc_replacements = parse_keys_needing_calculation(calc_keys, media_mappings) + + final_replacements = {**replacements, **calc_replacements} + cleaned = remove_zws(final_replacements) + return (cleaned, media_mappings) + + +def validate_mapping(mapping): + "Check mapping with a dummy Term." + t = Term(Language(), "") + refsrepo = None + try: + get_values_and_media_mapping(t, refsrepo, mapping) + except ParseException as ex: + msg = f'Invalid field mapping "{ex.line}"' + raise AnkiExportConfigurationError(msg) from ex + + +def get_fields_and_final_values(mapping, replacements): + "Break mapping string into fields, apply replacements." + ret = {} + for fieldname, value in mapping.items(): + subbed_value = value + for k, v in replacements.items(): + pattern = rf"{{\s*{re.escape(k)}\s*}}" + subbed_value = re.sub(pattern, f"{v}", subbed_value) + if subbed_value.strip() != "": + ret[fieldname.strip()] = subbed_value.strip() + return ret diff --git a/lute/ankiexport/forms.py b/lute/ankiexport/forms.py new file mode 100644 index 000000000..801da98c9 --- /dev/null +++ b/lute/ankiexport/forms.py @@ -0,0 +1,52 @@ +""" +SrsExportSpec form. +""" + +from flask_wtf import FlaskForm +from wtforms import StringField, SelectField, BooleanField, TextAreaField, HiddenField +from wtforms.validators import DataRequired, Length +from lute.ankiexport.service import Service +from lute.models.srsexport import SrsExportSpec + + +class SrsExportSpecForm(FlaskForm): + "Srs export spec." + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Flask doesn't allow for "general" errors, so handle those + # specially. + self.general_errors = [] + + # Extra data added to the form during POST + # to simplify validation. + self.anki_deck_names = None + self.anki_note_types = None + + export_name = StringField( + "Export Name", validators=[DataRequired(), Length(max=200)] + ) + criteria = TextAreaField( + "Criteria", render_kw={"placeholder": "Leave blank to always export"} + ) + deck_name = SelectField("Deck Name", validators=[DataRequired(), Length(max=200)]) + note_type = SelectField("Note Type", validators=[DataRequired(), Length(max=200)]) + field_mapping = HiddenField( + "Field Mapping", validators=[DataRequired(), Length(max=1000)] + ) + active = BooleanField("Active", default=True) + + def validate(self, extra_validators=None): + """Custom validation logic.""" + if not super().validate(extra_validators): + return False # Return early if standard validation fails + + temp_spec = SrsExportSpec() + self.populate_obj(temp_spec) + svc = Service(self.anki_deck_names, self.anki_note_types, [temp_spec]) + self.general_errors = svc.validate_spec(temp_spec) + if len(self.general_errors) > 0: + return False + + return True diff --git a/lute/ankiexport/routes.py b/lute/ankiexport/routes.py new file mode 100644 index 000000000..6f97fb60a --- /dev/null +++ b/lute/ankiexport/routes.py @@ -0,0 +1,138 @@ +""" +Anki export. +""" + +import json +from flask import ( + Blueprint, + request, + jsonify, + render_template, + redirect, + flash, +) +from lute.ankiexport.service import Service +from lute.models.srsexport import SrsExportSpec +from lute.ankiexport.forms import SrsExportSpecForm +from lute.ankiexport.exceptions import AnkiExportConfigurationError +from lute.db import db + + +bp = Blueprint("ankiexport", __name__, url_prefix="/ankiexport") + + +@bp.route("/index", methods=["GET", "POST"]) +def anki_index(): + "List the exports." + export_specs = db.session.query(SrsExportSpec).all() + export_specs_json = [ + { + "id": spec.id, + "export_name": spec.export_name, + "criteria": spec.criteria, + "deck_name": spec.deck_name, + "note_type": spec.note_type, + "field_mapping": spec.field_mapping, + "active": "yes" if spec.active else "no", + } + for spec in export_specs + ] + + return render_template( + "/ankiexport/index.html", + export_specs_json=export_specs_json, + ) + + +def _handle_form(spec, form_template_name): + """ + Handle a form post. + """ + form = SrsExportSpecForm(obj=spec) + + if request.method == "POST": + anki_settings_json = request.form.get("ankisettings") + anki_settings = json.loads(anki_settings_json) + + form.anki_deck_names = anki_settings.get("deck_names") + form.anki_note_types = anki_settings.get("note_types") + + # Have to load the option choices or flask-wtf complains ... + # ouch. + form.deck_name.choices = [(f, f) for f in form.anki_deck_names] + form.note_type.choices = [(f, f) for f in form.anki_note_types.keys()] + + if form.validate_on_submit(): + form.populate_obj(spec) + db.session.add(spec) + db.session.commit() + return redirect("/ankiexport/index", 302) + + return render_template(form_template_name, form=form, spec=spec) + + +@bp.route("/spec/edit/", methods=["GET", "POST"]) +def edit_spec(spec_id): + "Edit a spec." + spec = db.session.query(SrsExportSpec).filter(SrsExportSpec.id == spec_id).first() + return _handle_form(spec, "/ankiexport/edit.html") + + +@bp.route("/spec/new", methods=["GET", "POST"]) +def new_spec(): + "Make a new spec." + spec = SrsExportSpec() + # Hack ... not sure why this was necessary, given that the model + # and form both have the default as True. + if spec.active is None: + spec.active = True + return _handle_form(spec, "/ankiexport/new.html") + + +@bp.route("/spec/delete/", methods=["GET", "POST"]) +def delete_spec(spec_id): + "Delete a spec." + spec = db.session.query(SrsExportSpec).filter(SrsExportSpec.id == spec_id).first() + db.session.delete(spec) + db.session.commit() + flash("Export mapping deleted.") + return redirect("/ankiexport/index", 302) + + +@bp.route("/get_card_post_data", methods=["POST"]) +def get_ankiconnect_post_data(): + """Get data that the client javascript will post.""" + data = request.get_json() + word_ids = data["term_ids"] + termid_sentences = data["termid_sentences"] + base_url = data["base_url"] + anki_deck_names = data["deck_names"] + anki_note_types = data["note_types"] + export_specs = db.session.query(SrsExportSpec).all() + svc = Service(anki_deck_names, anki_note_types, export_specs) + try: + ret = svc.get_ankiconnect_post_data( + word_ids, termid_sentences, base_url, db.session + ) + return jsonify(ret) + except AnkiExportConfigurationError as ex: + response = jsonify({"error": str(ex)}) + response.status_code = 400 # Bad Request + return response + + +@bp.route("/validate_export_specs", methods=["POST"]) +def validate_export_specs(): + """Get data that the client javascript will post.""" + data = request.get_json() + anki_deck_names = data["deck_names"] + anki_note_types = data["note_types"] + export_specs = db.session.query(SrsExportSpec).all() + svc = Service(anki_deck_names, anki_note_types, export_specs) + try: + ret = svc.validate_specs() + return jsonify(ret) + except AnkiExportConfigurationError as ex: + response = jsonify({"error": str(ex)}) + response.status_code = 400 # Bad Request + return response diff --git a/lute/ankiexport/service.py b/lute/ankiexport/service.py new file mode 100644 index 000000000..2b3d5f7d9 --- /dev/null +++ b/lute/ankiexport/service.py @@ -0,0 +1,213 @@ +""" +Service, validates and posts. +""" + +import json +from lute.models.repositories import TermRepository +from lute.term.model import ReferencesRepository +from lute.ankiexport.exceptions import AnkiExportConfigurationError +from lute.ankiexport.field_mapping import ( + get_values_and_media_mapping, + validate_mapping, + get_fields_and_final_values, + SentenceLookup, +) +from lute.ankiexport.criteria import ( + evaluate_criteria, + validate_criteria, +) + + +class Service: + "Srs export service." + + def __init__( + self, + anki_deck_names, + anki_note_types_and_fields, + export_specs, + ): + "init" + self.anki_deck_names = anki_deck_names + self.anki_note_types_and_fields = anki_note_types_and_fields + self.export_specs = export_specs + + def validate_spec(self, spec): + """ + Returns array of errors if any for the given spec. + """ + if not spec.active: + return [] + + errors = [] + + try: + validate_criteria(spec.criteria) + except AnkiExportConfigurationError as ex: + errors.append(str(ex)) + + if spec.deck_name not in self.anki_deck_names: + errors.append(f'No deck name "{spec.deck_name}"') + + valid_note_type = spec.note_type in self.anki_note_types_and_fields + if not valid_note_type: + errors.append(f'No note type "{spec.note_type}"') + + mapping = None + try: + mapping = json.loads(spec.field_mapping) + except json.decoder.JSONDecodeError: + errors.append("Mapping is not valid json") + + if valid_note_type and mapping: + note_fields = self.anki_note_types_and_fields.get(spec.note_type, {}) + bad_fields = [f for f in mapping.keys() if f not in note_fields] + if len(bad_fields) > 0: + bad_fields = ", ".join(bad_fields) + msg = f"Note type {spec.note_type} does not have field(s): {bad_fields}" + errors.append(msg) + + if mapping: + try: + validate_mapping(json.loads(spec.field_mapping)) + except AnkiExportConfigurationError as ex: + errors.append(str(ex)) + + return errors + + def validate_specs(self): + """ + Return hash of spec ids and any config errors. + """ + failures = {} + for spec in self.export_specs: + v = self.validate_spec(spec) + if len(v) != 0: + failures[spec.id] = "; ".join(v) + return failures + + def validate_specs_failure_message(self): + "Failure message for alerts." + failures = self.validate_specs() + msgs = [] + for k, v in failures.items(): + spec = next(s for s in self.export_specs if s.id == k) + msgs.append(f"{spec.export_name}: {v}") + return msgs + + def _all_terms(self, term): + "Term and any parents." + ret = [term] + ret.extend(term.parents) + return ret + + def _all_tags(self, term): + "Tags for term and all parents." + ret = [tt.text for t in self._all_terms(term) for tt in t.term_tags] + return sorted(list(set(ret))) + + # pylint: disable=too-many-arguments,too-many-positional-arguments + def _build_ankiconnect_post_json( + self, + mapping, + media_mappings, + lute_and_term_tags, + deck_name, + model_name, + ): + "Build post json for term using the mappings." + + post_actions = [] + for new_filename, original_url in media_mappings.items(): + hsh = { + "action": "storeMediaFile", + "params": { + "filename": new_filename, + "url": original_url, + }, + } + post_actions.append(hsh) + + post_actions.append( + { + "action": "addNote", + "params": { + "note": { + "deckName": deck_name, + "modelName": model_name, + "fields": mapping, + "tags": lute_and_term_tags, + } + }, + } + ) + + return {"action": "multi", "params": {"actions": post_actions}} + + def get_ankiconnect_post_data_for_term(self, term, base_url, sentence_lookup): + """ + Get post data for a single term. + This assumes that all the specs are valid! + Separate method for unit testing. + """ + use_exports = [ + spec + for spec in self.export_specs + if spec.active and evaluate_criteria(spec.criteria, term) + ] + # print(f"Using {len(use_exports)} exports") + + ret = {} + for export in use_exports: + mapping = json.loads(export.field_mapping) + replacements, mmap = get_values_and_media_mapping( + term, sentence_lookup, mapping + ) + for k, v in mmap.items(): + mmap[k] = base_url + v + updated_mapping = get_fields_and_final_values(mapping, replacements) + tags = ["lute"] + self._all_tags(term) + + p = self._build_ankiconnect_post_json( + updated_mapping, + mmap, + tags, + export.deck_name, + export.note_type, + ) + ret[export.export_name] = p + + return ret + + def get_ankiconnect_post_data( + self, term_ids, termid_sentences, base_url, db_session + ): + """ + Build data to be posted. + + Throws if any validation failure or mapping failure, as it's + annoying to handle partial failures. + """ + + msgs = self.validate_specs_failure_message() + if len(msgs) > 0: + show_msgs = [f"* {m}" for m in msgs] + show_msgs = "\n".join(show_msgs) + err_msg = "Anki export configuration errors:\n" + show_msgs + raise AnkiExportConfigurationError(err_msg) + + repo = TermRepository(db_session) + + refsrepo = ReferencesRepository(db_session) + sentence_lookup = SentenceLookup(termid_sentences, refsrepo) + + ret = {} + for tid in term_ids: + term = repo.find(tid) + pd = self.get_ankiconnect_post_data_for_term( + term, base_url, sentence_lookup + ) + if len(pd) > 0: + ret[tid] = pd + + return ret diff --git a/lute/app_factory.py b/lute/app_factory.py index 31bf78fb9..8546fa502 100644 --- a/lute/app_factory.py +++ b/lute/app_factory.py @@ -5,11 +5,13 @@ """ import os +import json import platform import traceback from flask import ( Flask, render_template, + request, redirect, flash, current_app, @@ -17,28 +19,57 @@ send_from_directory, jsonify, ) +from sqlalchemy.event import listens_for +from sqlalchemy.pool import Pool + +from lute.config.app_config import AppConfig from lute.db import db from lute.db.setup.main import setup_db -import lute.backup.service as backupservice -import lute.db.demo +from lute.db.management import add_default_user_settings +from lute.db.data_cleanup import clean_data +from lute.backup.service import Service as BackupService +from lute.db.demo import Service as DemoService +import lute.utils.formutils + +from lute.parse.registry import init_parser_plugins, supported_parsers from lute.models.book import Book from lute.models.language import Language -from lute.models.setting import BackupSettings, UserSetting -from lute.book.stats import refresh_stats +from lute.settings.current import ( + refresh_global_settings, + current_settings, + current_hotkeys, +) +from lute.models.repositories import UserSettingRepository +from lute.book.stats import Service as StatsService +from lute.ankiexport.routes import bp as anki_bp from lute.book.routes import bp as book_bp +from lute.bookmarks.routes import bp as bookmarks_bp from lute.language.routes import bp as language_bp from lute.term.routes import bp as term_bp from lute.termtag.routes import bp as termtag_bp from lute.read.routes import bp as read_bp from lute.bing.routes import bp as bing_bp from lute.userimage.routes import bp as userimage_bp +from lute.useraudio.routes import bp as useraudio_bp from lute.termimport.routes import bp as termimport_bp -from lute.term_parent_map.routes import bp as term_parent_map_bp from lute.backup.routes import bp as backup_bp from lute.dev_api.routes import bp as dev_api_bp from lute.settings.routes import bp as settings_bp +from lute.themes.routes import bp as themes_bp +from lute.stats.routes import bp as stats_bp +from lute.cli.commands import bp as cli_bp + + +def _setup_app_dir(dirname, readme_content): + "Create one app directory." + if not os.path.exists(dirname): + os.makedirs(dirname) + readme = os.path.join(dirname, "README.md") + if not os.path.exists(readme): + with open(readme, "w", encoding="utf-8") as f: + f.write(readme_content) def _setup_app_dirs(app_config): @@ -47,28 +78,34 @@ def _setup_app_dirs(app_config): """ dp = app_config.datapath required_dirs = [ - {"d": dp, "readme": "Lute data folder."}, - { - "d": app_config.default_user_backup_path, - "readme": "Default path for user backups, can be overridden in settings.", - }, - { - "d": app_config.system_backup_path, - "readme": "Database backups created by Lute at app start, just in case.", - }, - { - "d": app_config.userimagespath, - "readme": "User images. Each subfolder is a language's ID.", - }, + [dp, "Lute data folder."], + [ + app_config.default_user_backup_path, + "Default path for user backups, can be overridden in settings.", + ], + [ + app_config.system_backup_path, + "Database backups created by Lute at app start, just in case.", + ], + [ + app_config.userimagespath, + "User images. Each subfolder is a language's ID.", + ], + [ + app_config.userthemespath, + "User themes. .css files for your personal themes.", + ], + [ + app_config.useraudiopath, + "User audio. Each file is a book's audio.", + ], + [ + app_config.temppath, + "Temp directory for export file writes, to avoid permissions issues.", + ], ] for rec in required_dirs: - d = rec["d"] - if not os.path.exists(d): - os.makedirs(d) - readme = os.path.join(d, "README.md") - if not os.path.exists(readme): - with open(readme, "w", encoding="utf-8") as f: - f.write(rec["readme"]) + _setup_app_dir(rec[0], rec[1]) def _add_base_routes(app, app_config): @@ -81,59 +118,80 @@ def inject_menu_bar_vars(): """ Inject backup settings into the all templates for the menu bar. """ - bs = BackupSettings.get_backup_settings() + us_repo = UserSettingRepository(db.session) + bs = us_repo.get_backup_settings() have_languages = len(db.session.query(Language).all()) > 0 ret = { "have_languages": have_languages, "backup_enabled": bs.backup_enabled, "backup_directory": bs.backup_dir, "backup_last_display_date": bs.last_backup_display_date, + "backup_time_since": bs.time_since_last_backup, + "user_settings": json.dumps(current_settings), + "user_hotkeys": json.dumps(current_hotkeys), } return ret @app.route("/") def index(): - is_production = not lute.db.demo.contains_demo_data() - bkp_settings = BackupSettings.get_backup_settings() + demosvc = DemoService(db.session) + is_production = not demosvc.contains_demo_data() + us_repo = UserSettingRepository(db.session) + bkp_settings = us_repo.get_backup_settings() have_books = len(db.session.query(Book).all()) > 0 have_languages = len(db.session.query(Language).all()) > 0 + language_choices = lute.utils.formutils.language_choices( + db.session, "(all languages)" + ) + current_language_id = lute.utils.formutils.valid_current_language_id(db.session) + bs = BackupService(db.session) + should_run_auto_backup = bs.should_run_auto_backup(bkp_settings) # Only back up if we have books, otherwise the backup is # kicked off when the user empties the demo database. - if ( - is_production - and have_books - and backupservice.should_run_auto_backup(bkp_settings) - ): + if is_production and have_books and should_run_auto_backup: return redirect("/backup/backup", 302) - refresh_stats() - warning_msg = backupservice.backup_warning(bkp_settings) + warning_msg = bs.backup_warning(bkp_settings) backup_show_warning = ( bkp_settings.backup_warn and bkp_settings.backup_enabled and warning_msg != "" ) - return render_template( - "index.html", - hide_homelink=True, - dbname=app_config.dbname, - datapath=app_config.datapath, - tutorial_book_id=lute.db.demo.tutorial_book_id(), - have_books=have_books, - have_languages=have_languages, - is_production_data=is_production, - # Backup stats - backup_show_warning=backup_show_warning, - backup_warning_msg=warning_msg, + demosvc = DemoService(db.session) + response = make_response( + render_template( + "index.html", + hide_homelink=True, + dbname=app_config.dbname, + datapath=app_config.datapath, + tutorial_book_id=demosvc.tutorial_book_id(), + have_books=have_books, + have_languages=have_languages, + language_choices=language_choices, + current_language_id=current_language_id, + is_production_data=is_production, + backup_show_warning=backup_show_warning, + backup_warning_msg=warning_msg, + ) ) + return response + + @app.route("/refresh_all_stats") + def refresh_all_stats(): + books_to_update = db.session.query(Book).filter(Book.archived == 0).all() + svc = StatsService(db.session) + for book in books_to_update: + svc.mark_stale(book) + return redirect("/", 302) @app.route("/wipe_database") def wipe_db(): - if lute.db.demo.contains_demo_data(): - lute.db.demo.delete_demo_data() + demosvc = DemoService(db.session) + if demosvc.contains_demo_data(): + demosvc.delete_demo_data() msg = """ The database has been wiped clean. Have fun!

(Lute has automatically enabled backups -- @@ -142,6 +200,19 @@ def wipe_db(): flash(msg) return redirect("/", 302) + @app.route("/remove_demo_flag") + def remove_demo(): + demosvc = DemoService(db.session) + if demosvc.contains_demo_data(): + demosvc.remove_flag() + msg = """ + Demo mode deactivated. Have fun!

+ (Lute has automatically enabled backups -- + change your Settings as needed.) + """ + flash(msg) + return redirect("/", 302) + @app.route("/version") def show_version(): ac = current_app.env_config @@ -191,7 +262,7 @@ def _internal_server_error(e): # pylint: disable=unused-argument # app.logger.error(exception_info) return ( render_template( - "500_error.html", + "errors/500_error.html", exception_info=exception_info, version=lute.__version__, platform=platform.platform(), @@ -200,6 +271,19 @@ def _internal_server_error(e): # pylint: disable=unused-argument 500, ) + @app.errorhandler(404) + def _page_not_found(e): # pylint: disable=unused-argument + "Show custom error page on 404." + return ( + render_template( + "errors/404_error.html", + version=lute.__version__, + requested_url=request.url, + referring_page=request.referrer, + ), + 404, + ) + def _create_app(app_config, extra_config): """ @@ -218,6 +302,11 @@ def _create_app(app_config, extra_config): # ref https://flask-sqlalchemy.palletsprojects.com/en/2.x/config/ # Don't track mods. "SQLALCHEMY_TRACK_MODIFICATIONS": False, + # Disable CSRF -- this is a local app, and it's highly + # unlikely that a malicious site will try to hack anyone's Lute data. + # ref https://stackoverflow.com/questions/5207160/ + # what-is-a-csrf-token-what-is-its-importance-and-how-does-it-work + "WTF_CSRF_ENABLED": False, } final_config = {**config, **extra_config} @@ -227,41 +316,127 @@ def _create_app(app_config, extra_config): app.env_config = app_config db.init_app(app) + + @listens_for(Pool, "connect") + def _pragmas_on_connect(dbapi_con, con_record): # pylint: disable=unused-argument + dbapi_con.execute("pragma recursive_triggers = on;") + dbapi_con.execute("pragma foreign_keys = on;") + with app.app_context(): db.create_all() - UserSetting.load() + add_default_user_settings(db.session, app_config.default_user_backup_path) + refresh_global_settings(db.session) app.db = db _add_base_routes(app, app_config) app.register_blueprint(language_bp) + app.register_blueprint(anki_bp) app.register_blueprint(book_bp) + app.register_blueprint(bookmarks_bp) app.register_blueprint(term_bp) app.register_blueprint(termtag_bp) app.register_blueprint(read_bp) app.register_blueprint(bing_bp) app.register_blueprint(userimage_bp) + app.register_blueprint(useraudio_bp) app.register_blueprint(termimport_bp) - app.register_blueprint(term_parent_map_bp) app.register_blueprint(backup_bp) app.register_blueprint(settings_bp) + app.register_blueprint(themes_bp) + app.register_blueprint(stats_bp) + app.register_blueprint(cli_bp) if app_config.is_test_db: app.register_blueprint(dev_api_bp) return app -def create_app(app_config, extra_config=None, output_func=None): +def _init_parser_plugins(plugin_data_path, outfunc): + "Load and init plugins." + outfunc("Initializing parsers from plugins ...") + init_parser_plugins() + + parsers = supported_parsers() + parsers_with_extra_data = [ + (typename, klass) for typename, klass in parsers if klass.uses_data_directory() + ] + if len(parsers_with_extra_data) > 0: + # outfunc("Creating data folders for plugins ...") + _setup_app_dir(plugin_data_path, "Data files for plugins.") + for pair in parsers_with_extra_data: + typename, klass = pair + dirname = os.path.join(plugin_data_path, typename) + klass.data_directory = dirname + + readme_content = f"Extra data for {klass.name()} plugin." + _setup_app_dir(dirname, readme_content) + klass.init_data_directory() + # outfunc(f" * {klass.name()}: {dirname}") + + outfunc("Enabled parsers:") + for _, v in supported_parsers(): + outfunc(f" * {v.name()}") + + +def create_app( + app_config_path=None, + extra_config=None, + output_func=None, +): """ App factory. Calls dbsetup, and returns Flask app. - Use extra_config to pass { 'TESTING': True } during unit tests. + Args: + - app_config_path: path to yml file. If None, use root config or default. + - extra_config: dict, e.g. pass { 'TESTING': True } during unit tests. """ + def null_print(s): # pylint: disable=unused-argument + pass + + outfunc = output_func or null_print + + if app_config_path is None: + if os.path.exists("config.yml"): + app_config_path = "config.yml" + else: + app_config_path = AppConfig.default_config_filename() + + app_config = AppConfig(app_config_path) _setup_app_dirs(app_config) setup_db(app_config, output_func) if extra_config is None: extra_config = {} + outfunc("Initializing app.") app = _create_app(app_config, extra_config) + # Plugins are loaded after the app, as they may use settings etc. + _init_parser_plugins(app_config.plugin_datapath, outfunc) + return app + + +def data_initialization(session, output_func=None): + """ + Any extra data setup. + + TODO: rework data initialization. The DB setup can be handled + outside of the application context, as IMO it's clearer to manage + the data separately from the thing that uses the data. This + requires moving from flask-sqlalchemy to plain sqlalchemy. + """ + + def _null_print(s): # pylint: disable=unused-argument + pass + + outfunc = output_func or _null_print + + demosvc = DemoService(session) + if demosvc.should_load_demo_data(): + outfunc("Loading demo data.") + demosvc.load_demo_data() + + # TODO valid parsers: do parser check, mark valid as active, invalid as inactive. + + clean_data(session, outfunc) diff --git a/lute/backup/routes.py b/lute/backup/routes.py index 2b6a9a478..1fc10992b 100644 --- a/lute/backup/routes.py +++ b/lute/backup/routes.py @@ -4,15 +4,55 @@ Backup settings form management, and running backups. """ +import os import traceback -from flask import Blueprint, current_app, render_template, request, jsonify -from lute.models.setting import BackupSettings -from lute.backup.service import create_backup +from flask import ( + Blueprint, + current_app, + render_template, + request, + jsonify, + redirect, + send_file, + flash, +) +from lute.db import db +from lute.models.repositories import UserSettingRepository +from lute.backup.service import Service bp = Blueprint("backup", __name__, url_prefix="/backup") +def _get_settings(): + "Get backup settings." + repo = UserSettingRepository(db.session) + return repo.get_backup_settings() + + +@bp.route("/index") +def index(): + """ + List all backups. + """ + settings = _get_settings() + service = Service(db.session) + backups = service.list_backups(settings.backup_dir) + backups.sort(reverse=True) + + return render_template( + "backup/index.html", backup_dir=settings.backup_dir, backups=backups + ) + + +@bp.route("/download/") +def download_backup(filename): + "Download the given backup file." + settings = _get_settings() + fullpath = os.path.join(settings.backup_dir, filename) + return send_file(fullpath, as_attachment=True) + + @bp.route("/backup", methods=["GET"]) def backup(): """ @@ -24,7 +64,7 @@ def backup(): if "type" in request.args: backuptype = "manual" - settings = BackupSettings.get_backup_settings() + settings = _get_settings() return render_template( "backup/backup.html", backup_folder=settings.backup_dir, backuptype=backuptype ) @@ -41,11 +81,21 @@ def do_backup(): backuptype = prms["type"] c = current_app.env_config - settings = BackupSettings.get_backup_settings() + settings = _get_settings() + service = Service(db.session) is_manual = backuptype.lower() == "manual" try: - f = create_backup(c, settings, is_manual=is_manual) + f = service.create_backup(c, settings, is_manual=is_manual) + flash(f"Backup created: {f}", "notice") return jsonify(f) except Exception as e: # pylint: disable=broad-exception-caught tb = traceback.format_exc() return jsonify({"errmsg": str(e) + " -- " + tb}), 500 + + +@bp.route("/skip_this_backup", methods=["GET"]) +def handle_skip_this_backup(): + "Update last backup date so backup not attempted again." + service = Service(db.session) + service.skip_this_backup() + return redirect("/", 302) diff --git a/lute/backup/service.py b/lute/backup/service.py index 99f46ac51..b8f8e4f5a 100644 --- a/lute/backup/service.py +++ b/lute/backup/service.py @@ -3,13 +3,14 @@ """ import os +import re import shutil import gzip from datetime import datetime import time +from typing import List, Union -from lute.db import db -from lute.models.setting import SystemSetting +from lute.models.repositories import UserSettingRepository from lute.models.book import Book from lute.models.term import Term @@ -20,105 +21,173 @@ class BackupException(Exception): """ -def create_backup(app_config, settings, is_manual=False, suffix=None): +class DatabaseBackupFile: """ - Create backup using current app config, settings. - - is_manual is True if this is a user-triggered manual - backup, otherwise is False. - - suffix can be specified for test. - - settings are from Setting.get_backup_settings(). - - backup_enabled - - backup_dir - - backup_auto - - backup_warn - - backup_count - - last_backup_datetime - """ - if not os.path.exists(settings.backup_dir): - raise BackupException("Missing directory " + settings.backup_dir) - - _mirror_images_dir(app_config.userimagespath, settings.backup_dir) - - prefix = "manual_" if is_manual else "" - if suffix is None: - suffix = datetime.now().strftime("%Y-%m-%d_%H%M%S") - fname = f"{prefix}lute_backup_{suffix}.db" - backupfile = os.path.join(settings.backup_dir, fname) - - f = _create_db_backup(app_config.dbfilename, backupfile) - _remove_excess_backups(settings.backup_count, settings.backup_dir) - return f - - -def should_run_auto_backup(backup_settings): + A representation of a lute backup file to hold metadata attributes. """ - True (if applicable) if last backup was old. - """ - bs = backup_settings - if bs.backup_enabled is False or bs.backup_auto is False: - return False - - last = bs.last_backup_datetime - if last is None: - return True - curr = int(time.time()) - diff = curr - last - return diff > 24 * 60 * 60 - - -def backup_warning(backup_settings): - "Get warning if needed." - if not backup_settings.backup_warn: - return "" + def __init__(self, filepath: Union[str, os.PathLike]): + if not os.path.exists(filepath): + raise BackupException(f"No backup file at {filepath}.") + + name = os.path.basename(filepath) + if not re.match(r"(manual_)?lute_backup_", name): + raise BackupException(f"Not a valid lute database backup at {filepath}.") + + self.filepath = filepath + self.name = name + self.is_manual = self.name.startswith("manual_") + + def __lt__(self, other): + return self.last_modified < other.last_modified + + @property + def last_modified(self) -> datetime: + return datetime.fromtimestamp(os.path.getmtime(self.filepath)).astimezone() + + @property + def size_bytes(self) -> int: + return os.path.getsize(self.filepath) + + @property + def size(self) -> str: + """ + A human-readable string representation of the size of the file. + + Eg. + 1746 bytes + 4 kB + 27 MB + """ + s = self.size_bytes + if s >= 1e9: + return f"{round(s * 1e-9)} GB" + if s >= 1e6: + return f"{round(s * 1e-6)} MB" + if s >= 1e3: + return f"{round(s * 1e-3)} KB" + return f"{s} bytes" + + +class Service: + "Service." + + def __init__(self, session): + self.session = session + + def create_backup(self, app_config, settings, is_manual=False, suffix=None): + """ + Create backup using current app config, settings. + + is_manual is True if this is a user-triggered manual + backup, otherwise is False. + + suffix can be specified for test. + + settings are from BackupSettings. + - backup_enabled + - backup_dir + - backup_auto + - backup_warn + - backup_count + - last_backup_datetime + """ + if not os.path.exists(settings.backup_dir): + raise BackupException("Missing directory " + settings.backup_dir) + + # def _print_now(msg): + # "Timing helper for when implement audio backup." + # now = datetime.now().strftime("%H-%M-%S") + # print(f"{now} - {msg}", flush=True) + + self._mirror_images_dir(app_config.userimagespath, settings.backup_dir) + + prefix = "manual_" if is_manual else "" + if suffix is None: + suffix = datetime.now().strftime("%Y-%m-%d_%H%M%S") + fname = f"{prefix}lute_backup_{suffix}.db" + backupfile = os.path.join(settings.backup_dir, fname) + + f = self._create_db_backup(app_config.dbfilename, backupfile) + self._remove_excess_backups(settings.backup_count, settings.backup_dir) + return f + + def should_run_auto_backup(self, backup_settings): + """ + True (if applicable) if last backup was old. + """ + bs = backup_settings + if bs.backup_enabled is False or bs.backup_auto is False: + return False + + last = bs.last_backup_datetime + if last is None: + return True + + curr = int(time.time()) + diff = curr - last + return diff > 24 * 60 * 60 + + def backup_warning(self, backup_settings): + "Get warning if needed." + if not backup_settings.backup_warn: + return "" + + have_books = self.session.query(self.session.query(Book).exists()).scalar() + have_terms = self.session.query(self.session.query(Term).exists()).scalar() + if have_books is False and have_terms is False: + return "" + + last = backup_settings.last_backup_datetime + if last is None: + return "Never backed up." + + curr = int(time.time()) + diff = curr - last + old_backup_msg = "Last backup was more than 1 week ago." + if diff > 7 * 24 * 60 * 60: + return old_backup_msg - have_books = len(db.session.query(Book).all()) > 0 - have_terms = len(db.session.query(Term).all()) > 0 - if have_books is False and have_terms is False: return "" - last = backup_settings.last_backup_datetime - if last is None: - return "Never backed up." - - curr = int(time.time()) - diff = curr - last - old_backup_msg = "Last backup was more than 1 week ago." - if diff > 7 * 24 * 60 * 60: - return old_backup_msg - - return "" - - -def _create_db_backup(dbfilename, backupfile): - "Make a backup." - shutil.copy(dbfilename, backupfile) - f = f"{backupfile}.gz" - with open(backupfile, "rb") as in_file, gzip.open( - f, "wb", compresslevel=9 - ) as out_file: - shutil.copyfileobj(in_file, out_file) - os.remove(backupfile) - SystemSetting.set_last_backup_datetime(int(time.time())) - return f - - -def _remove_excess_backups(count, outdir): - "Remove old backups." - files = [f for f in os.listdir(outdir) if f.startswith("lute_backup_")] - files.sort(reverse=True) - to_remove = files[count:] - for f in to_remove: - os.remove(os.path.join(outdir, f)) - - -def _mirror_images_dir(userimagespath, outdir): - "Copy the images to backup." - target_dir = os.path.join(outdir, "userimages_backup") - target_dir = os.path.abspath(target_dir) - if not os.path.exists(target_dir): - os.mkdir(target_dir) - shutil.copytree(userimagespath, target_dir, dirs_exist_ok=True) + def _create_db_backup(self, dbfilename, backupfile): + "Make a backup." + shutil.copy(dbfilename, backupfile) + f = f"{backupfile}.gz" + with open(backupfile, "rb") as in_file, gzip.open( + f, "wb", compresslevel=4 + ) as out_file: + shutil.copyfileobj(in_file, out_file) + os.remove(backupfile) + r = UserSettingRepository(self.session) + r.set_last_backup_datetime(int(time.time())) + return f + + def skip_this_backup(self): + "Set the last backup time to today." + r = UserSettingRepository(self.session) + r.set_last_backup_datetime(int(time.time())) + + def _remove_excess_backups(self, count, outdir): + "Remove old backups." + files = [f for f in self.list_backups(outdir) if not f.is_manual] + files.sort(reverse=True) + to_remove = files[count:] + for f in to_remove: + os.remove(f.filepath) + + def _mirror_images_dir(self, userimagespath, outdir): + "Copy the images to backup." + target_dir = os.path.join(outdir, "userimages_backup") + target_dir = os.path.abspath(target_dir) + if not os.path.exists(target_dir): + os.mkdir(target_dir) + shutil.copytree(userimagespath, target_dir, dirs_exist_ok=True) + + def list_backups(self, outdir) -> List[DatabaseBackupFile]: + "List all backup files." + return [ + DatabaseBackupFile(os.path.join(outdir, f)) + for f in os.listdir(outdir) + if re.match(r"(manual_)?lute_backup_", f) + ] diff --git a/lute/bing/routes.py b/lute/bing/routes.py index 68510f46b..fa60d2677 100644 --- a/lute/bing/routes.py +++ b/lute/bing/routes.py @@ -3,14 +3,47 @@ """ import os +import datetime +import hashlib import re import urllib.request -from flask import Blueprint, request, render_template, jsonify, current_app +from flask import ( + Blueprint, + request, + Response, + render_template, + jsonify, + current_app, + url_for, +) bp = Blueprint("bing", __name__, url_prefix="/bing") +@bp.route( + "/search_page///", methods=["GET"] +) +def bing_search_page(langid, text, searchstring): + """ + Load initial empty search page, passing real URL for subsequent ajax call to get images. + + Sometimes Bing image searches block or fail, so providing the initial empty search page + lets the user know work is in progress. The user can therefore interact with the page + immediately. The template for this route then makes an ajax call to the "bing_search()" + method below which actually does the search. + """ + + # Create URL for bing_search and pass into template. + search_url = url_for( + "bing.bing_search", langid=langid, text=text, searchstring=searchstring + ) + + return render_template( + "imagesearch/index.html", langid=langid, text=text, search_url=search_url + ) + + @bp.route("/search///", methods=["GET"]) def bing_search(langid, text, searchstring): "Do an image search." @@ -24,49 +57,66 @@ def bing_search(langid, text, searchstring): # dump("searching for " + text + " in " + language.getLgName()) search = urllib.parse.quote(text) - searchparams = searchstring.replace("###", search) - url = "https://www.bing.com/images/search?" + searchparams + params = searchstring.replace("[LUTE]", search) + params = params.replace("###", search) # TODO remove_old_###_placeholder: remove + url = "https://www.bing.com/images/search?" + params content = "" - with urllib.request.urlopen(url) as s: - content = s.read().decode("utf-8") - - # Samples + error_msg = "" + try: + with urllib.request.urlopen(url) as s: + content = s.read().decode("utf-8") + except urllib.error.URLError as e: + content = "" + error_msg = str(e.reason) + except Exception as e: # pylint: disable=broad-exception-caught + content = "" + error_msg = str(e) + + # Sample data returned by bing image search: # # or # - pattern = r"()" - matches = re.findall(pattern, content, re.I) - - images = list(matches) - def is_search_img(img): return not ('src="/' in img) and ("rms_img" in img or "vimgld" in img) - def fix_data_src(img): - return img.replace("data-src=", "src=") - - images = [fix_data_src(i) for i in images if is_search_img(i)] - - # Reduce image load count so we don't kill subpage loading. - images = images[:25] - def build_struct(image): src = "missing" - m = re.search(r'src="(.*?)"', image) + normalized_source = image.replace("data-src=", "src=") + m = re.search(r'src="(.*?)"', normalized_source) if m: src = m.group(1) return {"html": image, "src": src} - data = [build_struct(i) for i in images] + raw_images = list(re.findall(r"()", content, re.I)) - return render_template( - "imagesearch/index.html", langid=langid, text=text, images=data - ) + images = [build_struct(i) for i in raw_images if is_search_img(i)] + # Reduce image load count so we don't kill subpage loading. + # Also bing seems to throttle images if the count is higher (??). + images = images[:25] + + ret = { + "langid": langid, + "text": text, + "images": images, + "error_message": error_msg, + } + return jsonify(ret) + + +def _get_dir_and_filename(langid, text): + "Make a directory if needed, return [dir, filename]" + datapath = current_app.config["DATAPATH"] + image_dir = os.path.join(datapath, "userimages", langid) + if not os.path.exists(image_dir): + os.makedirs(image_dir) -def make_filename(text): - return re.sub(r"\s+", "_", text) + ".jpeg" + now = datetime.datetime.now() + timestamp = now.strftime("%Y%m%d_%H%M%S%f")[:-3] + hash_part = hashlib.md5(text.encode()).hexdigest()[:8] + filename = f"{timestamp}_{hash_part}.jpeg" + return [image_dir, filename] @bp.route("/save", methods=["POST"]) @@ -79,15 +129,41 @@ def bing_save(): text = request.form["text"] langid = request.form["langid"] - datapath = current_app.config["DATAPATH"] - imgdir = os.path.join(datapath, "userimages", langid) - if not os.path.exists(imgdir): - os.makedirs(imgdir) - filename = make_filename(text) + imgdir, filename = _get_dir_and_filename(langid, text) destfile = os.path.join(imgdir, filename) with urllib.request.urlopen(src) as response, open(destfile, "wb") as out_file: out_file.write(response.read()) - # This is the format of legacy Lute v2 data. - image_url = f"/userimages/{langid}/{filename}" - return jsonify({"filename": image_url}) + ret = { + "url": f"/userimages/{langid}/{filename}", + "filename": filename, + } + return jsonify(ret) + + +@bp.route("/manual_image_post", methods=["POST"]) +def manual_image_post(): + """ + For manual posts of images (not bing image clicks). + Save the posted image data to DATAPATH/userimages, + returning the filename. + """ + text = request.form["text"] + langid = request.form["langid"] + + if "manual_image_file" not in request.files: + return Response("No file part in request", status=400) + + f = request.files["manual_image_file"] + if f.filename == "": + return Response("No selected file", status=400) + + imgdir, filename = _get_dir_and_filename(langid, text) + destfile = os.path.join(imgdir, filename) + f.save(destfile) + + ret = { + "url": f"/userimages/{langid}/{filename}", + "filename": filename, + } + return jsonify(ret) diff --git a/lute/book/datatables.py b/lute/book/datatables.py index 63efae257..b197639c5 100644 --- a/lute/book/datatables.py +++ b/lute/book/datatables.py @@ -2,11 +2,10 @@ Show books in datatables. """ -from lute.db import db from lute.utils.data_tables import DataTablesSqliteQuery, supported_parser_type_criteria -def get_data_tables_list(parameters, is_archived): +def get_data_tables_list(parameters, is_archived, session): "Book json data for datatables." archived = "true" if is_archived else "false" @@ -16,22 +15,30 @@ def get_data_tables_list(parameters, is_archived): LgName, BkTitle, case when currtext.TxID is null then 1 else currtext.TxOrder end as PageNum, - pagecnt.c as PageCount, + textcounts.pagecount AS PageCount, + booklastopened.lastopeneddate AS LastOpenedDate, BkArchived, tags.taglist AS TagList, - case when ifnull(b.BkWordCount, 0) = 0 then 'n/a' else b.BkWordCount end as WordCount, + textcounts.wc AS WordCount, c.distinctterms as DistinctCount, c.distinctunknowns as UnknownCount, - c.unknownpercent as UnknownPercent + c.unknownpercent as UnknownPercent, + c.status_distribution as StatusDistribution, + case when completed_books.BkID is null then 0 else 1 end as IsCompleted FROM books b INNER JOIN languages ON LgID = b.BkLgID LEFT OUTER JOIN texts currtext ON currtext.TxID = BkCurrentTxID INNER JOIN ( - SELECT TxBkID, COUNT(TxID) AS c FROM texts + select TxBkID, max(TxStartDate) as lastopeneddate from texts group by TxBkID + ) booklastopened on booklastopened.TxBkID = b.BkID + INNER JOIN ( + SELECT TxBkID, SUM(TxWordCount) as wc, COUNT(TxID) AS pagecount + FROM texts GROUP BY TxBkID - ) pagecnt on pagecnt.TxBkID = b.BkID + ) textcounts on textcounts.TxBkID = b.BkID LEFT OUTER JOIN bookstats c on c.BkID = b.BkID + LEFT OUTER JOIN ( SELECT BtBkID as BkID, GROUP_CONCAT(T2Text, ', ') AS taglist FROM @@ -44,12 +51,27 @@ def get_data_tables_list(parameters, is_archived): GROUP BY BtBkID ) AS tags ON tags.BkID = b.BkID + left outer join ( + select texts.TxBkID as BkID + from texts + inner join ( + /* last page in each book */ + select TxBkID, max(TxOrder) as maxTxOrder from texts group by TxBkID + ) last_page on last_page.TxBkID = texts.TxBkID and last_page.maxTxOrder = texts.TxOrder + where TxReadDate is not null + ) completed_books on completed_books.BkID = b.BkID + WHERE b.BkArchived = {archived} and languages.LgParserType in ({ supported_parser_type_criteria() }) """ - # print(base_sql) - session = db.session - connection = session.connection() + # Add "where" criteria for all the filters. + language_id = parameters["filtLanguage"] + if language_id == "null" or language_id == "undefined" or language_id is None: + language_id = "0" + language_id = int(language_id) + if language_id != 0: + base_sql += f" and LgID = {language_id}" + connection = session.connection() return DataTablesSqliteQuery.get_data(base_sql, parameters, connection) diff --git a/lute/book/forms.py b/lute/book/forms.py index 05f2455d1..233a692ba 100644 --- a/lute/book/forms.py +++ b/lute/book/forms.py @@ -2,13 +2,23 @@ Book create/edit forms. """ -from wtforms import StringField, SelectField, FieldList, TextAreaField +import json +from flask import request +from wtforms import StringField, SelectField, TextAreaField, IntegerField, HiddenField from wtforms import ValidationError -from wtforms.validators import DataRequired, Length +from wtforms.validators import DataRequired, Length, NumberRange from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileAllowed +def _tag_values(field_data): + "Convert field data to array." + ret = [] + if field_data: + ret = [h["value"] for h in json.loads(field_data)] + return ret + + class NewBookForm(FlaskForm): """ New book. All fields can be entered. @@ -20,16 +30,63 @@ class NewBookForm(FlaskForm): desc = ( "Use for short texts, e.g. up to a few thousand words. " - + 'For longer texts, use the "Text File" below.' + + 'For longer texts, use the "Text file" below.' ) text = TextAreaField("Text", description=desc) textfile = FileField( "Text file", - description="Max file size 2048K", - validators=[FileAllowed(["txt"], "Please upload a valid text document")], + validators=[ + FileAllowed( + ["txt", "epub", "pdf", "srt", "vtt"], + "Please upload a valid text (txt, epub, pdf, srt, vtt)", + ) + ], + ) + split_by = SelectField( + "Split by", choices=[("paragraphs", "Paragraphs"), ("sentences", "Sentences")] + ) + threshold_page_tokens = IntegerField( + "Words per page", + validators=[NumberRange(min=1, max=1500)], + default=250, ) - source_uri = StringField("Source URI", validators=[Length(max=255)]) - book_tags = FieldList(StringField("book_tags")) + source_uri = StringField("Text source", validators=[Length(max=1000)]) + audiofile = FileField( + "Audio file", + validators=[ + FileAllowed( + ["mp3", "m4a", "wav", "ogg", "opus", "aac", "flac", "webm"], + "Please upload a valid audio file (mp3, m4a, wav, ogg, opus, aac, flac, webm)", + ) + ], + ) + book_tags = StringField("Tags") + + def __init__(self, *args, **kwargs): + "Call the constructor of the superclass (FlaskForm)" + super().__init__(*args, **kwargs) + book = kwargs.get("obj") + + def _data(arr): + "Get data in proper format for tagify." + return json.dumps([{"value": p} for p in arr]) + + self.book_tags.data = _data(book.book_tags) + if request.method == "POST": + self.book_tags.data = request.form.get("book_tags", "") + + def populate_obj(self, obj): + "Call the populate_obj method from the parent class, then mine." + super().populate_obj(obj) + obj.book_tags = _tag_values(self.book_tags.data) + tfd = self.textfile.data + if tfd: + obj.text_stream = tfd.stream + obj.text_stream_filename = tfd.filename + afd = self.audiofile.data + if afd: + obj.audio_stream = afd.stream + obj.audio_stream_filename = afd.filename def validate_language_id(self, field): # pylint: disable=unused-argument "Language must be set." @@ -54,5 +111,42 @@ class EditBookForm(FlaskForm): """ title = StringField("Title", validators=[DataRequired(), Length(max=255)]) - source_uri = StringField("Source URI", validators=[Length(max=255)]) - book_tags = FieldList(StringField("book_tags")) + source_uri = StringField("Source URI", validators=[Length(max=1000)]) + book_tags = StringField("Tags") + audiofile = FileField( + "Audio file", + validators=[ + FileAllowed( + ["mp3", "wav", "ogg", "opus", "aac", "flac", "webm"], + "Please upload a valid audio file (mp3, wav, ogg, opus, aac, flac, webm)", + ) + ], + ) + + # The current audio_filename can be removed from the current book. + audio_filename = HiddenField("Audio filename") + + def __init__(self, *args, **kwargs): + "Call the constructor of the superclass (FlaskForm)" + super().__init__(*args, **kwargs) + book = kwargs.get("obj") + + def _data(arr): + "Get data in proper format for tagify." + return json.dumps([{"value": p} for p in arr]) + + self.book_tags.data = _data(book.book_tags) + if request.method == "POST": + self.book_tags.data = request.form.get("book_tags", "") + + def populate_obj(self, obj): + "Call the populate_obj method from the parent class, then mine." + super().populate_obj(obj) + obj.book_tags = _tag_values(self.book_tags.data) + + afd = self.audiofile.data + if afd: + obj.audio_stream = afd.stream + obj.audio_stream_filename = afd.filename + obj.audio_bookmarks = None + obj.audio_current_pos = None diff --git a/lute/book/model.py b/lute/book/model.py index dd3b06e6d..07e22d1a1 100644 --- a/lute/book/model.py +++ b/lute/book/model.py @@ -2,23 +2,97 @@ Book domain objects. """ -from lute.models.book import Book as DBBook, BookTag -from lute.models.language import Language +from lute.models.book import BookTag, Book as DBBook, Text as DBText +from lute.models.repositories import ( + BookRepository, + BookTagRepository, + LanguageRepository, +) -class Book: +def token_group_generator(tokens, group_type, threshold=500): + """ + A generator that yields groups of ParsedTokens grouped by sentence or paragraph + with each group containing at least the threshold number of tokens. + """ + current_group = [] + buff = [] + + def trim_paras(tok_array): + "Remove para tokens from beginning and end." + while tok_array and tok_array[0].is_end_of_paragraph: + tok_array.pop(0) + while tok_array and tok_array[-1].is_end_of_paragraph: + tok_array.pop() + return tok_array + + def _matches_group_delimiter(tok): + if group_type == "sentences": + return tok.is_end_of_sentence + if group_type == "paragraphs": + return tok.is_end_of_paragraph + raise RuntimeError("Unhandled type " + group_type) + + for token in tokens: + buff.append(token) + if _matches_group_delimiter(token): + current_group.extend(buff) + # pylint: disable=consider-using-generator + current_count = sum([1 for t in current_group if t.is_word]) + buff = [] + + # Yield if threshold exceeded. + # Remove the final paragreph marker if it's there, it's not needed. + if current_count > threshold: + current_group = trim_paras(current_group) + yield current_group + current_group = [] + + # Add any remaining tokens + if buff: + current_group.extend(buff) + current_group = trim_paras(current_group) + if current_group: + yield current_group + + +class Book: # pylint: disable=too-many-instance-attributes """ A book domain object, to create/edit lute.models.book.Books. + + Book language can be specified either by language_id, or + language_name. language_name is useful for loading books via + scripts/api. language_id takes precedence. """ def __init__(self): self.id = None self.language_id = None + self.language_name = None self.title = None self.text = None self.source_uri = None + self.audio_filename = None + self.audio_current_pos = None + self.audio_bookmarks = None self.book_tags = [] + self.threshold_page_tokens = 250 + self.split_by = "paragraphs" + + # The source file used for the book text. + # Overrides the self.text if not None. + self.text_source_path = None + + self.text_stream = None + self.text_stream_filename = None + + # The source file used for audio. + self.audio_source_path = None + + self.audio_stream = None + self.audio_stream_filename = None + def __repr__(self): return f"" @@ -31,20 +105,28 @@ class Repository: Maps Book BO to and from lute.model.Book. """ - def __init__(self, _db): - self.db = _db + def __init__(self, _session): + self.session = _session + self.book_repo = BookRepository(self.session) def load(self, book_id): "Loads a Book business object for the DBBook." - dbb = DBBook.find(book_id) + dbb = self.book_repo.find(book_id) if dbb is None: raise ValueError(f"No book with id {book_id} found") return self._build_business_book(dbb) + def find_by_title(self, book_title, language_id): + "Loads a Book business object for the book with a given title." + dbb = self.book_repo.find_by_title(book_title, language_id) + if dbb is None: + return None + return self._build_business_book(dbb) + def get_book_tags(self): "Get all available book tags, helper method." - bts = self.db.session.query(BookTag).all() - return [t.text for t in bts] + bts = self.session.query(BookTag).all() + return sorted([t.text for t in bts]) def add(self, book): """ @@ -53,7 +135,7 @@ def add(self, book): clients should not change it. """ dbbook = self._build_db_book(book) - self.db.session.add(dbbook) + self.session.add(dbbook) return dbbook def delete(self, book): @@ -62,31 +144,78 @@ def delete(self, book): """ if book.id is None: raise ValueError(f"book {book.title} not saved") - b = DBBook.find(book.id) - self.db.session.delete(b) + b = self.book_repo.find(book.id) + self.session.delete(b) def commit(self): """ Commit everything. """ - self.db.session.commit() + self.session.commit() + + def _split_text_at_page_breaks(self, txt): + "Break fulltext manually at lines consisting of '---' only." + # Tried doing this with a regex without success. + segments = [] + current_segment = "" + for line in txt.split("\n"): + if line.strip() == "---": + segments.append(current_segment.strip()) + current_segment = "" + else: + current_segment += line + "\n" + if current_segment: + segments.append(current_segment.strip()) + return segments + + def _split_pages(self, book, language): + "Split fulltext into pages, respecting sentences." + + pages = [] + for segment in self._split_text_at_page_breaks(book.text): + tokens = language.parser.get_parsed_tokens(segment, language) + for toks in token_group_generator( + tokens, book.split_by, book.threshold_page_tokens + ): + s = "".join([t.token for t in toks]) + s = s.replace("\r", "").replace("¶", "\n") + pages.append(s.strip()) + pages = [p for p in pages if p.strip() != ""] + + return pages def _build_db_book(self, book): "Convert a book business object to a DBBook." - lang = Language.find(book.language_id) + lang_repo = LanguageRepository(self.session) + lang = None + if book.language_id: + lang = lang_repo.find(book.language_id) + elif book.language_name: + lang = lang_repo.find_by_name(book.language_name) + if lang is None: + msg = f"No language matching id={book.language_id} or name={book.language_name}" + raise RuntimeError(msg) b = None if book.id is None: - b = DBBook.create_book(book.title, lang, book.text) + pages = self._split_pages(book, lang) + b = DBBook(book.title, lang) + for index, page in enumerate(pages): + _ = DBText(b, page, index + 1) else: - b = DBBook.find(book.id) + b = self.book_repo.find(book.id) + b.title = book.title b.source_uri = book.source_uri + b.audio_filename = book.audio_filename + b.audio_current_pos = book.audio_current_pos + b.audio_bookmarks = book.audio_bookmarks + btr = BookTagRepository(self.session) booktags = [] for s in book.book_tags: - booktags.append(BookTag.find_or_create_by_text(s)) + booktags.append(btr.find_or_create_by_text(s)) b.remove_all_book_tags() for tt in booktags: b.add_book_tag(tt) @@ -98,8 +227,12 @@ def _build_business_book(self, dbbook): b = Book() b.id = dbbook.id b.language_id = dbbook.language.id + b.language_name = dbbook.language.name b.title = dbbook.title b.text = None # Not returning this for now b.source_uri = dbbook.source_uri + b.audio_filename = dbbook.audio_filename + b.audio_current_pos = dbbook.audio_current_pos + b.audio_bookmarks = dbbook.audio_bookmarks b.book_tags = [t.text for t in dbbook.book_tags] return b diff --git a/lute/book/routes.py b/lute/book/routes.py index 29130912b..1cbe7fe05 100644 --- a/lute/book/routes.py +++ b/lute/book/routes.py @@ -2,27 +2,57 @@ /book routes. """ -import requests -from bs4 import BeautifulSoup - -from flask import Blueprint, request, jsonify, render_template, redirect, flash +import json +from flask import ( + Blueprint, + request, + jsonify, + render_template, + redirect, + flash, +) from lute.utils.data_tables import DataTablesFlaskParamParser +from lute.book.service import ( + Service as BookService, + BookImportException, + BookDataFromUrl, +) from lute.book.datatables import get_data_tables_list from lute.book.forms import NewBookForm, EditBookForm +from lute.book.stats import Service as StatsService import lute.utils.formutils from lute.db import db - -from lute.models.book import Book as DBBook +from lute.models.language import Language +from lute.models.repositories import ( + BookRepository, + UserSettingRepository, + LanguageRepository, +) from lute.book.model import Book, Repository bp = Blueprint("book", __name__, url_prefix="/book") +def _load_term_custom_filters(request_form, parameters): + "Manually add filters that the DataTablesFlaskParamParser doesn't know about." + filter_param_names = [ + "filtLanguage", + ] + request_params = request_form.to_dict(flat=True) + for p in filter_param_names: + parameters[p] = request_params.get(p) + + def datatables_source(is_archived): "Get datatables json for books." + # In the future, we might want to create an API such as + # get_books(sort_order, search_string, length, index, language_id). + # See DataTablesFlaskParamParser.parse_params_2(request.form) + # (currently unused) parameters = DataTablesFlaskParamParser.parse_params(request.form) - data = get_data_tables_list(parameters, is_archived) + _load_term_custom_filters(request.form, parameters) + data = get_data_tables_list(parameters, is_archived, db.session) return jsonify(data) @@ -35,7 +65,17 @@ def datatables_active_source(): @bp.route("/archived", methods=["GET"]) def archived(): "List archived books." - return render_template("book/index.html", status="Archived") + language_choices = lute.utils.formutils.language_choices( + db.session, "(all languages)" + ) + current_language_id = lute.utils.formutils.valid_current_language_id(db.session) + + return render_template( + "book/index.html", + status="Archived", + language_choices=language_choices, + current_language_id=current_language_id, + ) # Archived must be capitalized, or the ajax call 404's. @@ -45,90 +85,90 @@ def datatables_archived_source(): return datatables_source(True) +def _book_from_url(url): + "Get data for a new book, or flash an error if can't parse." + service = BookService() + bd = None + try: + bd = service.book_data_from_url(url) + except BookImportException as e: + flash(e.message, "notice") + bd = BookDataFromUrl() + b = Book() + b.title = bd.title + b.source_uri = bd.source_uri + b.text = bd.text + return b + + +def _language_is_rtl_map(): + """ + Return language-id to is_rtl map, to be used during book creation. + """ + ret = {} + for lang in db.session.query(Language).all(): + ret[lang.id] = lang.right_to_left + return ret + + @bp.route("/new", methods=["GET", "POST"]) def new(): "Create a new book, either from text or from a file." b = Book() + import_url = request.args.get("importurl", "").strip() + if import_url != "": + b = _book_from_url(import_url) + form = NewBookForm(obj=b) - form.language_id.choices = lute.utils.formutils.language_choices() - repo = Repository(db) + form.language_id.choices = lute.utils.formutils.language_choices(db.session) + repo = Repository(db.session) if form.validate_on_submit(): - form.populate_obj(b) - if form.textfile.data: - content = form.textfile.data.read() - b.text = str(content, "utf-8") - book = repo.add(b) - repo.commit() - return redirect(f"/read/{book.id}/page/1", 302) - - parameters = request.args - import_url = parameters.get("importurl", "").strip() - if import_url != "": - b = load_book(import_url) - form = NewBookForm(obj=b) - form.language_id.choices = lute.utils.formutils.language_choices() + try: + form.populate_obj(b) + svc = BookService() + book = svc.import_book(b, db.session) + return redirect(f"/read/{book.id}/page/1", 302) + except BookImportException as e: + flash(e.message, "notice") + + # Don't set the current language before submit. + usrepo = UserSettingRepository(db.session) + current_language_id = int(usrepo.get_value("current_language_id")) + form.language_id.data = current_language_id return render_template( "book/create_new.html", book=b, form=form, tags=repo.get_book_tags(), + rtl_map=json.dumps(_language_is_rtl_map()), show_language_selector=True, ) -def load_book(url): - "Parse the url and load a new Book." - s = None - try: - timeout = 20 # seconds - response = requests.get(url, timeout=timeout) - response.raise_for_status() - s = response.text - except requests.exceptions.RequestException as e: - msg = f"Could not parse {url} (error: {str(e)})" - flash(msg, "notice") - return Book() - - soup = BeautifulSoup(s, "html.parser") - extracted_text = [] - - # Add elements in order found. - for element in soup.descendants: - if element.name in ("h1", "h2", "h3", "h4", "p"): - extracted_text.append(element.text) - - title_node = soup.find("title") - orig_title = title_node.string if title_node else url - - short_title = orig_title[:150] - if len(orig_title) > 150: - short_title += " ..." - - b = Book() - b.title = short_title - b.source_uri = url - b.text = "\n\n".join(extracted_text) - return b - - @bp.route("/edit/", methods=["GET", "POST"]) def edit(bookid): "Edit a book - can only change a few fields." - repo = Repository(db) + repo = Repository(db.session) b = repo.load(bookid) form = EditBookForm(obj=b) if form.validate_on_submit(): form.populate_obj(b) - repo.add(b) - repo.commit() + svc = BookService() + svc.import_book(b, db.session) flash(f"{b.title} updated.") return redirect("/", 302) + lang_repo = LanguageRepository(db.session) + lang = lang_repo.find(b.language_id) return render_template( - "book/edit.html", book=b, form=form, tags=repo.get_book_tags() + "book/edit.html", + book=b, + title_direction="rtl" if lang.right_to_left else "ltr", + form=form, + tags=repo.get_book_tags(), ) @@ -137,10 +177,16 @@ def import_webpage(): return render_template("book/import_webpage.html") +def _find_book(bookid): + "Find book from db." + br = BookRepository(db.session) + return br.find(bookid) + + @bp.route("/archive/", methods=["POST"]) def archive(bookid): "Archive a book." - b = DBBook.find(bookid) + b = _find_book(bookid) b.archived = True db.session.add(b) db.session.commit() @@ -150,7 +196,7 @@ def archive(bookid): @bp.route("/unarchive/", methods=["POST"]) def unarchive(bookid): "Archive a book." - b = DBBook.find(bookid) + b = _find_book(bookid) b.archived = False db.session.add(b) db.session.commit() @@ -160,7 +206,28 @@ def unarchive(bookid): @bp.route("/delete/", methods=["POST"]) def delete(bookid): "Archive a book." - b = DBBook.find(bookid) + b = _find_book(bookid) db.session.delete(b) db.session.commit() return redirect("/", 302) + + +@bp.route("/table_stats/", methods=["GET"]) +def table_stats(bookid): + "Get the stats, return ajax." + b = _find_book(bookid) + if b is None or b.language is None: + # Playwright tests were sometimes passing an id that didn't exist ... + # I believe this is due to page caching, i.e. the book listing + # is showing books and IDs that no longer exist after cache reset. + # TODO fix_hack: get rid of this hack. + return jsonify({}) + svc = StatsService(db.session) + stats = svc.get_stats(b) + ret = { + "distinctterms": stats.distinctterms, + "distinctunknowns": stats.distinctunknowns, + "unknownpercent": stats.unknownpercent, + "status_distribution": stats.status_distribution, + } + return jsonify(ret) diff --git a/lute/book/service.py b/lute/book/service.py new file mode 100644 index 000000000..9579ad458 --- /dev/null +++ b/lute/book/service.py @@ -0,0 +1,284 @@ +""" +book helper routines. +""" + +import os +import shutil +from io import StringIO, TextIOWrapper, BytesIO +from datetime import datetime +import uuid +from dataclasses import dataclass +from tempfile import TemporaryFile +import requests +from bs4 import BeautifulSoup +from flask import current_app, flash +from openepub import Epub, EpubError +from pypdf import PdfReader +from subtitle_parser import SrtParser, WebVttParser +from lute.book.model import Repository + + +class BookImportException(Exception): + """ + Exception to throw on book import error. + """ + + def __init__(self, message="A custom error occurred", cause=None): + self.cause = cause + self.message = message + super().__init__(message) + + +@dataclass +class BookDataFromUrl: + "Data class" + title: str = None + source_uri: str = None + text: str = None + + +class FileTextExtraction: + "Utility to extract text from various file formats." + + def get_file_content(self, filename, filestream): + """ + Get the content of the file. + """ + _, ext = os.path.splitext(filename) + ext = (ext or "").lower() + + messages = { + ".pdf": """ + Note: pdf imports can be inaccurate, due to how PDFs are encoded. + Please be aware of this while reading. + """ + } + msg = messages.get(ext) + if msg is not None: + flash(msg, "notice") + + handlers = { + ".txt": self._get_textfile_content, + ".epub": self._get_epub_content, + ".pdf": self._get_pdf_content, + ".srt": self._get_srt_content, + ".vtt": self._get_vtt_content, + } + handler = handlers.get(ext) + if handler is None: + raise ValueError(f'Unknown file extension "{ext}"') + content = handler(filename, filestream).strip() + if content == "": + raise BookImportException(f"{filename} is empty.") + return content + + def _get_text_stream_content(self, fstream, encoding="utf-8"): + "Gets content from simple text stream." + + usestream = fstream + # May have to convert the fstream to a a BytesIO stream. + # GitHub CI caught this, and per ChatGPT: In Python 3.10, + # SpooledTemporaryFile no longer automatically gains all + # file-like methods when rolled over to a regular temporary + # file. Specifically, it seems that the object lacks the + # readable method required by TextIOWrapper to validate the + # stream ... + # + # I haven't looked into this deeply, but when running Python + # 3.10.16 on my mac, "inv accept -k bad_text_files" failed on + # line "with TextIOWrapper(fstream, encoding=encoding) as + # decoded:" with "AttributeError: 'SpooledTemporaryFile' + # object has no attribute 'readable'. Did you mean: + # 'readline'?".. Converting usestream to BytesIO fixed it. + if not hasattr(fstream, "readable"): + usestream = BytesIO(fstream.read()) # Wrap in BytesIO if needed + with TextIOWrapper(usestream, encoding=encoding) as decoded: + return decoded.read() + + def _get_textfile_content(self, filename, filestream): + "Get content as a single string." + try: + return self._get_text_stream_content(filestream) + except UnicodeDecodeError as e: + f = filename + msg = f"{f} is not utf-8 encoding, please convert it to utf-8 first (error: {str(e)})" + raise BookImportException(message=msg, cause=e) from e + + def _get_epub_content(self, filename, filestream): + """ + Get the content of the epub as a single string. + """ + content = "" + try: + if hasattr(filestream, "seekable"): + epub = Epub(stream=filestream) + content = epub.get_text() + else: + # We get a SpooledTemporaryFile from the form but this doesn't + # implement all file-like methods until python 3.11. So we need + # to rewrite it into a TemporaryFile + with TemporaryFile() as tf: + filestream.seek(0) + tf.write(filestream.read()) + epub = Epub(stream=tf) + content = epub.get_text() + except EpubError as e: + msg = f"Could not parse {filename} (error: {str(e)})" + raise BookImportException(message=msg, cause=e) from e + return content + + def _get_pdf_content(self, filename, filestream): + "Get content as a single string from a PDF file using PyPDF2." + content = "" + try: + pdf_reader = PdfReader(filestream) + for page in pdf_reader.pages: + content += page.extract_text() + return content + except Exception as e: + msg = f"Could not parse {filename} (error: {str(e)})" + raise BookImportException(message=msg, cause=e) from e + + def _get_srt_content(self, filename, filestream): + """ + Get the content of the srt as a single string. + """ + content = "" + try: + srt_content = self._get_text_stream_content(filestream, "utf-8-sig") + parser = SrtParser(StringIO(srt_content)) + parser.parse() + content = "\n".join(subtitle.text for subtitle in parser.subtitles) + return content + except Exception as e: + msg = f"Could not parse {filename} (error: {str(e)})" + raise BookImportException(message=msg, cause=e) from e + + def _get_vtt_content(self, filename, filestream): + """ + Get the content of the vtt as a single string. + """ + content = "" + try: + vtt_content = self._get_text_stream_content(filestream, "utf-8-sig") + # Check if it is from YouTube + lines = vtt_content.split("\n") + if lines[1].startswith("Kind:") and lines[2].startswith("Language:"): + vtt_content = "\n".join(lines[:1] + lines[3:]) + parser = WebVttParser(StringIO(vtt_content)) + parser.parse() + content = "\n".join(subtitle.text for subtitle in parser.subtitles) + return content + except Exception as e: + msg = f"Could not parse {filename} (error: {str(e)})" + raise BookImportException(message=msg, cause=e) from e + + +class Service: + "Service." + + def _unique_fname(self, filename): + """ + Return secure name pre-pended with datetime string. + """ + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d_%H%M%S") + _, ext = os.path.splitext(filename) + ext = (ext or "").lower() + newfilename = uuid.uuid4().hex + return f"{formatted_datetime}_{newfilename}{ext}" + + def save_audio_file(self, audio_file_field_data): + """ + Save the file to disk, return its filename. + """ + filename = self._unique_fname(audio_file_field_data.filename) + fp = os.path.join(current_app.env_config.useraudiopath, filename) + audio_file_field_data.save(fp) + return filename + + def book_data_from_url(self, url): + """ + Parse the url and load source data for a new Book. + This returns a domain object, as the book is still unparsed. + """ + s = None + try: + timeout = 20 # seconds + response = requests.get(url, timeout=timeout) + response.raise_for_status() + s = response.text + except requests.exceptions.RequestException as e: + msg = f"Could not parse {url} (error: {str(e)})" + raise BookImportException(message=msg, cause=e) from e + + soup = BeautifulSoup(s, "html.parser") + extracted_text = [] + + # Add elements in order found. + for element in soup.descendants: + if element.name in ("h1", "h2", "h3", "h4", "p"): + extracted_text.append(element.text) + + title_node = soup.find("title") + orig_title = title_node.string if title_node else url + + short_title = orig_title[:150] + if len(orig_title) > 150: + short_title += " ..." + + b = BookDataFromUrl() + b.title = short_title + b.source_uri = url + b.text = "\n\n".join(extracted_text) + return b + + def import_book(self, book, session): + """ + Save the book as a dbbook, parsing and saving files as needed. + Returns new book created. + """ + + def _raise_if_file_missing(p, fldname): + if not os.path.exists(p): + raise BookImportException(f"Missing file {p} given in {fldname}") + + def _raise_if_none(p, fldname): + if p is None: + raise BookImportException(f"Must set {fldname}") + + fte = FileTextExtraction() + if book.text_source_path: + _raise_if_file_missing(book.text_source_path, "text_source_path") + tsp = book.text_source_path + with open(tsp, mode="rb") as stream: + book.text = fte.get_file_content(tsp, stream) + + if book.text_stream: + _raise_if_none(book.text_stream_filename, "text_stream_filename") + book.text = fte.get_file_content( + book.text_stream_filename, book.text_stream + ) + + if book.audio_source_path: + _raise_if_file_missing(book.audio_source_path, "audio_source_path") + newname = self._unique_fname(book.audio_source_path) + fp = os.path.join(current_app.env_config.useraudiopath, newname) + shutil.copy(book.audio_source_path, fp) + book.audio_filename = newname + + if book.audio_stream: + _raise_if_none(book.audio_stream_filename, "audio_stream_filename") + newname = self._unique_fname(book.audio_stream_filename) + fp = os.path.join(current_app.env_config.useraudiopath, newname) + with open(fp, mode="wb") as fcopy: # Use "wb" to write in binary mode + while chunk := book.audio_stream.read( + 8192 + ): # Read the stream in chunks (e.g., 8 KB) + fcopy.write(chunk) + book.audio_filename = newname + + repo = Repository(session) + dbbook = repo.add(book) + repo.commit() + return dbbook diff --git a/lute/book/stats.py b/lute/book/stats.py index 585c86995..217cc0c02 100644 --- a/lute/book/stats.py +++ b/lute/book/stats.py @@ -2,120 +2,137 @@ Book statistics. """ -from lute.read.service import get_paragraphs -from lute.db import db -from lute.models.book import Book - - -def get_status_distribution(book): - """ - Return statuses and count of unique words per status. - - Does a full render of the next 20 pages in a book - to calculate the distribution. - """ - txindex = 0 - - if (book.current_tx_id or 0) != 0: - for t in book.texts: - if t.id == book.current_tx_id: - break - txindex += 1 - - paras = [ - get_paragraphs(t) - for t in - # Next 20 pages, a good enough sample. - book.texts[txindex : txindex + 20] - ] - - def flatten_list(nested_list): - result = [] - for item in nested_list: - if isinstance(item, list): - result.extend(flatten_list(item)) - else: - result.append(item) - return result - - text_items = [] - for s in flatten_list(paras): - text_items.extend(s.textitems) - text_items = [ti for ti in text_items if ti.is_word] - - statterms = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 98: [], 99: []} - - for ti in text_items: - statterms[ti.wo_status or 0].append(ti.text_lc) - - stats = {} - for statusval, allterms in statterms.items(): - uniques = list(set(allterms)) - statterms[statusval] = uniques - stats[statusval] = len(uniques) - - return stats - - -################################################## -# Stats table refresh. - - -class BookStats(db.Model): - "The stats table." - __tablename__ = "bookstats" - - id = db.Column(db.Integer, primary_key=True) - BkID = db.Column(db.Integer) - wordcount = db.Column(db.Integer) - distinctterms = db.Column(db.Integer) - distinctunknowns = db.Column(db.Integer) - unknownpercent = db.Column(db.Integer) - - -def refresh_stats(): - "Refresh stats for all books requiring update." - books_to_update = ( - db.session.query(Book) - .filter(~Book.id.in_(db.session.query(BookStats.BkID))) - .all() - ) - books = [b for b in books_to_update if b.is_supported] - for book in books: - stats = _get_stats(book) - _update_stats(book, stats) - - -def mark_stale(book): - "Mark a book's stats as stale to force refresh." - bk_id = book.id - db.session.query(BookStats).filter_by(BkID=bk_id).delete() - db.session.commit() - - -def _get_stats(book): - "Calc stats for the book using the status distribution." - status_distribution = get_status_distribution(book) - unknowns = status_distribution[0] - allunique = sum(status_distribution.values()) - - percent = 0 - if allunique > 0: # In case not parsed. - percent = round(100.0 * unknowns / allunique) - - # Any change in the below fields requires a change to - # update_stats as well, query insert doesn't check field order. - return [book.word_count or 0, allunique, unknowns, percent] - - -def _update_stats(book, stats): - "Update BookStats for the given book." - new_stats = BookStats( - BkID=book.id, - wordcount=stats[0], - distinctterms=stats[1], - distinctunknowns=stats[2], - unknownpercent=stats[3], - ) - db.session.add(new_stats) - db.session.commit() +import json +from sqlalchemy import select, text +from lute.read.render.service import Service as RenderService +from lute.models.book import Book, BookStats +from lute.models.repositories import UserSettingRepository + +# from lute.utils.debug_helpers import DebugTimer + + +class Service: + "Service." + + def __init__(self, session): + self.session = session + + def _last_n_pages(self, book, txindex, n): + "Get next n pages, or at least n pages." + start_index = max(0, txindex - n) + end_index = txindex + n + texts = book.texts[start_index:end_index] + return texts[-n:] + + def _get_sample_texts(self, book): + "Get texts to use as sample." + txindex = 0 + if (book.current_tx_id or 0) != 0: + for t in book.texts: + if t.id == book.current_tx_id: + break + txindex += 1 + + repo = UserSettingRepository(self.session) + sample_size = int(repo.get_value("stats_calc_sample_size") or 5) + texts = self._last_n_pages(book, txindex, sample_size) + return texts + + def calc_status_distribution(self, book): + """ + Calculate statuses and count of unique words per status. + + Does a full render of a small number of pages + to calculate the distribution. + """ + + # DebugTimer.clear_total_summary() + # dt = DebugTimer("get_status_distribution", display=False) + texts = self._get_sample_texts(book) + + # Getting the individual paragraphs per page, and then combining, + # is much faster than combining all pages into one giant page. + service = RenderService(self.session) + mw = service.get_multiword_indexer(book.language) + textitems = [] + for tx in texts: + textitems.extend(service.get_textitems(tx.text, book.language, mw)) + # # Old slower code: + # text_sample = "\n".join([t.text for t in texts]) + # paras = get_paragraphs(text_sample, book.language) ... etc. + # dt.step("get_paragraphs") + + textitems = [ti for ti in textitems if ti.is_word] + statterms = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 98: [], 99: []} + for ti in textitems: + statterms[ti.wo_status or 0].append(ti.text_lc) + + stats = {} + for statusval, allterms in statterms.items(): + uniques = list(set(allterms)) + statterms[statusval] = uniques + stats[statusval] = len(uniques) + + # dt.step("compiled") + # DebugTimer.total_summary() + + return stats + + def refresh_stats(self): + "Refresh stats for all books requiring update." + sql = "delete from bookstats where status_distribution is null" + self.session.execute(text(sql)) + self.session.commit() + book_ids_with_stats = select(BookStats.BkID).scalar_subquery() + books_to_update = ( + self.session.query(Book).filter(~Book.id.in_(book_ids_with_stats)).all() + ) + books = [b for b in books_to_update if b.is_supported] + for book in books: + stats = self._calculate_stats(book) + self._update_stats(book, stats) + + def mark_stale(self, book): + "Mark a book's stats as stale to force refresh." + bk_id = book.id + self.session.query(BookStats).filter_by(BkID=bk_id).delete() + self.session.commit() + + def get_stats(self, book): + "Gets stats from the cache if available, or calculates." + bk_id = book.id + stats = self.session.query(BookStats).filter_by(BkID=bk_id).first() + if stats is None or stats.status_distribution is None: + newstats = self._calculate_stats(book) + self._update_stats(book, newstats) + stats = self.session.query(BookStats).filter_by(BkID=bk_id).first() + return stats + + def _calculate_stats(self, book): + "Calc stats for the book using the status distribution." + status_distribution = self.calc_status_distribution(book) + unknowns = status_distribution[0] + allunique = sum(status_distribution.values()) + + percent = 0 + if allunique > 0: # In case not parsed. + percent = round(100.0 * unknowns / allunique) + + return { + "allunique": allunique, + "unknowns": unknowns, + "percent": percent, + "distribution": json.dumps(status_distribution), + } + + def _update_stats(self, book, stats): + "Update BookStats for the given book." + s = self.session.query(BookStats).filter_by(BkID=book.id).first() + if s is None: + s = BookStats(BkID=book.id) + s.distinctterms = stats["allunique"] + s.distinctunknowns = stats["unknowns"] + s.unknownpercent = stats["percent"] + s.status_distribution = stats["distribution"] + self.session.add(s) + self.session.commit() diff --git a/lute/bookmarks/__init__.py b/lute/bookmarks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lute/bookmarks/datatables.py b/lute/bookmarks/datatables.py new file mode 100644 index 000000000..6dcdeac20 --- /dev/null +++ b/lute/bookmarks/datatables.py @@ -0,0 +1,19 @@ +""" +Show bookmarks in datatables. +""" + +from lute.utils.data_tables import DataTablesSqliteQuery + + +def get_data_tables_list(parameters, book_id, session): + "Bookmark json data for datatables." + + base_sql = f""" + SELECT tb.TbID, tb.TbTxID, tb.TbTitle, tx.TxOrder + FROM textbookmarks as tb + INNER JOIN texts as tx ON tb.TbTxID = tx.TxID + WHERE tx.TxBkID = { book_id } + """ + + connection = session.connection() + return DataTablesSqliteQuery.get_data(base_sql, parameters, connection) diff --git a/lute/bookmarks/routes.py b/lute/bookmarks/routes.py new file mode 100644 index 000000000..e9b9b00ff --- /dev/null +++ b/lute/bookmarks/routes.py @@ -0,0 +1,116 @@ +""" +/bookmarks endpoint +""" + +from flask import Blueprint, request, render_template, jsonify +from lute.bookmarks.datatables import get_data_tables_list +from lute.models.book import Text, TextBookmark +from lute.models.repositories import BookRepository +from lute.utils.data_tables import DataTablesFlaskParamParser +from lute.db import db + +bp = Blueprint("bookmarks", __name__, url_prefix="/bookmarks") + + +@bp.route("//datatables", methods=["POST"]) +def datatables_bookmarks(bookid): + "Get datatables json for bookmarks." + parameters = DataTablesFlaskParamParser.parse_params(request.form) + data = get_data_tables_list(parameters, bookid, db.session) + return jsonify(data) + + +@bp.route("/", methods=["GET"]) +def bookmarks(bookid): + "Get all bookarks for given bookid." + br = BookRepository(db.session) + book = br.find(bookid) + + text_dir = "rtl" if book.language.right_to_left else "ltr" + return render_template("bookmarks/list.html", book=book, text_dir=text_dir) + + +@bp.route("/add", methods=["POST"]) +def add_bookmark(): + "Add bookmark" + data = request.json + title = data.get("title") + try: + book_id = int(data.get("book_id")) + page_num = int(data.get("page_num")) + except ValueError: + return jsonify( + success=False, reason="Invalid book_id or page_num provided.", status=200 + ) + + if book_id is None or page_num is None or title is None: + return jsonify( + success=False, + reason="Missing value for required parameter 'title' or 'book_id' or page_num.", + status=200, + ) + + tx = ( + db.session.query(Text) + .filter(Text.bk_id == book_id) + .filter(Text.order == page_num) + .first() + ) + bookmark = TextBookmark(title=title, text=tx) + + db.session.add(bookmark) + db.session.commit() + return jsonify(success=True, status=200) + + +@bp.route("/delete", methods=["POST"]) +def delete_bookmark(): + "Delete bookmark" + data = request.json + bookmark_id = None + try: + bookmark_id = int(data.get("bookmark_id")) + except ValueError: + return jsonify( + success=False, + reason=f"Invalid bookmark_id ({data.get('bookmark_id')}) provided.", + status=200, + ) + + if bookmark_id is None: + return jsonify( + success=False, + reason="Missing required parameter 'bookmark_id'.", + status=200, + ) + + db.session.query(TextBookmark).filter(TextBookmark.id == bookmark_id).delete() + db.session.commit() + return jsonify(success=True, status=200) + + +@bp.route("/edit", methods=["POST"]) +def edit_bookmark(): + "Edit bookmark" + data = request.json + bookmark_id = None + try: + bookmark_id = int(data.get("bookmark_id")) + except ValueError: + return jsonify( + success=False, reason="Invalid bookmark_id provided.", status=200 + ) + new_title = data.get("new_title", "").strip() + + if bookmark_id is None or new_title == "": + return jsonify( + success=False, + reason="Missing value for required parameter 'new_title' or 'bookmark_id'.", + status=200, + ) + + db.session.query(TextBookmark).filter(TextBookmark.id == bookmark_id).update( + {"title": new_title} + ) + db.session.commit() + return jsonify(success=True, status=200) diff --git a/lute/cli/README.md b/lute/cli/README.md new file mode 100644 index 000000000..9c6917899 --- /dev/null +++ b/lute/cli/README.md @@ -0,0 +1,22 @@ +CLI commands. + +Note the lute.app_factory has to be specified as the `--app`. + +Samples: + +``` +flask --app lute.app_factory cli --help + +flask --app lute.app_factory cli language_export English ./hello.csv +``` + +See the help for a command: + +``` +flask --app lute.app_factory cli language_export --help + +Usage: flask cli language_export [OPTIONS] LANGUAGE OUTPUT_PATH + + Get all terms from active books in the language, and write a data file of + term frequencies and children. +``` \ No newline at end of file diff --git a/lute/cli/__init__.py b/lute/cli/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lute/cli/commands.py b/lute/cli/commands.py new file mode 100644 index 000000000..c8b7fadd0 --- /dev/null +++ b/lute/cli/commands.py @@ -0,0 +1,107 @@ +""" +Simple CLI commands. +""" + +import click +from flask import Blueprint + +from lute.cli.language_term_export import generate_language_file, generate_book_file +from lute.cli.import_books import import_books_from_csv + +bp = Blueprint("cli", __name__) + + +@bp.cli.command("hello") +def hello(): + "Say hello -- proof-of-concept CLI command only." + msg = """ + Hello there! + + This is the Lute cli. + + There may be some experimental scripts here ... + nothing that will change or damage your Lute data, + but the CLI may change. + + Thanks for looking. + """ + print(msg) + + +@bp.cli.command("language_export") +@click.argument("language") +@click.argument("output_path") +def language_export(language, output_path): + """ + Get all terms from all books in the language, and write a + data file of term frequencies and children. + """ + generate_language_file(language, output_path) + + +@bp.cli.command("book_term_export") +@click.argument("bookid") +@click.argument("output_path") +def book_term_export(bookid, output_path): + """ + Get all terms for the given book, and write a + data file of term frequencies and children. + """ + generate_book_file(bookid, output_path) + + +@bp.cli.command("import_books_from_csv") +@click.option( + "--commit", + is_flag=True, + help=""" + Commit the changes to the database. If not set, import in dry-run mode. A + list of changes will be printed out but not applied. +""", +) +@click.option( + "--tags", + default="", + help=""" + A comma-separated list of tags to apply to all books. +""", +) +@click.option( + "--language", + default="", + help=""" + The name of the default language to apply to each book, as it appears in + your language settings. If unset, the language must be indicated in the + "language" column of the CSV file. +""", +) +@click.argument("file") +def import_books_from_csv_cmd(language, file, tags, commit): + """ + Import books from a CSV file. + + The CSV file must have a header row with the following, case-sensitive, + column names. The order of the columns does not matter. The CSV file may + include additional columns, which will be ignored. + + - title: the title of the book + + - text: the text of the book + + - language: [optional] the name of the language of book, as it appears in + your language settings. If unspecified, the language specified on the + command line (using the --language option) will be used. + + - url: [optional] the source URL for the book + + - tags: [optional] a comma-separated list of tags to apply to the book + (e.g., "audiobook,beginner") + + - audio: [optional] the path to the audio file of the book. This should + either be an absolute path, or a path relative to the CSV file. + + - bookmarks: [optional] a semicolon-separated list of audio bookmark + positions, in seconds (decimals permitted; e.g., "12.34;42.89;89.00"). + """ + tags = list(tags.split(",")) if tags else [] + import_books_from_csv(file, language, tags, commit) diff --git a/lute/cli/import_books.py b/lute/cli/import_books.py new file mode 100644 index 000000000..bd582c66b --- /dev/null +++ b/lute/cli/import_books.py @@ -0,0 +1,81 @@ +""" +Bulk import books. +""" + +import csv +import os +import sys + +from lute.book.model import Book, Repository +from lute.db import db +from lute.models.repositories import LanguageRepository + + +def import_books_from_csv(file, language, tags, commit): + """ + Bulk import books from a CSV file. + + Args: + + file: the path to the CSV file to import (see lute/cli/commands.py + for the requirements for this file). + language: the name of the language to use by default, as it appears in + your languages settings + tags: a list of tags to apply to all books + commit: a boolean value indicating whether to commit the changes to the + database. If false, a list of books to be imported will be + printed out, but no changes will be made. + """ + repo = Repository(db.session) + lang_repo = LanguageRepository(db.session) + + count = 0 + with open(file, newline="", encoding="utf-8") as f: + r = csv.DictReader(f) + for row in r: + book = Book() + book.title = row["title"] + book.language_name = row.get("language") or language + if not book.language_name: + print(f"Skipping book with unspecified language: {book.title}") + continue + lang = lang_repo.find_by_name(book.language_name) + if not lang: + print( + f"Skipping book with unknown language ({book.language_name}): {book.title}" + ) + continue + if repo.find_by_title(book.title, lang.id) is not None: + print(f"Already exists in {book.language_name}: {book.title}") + continue + count += 1 + all_tags = [] + if tags: + all_tags.extend(tags) + if "tags" in row and row["tags"]: + for tag in row["tags"].split(","): + if tag and tag not in all_tags: + all_tags.append(tag) + book.book_tags = all_tags + book.text = row["text"] + book.source_uri = row.get("url") or None + if "audio" in row and row["audio"]: + book.audio_filename = os.path.join(os.path.dirname(file), row["audio"]) + book.audio_bookmarks = row.get("bookmarks") or None + repo.add(book) + print( + f"Added {book.language_name} book (tags={','.join(all_tags)}): {book.title}" + ) + + print() + print(f"Added {count} books") + print() + + if not commit: + db.session.rollback() + print("Dry run, no changes made.") + return + + print("Committing...") + sys.stdout.flush() + repo.commit() diff --git a/lute/cli/language_term_export.py b/lute/cli/language_term_export.py new file mode 100644 index 000000000..47fde2f33 --- /dev/null +++ b/lute/cli/language_term_export.py @@ -0,0 +1,174 @@ +""" +Generate a rough data file for a given language for all terms. + +Gets *all* books for a given language, and writes a data file to +the specified csv output file name. + +Generates csv with headings: +term; count; familycount; books; definition; status; children + +e.g. +term; count; familycount; books; definition; status; children +haber; 100; 1500; book1,book2; to exist; 99; hay (500), he (200), has (150) ... +""" + +import csv +from lute.db import db +from lute.models.book import Book +from lute.read.render.service import Service + + +def _add_term_to_dict(t, terms): + "Add term to dictionary and return it." + key = t.text_lc + if key in terms: + return terms[key] + + tag_list = ", ".join([tg.text for tg in t.term_tags]) + if tag_list == "": + tag_list = "-" + + parents_text = sorted([p.text_lc for p in t.parents]) + parents_text = "; ".join(parents_text) + if parents_text == "": + parents_text = "-" + + zws = "\u200B" + hsh = { + "sourceterm": t, + "term": t.text.replace(zws, ""), + "count": 0, + "familycount": 0, + "books": [], + "definition": t.translation or "-", + "status": t.status, + "parents": parents_text, + "children": [], + "tags": tag_list, + } + terms[key] = hsh + return hsh + + +def _process_book(b, terms, multiword_indexer): + "Process pages in book, add to output." + print(f"Processing {b.title} ...") + i = 0 + service = Service(db.session) + for text in b.texts: + i += 1 + if i % 10 == 0: + print(f" page {i} of {b.page_count}", end="\r") + textitems = service.get_textitems(text.text, b.language, multiword_indexer) + displayed_terms = [ + ti.term for ti in textitems if ti.is_word and ti.term is not None + ] + for t in displayed_terms: + e = _add_term_to_dict(t, terms) + e["count"] += 1 + e["familycount"] += 1 + if b.title not in e["books"]: + e["books"].append(b.title) + + for parent in t.parents: + p = _add_term_to_dict(parent, terms) + p["familycount"] += 1 + if b.title not in p["books"]: + p["books"].append(b.title) + if t.text_lc not in p["children"]: + p["children"].append(t.text_lc) + + +def _book_list_truncated(title_array): + "Return first 5 books, + count of rest." + titles = list(set(title_array)) + first_5 = titles[:5] + ret = ", ".join(first_5) + count_rest = len(titles) - len(first_5) + if count_rest > 0: + ret += f" [... +{count_rest} more]" + return ret + + +def _finalize_output(terms): + "Convert terms hash to usable output." + for _, hsh in terms.items(): + hsh["books"] = _book_list_truncated(hsh["books"]) + + # children to child (count) + children = [] + for key in hsh["children"]: + t = terms[key] + children.append({"count": t["count"], "term": t["sourceterm"].text}) + csorted = sorted(children, key=lambda c: c["count"], reverse=True) + children_string = "; ".join([f"{c['term']} ({c['count']})" for c in csorted]) + if children_string == "": + children_string = "-" + hsh["children"] = children_string + + ret = terms.values() + return sorted(ret, key=lambda x: (-x["familycount"], x["term"])) + + +def _load_indexers(books): + "Load multiword indexers for book languages." + service = Service(db.session) + ret = {} + lang_map = {book.language.id: book.language for book in books} + for langid, lang in lang_map.items(): + ret[langid] = service.get_multiword_indexer(lang) + return ret + + +def _generate_file(books, outfile_name): + "Write data file for books to outfile_name." + indexers = _load_indexers(books) + terms = {} + for b in books: + _process_book(b, terms, indexers[b.language.id]) + outdata = _finalize_output(terms) + + with open(outfile_name, "w", newline="", encoding="utf-8") as outfile: + keys = [ + "term", + "count", + "familycount", + "books", + "definition", + "status", + "parents", + "children", + "tags", + ] + writer = csv.DictWriter(outfile, fieldnames=keys, extrasaction="ignore") + writer.writeheader() + for r in outdata: + writer.writerow(r) + + +def generate_language_file(language_name, outfile_name): + """ + Generate the datafile for the language. + """ + books = db.session.query(Book).all() + books = [b for b in books if b.language.name == language_name] + if len(books) == 0: + print(f"No books for given language {language_name}, quitting.") + else: + print(f"Writing to {outfile_name}") + _generate_file(books, outfile_name) + print("Done. ") # extra space overwrites old output. + + +def generate_book_file(bookid, outfile_name): + """ + Generate the datafile for the book. + """ + books = db.session.query(Book).all() + books = [b for b in books if f"{b.id}" == f"{bookid}"] + if len(books) == 0: + print(f"No book with id = {bookid}.") + else: + print(f"Writing to {outfile_name}") + _generate_file(books, outfile_name) + print("Done. ") # extra space overwrites old output. diff --git a/lute/config/app_config.py b/lute/config/app_config.py index 939732947..e7aec2973 100644 --- a/lute/config/app_config.py +++ b/lute/config/app_config.py @@ -24,14 +24,19 @@ def _load_config(self, config_file_path): """ Load and validate the config file. """ - with open(config_file_path, "r", encoding="utf-8") as file: - config = yaml.safe_load(file) + with open(config_file_path, "r", encoding="utf-8") as cf: + config = yaml.safe_load(cf) + + if not isinstance(config, dict): + raise RuntimeError( + f"File at {config_file_path} is invalid or is not a yaml dictionary." + ) self.env = config.get("ENV", None) if self.env not in ["prod", "dev"]: raise ValueError(f"ENV must be prod or dev, was {self.env}.") - self.is_docker = "IS_DOCKER" in config + self.is_docker = bool(config.get("IS_DOCKER", False)) # Database name. self.dbname = config.get("DBNAME", None) @@ -44,7 +49,11 @@ def _load_config(self, config_file_path): # Path to user data. self.datapath = config.get("DATAPATH", self._get_appdata_dir()) + self.plugin_datapath = os.path.join(self.datapath, "plugins") self.userimagespath = os.path.join(self.datapath, "userimages") + self.useraudiopath = os.path.join(self.datapath, "useraudio") + self.userthemespath = os.path.join(self.datapath, "userthemes") + self.temppath = os.path.join(self.datapath, "temp") self.dbfilename = os.path.join(self.datapath, self.dbname) # Path to db backup. @@ -69,14 +78,13 @@ def sqliteconnstring(self): "Full sqlite connection string." return f"sqlite:///{self.dbfilename}" - @staticmethod - def create_from_config(): - "Create an AppConfig from the config file." - thisdir = AppConfig.configdir() - configfile = os.path.join(thisdir, "config.yml") - return AppConfig(configfile) - @staticmethod def configdir(): "Return the path to the configuration file directory." return os.path.dirname(os.path.realpath(__file__)) + + @staticmethod + def default_config_filename(): + "Return the path to the default configuration file." + thisdir = AppConfig.configdir() + return os.path.join(thisdir, "config.yml") diff --git a/lute/db/data_cleanup.py b/lute/db/data_cleanup.py new file mode 100644 index 000000000..65947cf6e --- /dev/null +++ b/lute/db/data_cleanup.py @@ -0,0 +1,191 @@ +""" +Data cleanup routines. + +Sometimes required as data management changes. +These cleanup routines will be called by the app_factory. +""" + +from sqlalchemy import select, text as sqltext +from lute.models.language import Language +from lute.models.book import Text, Sentence +from lute.models.term import TermImage + + +class ProgressReporter: + "Report progress for to an output function." + + def __init__(self, total_count, output_func, report_every=100): + "Setup counters." + self.current = 0 + self.last_output = 0 + self.total_count = total_count + self.report_every = report_every + self.output_func = output_func + + def increment(self): + "Increment counter, and if past threshold, output." + if self.total_count == 0: + return + self.current += 1 + if self.current - self.last_output < self.report_every: + return + self.output_func(f" {self.current} of {self.total_count}") + self.last_output = self.current + + +def _set_texts_word_count(session, output_function): + """ + texts.TxWordCount should be set for all texts. + + Fixing a design error: the counts should have been stored here, + instead of only in books.BkWordCount. + + Ref https://github.com/jzohrab/lute-v3/issues/95 + """ + calc_counts = session.query(Text).filter(Text.word_count.is_(None)).all() + + # Don't recalc with invalid parsers!!!! + recalc = [t for t in calc_counts if t.book.language.is_supported] + + if len(recalc) == 0: + # Nothing to calculate, quit. + return + + output_function(f"Fixing word counts for {len(recalc)} Texts.") + pr = ProgressReporter(len(recalc), output_function) + for t in recalc: + pr.increment() + pt = t.book.language.get_parsed_tokens(t.text) + words = [w for w in pt if w.is_word] + t.word_count = len(words) + session.add(t) + session.commit() + output_function("Done.") + + +def _load_sentence_textlc(session, output_function): + """ + sentences.SeTextLC was added after deployment, need to load it + to fix issue 531. ref https://github.com/LuteOrg/lute-v3/issues/531 + + Only update sentences where the language is supported. e.g. the + user may have installed mecab, done some japanese, and then + uninstalled mecab: the data will be hidden, but it's still + present, and the sentences cannot be updated as the parser can't + be loaded. + """ + + supported_langs = { + lang.id: lang for lang in session.query(Language).all() if lang.is_supported + } + langids = [f"{k}" for k in list(supported_langs.keys())] + if len(langids) == 0: + langids = ["-999"] # dummy to ensure good base sql + + base_sql = f""" + select SeID, BkLgID + from sentences + inner join texts on SeTxID = TxID + inner join books on BkID = TxBkID + where BkLgID in ({','.join(langids)}) + and SeTextLC is null + """ + + count = session.execute(sqltext(f"select count(*) from ({base_sql}) src")).scalar() + if count == 0: + # Do nothing, don't print messages." + return + + def _get_next_batch(batch_size): + # Query for up to 1000 Sentence objects where textlc_content is None + sql = f"{base_sql} limit {batch_size}" + recs = session.execute(sqltext(sql)).all() + seids = [int(rec[0]) for rec in recs] + if len(seids) == 0: + return [] + + sentences = session.query(Sentence).filter(Sentence.id.in_(seids)).all() + se_map = {se.id: se for se in sentences} + return [ + {"sentence": se_map[int(rec[0])], "langid": int(rec[1])} for rec in recs + ] + + # Guard against infinite loop. + last_batch_ids = [] + + output_function(f"Updating data for {count} sentences.") + batch_size = 1000 + pr = ProgressReporter(count, output_function, report_every=batch_size) + batch = _get_next_batch(batch_size) + while len(batch) > 0: + curr_batch_ids = [se_langid["sentence"].id for se_langid in batch] + if last_batch_ids == curr_batch_ids: + raise RuntimeError("Sentences not getting updated correctly.") + + for se_langid in batch: + pr.increment() + sentence = se_langid["sentence"] + lang = supported_langs[se_langid["langid"]] + if lang is None: + raise RuntimeError(f"Logic err: Missing langid={se_langid['langid']}") + sentence.set_lowercase_text(lang.parser) + session.add(sentence) + session.commit() + + last_batch_ids = curr_batch_ids + batch = _get_next_batch(batch_size) + + session.commit() + output_function("Done.") + + +def _update_term_images(session, output_function): + """ + Fix TermImage sources (ref https://github.com/LuteOrg/lute-v3/issues/582) + + Prior to issue 582, images were stored in the db as url-like items, + "/userimages/{language_id}/{term}.jpg". + + e.g. wordimages.wisource = "/userimages/2/thiết_kế_nội_thất.jpeg", including + zero-width spaces. This routine removes the "/userimages/{language_id}/" + from the start of the strings. + + Also, some images didn't have ".jpg" at the end ... this adds that. + """ + + def _fix_source(s): + "Remove the leading userimages and languageid, add .jpeg if needed." + parts = s.split("/", 3) + ret = parts[-1] + if not ret.endswith(".jpeg"): + ret = f"{ret}.jpeg" + return ret + + stmt = select(TermImage).where(TermImage.source.contains("userimages")) + recalc = session.execute(stmt).scalars().all() + if len(recalc) == 0: + # Nothing to calculate, quit. + return + + batch_size = 1000 + output_function(f"Fixing image sources for {len(recalc)} word images.") + pr = ProgressReporter(len(recalc), output_function, report_every=batch_size) + n = 0 + for ti in recalc: + pr.increment() + ti.source = _fix_source(ti.source) + session.add(ti) + n += 1 + if n % batch_size == 0: + session.commit() + + # Any remaining. + session.commit() + output_function("Done.") + + +def clean_data(session, output_function): + "Clean all data as required, sending messages to output_function." + _set_texts_word_count(session, output_function) + _load_sentence_textlc(session, output_function) + _update_term_images(session, output_function) diff --git a/lute/db/demo.py b/lute/db/demo.py index 2bb46f43a..b02eaa27b 100644 --- a/lute/db/demo.py +++ b/lute/db/demo.py @@ -8,169 +8,168 @@ data is demo. """ -import os -import re -from glob import glob -import yaml from sqlalchemy import text - -from lute.models.language import Language -from lute.models.book import Book -from lute.book.stats import refresh_stats -from lute.models.setting import SystemSetting -from lute.db import db +from lute.language.service import Service as LanguageService +from lute.book.model import Repository +from lute.book.stats import Service as StatsService +from lute.models.repositories import SystemSettingRepository, LanguageRepository import lute.db.management -def contains_demo_data(): - """ - True if IsDemoData setting is present. - """ - ss = SystemSetting.get_value("IsDemoData") - if ss is None: - return False - return True - - -def remove_flag(): - """ - Remove IsDemoData setting. - """ - SystemSetting.delete_key("IsDemoData") - db.session.commit() - - -def tutorial_book_id(): - """ - Return the book id of the tutorial. - """ - if not contains_demo_data(): - return None - sql = """select BkID from books - inner join languages on LgID = BkLgID - where LgName = 'English' and BkTitle = 'Tutorial' - """ - r = db.session.execute(text(sql)).first() - if r is None: - return None - return int(r[0]) - - -def delete_demo_data(): - """ - If this is a demo, wipe everything. - """ - if not contains_demo_data(): - raise RuntimeError("Can't delete non-demo data.") - remove_flag() - lute.db.management.delete_all_data() - - -# Loading demo data. - - -def demo_data_path(): - """ - Path to the demo data yaml files. - """ - thisdir = os.path.dirname(__file__) - demo_dir = os.path.join(thisdir, "demo") - return os.path.abspath(demo_dir) - - -def get_demo_language(filename): - """ - Create a new Language object from a yaml definition. - """ - with open(filename, "r", encoding="utf-8") as file: - d = yaml.safe_load(file) - - lang = Language() - - def load(key, method): - if key in d: - val = d[key] - # Handle boolean values - if isinstance(val, str): - temp = val.lower() - if temp == "true": - val = True - elif temp == "false": - val = False - setattr(lang, method, val) - - # Define mappings for fields - mappings = { - "name": "name", - "dict_1": "dict_1_uri", - "dict_2": "dict_2_uri", - "sentence_translation": "sentence_translate_uri", - "show_romanization": "show_romanization", - "right_to_left": "right_to_left", - "parser_type": "parser_type", - "character_substitutions": "character_substitutions", - "split_sentences": "regexp_split_sentences", - "split_sentence_exceptions": "exceptions_split_sentences", - "word_chars": "word_characters", - } - - for key in d.keys(): - funcname = mappings.get(key, "") - if funcname: - load(key, funcname) - - return lang - - -def predefined_languages(): - "Languages that have yaml files." - demo_glob = os.path.join(demo_data_path(), "languages", "*.yaml") - langs = [get_demo_language(f) for f in glob(demo_glob)] - langs.sort(key=lambda x: x.name) - return langs - - -def load_demo_languages(): - """ - Load predefined languages. Assume everything is supported. - - This method will also be called during acceptance tests, so it's "public". - """ - supported = [lang for lang in predefined_languages() if lang.is_supported] - for lang in supported: - db.session.add(lang) - db.session.commit() - - -def load_demo_stories(): - "Load the stories." - demo_glob = os.path.join(demo_data_path(), "stories", "*.txt") - for filename in glob(demo_glob): - with open(filename, "r", encoding="utf-8") as f: - content = f.read() - - langpatt = r"language:\s*(.*)\n" - lang = re.search(langpatt, content).group(1).strip() - lang = Language.find_by_name(lang) - - if lang is None or not lang.is_supported: - pass - else: - title_match = re.search(r"title:\s*(.*)\n", content) - title = title_match.group(1).strip() - content = re.sub(r"#.*\n", "", content) - b = Book.create_book(title, lang, content) - db.session.add(b) - SystemSetting.set_value("IsDemoData", True) - db.session.commit() - refresh_stats() - - -def load_demo_data(): - """ - Load the data. - """ - load_demo_languages() - load_demo_stories() - SystemSetting.set_value("IsDemoData", True) - db.session.commit() +class Service: + "Demo database service." + + def __init__(self, session): + self.session = session + + def _demo_languages(self): + """ + Demo languages to be loaded for new users. + Also loaded during tests. + """ + return [ + "Arabic", + "Classical Chinese", + "Czech", + "English", + "French", + "German", + "Greek", + "Hindi", + "Japanese", + "Russian", + "Sanskrit", + "Spanish", + "Turkish", + ] + + def set_load_demo_flag(self): + "Set the flag." + repo = SystemSettingRepository(self.session) + repo.set_value("LoadDemoData", True) + self.session.commit() + + def remove_load_demo_flag(self): + "Set the flag." + repo = SystemSettingRepository(self.session) + repo.delete_key("LoadDemoData") + self.session.commit() + + def _flag_exists(self, flagname): + "True if flag exists, else false." + repo = SystemSettingRepository(self.session) + return repo.key_exists(flagname) + + def should_load_demo_data(self): + return self._flag_exists("LoadDemoData") + + def contains_demo_data(self): + return self._flag_exists("IsDemoData") + + def remove_flag(self): + """ + Remove IsDemoData setting. + """ + if not self.contains_demo_data(): + raise RuntimeError("Can't delete non-demo data.") + + repo = SystemSettingRepository(self.session) + repo.delete_key("IsDemoData") + self.session.commit() + + def tutorial_book_id(self): + """ + Return the book id of the tutorial. + """ + if not self.contains_demo_data(): + return None + sql = """select BkID from books + inner join languages on LgID = BkLgID + where LgName = 'English' and BkTitle = 'Tutorial' + """ + r = self.session.execute(text(sql)).first() + if r is None: + return None + return int(r[0]) + + def delete_demo_data(self): + """ + If this is a demo, wipe everything. + """ + if not self.contains_demo_data(): + raise RuntimeError("Can't delete non-demo data.") + self.remove_flag() + lute.db.management.delete_all_data(self.session) + + # Loading demo data. + + def load_demo_languages(self): + """ + Load selected predefined languages, if they're supported. + + This method will also be called during acceptance tests, so it's public. + """ + demo_langs = self._demo_languages() + service = LanguageService(self.session) + langs = [service.get_language_def(langname).language for langname in demo_langs] + supported = [lang for lang in langs if lang.is_supported] + for lang in supported: + self.session.add(lang) + self.session.commit() + + def load_demo_stories(self): + "Load the stories for any languages already loaded." + demo_langs = self._demo_languages() + service = LanguageService(self.session) + langdefs = [service.get_language_def(langname) for langname in demo_langs] + + langrepo = LanguageRepository(self.session) + langdefs = [ + d + for d in langdefs + if d.language.is_supported + and langrepo.find_by_name(d.language.name) is not None + ] + + r = Repository(self.session) + for d in langdefs: + for b in d.books: + r.add(b) + r.commit() + + repo = SystemSettingRepository(self.session) + repo.set_value("IsDemoData", True) + self.session.commit() + + svc = StatsService(self.session) + svc.refresh_stats() + + def _db_has_data(self): + "True of the db contains any language data." + sql = "select LgID from languages limit 1" + r = self.session.execute(text(sql)).first() + return r is not None + + def load_demo_data(self): + """ + Load the data. + """ + if self._db_has_data(): + self.remove_load_demo_flag() + return + + repo = SystemSettingRepository(self.session) + do_load = repo.get_value("LoadDemoData") + if do_load is None: + # Only load if flag is explicitly set. + return + + do_load = bool(int(do_load)) + if not do_load: + return + + self.load_demo_languages() + self.load_demo_stories() + self.remove_load_demo_flag() + repo.set_value("IsDemoData", True) + self.session.commit() diff --git a/lute/db/demo/README.md b/lute/db/demo/README.md deleted file mode 100644 index ccdf235ab..000000000 --- a/lute/db/demo/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Demo data. - -This should only be added to an empty database. - -Tested in: tests/src/Utils/DemoDataLoader_Test.php - -Copy _language.yaml.example to [langname].yaml, fill it in. Create a story .txt file for the lang too. \ No newline at end of file diff --git a/lute/db/demo/languages/_language.yaml.example b/lute/db/demo/languages/_language.yaml.example deleted file mode 100644 index 3cd3982e7..000000000 --- a/lute/db/demo/languages/_language.yaml.example +++ /dev/null @@ -1,12 +0,0 @@ -name: -dict_1: -dict_2: -sentence_translation: -# show_romanization: -# right_to_left: - -# parser_type: defaults to space delimited. -# character_substitutions: -# split_sentences: -# split_sentence_exceptions: -# word_chars: diff --git a/lute/db/demo/languages/arabic.yaml b/lute/db/demo/languages/arabic.yaml deleted file mode 100644 index 036fd126e..000000000 --- a/lute/db/demo/languages/arabic.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: Arabic -dict_1: https://www.arabicstudentsdictionary.com/search?q=### -dict_2: "*https://translate.google.com/?hl=es&sl=ar&tl=en&text=###&op=translate" -sentence_translation: "*https://translate.google.com/?hl=es&sl=ar&tl=en&text=###" -show_romanization: true -right_to_left: true - -# parser_type: defaults to space delimited. -# character_substitutions: -split_sentences: .!?؟۔‎ -# split_sentence_exceptions: -word_chars: \x{0600}-\x{06FF}\x{FE70}-\x{FEFC} diff --git a/lute/db/demo/languages/classical_chinese.yaml b/lute/db/demo/languages/classical_chinese.yaml deleted file mode 100644 index ac8a38893..000000000 --- a/lute/db/demo/languages/classical_chinese.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: Classical Chinese -dict_1: https://ctext.org/dictionary.pl?if=en&char=### -dict_2: https://www.bing.com/images/search?q=###&form=HDRSC2&first=1&tsc=ImageHoverTitle -sentence_translation: "*https://www.deepl.com/translator#ch/en/###" -show_romanization: true -# right_to_left: - -parser_type: classicalchinese -# character_substitutions: -split_sentences: .!?。!? -# split_sentence_exceptions: -word_chars: 一-龥 diff --git a/lute/db/demo/languages/english.yaml b/lute/db/demo/languages/english.yaml deleted file mode 100644 index f6a8d20ec..000000000 --- a/lute/db/demo/languages/english.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: English -dict_1: https://en.thefreedictionary.com/### -dict_2: https://www.bing.com/images/search?q=###&form=HDRSC2&first=1&tsc=ImageHoverTitle -sentence_translation: "*https://www.deepl.com/translator#en/en/###" -# show_romanization: -# right_to_left: - -# parser_type: defaults to space delimited. -# character_substitutions: -# split_sentences: -# split_sentence_exceptions: -# word_chars: diff --git a/lute/db/demo/languages/french.yaml b/lute/db/demo/languages/french.yaml deleted file mode 100644 index b8f58aee4..000000000 --- a/lute/db/demo/languages/french.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: French -dict_1: https://fr.thefreedictionary.com/### -dict_2: https://www.bing.com/images/search?q=###&form=HDRSC2&first=1&tsc=ImageHoverTitle -sentence_translation: "*https://www.deepl.com/translator#fr/en/###" -# show_romanization: -# right_to_left: - -# parser_type: defaults to space delimited. -# character_substitutions: -# split_sentences: -# split_sentence_exceptions: -# word_chars: diff --git a/lute/db/demo/languages/german.yaml b/lute/db/demo/languages/german.yaml deleted file mode 100644 index c28756b60..000000000 --- a/lute/db/demo/languages/german.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: German -dict_1: https://de.thefreedictionary.com/### -dict_2: https://www.wordreference.com/deen/### -sentence_translation: "*https://www.deepl.com/translator#de/en/###" -# show_romanization: -# right_to_left: - -# parser_type: defaults to space delimited. -# character_substitutions: -# split_sentences: -# split_sentence_exceptions: -# word_chars: diff --git a/lute/db/demo/languages/greek.yaml b/lute/db/demo/languages/greek.yaml deleted file mode 100644 index 7c313777c..000000000 --- a/lute/db/demo/languages/greek.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: Greek -dict_1: https://www.wordreference.com/gren/### -dict_2: https://en.wiktionary.org/wiki/### -sentence_translation: "*https://www.deepl.com/translator#el/en/###" -show_romanization: true -# right_to_left: - -# parser_type: defaults to space delimited. -# character_substitutions: -# split_sentences: -# split_sentence_exceptions: -word_chars: a-zA-ZÀ-ÖØ-öø-ȳͰ-Ͽἀ-ῼ diff --git a/lute/db/demo/languages/japanese.yaml b/lute/db/demo/languages/japanese.yaml deleted file mode 100644 index 56f3100e6..000000000 --- a/lute/db/demo/languages/japanese.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: Japanese -dict_1: https://jisho.org/search/### -dict_2: https://www.bing.com/images/search?q=###&form=HDRSC2&first=1&tsc=ImageHoverTitle -sentence_translation: "*https://www.deepl.com/translator#jp/en/###" -show_romanization: true -# right_to_left: - -parser_type: japanese -# character_substitutions: -split_sentences: .!?。?! -# split_sentence_exceptions: - -# Ref https://stackoverflow.com/questions/5797505/php-regex-expression-involving-japanese -# This shouldn't ever be used due to use of MeCab (parser_type). -word_chars: \p{Han}\p{Katakana}\p{Hiragana} diff --git a/lute/db/demo/languages/spanish.yaml b/lute/db/demo/languages/spanish.yaml deleted file mode 100644 index 3aa5f0c3b..000000000 --- a/lute/db/demo/languages/spanish.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: Spanish -dict_1: https://es.thefreedictionary.com/### -dict_2: https://www.wordreference.com/es/en/translation.asp?spen=### -sentence_translation: "*https://www.deepl.com/translator#es/en/###" -# show_romanization: false -# right_to_left: false - -# parser_type: defaults to space delimited. -# character_substitutions: -# split_sentences: .!?؟۔‎ -# split_sentence_exceptions: -# word_chars: \x{0600}-\x{06FF}\x{FE70}-\x{FEFC} diff --git a/lute/db/demo/languages/turkish.yaml b/lute/db/demo/languages/turkish.yaml deleted file mode 100644 index f6f094666..000000000 --- a/lute/db/demo/languages/turkish.yaml +++ /dev/null @@ -1,12 +0,0 @@ -name: Turkish -dict_1: https://www.wordreference.com/tren/### -dict_2: https://tr.wiktionary.org/### -sentence_translation: "*https://www.deepl.com/translator#tr/en/###" -show_romanization: true -# right_to_left: - -parser_type: turkish -# character_substitutions: -# split_sentences: -# split_sentence_exceptions: -word_chars: a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑğĞıİöÖüÜşŞçÇ diff --git a/lute/db/demo/stories/ar_demo.txt b/lute/db/demo/stories/ar_demo.txt deleted file mode 100644 index 96cfb0d91..000000000 --- a/lute/db/demo/stories/ar_demo.txt +++ /dev/null @@ -1,13 +0,0 @@ -# title: Examples -# language: Arabic -# - -مرحبا، كيف حالك ؟ -مرحبا, أنا بخير -هل انت جديدٌ هنا؟ لم أراك من قبل -انا طالب جديد.لقد وصلت البارحة -انا محمد, تشرفت بلقائك - -شجرة الحياة - -تحكي هذه القصة عن ولد صغير يُدعى «يوسف»، يعيش مع أمه الأرملة الفقيرة، يساعدها ويحنو عليها ويحبها حبًا جمًا. وفي يوم من الأيام يصيب المرض أم يوسف ويشتد عليها، ولا يعرف يوسف ماذا يفعل لإنقاذها، فلا يجد أمامه سوى اللجوء إلى الجِنِّيَّة «وِداد» التي تدله على شجرة فيها الشفاء لأمه، هذه الشجرة تقع في أعلى الجبل المقابل لمنزلهم، وعلى يوسف أن يتسلق هذا الجبل ويواجه المخاطر من أجل أن يأتي لأمه بالدواء الموجود في أوراق هذه الشجرة، فهل سينجح يوسف في ذلك؟ وماذا ينتظره من مخاطر وأهوال؟ diff --git a/lute/db/demo/stories/cc_demo.txt b/lute/db/demo/stories/cc_demo.txt deleted file mode 100644 index 696216121..000000000 --- a/lute/db/demo/stories/cc_demo.txt +++ /dev/null @@ -1,4 +0,0 @@ -# title: 逍遙遊 -# language: Classical Chinese -# -北冥有魚,其名為鯤。鯤之大,不知其幾千里也。化而為鳥,其名為鵬。鵬之背,不知其幾千里也;怒而飛,其翼若垂天之雲。是鳥也,海運則將徙於南冥。南冥者,天池也。齊諧者,志怪者也。諧之言曰:「鵬之徙於南冥也,水擊三千里,摶扶搖而上者九萬里,去以六月息者也。」野馬也,塵埃也,生物之以息相吹也。天之蒼蒼,其正色邪?其遠而無所至極邪?其視下也亦若是,則已矣。且夫水之積也不厚,則負大舟也無力。覆杯水於坳堂之上,則芥為之舟,置杯焉則膠,水淺而舟大也。風之積也不厚,則其負大翼也無力。故九萬里則風斯在下矣,而後乃今培風;背負青天而莫之夭閼者,而後乃今將圖南。蜩與學鳩笑之曰:「我決起而飛,槍1榆、枋,時則不至而控於地而已矣,奚以之九萬里而南為?」適莽蒼者三湌而反,腹猶果然;適百里者宿舂糧;適千里者三月聚糧。之二蟲又何知!小知不及大知,小年不及大年。奚以知其然也?朝菌不知晦朔,蟪蛄不知春秋,此小年也。楚之南有冥靈者,以五百歲為春,五百歲為秋;上古有大椿者,以八千歲為春,八千歲為秋。而彭祖乃今以久特聞,眾人匹之,不亦悲乎! diff --git a/lute/db/demo/stories/de_Stadtmusikanten.txt b/lute/db/demo/stories/de_Stadtmusikanten.txt deleted file mode 100644 index d40be9ae3..000000000 --- a/lute/db/demo/stories/de_Stadtmusikanten.txt +++ /dev/null @@ -1,6 +0,0 @@ -# title: Die Bremer Stadtmusikanten -# language: German -# -Es hatte ein Mann einen Esel, der schon lange Jahre die Säcke unverdrossen zur Mühle getragen hatte, dessen Kräfte aber nun zu Ende gingen, so daß er zur Arbeit immer untauglicher ward. Da dachte der Herr daran, ihn aus dem Futter zu schaffen, aber der Esel merkte, daß kein guter Wind wehte, lief fort und machte sich auf den Weg nach Bremen; dort, meinte er, könnte er ja Stadtmusikant werden. - -Als er ein Weilchen fortgegangen war, fand er einen Jagdhund auf dem Wege liegen, der jappte wie einer, der sich müde gelaufen hat. "Nun, was jappst du so, Packan?" fragte der Esel. "Ach," sagte der Hund, "weil ich alt bin und jeden Tag schwächer werde, auch auf der Jagd nicht mehr fort kann, hat mich mein Herr wollen totschlagen, da hab ich Reißaus genommen; aber womit soll ich nun mein Brot verdienen?" - "Weißt du was?" sprach der Esel, "ich gehe nach Bremen und werde dort Stadtmusikant, geh mit und laß dich auch bei der Musik annehmen. Ich spiele die Laute und du schlägst die Pauken." diff --git a/lute/db/demo/stories/es_aladino.txt b/lute/db/demo/stories/es_aladino.txt deleted file mode 100644 index 72ccc1573..000000000 --- a/lute/db/demo/stories/es_aladino.txt +++ /dev/null @@ -1,6 +0,0 @@ -# title: Aladino y la lámpara maravillosa -# language: Spanish -# -Érase una vez un muchacho llamado Aladino que vivía en el lejano Oriente con su madre, en una casa sencilla y humilde. Tenían lo justo para vivir, así que cada día, Aladino recorría el centro de la ciudad en busca de algún alimento que llevarse a la boca. - -En una ocasión paseaba entre los puestos de fruta del mercado, cuando se cruzó con un hombre muy extraño con pinta de extranjero. Aladino se quedó sorprendido al escuchar que le llamaba por su nombre. diff --git a/lute/db/demo/stories/fr_goldilocks.txt b/lute/db/demo/stories/fr_goldilocks.txt deleted file mode 100644 index 076008780..000000000 --- a/lute/db/demo/stories/fr_goldilocks.txt +++ /dev/null @@ -1,6 +0,0 @@ -# title: Boucles d’or et les trois ours -# language: French -# -Il était une fois trois ours: un papa ours, une maman ours et un bébé ours. Ils habitaient tous ensemble dans une maison jaune au milieu d'une grande forêt. - -Un jour, Maman Ours prépara une grande marmite de porridge délicieux et fumant pour le petit déjeuner. Il était trop chaud pour pouvoir être mangé, alors les ours décidèrent d'aller se promener en attendant que le porridge refroidisse. diff --git a/lute/db/demo/stories/gr_demo.txt b/lute/db/demo/stories/gr_demo.txt deleted file mode 100644 index 51f8f0b53..000000000 --- a/lute/db/demo/stories/gr_demo.txt +++ /dev/null @@ -1,18 +0,0 @@ -# title: Γεια σου, Νίκη. Ο Πέτρος είμαι. -# language: Greek -# -Πέτρος: Γεια σου, Νίκη. Ο Πέτρος είμαι. -Νίκη: Α, γεια σου Πέτρο. Τι κάνεις; -Πέτρος: Μια χαρά. Σε παίρνω για να πάμε καμιά βόλτα αργότερα. Τι λες; -Νίκη: Α, ωραία. Κι εγώ θέλω να βγω λίγο. Συνέχεια διαβάζω για τις εξετάσεις… κουράστηκα πια. Πού λες να πάμε; -Πέτρος: Στη γνωστή καφετέρια στην πλατεία. Θα είναι και άλλα παιδιά από την τάξη μας εκεί. -Νίκη: Ναι; Ποιοι θα είναι; -Πέτρος: Ο Γιάννης, ο Αντρέας και η Ελπίδα. -Νίκη: Ωραία. Θα πάτε και πουθενά αλλού μετά; -Πέτρος: Ναι, λέμε να πάμε στον κινηματογράφο που είναι κοντά στην καφετέρια. Παίζει μια κωμωδία. -Νίκη: Α, δεν μπορώ να καθίσω έξω μέχρι τόσο αργά. Πρέπει να γυρίσω σπίτι για να διαβάσω. -Πέτρος: Έλα τώρα. Διαβάζεις αύριο… -Νίκη: Όχι, όχι, αδύνατον. Είμαι πολύ πίσω στο διάβασμά μου. -Πέτρος: Καλά, έλα μόνο στην καφετέρια τότε. Θα περάσω να σε πάρω γύρω στις έξι να πάμε μαζί. Εντάξει; -Νίκη: Εντάξει. Γεια. -Πέτρο: Τα λέμε. Γεια. diff --git a/lute/db/demo/stories/jp_kitakaze_to_taiyou.txt b/lute/db/demo/stories/jp_kitakaze_to_taiyou.txt deleted file mode 100644 index 4e1bf6839..000000000 --- a/lute/db/demo/stories/jp_kitakaze_to_taiyou.txt +++ /dev/null @@ -1,8 +0,0 @@ -# title: 北風と太陽 - きたかぜたいよう -# language: Japanese -# -北風と太陽 - -「おれの方が強い。」「いいや、ぼくの方が強い。」 -北風と太陽の声が聞こえます。二人はどちらの力が強いかでケンカをしているようです。 -「太陽が毎日元気だから、暑くてみんな困っているよ。おれが涼しい風を吹くと、みんな嬉しそうだ。」 diff --git a/lute/db/demo/stories/tr_demo.txt b/lute/db/demo/stories/tr_demo.txt deleted file mode 100644 index 6c6e98a5a..000000000 --- a/lute/db/demo/stories/tr_demo.txt +++ /dev/null @@ -1,8 +0,0 @@ -# title: Büyük ağaç -# language: Turkish -# -Büyük ağaç eskiden aşılanmış ve her yıl güzel, iri, pembe şeftaliler verirmiş, insanın eline sığmazmış bu şeftaliler. Öyle güzelmişler ki insan yemeye kıyamazmış onları. Bahçıvan, bu büyük ağacı yabancı bir uzmanın kendi ülkesinden getirdiği bir tohumla aşıladığını söylermiş. Belli ki böyle masraf edilen bir ağaçta yetişen şeftaliler oldukça değerliymiş. - -İki ağacın da gövdelerine nazar değmesin diye birer nazarlık asılıymış. - -Ağaçlardan küçük olanında her yıl bin tane çiçek açarmış ama bir tek şeftali bile yetişmezmiş üzerinde. Ya çiçekleri dökülürmüş, ya da ham şeftaliler kuruyup dallardan düşermiş. Bahçıvan küçük ağaç için elinden geleni yapmış ama değişen bir şey olmamış. Yıllar geçtikçe dalları ve yaprakları çoğalmış ama bir tek şeftali bile görünmemiş üzerinde. diff --git a/lute/db/demo/stories/tutorial.txt b/lute/db/demo/stories/tutorial.txt deleted file mode 100644 index edc4af09a..000000000 --- a/lute/db/demo/stories/tutorial.txt +++ /dev/null @@ -1,84 +0,0 @@ -# title: Tutorial -# language: English -# -Welcome to Lute! This short guide should get you going. - -Navigation - -This tutorial has multiple pages. Above the title are some arrows to navigate forwards and backwards. In longer texts, you can jump forward or back ten pages at a time as well. - -1. The Basics - -All of these words are blue because they are "unknown" - according to Lute, this is the first time you're seeing these words. - -You can click on a word, and create a definition. For example, click on this word: elephant. - -When the form pops up in the right-hand frame, a dictionary is loaded below. Copy-paste something from the dictionary into the translation, or make up your own, mark the status, add some tags if you want (eg, type "noun" in the tags field), and click save. From now on, every English text that you read that contains the word "elephant" will show the status. If you hover over any "elephant", you'll see some information. - -1.1 Multiple dictionaries. - -Next to the term is a small arrow, "Lookup". If you click on this repeatedly, Lute cycles through the dictionaries that you configure for the language in the "Languages" link on the homepage. - -1.2 Images - -For this demo, English has been configured to do an image search for the second English dictionary, so if you click on the arrow, you'll see some happy elephants (if you clicked on elephant!). - -You can also click on the little "eye icon" next to the term, and it opens up a common image search URL. - -In either case, if you click on one of the images shown in the list, that image is saved in your public/media/images folder. When you hover over the word in the reading pane, that picture is included in the word hover. Try adding an image for your elephant by clicking on the term, clicking the eye icon, and clicking a picture you like. Then hover over your elephant. - -Note: sometimes these images make _no sense_ -- it's using Bing image search, and it does the best it can with the limited context it has. - -2. Multi-word Terms - -You can create multi-word terms by clicking and dragging across multiple words, then release the mouse. Try creating a term for the phrase "the cat's pyjamas", and add a translation and set the status. - -(A brief side note: Lute keeps track of where you are in any text. If you click the Home link above to leave this tutorial, and later click the Tutorial link from the Text listing, Lute will open the last page you were at.) - -3. Parent Terms - -Sometimes it helps to associate terms with a "parent". For example, the verb "to have" is conjugated in various forms as "I have a cold", "he has a dog", "they had dinner". First create a Term for "have". Then create a Term for "has", and in the Parent field start typing "have". Lute will show the existing Term "have" in the drop down, and if you select it, "has" will be associated with that on save, and when you hover over "has" you'll see the parent's information as well. - -If you enter a non-existent Parent word, Lute will create a placeholder Term for that Parent, copying some content from your term. For example, try creating a Term for the word "dogs", associating it with the non-existent Term "dog". When you save "dogs", both will be updated. - -Terms can have multiple parents, too. Hit the Enter (or Return) key after each parent. For example, if you wanted to associate the Term "puppies" with both "puppy" and "dog", click on "puppies", and in the Parents text box type "puppy", hit Enter, type "dog", and hit Enter. Sometimes this is necessary: for example, in Spanish, "se sienta" can either be a conjugation of "sentirse" (to feel) or "sentarse" (to sit), depending on the context. - -4. Mark the remainder as "Well Known" - -When you're done creating Terms on a page, you will likely still have a bunch of blue words, or "unknowns", left over, even though you really know these words. You can set all of these to "Well Known" in one shot with the green checkmark at the bottom of the page ("Mark rest as known"). Try that now to see what happens. This button only affects words on the current page, so when you go to the next page, you'll see that some words are still unknown. - -There are other buttons at the bottom of the page. The green checkmark with the arrow sets the remaining unknowns on the current page, and also goes to the next page. - -5. Keyboard shortcuts - -The small blue question mark in the header shows some keyboard shortcuts. - -5.1 Updating Status - -If you've worked through the tutorial, you'll have noted that words are underlined in blue when you move the mouse over them. You can quickly change the status of the current word by hitting 1, 2, 3, 4, 5, w (for Well-Known), or i (for Ignore). Try hovering over the following words and hit the status buttons: apple, banana, cranberry, donut. - -If you click on a word, it's underlined in red, and the Term edit form is shown. (Before you switch over to the Term editing form, you can still update its status.) You can jump over to the edit form by hitting Tab, and then start editing. - -When a word has been clicked, it's "active", so it keeps the focus. Hovering the mouse over other words won't underline them in blue anymore, and hitting status update hotkeys (1 - 5, w, i) will only update the active word. To "un-click" a word underlined in red, click it again, or hit Escape or Return. Then you'll be back in "Hover mode". Try clicking and un-clicking or Escaping any word in this paragraph to get a feel for it. - -Note that for the keyboard shortcuts to work, the reading pane (where the text is) must have the "focus". Click anywhere on the reading pane to re-establish focus. - -5.1 Bulk updates - -If you hold down Shift and click a bunch of words, you can bulk update their statuses. - -5.2 Arrow keys - -The Right and Left arrow keys click the next and previous words. Hit Escape or Return to get back to "hover mode". - -5.3 Copying text - -When a word is hovered over or clicked, hit "c" to copy that word's sentence to your clipboard. Hit "C" to copy the word's full paragraph (multiple sentences). You can also copy arbitrary sections of text by holding down the Shift key while highlighting the text with your mouse. - -6. Next steps - -All done this text! - -Lute keeps track of all of this in your database, so any time you create or import a new Book, all the info you've created is carried forward. - -There's a tutorial follow-up: go to the Home screen, and click the "Tutorial follow-up" in the table. diff --git a/lute/db/demo/stories/tutorial_follow_up.txt b/lute/db/demo/stories/tutorial_follow_up.txt deleted file mode 100644 index 4a6460f82..000000000 --- a/lute/db/demo/stories/tutorial_follow_up.txt +++ /dev/null @@ -1,39 +0,0 @@ -# title: Tutorial follow-up -# language: English -# -Hopefully you've gone through the Tutorial, and created some Terms. - -From the Tutorial, you've already told Lute that you know most of the words on this page. You can hover over words to see information about them, such as your information you might have added about dogs. - -There are still a few blue words, which according to Lute are still "unknown" to you. You can process them like you did on the last text. - -(fyi - If a text has a spelling mikstaske, you can edit it by clicking the small Edit icon next to the title. If you'd like, correct the mistake now, and resave this text.) - - -Appendix: A few other things - -A0. In case you missed it, on the Home screen there are some menu bar items on the top right. Go back there and hover over them to see what you can do. This is all demo data, so you can do what you want. (But don't delete the tutorials until you've gone through them.) - -A1. Term sentences - -In the "Term" edit form, you can click on the "Sentences" link to see where that term or its relations have been used. Click on "elephant", and then click the Sentences link shown to see where that term has been used. - -A2. Archiving, Unarchiving, and Deleting Texts - -When you're done reading a text, you can either Archive it, or Delete it. Archiving clears out the parsing data for a given text, but the text is still available and can be unarchived and re-read. The sentences are also available for searching with the Term "Sentences" link. Deletion completely removes a text, the parsing data, and its sentences. Neither archiving nor deleting touch any Terms you've created, it just clears out the texts. - -On the last page of every book, Lute shows a link for you to archive the book. You can also delete it from the Home screen by clicking on the "Archive" action (the image with the little down arrow) in the right-most column. - -To unarchive the text, go to Home, Text Archive, and click the "Unarchive" action (the little up arrow). - - - -=== - -Those are the the core feature of Lute! There are some sample stories for other languages. Try those out or create your own. - -When you're done with the demo, go back to the Home screen and click the link to clear out the database. Lute will delete all of the demo data, and you can get started. You'll be prompted to create your first language, and then you can create your first book. Lute will then ask you to specify your backup preferences, and with that all done, you'll be off and running. - -There is a Lute Discord and Wiki as well -- see the "About" menu bar. - -I hope that you find Lute a fun tool to use for learning languages. Cheers and best wishes! diff --git a/lute/db/language_defs b/lute/db/language_defs new file mode 160000 index 000000000..2dfa1ad2f --- /dev/null +++ b/lute/db/language_defs @@ -0,0 +1 @@ +Subproject commit 2dfa1ad2fc55f5e87de925f30ef3415876e9fa0a diff --git a/lute/db/management.py b/lute/db/management.py index 81bd3ccf4..172fac9b1 100644 --- a/lute/db/management.py +++ b/lute/db/management.py @@ -2,12 +2,15 @@ Db management. """ +import os from sqlalchemy import text -from lute.db import db +from flask import current_app from lute.models.setting import UserSetting +from lute.settings.hotkey_data import initial_hotkey_defaults +from lute.models.repositories import UserSettingRepository -def delete_all_data(): +def delete_all_data(session): """ DANGEROUS! Delete everything, restore user settings, clear sys settings. @@ -23,6 +26,97 @@ def delete_all_data(): "delete from settings", ] for s in statements: - db.session.execute(text(s)) - db.session.commit() - UserSetting.load() + session.execute(text(s)) + session.commit() + add_default_user_settings(session, current_app.env_config.default_user_backup_path) + + +def _revised_mecab_path(repo): + """ + Change the mecab_path if it's not found, and a + replacement is found. + + Lute Docker images are built to be multi-arch, and + interestingly (annoyingly), mecab libraries are installed into + different locations depending on the architecture, even with + the same Dockerfile and base image. + + Returns: new mecab path if old one is missing _and_ + new one found, otherwise just return the old one. + """ + + mp = repo.get_value("mecab_path") + if mp is not None and os.path.exists(mp): + return mp + + # See develop docs for notes on how to find the libmecab path! + candidates = [ + # linux/arm64 + "/lib/aarch64-linux-gnu/libmecab.so.2", + # linux/amd64 + "/lib/x86_64-linux-gnu/libmecab.so.2", + # github CI, ubuntu-latest + "/lib/x86_64-linux-gnu/libmecab.so.2", + ] + replacements = [p for p in candidates if os.path.exists(p)] + if len(replacements) > 0: + return replacements[0] + # Replacement not found, leave current value as-is. + return mp + + +def add_default_user_settings(session, default_user_backup_path): + """ + Load missing user settings with default values. + """ + repo = UserSettingRepository(session) + + def add_initial_vals_if_needed(hsh): + "Add settings as required." + for k, v in hsh.items(): + if not repo.key_exists(k): + s = UserSetting() + s.key = k + s.value = v + session.add(s) + session.commit() + + # These keys are rendered into the global javascript namespace var + # LUTE_USER_SETTINGS, so if any of these keys change, check the usage + # of that variable as well. + keys_and_defaults = { + "backup_enabled": True, + "backup_auto": True, + "backup_warn": True, + "backup_dir": default_user_backup_path, + "backup_count": 5, + "lastbackup": None, + "mecab_path": None, + "japanese_reading": "hiragana", + "current_theme": "-", + "custom_styles": "/* Custom css to modify Lute's appearance. */", + "show_highlights": True, + "current_language_id": 0, + # Behaviour: + "open_popup_in_new_tab": False, + "stop_audio_on_term_form_open": True, + "stats_calc_sample_size": 5, + # Term popups: + "term_popup_promote_parent_translation": True, + "term_popup_show_components": True, + # Anki: + "use_ankiconnect": False, + "ankiconnect_url": "http://127.0.0.1:8765", + } + add_initial_vals_if_needed(keys_and_defaults) + + # Revise the mecab path if necessary. + # Note this is done _after_ the defaults are loaded, + # because the user may have already loaded the defaults + # (e.g. on machine upgrade) and stored them in the db, + # so we may have to _update_ the existing setting. + revised_mecab_path = _revised_mecab_path(repo) + repo.set_value("mecab_path", revised_mecab_path) + session.commit() + + add_initial_vals_if_needed(initial_hotkey_defaults()) diff --git a/lute/db/schema/baseline.sql b/lute/db/schema/baseline.sql index 2018c6fbc..65f061780 100644 --- a/lute/db/schema/baseline.sql +++ b/lute/db/schema/baseline.sql @@ -1,5 +1,5 @@ -- ------------------------------------------ --- Baseline db with demo data. +-- Baseline db with flag to load demo data. -- Migrations tracked in _migrations, settings reset. -- Generated from 'inv db.export.baseline' -- ------------------------------------------ @@ -36,32 +36,24 @@ INSERT INTO _migrations VALUES('20230827_052154_allow_multiple_word_parents.sql' INSERT INTO _migrations VALUES('20231018_211236_remove_excess_texts_fields.sql'); INSERT INTO _migrations VALUES('20231029_092851_create_migration_settings.sql'); INSERT INTO _migrations VALUES('20231101_203811_modify_settings_schema.sql'); -CREATE TABLE IF NOT EXISTS "languages" ( - "LgID" INTEGER NOT NULL , - "LgName" VARCHAR(40) NOT NULL , - "LgDict1URI" VARCHAR(200) NOT NULL , - "LgDict2URI" VARCHAR(200) NULL , - "LgGoogleTranslateURI" VARCHAR(200) NULL , - "LgCharacterSubstitutions" VARCHAR(500) NOT NULL , - "LgRegexpSplitSentences" VARCHAR(500) NOT NULL , - "LgExceptionsSplitSentences" VARCHAR(500) NOT NULL , - "LgRegexpWordCharacters" VARCHAR(500) NOT NULL , - "LgRemoveSpaces" TINYINT NOT NULL , - "LgSplitEachChar" TINYINT NOT NULL , - "LgRightToLeft" TINYINT NOT NULL , - "LgShowRomanization" TINYINT NOT NULL DEFAULT '0' , - "LgParserType" VARCHAR(20) NOT NULL DEFAULT 'spacedel' , - PRIMARY KEY ("LgID") -); -INSERT INTO languages VALUES(1,'Arabic','https://www.arabicstudentsdictionary.com/search?q=###','*https://translate.google.com/?hl=es&sl=ar&tl=en&text=###&op=translate','*https://translate.google.com/?hl=es&sl=ar&tl=en&text=###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?؟۔‎','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','\u0600-\u06FF\uFE70-\uFEFC',0,0,1,1,'spacedel'); -INSERT INTO languages VALUES(2,'Classical Chinese','https://ctext.org/dictionary.pl?if=en&char=###','https://www.bing.com/images/search?q=###&form=HDRSC2&first=1&tsc=ImageHoverTitle','*https://www.deepl.com/translator#ch/en/###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?。!?','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','一-龥',0,0,0,1,'classicalchinese'); -INSERT INTO languages VALUES(3,'English','https://en.thefreedictionary.com/###','https://www.bing.com/images/search?q=###&form=HDRSC2&first=1&tsc=ImageHoverTitle','*https://www.deepl.com/translator#en/en/###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ',0,0,0,0,'spacedel'); -INSERT INTO languages VALUES(4,'French','https://fr.thefreedictionary.com/###','https://www.bing.com/images/search?q=###&form=HDRSC2&first=1&tsc=ImageHoverTitle','*https://www.deepl.com/translator#fr/en/###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ',0,0,0,0,'spacedel'); -INSERT INTO languages VALUES(5,'German','https://de.thefreedictionary.com/###','https://www.wordreference.com/deen/###','*https://www.deepl.com/translator#de/en/###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ',0,0,0,0,'spacedel'); -INSERT INTO languages VALUES(6,'Greek','https://www.wordreference.com/gren/###','https://en.wiktionary.org/wiki/###','*https://www.deepl.com/translator#el/en/###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','a-zA-ZÀ-ÖØ-öø-ȳͰ-Ͽἀ-ῼ',0,0,0,1,'spacedel'); -INSERT INTO languages VALUES(7,'Japanese','https://jisho.org/search/###','https://www.bing.com/images/search?q=###&form=HDRSC2&first=1&tsc=ImageHoverTitle','*https://www.deepl.com/translator#jp/en/###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?。?!','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','\p{Han}\p{Katakana}\p{Hiragana}',0,0,0,1,'japanese'); -INSERT INTO languages VALUES(8,'Spanish','https://es.thefreedictionary.com/###','https://www.wordreference.com/es/en/translation.asp?spen=###','*https://www.deepl.com/translator#es/en/###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ',0,0,0,0,'spacedel'); -INSERT INTO languages VALUES(9,'Turkish','https://www.wordreference.com/tren/###','https://tr.wiktionary.org/###','*https://www.deepl.com/translator#tr/en/###','´=''|`=''|’=''|‘=''|...=…|..=‥','.!?','Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds.','a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑğĞıİöÖüÜşŞçÇ',0,0,0,1,'turkish'); +INSERT INTO _migrations VALUES('20231130_141236_add_TxWordCount.sql'); +INSERT INTO _migrations VALUES('20231210_103924_add_book_audio_fields.sql'); +INSERT INTO _migrations VALUES('20240101_122610_add_bookstats_status_distribution.sql'); +INSERT INTO _migrations VALUES('20240118_154258_change_status_abbrev.sql'); +INSERT INTO _migrations VALUES('20240113_215142_add_term_follow_parent_bool.sql'); +INSERT INTO _migrations VALUES('20240125_drop_BkWordCount.sql'); +INSERT INTO _migrations VALUES('20240125_drop_bookstats_wordcount.sql'); +INSERT INTO _migrations VALUES('20240207_01_create_languagedicts.sql'); +INSERT INTO _migrations VALUES('20240207_02_drop_old_language_fields.sql'); +INSERT INTO _migrations VALUES('20240525_create_textbookmarks.sql'); +INSERT INTO _migrations VALUES('20240815_clean_up_bad_wordtags.sql'); +INSERT INTO _migrations VALUES('20241103_change_lastbackup_to_user_setting.sql'); +INSERT INTO _migrations VALUES('20241214_add_SeTextLC.sql'); +INSERT INTO _migrations VALUES('20241221_add_wordsread_table.sql'); +INSERT INTO _migrations VALUES('20241221_clean_up_missing_relationships.sql'); +INSERT INTO _migrations VALUES('20250102_add_TxStartDate.sql'); +INSERT INTO _migrations VALUES('20241220_fix_for_wordsread_table_load.sql'); +INSERT INTO _migrations VALUES('20250206_create_srsexportspecs.sql'); CREATE TABLE IF NOT EXISTS "statuses" ( "StID" INTEGER NOT NULL , "StText" VARCHAR(20) NOT NULL , @@ -74,8 +66,8 @@ INSERT INTO statuses VALUES(2,'New (2)','2'); INSERT INTO statuses VALUES(3,'Learning (3)','3'); INSERT INTO statuses VALUES(4,'Learning (4)','4'); INSERT INTO statuses VALUES(5,'Learned','5'); -INSERT INTO statuses VALUES(98,'Ignored','Ign'); -INSERT INTO statuses VALUES(99,'Well Known','WKn'); +INSERT INTO statuses VALUES(98,'Ignored','I'); +INSERT INTO statuses VALUES(99,'Well Known','W'); CREATE TABLE IF NOT EXISTS "tags" ( "TgID" INTEGER NOT NULL , "TgText" VARCHAR(20) NOT NULL , @@ -106,29 +98,10 @@ CREATE TABLE IF NOT EXISTS "sentences" ( "SeID" INTEGER NOT NULL , "SeTxID" INTEGER NOT NULL , "SeOrder" SMALLINT NOT NULL , - "SeText" TEXT NULL , + "SeText" TEXT NULL , SeTextLC TEXT null, PRIMARY KEY ("SeID"), FOREIGN KEY("SeTxID") REFERENCES "texts" ("TxID") ON UPDATE NO ACTION ON DELETE CASCADE ); -CREATE TABLE IF NOT EXISTS "bookstats" ( - "BkID" INTEGER NOT NULL , - "wordcount" INTEGER NULL , - "distinctterms" INTEGER NULL , - "distinctunknowns" INTEGER NULL , - "unknownpercent" INTEGER NULL , - PRIMARY KEY ("BkID"), - FOREIGN KEY("BkID") REFERENCES "books" ("BkID") ON UPDATE NO ACTION ON DELETE CASCADE -); -INSERT INTO bookstats VALUES(1,382,170,170,100); -INSERT INTO bookstats VALUES(2,83,63,63,100); -INSERT INTO bookstats VALUES(3,1162,357,357,100); -INSERT INTO bookstats VALUES(4,489,191,191,100); -INSERT INTO bookstats VALUES(5,157,99,99,100); -INSERT INTO bookstats VALUES(6,115,100,100,100); -INSERT INTO bookstats VALUES(7,110,85,85,100); -INSERT INTO bookstats VALUES(8,175,120,120,100); -INSERT INTO bookstats VALUES(9,69,49,49,100); -INSERT INTO bookstats VALUES(10,64,41,41,100); CREATE TABLE IF NOT EXISTS "wordimages" ( "WiID" INTEGER NOT NULL , "WiWoID" INTEGER NOT NULL , @@ -153,29 +126,9 @@ CREATE TABLE IF NOT EXISTS "words" ( "WoRomanization" VARCHAR(100) NULL , "WoTokenCount" TINYINT NOT NULL DEFAULT '0' , "WoCreated" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP , - "WoStatusChanged" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + "WoStatusChanged" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, WoSyncStatus INTEGER NOT NULL DEFAULT 0, FOREIGN KEY("WoLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE ); -CREATE TABLE IF NOT EXISTS "books" ( - "BkID" INTEGER NOT NULL , - "BkLgID" INTEGER NOT NULL , - "BkTitle" VARCHAR(200) NOT NULL , - "BkSourceURI" VARCHAR(1000) NULL , - "BkArchived" TINYINT NOT NULL DEFAULT '0' , - "BkCurrentTxID" INTEGER NOT NULL DEFAULT '0' , BkWordCount INT, - PRIMARY KEY ("BkID"), - FOREIGN KEY("BkLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE -); -INSERT INTO books VALUES(1,2,'逍遙遊',NULL,0,0,382); -INSERT INTO books VALUES(2,8,'Aladino y la lámpara maravillosa',NULL,0,0,83); -INSERT INTO books VALUES(3,3,'Tutorial',NULL,0,0,1162); -INSERT INTO books VALUES(4,3,'Tutorial follow-up',NULL,0,0,489); -INSERT INTO books VALUES(5,6,'Γεια σου, Νίκη. Ο Πέτρος είμαι.',NULL,0,0,157); -INSERT INTO books VALUES(6,1,'Examples',NULL,0,0,115); -INSERT INTO books VALUES(7,9,'Büyük ağaç',NULL,0,0,110); -INSERT INTO books VALUES(8,5,'Die Bremer Stadtmusikanten',NULL,0,0,175); -INSERT INTO books VALUES(9,4,'Boucles d’or et les trois ours',NULL,0,0,69); -INSERT INTO books VALUES(10,7,'北風と太陽 - きたかぜたいよう',NULL,0,0,64); CREATE TABLE IF NOT EXISTS "wordparents" ( "WpWoID" INTEGER NOT NULL , "WpParentWoID" INTEGER NOT NULL , @@ -187,35 +140,89 @@ CREATE TABLE IF NOT EXISTS "texts" ( "TxBkID" INTEGER NOT NULL , "TxOrder" INTEGER NOT NULL , "TxText" TEXT NOT NULL , - TxReadDate datetime null, + TxReadDate datetime null, TxWordCount INTEGER null, TxStartDate datetime null, PRIMARY KEY ("TxID"), FOREIGN KEY("TxBkID") REFERENCES "books" ("BkID") ON UPDATE NO ACTION ON DELETE CASCADE ); -INSERT INTO texts VALUES(1,1,1,'北冥有魚,其名為鯤。鯤之大,不知其幾千里也。化而為鳥,其名為鵬。鵬之背,不知其幾千里也;怒而飛,其翼若垂天之雲。是鳥也,海運則將徙於南冥。南冥者,天池也。齊諧者,志怪者也。諧之言曰:「鵬之徙於南冥也,水擊三千里,摶扶搖而上者九萬里,去以六月息者也。」野馬也,塵埃也,生物之以息相吹也。天之蒼蒼,其正色邪?其遠而無所至極邪?其視下也亦若是,則已矣。且夫水之積也不厚,則負大舟也無力。覆杯水於坳堂之上,則芥為之舟,置杯焉則膠,水淺而舟大也。風之積也不厚,則其負大翼也無力。故九萬里則風斯在下矣,而後乃今培風;背負青天而莫之夭閼者,而後乃今將圖南。',NULL); -INSERT INTO texts VALUES(2,1,2,'蜩與學鳩笑之曰:「我決起而飛,槍1榆、枋,時則不至而控於地而已矣,奚以之九萬里而南為?」適莽蒼者三湌而反,腹猶果然;適百里者宿舂糧;適千里者三月聚糧。之二蟲又何知!小知不及大知,小年不及大年。奚以知其然也?朝菌不知晦朔,蟪蛄不知春秋,此小年也。楚之南有冥靈者,以五百歲為春,五百歲為秋;上古有大椿者,以八千歲為春,八千歲為秋。而彭祖乃今以久特聞,眾人匹之,不亦悲乎!',NULL); -INSERT INTO texts VALUES(3,2,1,replace('Érase una vez un muchacho llamado Aladino que vivía en el lejano Oriente con su madre, en una casa sencilla y humilde. Tenían lo justo para vivir, así que cada día, Aladino recorría el centro de la ciudad en busca de algún alimento que llevarse a la boca.\n\nEn una ocasión paseaba entre los puestos de fruta del mercado, cuando se cruzó con un hombre muy extraño con pinta de extranjero. Aladino se quedó sorprendido al escuchar que le llamaba por su nombre.','\n',char(10)),NULL); -INSERT INTO texts VALUES(4,3,1,replace('Welcome to Lute! This short guide should get you going.\n\nNavigation\n\nThis tutorial has multiple pages. Above the title are some arrows to navigate forwards and backwards. In longer texts, you can jump forward or back ten pages at a time as well.\n\n1. The Basics\n\nAll of these words are blue because they are "unknown" - according to Lute, this is the first time you''re seeing these words.\n\nYou can click on a word, and create a definition. For example, click on this word: elephant.\n\nWhen the form pops up in the right-hand frame, a dictionary is loaded below. Copy-paste something from the dictionary into the translation, or make up your own, mark the status, add some tags if you want (eg, type "noun" in the tags field), and click save. From now on, every English text that you read that contains the word "elephant" will show the status. If you hover over any "elephant", you''ll see some information.\n\n1.1 Multiple dictionaries.\n\nNext to the term is a small arrow, "Lookup". If you click on this repeatedly, Lute cycles through the dictionaries that you configure for the language in the "Languages" link on the homepage.\n\n1.2 Images\n\nFor this demo, English has been configured to do an image search for the second English dictionary, so if you click on the arrow, you''ll see some happy elephants (if you clicked on elephant!).','\n',char(10)),NULL); -INSERT INTO texts VALUES(5,3,2,replace('You can also click on the little "eye icon" next to the term, and it opens up a common image search URL.\n\nIn either case, if you click on one of the images shown in the list, that image is saved in your public/media/images folder. When you hover over the word in the reading pane, that picture is included in the word hover. Try adding an image for your elephant by clicking on the term, clicking the eye icon, and clicking a picture you like. Then hover over your elephant.\n\nNote: sometimes these images make _no sense_ -- it''s using Bing image search, and it does the best it can with the limited context it has.\n\n2. Multi-word Terms\n\nYou can create multi-word terms by clicking and dragging across multiple words, then release the mouse. Try creating a term for the phrase "the cat''s pyjamas", and add a translation and set the status.\n\n(A brief side note: Lute keeps track of where you are in any text. If you click the Home link above to leave this tutorial, and later click the Tutorial link from the Text listing, Lute will open the last page you were at.)\n\n3. Parent Terms\n\nSometimes it helps to associate terms with a "parent". For example, the verb "to have" is conjugated in various forms as "I have a cold", "he has a dog", "they had dinner". First create a Term for "have".','\n',char(10)),NULL); -INSERT INTO texts VALUES(6,3,3,replace('Then create a Term for "has", and in the Parent field start typing "have". Lute will show the existing Term "have" in the drop down, and if you select it, "has" will be associated with that on save, and when you hover over "has" you''ll see the parent''s information as well.\n\nIf you enter a non-existent Parent word, Lute will create a placeholder Term for that Parent, copying some content from your term. For example, try creating a Term for the word "dogs", associating it with the non-existent Term "dog". When you save "dogs", both will be updated.\n\nTerms can have multiple parents, too. Hit the Enter (or Return) key after each parent. For example, if you wanted to associate the Term "puppies" with both "puppy" and "dog", click on "puppies", and in the Parents text box type "puppy", hit Enter, type "dog", and hit Enter. Sometimes this is necessary: for example, in Spanish, "se sienta" can either be a conjugation of "sentirse" (to feel) or "sentarse" (to sit), depending on the context.\n\n4. Mark the remainder as "Well Known"\n\nWhen you''re done creating Terms on a page, you will likely still have a bunch of blue words, or "unknowns", left over, even though you really know these words. You can set all of these to "Well Known" in one shot with the green checkmark at the bottom of the page ("Mark rest as known"). Try that now to see what happens.','\n',char(10)),NULL); -INSERT INTO texts VALUES(7,3,4,replace('This button only affects words on the current page, so when you go to the next page, you''ll see that some words are still unknown.\n\nThere are other buttons at the bottom of the page. The green checkmark with the arrow sets the remaining unknowns on the current page, and also goes to the next page.\n\n5. Keyboard shortcuts\n\nThe small blue question mark in the header shows some keyboard shortcuts.\n\n5.1 Updating Status\n\nIf you''ve worked through the tutorial, you''ll have noted that words are underlined in blue when you move the mouse over them. You can quickly change the status of the current word by hitting 1, 2, 3, 4, 5, w (for Well-Known), or i (for Ignore). Try hovering over the following words and hit the status buttons: apple, banana, cranberry, donut.\n\nIf you click on a word, it''s underlined in red, and the Term edit form is shown. (Before you switch over to the Term editing form, you can still update its status.) You can jump over to the edit form by hitting Tab, and then start editing.\n\nWhen a word has been clicked, it''s "active", so it keeps the focus. Hovering the mouse over other words won''t underline them in blue anymore, and hitting status update hotkeys (1 - 5, w, i) will only update the active word. To "un-click" a word underlined in red, click it again, or hit Escape or Return. Then you''ll be back in "Hover mode".','\n',char(10)),NULL); -INSERT INTO texts VALUES(8,3,5,replace('Try clicking and un-clicking or Escaping any word in this paragraph to get a feel for it.\n\nNote that for the keyboard shortcuts to work, the reading pane (where the text is) must have the "focus". Click anywhere on the reading pane to re-establish focus.\n\n5.1 Bulk updates\n\nIf you hold down Shift and click a bunch of words, you can bulk update their statuses.\n\n5.2 Arrow keys\n\nThe Right and Left arrow keys click the next and previous words. Hit Escape or Return to get back to "hover mode".\n\n5.3 Copying text\n\nWhen a word is hovered over or clicked, hit "c" to copy that word''s sentence to your clipboard. Hit "C" to copy the word''s full paragraph (multiple sentences). You can also copy arbitrary sections of text by holding down the Shift key while highlighting the text with your mouse.\n\n6. Next steps\n\nAll done this text!\n\nLute keeps track of all of this in your database, so any time you create or import a new Book, all the info you''ve created is carried forward.\n\nThere''s a tutorial follow-up: go to the Home screen, and click the "Tutorial follow-up" in the table.','\n',char(10)),NULL); -INSERT INTO texts VALUES(9,4,1,replace('Hopefully you''ve gone through the Tutorial, and created some Terms.\n\nFrom the Tutorial, you''ve already told Lute that you know most of the words on this page. You can hover over words to see information about them, such as your information you might have added about dogs.\n\nThere are still a few blue words, which according to Lute are still "unknown" to you. You can process them like you did on the last text.\n\n(fyi - If a text has a spelling mikstaske, you can edit it by clicking the small Edit icon next to the title. If you''d like, correct the mistake now, and resave this text.)\n\n\nAppendix: A few other things\n\nA0. In case you missed it, on the Home screen there are some menu bar items on the top right. Go back there and hover over them to see what you can do. This is all demo data, so you can do what you want. (But don''t delete the tutorials until you''ve gone through them.)\n\nA1. Term sentences\n\nIn the "Term" edit form, you can click on the "Sentences" link to see where that term or its relations have been used. Click on "elephant", and then click the Sentences link shown to see where that term has been used.\n\nA2. Archiving, Unarchiving, and Deleting Texts\n\nWhen you''re done reading a text, you can either Archive it, or Delete it.','\n',char(10)),NULL); -INSERT INTO texts VALUES(10,4,2,replace('Archiving clears out the parsing data for a given text, but the text is still available and can be unarchived and re-read. The sentences are also available for searching with the Term "Sentences" link. Deletion completely removes a text, the parsing data, and its sentences. Neither archiving nor deleting touch any Terms you''ve created, it just clears out the texts.\n\nOn the last page of every book, Lute shows a link for you to archive the book. You can also delete it from the Home screen by clicking on the "Archive" action (the image with the little down arrow) in the right-most column.\n\nTo unarchive the text, go to Home, Text Archive, and click the "Unarchive" action (the little up arrow).\n\n\n\n===\n\nThose are the the core feature of Lute! There are some sample stories for other languages. Try those out or create your own.\n\nWhen you''re done with the demo, go back to the Home screen and click the link to clear out the database. Lute will delete all of the demo data, and you can get started. You''ll be prompted to create your first language, and then you can create your first book. Lute will then ask you to specify your backup preferences, and with that all done, you''ll be off and running.\n\nThere is a Lute Discord and Wiki as well -- see the "About" menu bar.\n\nI hope that you find Lute a fun tool to use for learning languages.','\n',char(10)),NULL); -INSERT INTO texts VALUES(11,4,3,'Cheers and best wishes!',NULL); -INSERT INTO texts VALUES(12,5,1,replace('Πέτρος: Γεια σου, Νίκη. Ο Πέτρος είμαι.\nΝίκη: Α, γεια σου Πέτρο. Τι κάνεις;\nΠέτρος: Μια χαρά. Σε παίρνω για να πάμε καμιά βόλτα αργότερα. Τι λες;\nΝίκη: Α, ωραία. Κι εγώ θέλω να βγω λίγο. Συνέχεια διαβάζω για τις εξετάσεις… κουράστηκα πια. Πού λες να πάμε;\nΠέτρος: Στη γνωστή καφετέρια στην πλατεία. Θα είναι και άλλα παιδιά από την τάξη μας εκεί.\nΝίκη: Ναι; Ποιοι θα είναι;\nΠέτρος: Ο Γιάννης, ο Αντρέας και η Ελπίδα.\nΝίκη: Ωραία. Θα πάτε και πουθενά αλλού μετά;\nΠέτρος: Ναι, λέμε να πάμε στον κινηματογράφο που είναι κοντά στην καφετέρια. Παίζει μια κωμωδία.\nΝίκη: Α, δεν μπορώ να καθίσω έξω μέχρι τόσο αργά. Πρέπει να γυρίσω σπίτι για να διαβάσω.\nΠέτρος: Έλα τώρα. Διαβάζεις αύριο…\nΝίκη: Όχι, όχι, αδύνατον. Είμαι πολύ πίσω στο διάβασμά μου.\nΠέτρος: Καλά, έλα μόνο στην καφετέρια τότε. Θα περάσω να σε πάρω γύρω στις έξι να πάμε μαζί. Εντάξει;\nΝίκη: Εντάξει. Γεια.\nΠέτρο: Τα λέμε. Γεια.','\n',char(10)),NULL); -INSERT INTO texts VALUES(13,6,1,replace('مرحبا، كيف حالك ؟\nمرحبا, أنا بخير\nهل انت جديدٌ هنا؟ لم أراك من قبل\nانا طالب جديد.لقد وصلت البارحة\nانا محمد, تشرفت بلقائك\n\nشجرة الحياة\n\nتحكي هذه القصة عن ولد صغير يُدعى «يوسف»، يعيش مع أمه الأرملة الفقيرة، يساعدها ويحنو عليها ويحبها حبًا جمًا. وفي يوم من الأيام يصيب المرض أم يوسف ويشتد عليها، ولا يعرف يوسف ماذا يفعل لإنقاذها، فلا يجد أمامه سوى اللجوء إلى الجِنِّيَّة «وِداد» التي تدله على شجرة فيها الشفاء لأمه، هذه الشجرة تقع في أعلى الجبل المقابل لمنزلهم، وعلى يوسف أن يتسلق هذا الجبل ويواجه المخاطر من أجل أن يأتي لأمه بالدواء الموجود في أوراق هذه الشجرة، فهل سينجح يوسف في ذلك؟ وماذا ينتظره من مخاطر وأهوال؟','\n',char(10)),NULL); -INSERT INTO texts VALUES(14,7,1,replace('Büyük ağaç eskiden aşılanmış ve her yıl güzel, iri, pembe şeftaliler verirmiş, insanın eline sığmazmış bu şeftaliler. Öyle güzelmişler ki insan yemeye kıyamazmış onları. Bahçıvan, bu büyük ağacı yabancı bir uzmanın kendi ülkesinden getirdiği bir tohumla aşıladığını söylermiş. Belli ki böyle masraf edilen bir ağaçta yetişen şeftaliler oldukça değerliymiş.\n\nİki ağacın da gövdelerine nazar değmesin diye birer nazarlık asılıymış.\n\nAğaçlardan küçük olanında her yıl bin tane çiçek açarmış ama bir tek şeftali bile yetişmezmiş üzerinde. Ya çiçekleri dökülürmüş, ya da ham şeftaliler kuruyup dallardan düşermiş. Bahçıvan küçük ağaç için elinden geleni yapmış ama değişen bir şey olmamış. Yıllar geçtikçe dalları ve yaprakları çoğalmış ama bir tek şeftali bile görünmemiş üzerinde.','\n',char(10)),NULL); -INSERT INTO texts VALUES(15,8,1,replace('Es hatte ein Mann einen Esel, der schon lange Jahre die Säcke unverdrossen zur Mühle getragen hatte, dessen Kräfte aber nun zu Ende gingen, so daß er zur Arbeit immer untauglicher ward. Da dachte der Herr daran, ihn aus dem Futter zu schaffen, aber der Esel merkte, daß kein guter Wind wehte, lief fort und machte sich auf den Weg nach Bremen; dort, meinte er, könnte er ja Stadtmusikant werden.\n\nAls er ein Weilchen fortgegangen war, fand er einen Jagdhund auf dem Wege liegen, der jappte wie einer, der sich müde gelaufen hat. "Nun, was jappst du so, Packan?" fragte der Esel. "Ach," sagte der Hund, "weil ich alt bin und jeden Tag schwächer werde, auch auf der Jagd nicht mehr fort kann, hat mich mein Herr wollen totschlagen, da hab ich Reißaus genommen; aber womit soll ich nun mein Brot verdienen?" - "Weißt du was?" sprach der Esel, "ich gehe nach Bremen und werde dort Stadtmusikant, geh mit und laß dich auch bei der Musik annehmen. Ich spiele die Laute und du schlägst die Pauken."','\n',char(10)),NULL); -INSERT INTO texts VALUES(16,9,1,replace('Il était une fois trois ours: un papa ours, une maman ours et un bébé ours. Ils habitaient tous ensemble dans une maison jaune au milieu d''une grande forêt.\n\nUn jour, Maman Ours prépara une grande marmite de porridge délicieux et fumant pour le petit déjeuner. Il était trop chaud pour pouvoir être mangé, alors les ours décidèrent d''aller se promener en attendant que le porridge refroidisse.','\n',char(10)),NULL); -INSERT INTO texts VALUES(17,10,1,replace('北風と太陽\n\n「おれの方が強い。」「いいや、ぼくの方が強い。」\n北風と太陽の声が聞こえます。二人はどちらの力が強いかでケンカをしているようです。\n「太陽が毎日元気だから、暑くてみんな困っているよ。おれが涼しい風を吹くと、みんな嬉しそうだ。」','\n',char(10)),NULL); CREATE TABLE IF NOT EXISTS "settings" ( "StKey" VARCHAR(40) NOT NULL, "StKeyType" TEXT NOT NULL, "StValue" TEXT NULL, PRIMARY KEY ("StKey") ); -INSERT INTO settings VALUES('IsDemoData','system','1'); -CREATE UNIQUE INDEX "LgName" ON "languages" ("LgName"); +INSERT INTO settings VALUES('LoadDemoData','system','1'); +CREATE TABLE IF NOT EXISTS "books" ( + "BkID" INTEGER NOT NULL , + "BkLgID" INTEGER NOT NULL , + "BkTitle" VARCHAR(200) NOT NULL , + "BkSourceURI" VARCHAR(1000) NULL , + "BkArchived" TINYINT NOT NULL DEFAULT '0' , + "BkCurrentTxID" INTEGER NOT NULL DEFAULT '0', + BkAudioFilename TEXT NULL, + BkAudioCurrentPos REAL NULL, + BkAudioBookmarks TEXT NULL, + PRIMARY KEY ("BkID"), + FOREIGN KEY("BkLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE +); +CREATE TABLE IF NOT EXISTS "bookstats" ( + "BkID" INTEGER NOT NULL , + "distinctterms" INTEGER NULL , + "distinctunknowns" INTEGER NULL , + "unknownpercent" INTEGER NULL , + status_distribution VARCHAR(100) NULL, + PRIMARY KEY ("BkID"), + FOREIGN KEY("BkID") REFERENCES "books" ("BkID") ON UPDATE NO ACTION ON DELETE CASCADE +); +CREATE TABLE languagedicts ( + "LdID" INTEGER NOT NULL, + "LdLgID" INTEGER NOT NULL, + "LdUseFor" VARCHAR(20) NOT NULL, + "LdType" VARCHAR(20) NOT NULL, + "LdDictURI" VARCHAR(200) NOT NULL, + "LdIsActive" TINYINT NOT NULL DEFAULT 1, + "LdSortOrder" INTEGER NOT NULL, + PRIMARY KEY ("LdID"), + FOREIGN KEY("LdLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE +); +CREATE TABLE IF NOT EXISTS "languages" ( + "LgID" INTEGER NOT NULL , + "LgName" VARCHAR(40) NOT NULL , + "LgCharacterSubstitutions" VARCHAR(500) NOT NULL , + "LgRegexpSplitSentences" VARCHAR(500) NOT NULL , + "LgExceptionsSplitSentences" VARCHAR(500) NOT NULL , + "LgRegexpWordCharacters" VARCHAR(500) NOT NULL , + "LgRightToLeft" TINYINT NOT NULL , + "LgShowRomanization" TINYINT NOT NULL DEFAULT '0' , + "LgParserType" VARCHAR(20) NOT NULL DEFAULT 'spacedel' , + PRIMARY KEY ("LgID") +); +CREATE TABLE textbookmarks ( + "TbID" INTEGER NOT NULL, + "TbTxID" INTEGER NOT NULL, + "TbTitle" TEXT NOT NULL, + PRIMARY KEY ("TbID"), + FOREIGN KEY("TbTxID") REFERENCES texts ("TxID") ON DELETE CASCADE +); +CREATE TABLE IF NOT EXISTS "wordsread" ( + "WrID" INTEGER NOT NULL, + "WrLgID" INTEGER NOT NULL, + "WrTxID" INTEGER NULL, + "WrReadDate" DATETIME NOT NULL, + "WrWordCount" INTEGER NOT NULL, + PRIMARY KEY ("WrID"), + FOREIGN KEY("WrTxID") REFERENCES "texts" ("TxID") ON DELETE SET NULL, + FOREIGN KEY("WrLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE +); +CREATE TABLE IF NOT EXISTS "srsexportspecs" ( + "SrsID" INTEGER NOT NULL, + "SrsExportName" VARCHAR(200) NOT NULL UNIQUE, + "SrsCriteria" VARCHAR(1000) NOT NULL, + "SrsDeckName" VARCHAR(200) NOT NULL, + "SrsNoteType" VARCHAR(200) NOT NULL, + "SrsFieldMapping" VARCHAR(1000) NOT NULL, + "SrsActive" TINYINT NOT NULL DEFAULT '1', + PRIMARY KEY ("SrsID") +); CREATE UNIQUE INDEX "TgText" ON "tags" ("TgText"); CREATE UNIQUE INDEX "T2Text" ON "tags2" ("T2Text"); CREATE INDEX "BtT2ID" ON "booktags" ("BtT2ID"); @@ -229,9 +236,61 @@ CREATE INDEX "WoStatusChanged" ON "words" ("WoStatusChanged"); CREATE INDEX "WoTextLC" ON "words" ("WoTextLC"); CREATE UNIQUE INDEX "WoTextLCLgID" ON "words" ("WoTextLC", "WoLgID"); CREATE INDEX "WoTokenCount" ON "words" ("WoTokenCount"); -CREATE INDEX "BkLgID" ON "books" ("BkLgID"); CREATE UNIQUE INDEX "wordparent_pair" ON "wordparents" ("WpWoID", "WpParentWoID"); +CREATE INDEX "BkLgID" ON "books" ("BkLgID"); +CREATE UNIQUE INDEX "LgName" ON "languages" ("LgName"); +CREATE TRIGGER trig_wordparents_after_insert_update_parent_WoStatus_if_following +-- created by db/schema/migrations_repeatable/trig_wordparents.sql +AFTER INSERT ON wordparents +BEGIN + UPDATE words + SET WoStatus = ( + select WoStatus from words where WoID = new.WpWoID + ) + WHERE WoID = new.WpParentWoID + AND 1 = ( + SELECT COUNT(*) + FROM wordparents + INNER JOIN words ON WoID = WpWoID + WHERE WoSyncStatus = 1 + AND WoID = new.WpWoID + ); +END +; +CREATE TRIGGER trig_words_after_update_WoStatus_if_following_parent +-- created by db/schema/migrations_repeatable/trig_words.sql +AFTER UPDATE OF WoStatus, WoSyncStatus ON words +FOR EACH ROW +WHEN (old.WoStatus <> new.WoStatus or (old.WoSyncStatus = 0 and new.WoSyncStatus = 1)) +BEGIN + UPDATE words + SET WoStatus = new.WoStatus + WHERE WoID in ( + -- single parent children that are following this term. + select WpWoID + from wordparents + inner join words on WoID = WpWoID + where WoSyncStatus = 1 + and WpParentWoID = old.WoID + group by WpWoID + having count(*) = 1 + + UNION + + -- The parent of this term, + -- if this term has a single parent and has "follow parent" + select WpParentWoID + from wordparents + inner join words on WoID = WpWoID + where WoSyncStatus = 1 + and WoID = old.WoID + group by WpWoID + having count(*) = 1 + ); +END +; CREATE TRIGGER trig_words_update_WoStatusChanged +-- created by db/schema/migrations_repeatable/trig_words.sql AFTER UPDATE OF WoStatus ON words FOR EACH ROW WHEN old.WoStatus <> new.WoStatus @@ -239,5 +298,29 @@ BEGIN UPDATE words SET WoStatusChanged = CURRENT_TIMESTAMP WHERE WoID = NEW.WoID; -END; +END +; +CREATE TRIGGER trig_words_update_WoCreated_if_no_longer_unknown +-- created by db/schema/migrations_repeatable/trig_words.sql +AFTER UPDATE OF WoStatus ON words +FOR EACH ROW +WHEN old.WoStatus <> new.WoStatus and old.WoStatus = 0 +BEGIN + UPDATE words + SET WoCreated = CURRENT_TIMESTAMP + WHERE WoID = NEW.WoID; +END +; +CREATE TRIGGER trig_word_after_delete_change_WoSyncStatus_for_orphans +-- created by db/schema/migrations_repeatable/trig_words.sql +-- +-- If a term is deleted, any orphaned children must +-- be updated to have WoSyncStatus = 0. +AFTER DELETE ON words +BEGIN + UPDATE words + SET WoSyncStatus = 0 + WHERE WoID NOT IN (SELECT WpWoID FROM wordparents); +END +; COMMIT; diff --git a/lute/db/schema/empty.sql b/lute/db/schema/empty.sql deleted file mode 100644 index a1add6aad..000000000 --- a/lute/db/schema/empty.sql +++ /dev/null @@ -1,203 +0,0 @@ --- ------------------------------------------ --- EMPTY DB. --- Migrations tracked in _migrations, settings reset. --- Generated from 'inv db.export.empty' --- ------------------------------------------ - -PRAGMA foreign_keys=OFF; -BEGIN TRANSACTION; -CREATE TABLE IF NOT EXISTS "_migrations" ( - "filename" VARCHAR(255) NOT NULL , - PRIMARY KEY ("filename") -); -INSERT INTO _migrations VALUES('20230409_224327_load_statuses.sql'); -INSERT INTO _migrations VALUES('20230414_225828_add_texttokens_TokTextLC.sql'); -INSERT INTO _migrations VALUES('20230428_224656_create_wordflashmessages_table.sql'); -INSERT INTO _migrations VALUES('20230518_190000_remove_old_words_fields.sql'); -INSERT INTO _migrations VALUES('20230519_194627_add_TxDateRead.sql'); -INSERT INTO _migrations VALUES('20230621_010000_drop_texttags_table.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_01_booktags.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_02_wordtags.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_03_sentences.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_04_texttokens.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_05_texts.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_06_bookstats.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_07_termimages.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_08_wordflashmessages.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_09_wordparents.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_10_words.sql'); -INSERT INTO _migrations VALUES('20230621_224416_fk_11_books.sql'); -INSERT INTO _migrations VALUES('20230623_234104_drop_TxTitle.sql'); -INSERT INTO _migrations VALUES('20230624_182104_drop_index_TxBkIDTxOrder.sql'); -INSERT INTO _migrations VALUES('20230818_201200_add_BkWordCount.sql'); -INSERT INTO _migrations VALUES('20230819_044107_drop_texttokens.sql'); -INSERT INTO _migrations VALUES('20230819_050036_vacuum.sql'); -INSERT INTO _migrations VALUES('20230827_052154_allow_multiple_word_parents.sql'); -INSERT INTO _migrations VALUES('20231018_211236_remove_excess_texts_fields.sql'); -INSERT INTO _migrations VALUES('20231029_092851_create_migration_settings.sql'); -INSERT INTO _migrations VALUES('20231101_203811_modify_settings_schema.sql'); -CREATE TABLE IF NOT EXISTS "languages" ( - "LgID" INTEGER NOT NULL , - "LgName" VARCHAR(40) NOT NULL , - "LgDict1URI" VARCHAR(200) NOT NULL , - "LgDict2URI" VARCHAR(200) NULL , - "LgGoogleTranslateURI" VARCHAR(200) NULL , - "LgCharacterSubstitutions" VARCHAR(500) NOT NULL , - "LgRegexpSplitSentences" VARCHAR(500) NOT NULL , - "LgExceptionsSplitSentences" VARCHAR(500) NOT NULL , - "LgRegexpWordCharacters" VARCHAR(500) NOT NULL , - "LgRemoveSpaces" TINYINT NOT NULL , - "LgSplitEachChar" TINYINT NOT NULL , - "LgRightToLeft" TINYINT NOT NULL , - "LgShowRomanization" TINYINT NOT NULL DEFAULT '0' , - "LgParserType" VARCHAR(20) NOT NULL DEFAULT 'spacedel' , - PRIMARY KEY ("LgID") -); -CREATE TABLE IF NOT EXISTS "statuses" ( - "StID" INTEGER NOT NULL , - "StText" VARCHAR(20) NOT NULL , - "StAbbreviation" VARCHAR(5) NOT NULL , - PRIMARY KEY ("StID") -); -INSERT INTO statuses VALUES(0,'Unknown','?'); -INSERT INTO statuses VALUES(1,'New (1)','1'); -INSERT INTO statuses VALUES(2,'New (2)','2'); -INSERT INTO statuses VALUES(3,'Learning (3)','3'); -INSERT INTO statuses VALUES(4,'Learning (4)','4'); -INSERT INTO statuses VALUES(5,'Learned','5'); -INSERT INTO statuses VALUES(98,'Ignored','Ign'); -INSERT INTO statuses VALUES(99,'Well Known','WKn'); -CREATE TABLE IF NOT EXISTS "tags" ( - "TgID" INTEGER NOT NULL , - "TgText" VARCHAR(20) NOT NULL , - "TgComment" VARCHAR(200) NOT NULL DEFAULT '' , - PRIMARY KEY ("TgID") -); -CREATE TABLE IF NOT EXISTS "tags2" ( - "T2ID" INTEGER NOT NULL , - "T2Text" VARCHAR(20) NOT NULL , - "T2Comment" VARCHAR(200) NOT NULL DEFAULT '' , - PRIMARY KEY ("T2ID") -); -CREATE TABLE IF NOT EXISTS "booktags" ( - "BtBkID" INTEGER NOT NULL , - "BtT2ID" INTEGER NOT NULL , - PRIMARY KEY ("BtBkID", "BtT2ID"), - FOREIGN KEY("BtT2ID") REFERENCES "tags2" ("T2ID") ON UPDATE NO ACTION ON DELETE CASCADE, - FOREIGN KEY("BtBkID") REFERENCES "books" ("BkID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "wordtags" ( - "WtWoID" INTEGER NOT NULL , - "WtTgID" INTEGER NOT NULL , - PRIMARY KEY ("WtWoID", "WtTgID"), - FOREIGN KEY("WtWoID") REFERENCES "words" ("WoID") ON UPDATE NO ACTION ON DELETE CASCADE, - FOREIGN KEY("WtTgID") REFERENCES "tags" ("TgID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "sentences" ( - "SeID" INTEGER NOT NULL , - "SeTxID" INTEGER NOT NULL , - "SeOrder" SMALLINT NOT NULL , - "SeText" TEXT NULL , - PRIMARY KEY ("SeID"), - FOREIGN KEY("SeTxID") REFERENCES "texts" ("TxID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "bookstats" ( - "BkID" INTEGER NOT NULL , - "wordcount" INTEGER NULL , - "distinctterms" INTEGER NULL , - "distinctunknowns" INTEGER NULL , - "unknownpercent" INTEGER NULL , - PRIMARY KEY ("BkID"), - FOREIGN KEY("BkID") REFERENCES "books" ("BkID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "wordimages" ( - "WiID" INTEGER NOT NULL , - "WiWoID" INTEGER NOT NULL , - "WiSource" VARCHAR(500) NOT NULL , - PRIMARY KEY ("WiID"), - FOREIGN KEY("WiWoID") REFERENCES "words" ("WoID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "wordflashmessages" ( - "WfID" INTEGER NOT NULL, - "WfWoID" INTEGER NOT NULL, - "WfMessage" VARCHAR(200) NOT NULL, - PRIMARY KEY ("WfID"), - FOREIGN KEY("WfWoID") REFERENCES "words" ("WoID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "words" ( - "WoID" INTEGER NOT NULL PRIMARY KEY , - "WoLgID" INTEGER NOT NULL , - "WoText" VARCHAR(250) NOT NULL , - "WoTextLC" VARCHAR(250) NOT NULL , - "WoStatus" TINYINT NOT NULL , - "WoTranslation" VARCHAR(500) NULL , - "WoRomanization" VARCHAR(100) NULL , - "WoTokenCount" TINYINT NOT NULL DEFAULT '0' , - "WoCreated" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP , - "WoStatusChanged" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY("WoLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "books" ( - "BkID" INTEGER NOT NULL , - "BkLgID" INTEGER NOT NULL , - "BkTitle" VARCHAR(200) NOT NULL , - "BkSourceURI" VARCHAR(1000) NULL , - "BkArchived" TINYINT NOT NULL DEFAULT '0' , - "BkCurrentTxID" INTEGER NOT NULL DEFAULT '0' , BkWordCount INT, - PRIMARY KEY ("BkID"), - FOREIGN KEY("BkLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "wordparents" ( - "WpWoID" INTEGER NOT NULL , - "WpParentWoID" INTEGER NOT NULL , - FOREIGN KEY("WpParentWoID") REFERENCES "words" ("WoID") ON UPDATE NO ACTION ON DELETE CASCADE, - FOREIGN KEY("WpWoID") REFERENCES "words" ("WoID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "texts" ( - "TxID" INTEGER NOT NULL , - "TxBkID" INTEGER NOT NULL , - "TxOrder" INTEGER NOT NULL , - "TxText" TEXT NOT NULL , - TxReadDate datetime null, - PRIMARY KEY ("TxID"), - FOREIGN KEY("TxBkID") REFERENCES "books" ("BkID") ON UPDATE NO ACTION ON DELETE CASCADE -); -CREATE TABLE IF NOT EXISTS "settings" ( - "StKey" VARCHAR(40) NOT NULL, - "StKeyType" TEXT NOT NULL, - "StValue" TEXT NULL, - PRIMARY KEY ("StKey") -); -INSERT INTO settings VALUES('backup_enabled','user',NULL); -INSERT INTO settings VALUES('backup_auto','user','1'); -INSERT INTO settings VALUES('backup_warn','user','1'); -INSERT INTO settings VALUES('backup_dir','user',NULL); -INSERT INTO settings VALUES('backup_count','user','5'); -INSERT INTO settings VALUES('mecab_path','user',NULL); -INSERT INTO settings VALUES('custom_styles','user','/* Custom css to modify Lute''s appearance. */'); -CREATE UNIQUE INDEX "LgName" ON "languages" ("LgName"); -CREATE UNIQUE INDEX "TgText" ON "tags" ("TgText"); -CREATE UNIQUE INDEX "T2Text" ON "tags2" ("T2Text"); -CREATE INDEX "BtT2ID" ON "booktags" ("BtT2ID"); -CREATE INDEX "WtTgID" ON "wordtags" ("WtTgID"); -CREATE INDEX "SeOrder" ON "sentences" ("SeOrder"); -CREATE INDEX "SeTxID" ON "sentences" ("SeTxID"); -CREATE INDEX "WiWoID" ON "wordimages" ("WiWoID"); -CREATE INDEX "WoLgID" ON "words" ("WoLgID"); -CREATE INDEX "WoStatus" ON "words" ("WoStatus"); -CREATE INDEX "WoStatusChanged" ON "words" ("WoStatusChanged"); -CREATE INDEX "WoTextLC" ON "words" ("WoTextLC"); -CREATE UNIQUE INDEX "WoTextLCLgID" ON "words" ("WoTextLC", "WoLgID"); -CREATE INDEX "WoTokenCount" ON "words" ("WoTokenCount"); -CREATE INDEX "BkLgID" ON "books" ("BkLgID"); -CREATE UNIQUE INDEX "wordparent_pair" ON "wordparents" ("WpWoID", "WpParentWoID"); -CREATE TRIGGER trig_words_update_WoStatusChanged -AFTER UPDATE OF WoStatus ON words -FOR EACH ROW -WHEN old.WoStatus <> new.WoStatus -BEGIN - UPDATE words - SET WoStatusChanged = CURRENT_TIMESTAMP - WHERE WoID = NEW.WoID; -END; -COMMIT; diff --git a/lute/db/schema/migrations/20231130_141236_add_TxWordCount.sql b/lute/db/schema/migrations/20231130_141236_add_TxWordCount.sql new file mode 100644 index 000000000..a5bfa4c63 --- /dev/null +++ b/lute/db/schema/migrations/20231130_141236_add_TxWordCount.sql @@ -0,0 +1,2 @@ +-- lute.stats.service updates the TxWordCount when needed. +alter table texts add column TxWordCount INTEGER null; diff --git a/lute/db/schema/migrations/20231210_103924_add_book_audio_fields.sql b/lute/db/schema/migrations/20231210_103924_add_book_audio_fields.sql new file mode 100644 index 000000000..d71e51be3 --- /dev/null +++ b/lute/db/schema/migrations/20231210_103924_add_book_audio_fields.sql @@ -0,0 +1,5 @@ +-- book audio fields. + +ALTER TABLE books ADD COLUMN BkAudioFilename TEXT NULL; +ALTER TABLE books ADD COLUMN BkAudioCurrentPos REAL NULL; +ALTER TABLE books ADD COLUMN BkAudioBookmarks TEXT NULL; diff --git a/lute/db/schema/migrations/20240101_122610_add_bookstats_status_distribution.sql b/lute/db/schema/migrations/20240101_122610_add_bookstats_status_distribution.sql new file mode 100644 index 000000000..7d31d31df --- /dev/null +++ b/lute/db/schema/migrations/20240101_122610_add_bookstats_status_distribution.sql @@ -0,0 +1 @@ +alter table bookstats add status_distribution VARCHAR(100) NULL; diff --git a/lute/db/schema/migrations/20240113_215142_add_term_follow_parent_bool.sql b/lute/db/schema/migrations/20240113_215142_add_term_follow_parent_bool.sql new file mode 100644 index 000000000..2b83b187b --- /dev/null +++ b/lute/db/schema/migrations/20240113_215142_add_term_follow_parent_bool.sql @@ -0,0 +1,2 @@ +ALTER TABLE words +ADD COLUMN WoSyncStatus INTEGER NOT NULL DEFAULT 0; diff --git a/lute/db/schema/migrations/20240118_154258_change_status_abbrev.sql b/lute/db/schema/migrations/20240118_154258_change_status_abbrev.sql new file mode 100644 index 000000000..1deacfd27 --- /dev/null +++ b/lute/db/schema/migrations/20240118_154258_change_status_abbrev.sql @@ -0,0 +1,4 @@ +-- Update statuses for datatables export, to match value required in import. + +update statuses set StAbbreviation = 'W' where StID = 99; +update statuses set StAbbreviation = 'I' where StID = 98; diff --git a/lute/db/schema/migrations/20240125_drop_BkWordCount.sql b/lute/db/schema/migrations/20240125_drop_BkWordCount.sql new file mode 100644 index 000000000..35aa4f514 --- /dev/null +++ b/lute/db/schema/migrations/20240125_drop_BkWordCount.sql @@ -0,0 +1,63 @@ +-- sqlite only started supporting "alter table drop column" as at v3.35 (I think). +-- for max compatibility with user systems, have to follow process outlined at +-- https://www.sqlitetutorial.net/sqlite-alter-table/ + +-- disable foreign key constraint check +PRAGMA foreign_keys=off; + +-- start a transaction +BEGIN TRANSACTION; + +-- Here you can drop column +CREATE TABLE IF NOT EXISTS "new_books" ( + "BkID" INTEGER NOT NULL , + "BkLgID" INTEGER NOT NULL , + "BkTitle" VARCHAR(200) NOT NULL , + "BkSourceURI" VARCHAR(1000) NULL , + "BkArchived" TINYINT NOT NULL DEFAULT '0' , + "BkCurrentTxID" INTEGER NOT NULL DEFAULT '0', + BkAudioFilename TEXT NULL, + BkAudioCurrentPos REAL NULL, + BkAudioBookmarks TEXT NULL, + PRIMARY KEY ("BkID"), + FOREIGN KEY("BkLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE +); + + +-- copy data from the table to the new_table +INSERT INTO new_books( + BkID, + BkLgID, + BkTitle, + BkSourceURI, + BkArchived, + BkCurrentTxID, + BkAudioFilename, + BkAudioCurrentPos, + BkAudioBookmarks +) +SELECT + BkID, + BkLgID, + BkTitle, + BkSourceURI, + BkArchived, + BkCurrentTxID, + BkAudioFilename, + BkAudioCurrentPos, + BkAudioBookmarks +FROM books; + +-- drop the table +DROP TABLE books; + +-- rename the new_table to the table +ALTER TABLE new_books RENAME TO books; + +-- commit the transaction +COMMIT; + +CREATE INDEX "BkLgID" ON "books" ("BkLgID"); + +-- enable foreign key constraint check +PRAGMA foreign_keys=on; diff --git a/lute/db/schema/migrations/20240125_drop_bookstats_wordcount.sql b/lute/db/schema/migrations/20240125_drop_bookstats_wordcount.sql new file mode 100644 index 000000000..bc7c368b3 --- /dev/null +++ b/lute/db/schema/migrations/20240125_drop_bookstats_wordcount.sql @@ -0,0 +1,49 @@ +-- sqlite only started supporting "alter table drop column" as at v3.35 (I think). +-- for max compatibility with user systems, have to follow process outlined at +-- https://www.sqlitetutorial.net/sqlite-alter-table/ + +-- disable foreign key constraint check +PRAGMA foreign_keys=off; + +-- start a transaction +BEGIN TRANSACTION; + +-- Here you can drop column +CREATE TABLE IF NOT EXISTS "new_bookstats" ( + "BkID" INTEGER NOT NULL , + "distinctterms" INTEGER NULL , + "distinctunknowns" INTEGER NULL , + "unknownpercent" INTEGER NULL , + status_distribution VARCHAR(100) NULL, + PRIMARY KEY ("BkID"), + FOREIGN KEY("BkID") REFERENCES "books" ("BkID") ON UPDATE NO ACTION ON DELETE CASCADE +); + + +-- copy data from the table to the new_table +INSERT INTO new_bookstats( + BkID, + distinctterms, + distinctunknowns, + unknownpercent, + status_distribution +) +SELECT + BkID, + distinctterms, + distinctunknowns, + unknownpercent, + status_distribution +FROM bookstats; + +-- drop the table +DROP TABLE bookstats; + +-- rename the new_table to the table +ALTER TABLE new_bookstats RENAME TO bookstats; + +-- commit the transaction +COMMIT; + +-- enable foreign key constraint check +PRAGMA foreign_keys=on; diff --git a/lute/db/schema/migrations/20240207_01_create_languagedicts.sql b/lute/db/schema/migrations/20240207_01_create_languagedicts.sql new file mode 100644 index 000000000..04bd80566 --- /dev/null +++ b/lute/db/schema/migrations/20240207_01_create_languagedicts.sql @@ -0,0 +1,54 @@ +-- Language dict table + +CREATE TABLE languagedicts ( + "LdID" INTEGER NOT NULL, + "LdLgID" INTEGER NOT NULL, + "LdUseFor" VARCHAR(20) NOT NULL, + "LdType" VARCHAR(20) NOT NULL, + "LdDictURI" VARCHAR(200) NOT NULL, + "LdIsActive" TINYINT NOT NULL DEFAULT 1, + "LdSortOrder" INTEGER NOT NULL, + PRIMARY KEY ("LdID"), + FOREIGN KEY("LdLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE +); + + +-- Copy existing dictionary data to new table. + +-- TERMS: dict 1 + +-- embedded +insert into languagedicts (LdLgID, LdUseFor, LdType, LdDictURI, LdSortOrder) +select lgid, "terms", "embeddedhtml", LgDict1URI, 1 +from languages where LgDict1URI is not null and LgDict1URI NOT LIKE '*%'; + +-- popup +insert into languagedicts (LdLgID, LdUseFor, LdType, LdDictURI, LdSortOrder) +select lgid, "terms", "popuphtml", substr(LgDict1URI,2), 2 +from languages where LgDict1URI is not null and LgDict1URI LIKE '*%'; + + +-- TERMS: dict 2 + +-- embedded +insert into languagedicts (LdLgID, LdUseFor, LdType, LdDictURI, LdSortOrder) +select lgid, "terms", "embeddedhtml", LgDict2URI, 2 +from languages where LgDict2URI is not null and LgDict2URI NOT LIKE '*%'; + +-- popup +insert into languagedicts (LdLgID, LdUseFor, LdType, LdDictURI, LdSortOrder) +select lgid, "terms", "popuphtml", substr(LgDict2URI,2), 2 +from languages where LgDict2URI is not null and LgDict2URI LIKE '*%'; + + +-- SENTENCES + +-- embedded +insert into languagedicts (LdLgID, LdUseFor, LdType, LdDictURI, LdSortOrder) +select lgid, "sentences", "embeddedhtml", LgGoogleTranslateURI, 3 +from languages where LgGoogleTranslateURI is not null and LgGoogleTranslateURI NOT LIKE '*%'; + +-- popup +insert into languagedicts (LdLgID, LdUseFor, LdType, LdDictURI, LdSortOrder) +select lgid, "sentences", "popuphtml", substr(LgGoogleTranslateURI,2), 3 +from languages where LgGoogleTranslateURI is not null and LgGoogleTranslateURI LIKE '*%'; diff --git a/lute/db/schema/migrations/20240207_02_drop_old_language_fields.sql b/lute/db/schema/migrations/20240207_02_drop_old_language_fields.sql new file mode 100644 index 000000000..9e38bfb00 --- /dev/null +++ b/lute/db/schema/migrations/20240207_02_drop_old_language_fields.sql @@ -0,0 +1,61 @@ +-- sqlite only started supporting "alter table drop column" as at v3.35 (I think). +-- for max compatibility with user systems, have to follow process outlined at +-- https://www.sqlitetutorial.net/sqlite-alter-table/ + +-- disable foreign key constraint check +PRAGMA foreign_keys=off; + +-- start a transaction +BEGIN TRANSACTION; + +-- Here you can drop column +CREATE TABLE IF NOT EXISTS "new_languages" ( + "LgID" INTEGER NOT NULL , + "LgName" VARCHAR(40) NOT NULL , + "LgCharacterSubstitutions" VARCHAR(500) NOT NULL , + "LgRegexpSplitSentences" VARCHAR(500) NOT NULL , + "LgExceptionsSplitSentences" VARCHAR(500) NOT NULL , + "LgRegexpWordCharacters" VARCHAR(500) NOT NULL , + "LgRightToLeft" TINYINT NOT NULL , + "LgShowRomanization" TINYINT NOT NULL DEFAULT '0' , + "LgParserType" VARCHAR(20) NOT NULL DEFAULT 'spacedel' , + PRIMARY KEY ("LgID") +); + +-- copy data from the table to the new_table +INSERT INTO new_languages( + LgID, + LgName, + LgCharacterSubstitutions, + LgRegexpSplitSentences, + LgExceptionsSplitSentences, + LgRegexpWordCharacters, + LgRightToLeft, + LgShowRomanization, + LgParserType +) +SELECT + LgID, + LgName, + LgCharacterSubstitutions, + LgRegexpSplitSentences, + LgExceptionsSplitSentences, + LgRegexpWordCharacters, + LgRightToLeft, + LgShowRomanization, + LgParserType +FROM languages; + +-- drop the table +DROP TABLE languages; + +-- rename the new_table to the table +ALTER TABLE new_languages RENAME TO languages; + +-- commit the transaction +COMMIT; + +CREATE UNIQUE INDEX "LgName" ON "languages" ("LgName"); + +-- enable foreign key constraint check +PRAGMA foreign_keys=on; diff --git a/lute/db/schema/migrations/20240525_create_textbookmarks.sql b/lute/db/schema/migrations/20240525_create_textbookmarks.sql new file mode 100644 index 000000000..5aac15775 --- /dev/null +++ b/lute/db/schema/migrations/20240525_create_textbookmarks.sql @@ -0,0 +1,17 @@ +-- https://www.sqlitetutorial.net/sqlite-alter-table/ + +BEGIN TRANSACTION; + +PRAGMA foreign_keys=on; + +-- Text bookmarks table +CREATE TABLE textbookmarks ( + "TbID" INTEGER PRIMARY KEY, + "TbTxID" INTEGER NOT NULL, + "TbTitle" VARCHAR(200) NOT NULL, + FOREIGN KEY("TbTxID") REFERENCES "texts" ("TxID") ON DELETE CASCADE +); + +PRAGMA foreign_keys=off; + +COMMIT; \ No newline at end of file diff --git a/lute/db/schema/migrations/20240815_clean_up_bad_wordtags.sql b/lute/db/schema/migrations/20240815_clean_up_bad_wordtags.sql new file mode 100644 index 000000000..ec7dd600b --- /dev/null +++ b/lute/db/schema/migrations/20240815_clean_up_bad_wordtags.sql @@ -0,0 +1,7 @@ +-- Clean up job per GitHub issue https://github.com/LuteOrg/lute-v3/issues/455 +-- +-- When users deleted Term Tags through the UI, the records in the wordtags +-- table weren't being deleted properly, which may cause some problems in the future. +-- This script deletes those orphaned wordtags records. + +delete from wordtags where WtTgID not in (select TgID from tags); diff --git a/lute/db/schema/migrations/20241103_change_lastbackup_to_user_setting.sql b/lute/db/schema/migrations/20241103_change_lastbackup_to_user_setting.sql new file mode 100644 index 000000000..df3463a03 --- /dev/null +++ b/lute/db/schema/migrations/20241103_change_lastbackup_to_user_setting.sql @@ -0,0 +1,3 @@ +-- Change lastbackup to from system to user key. + +update settings set StKeyType = 'user' where StKey = 'lastbackup'; diff --git a/lute/db/schema/migrations/20241214_add_SeTextLC.sql b/lute/db/schema/migrations/20241214_add_SeTextLC.sql new file mode 100644 index 000000000..a247d5f2f --- /dev/null +++ b/lute/db/schema/migrations/20241214_add_SeTextLC.sql @@ -0,0 +1,2 @@ +-- add sentences.SeTextLC, for https://github.com/LuteOrg/lute-v3/issues/531 +alter table sentences add column SeTextLC TEXT null; diff --git a/lute/db/schema/migrations/20241220_fix_for_wordsread_table_load.sql b/lute/db/schema/migrations/20241220_fix_for_wordsread_table_load.sql new file mode 100644 index 000000000..2ba5ad6b1 --- /dev/null +++ b/lute/db/schema/migrations/20241220_fix_for_wordsread_table_load.sql @@ -0,0 +1,11 @@ +-- Manual fix for 20241221_add_wordsread_table.sql, which will be run after this script! +-- +-- Hacky fix: a user had a problem during startup and running of the above script +-- at query +-- insert into wordsread (WrLgID, WrTxID, WrReadDate, WrWordCount) +-- select bklgid, txid, txreaddate, txwordcount from texts inner join books on bkid=txbkid where txreaddate is not null; +-- b/c somehow a text had txreaddate not null, but txwordcount = null. +-- +-- SHOULD NEVER HAVE HAPPENED but what are you going to do. + +update texts set txwordcount = 0 where txwordcount is null and txreaddate is not null; diff --git a/lute/db/schema/migrations/20241221_add_wordsread_table.sql b/lute/db/schema/migrations/20241221_add_wordsread_table.sql new file mode 100644 index 000000000..214ce30af --- /dev/null +++ b/lute/db/schema/migrations/20241221_add_wordsread_table.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS "wordsread" ( + "WrID" INTEGER NOT NULL, + "WrLgID" INTEGER NOT NULL, + "WrTxID" INTEGER NULL, + "WrReadDate" DATETIME NOT NULL, + "WrWordCount" INTEGER NOT NULL, + PRIMARY KEY ("WrID"), + FOREIGN KEY("WrTxID") REFERENCES "texts" ("TxID") ON DELETE SET NULL, + FOREIGN KEY("WrLgID") REFERENCES "languages" ("LgID") ON UPDATE NO ACTION ON DELETE CASCADE +); + +-- load initial data. +insert into wordsread (WrLgID, WrTxID, WrReadDate, WrWordCount) +select bklgid, txid, txreaddate, txwordcount from texts inner join books on bkid=txbkid where txreaddate is not null; diff --git a/lute/db/schema/migrations/20241221_clean_up_missing_relationships.sql b/lute/db/schema/migrations/20241221_clean_up_missing_relationships.sql new file mode 100644 index 000000000..563e5944f --- /dev/null +++ b/lute/db/schema/migrations/20241221_clean_up_missing_relationships.sql @@ -0,0 +1,25 @@ +-- Clean up bad data, where relationships are invalid. +-- +-- Per issue 460, the pragma foreign_keys was not ON, so it's possible +-- (though unlikely) that some data in the db is bad/unreachable. +-- +-- This is being done as a one-time fix, rather than as a repeatable +-- migration, as it would be very annoying if the data model changed +-- and I forgot to update the script!! + +DELETE FROM languagedicts WHERE LdLgID NOT IN (SELECT LgID FROM languages); +DELETE FROM wordsread WHERE WrLgID NOT IN (SELECT LgID FROM languages); + +DELETE FROM books WHERE BkLgID NOT IN (SELECT LgID FROM languages); +DELETE FROM bookstats WHERE BkID NOT IN (SELECT BkID FROM books); +DELETE FROM booktags WHERE BtBkID NOT IN (SELECT BkID FROM books) OR BtT2ID NOT IN (SELECT T2ID FROM tags2); + +DELETE FROM texts WHERE TxBkID NOT IN (SELECT BkID FROM books); +DELETE FROM textbookmarks WHERE TbTxID NOT IN (SELECT TxID FROM texts); +DELETE FROM sentences WHERE SeTxID NOT IN (SELECT TxID FROM texts); +DELETE FROM wordsread WHERE WrTxID IS NOT NULL AND WrTxID NOT IN (SELECT TxID FROM texts); + +DELETE FROM wordtags WHERE WtWoID NOT IN (SELECT WoID FROM words) OR WtTgID NOT IN (SELECT TgID FROM tags); +DELETE FROM wordimages WHERE WiWoID NOT IN (SELECT WoID FROM words); +DELETE FROM wordflashmessages WHERE WfWoID NOT IN (SELECT WoID FROM words); +DELETE FROM wordparents WHERE WpWoID NOT IN (SELECT WoID FROM words) OR WpParentWoID NOT IN (SELECT WoID FROM words); diff --git a/lute/db/schema/migrations/20250102_add_TxStartDate.sql b/lute/db/schema/migrations/20250102_add_TxStartDate.sql new file mode 100644 index 000000000..111d7a02b --- /dev/null +++ b/lute/db/schema/migrations/20250102_add_TxStartDate.sql @@ -0,0 +1,44 @@ +-- Add field +alter table texts add column TxStartDate datetime null; + +-- Originally I had set the TxStartDate using best guesses, but now +-- feel that that isn't justified. Too many assumptions, too much to +-- mess up. + +/* +-- Assume that pages were started 10 mins before the TxReadDate. +update texts set TxStartDate = datetime(TxReadDate, '-10 minutes') WHERE TxReadDate is not null; + +-- Set the start date for the current text in each book if needed (i.e. if any page has been marked +-- read in that book already). +-- This assumes e.g. that the user clicked "mark as read" and immediately started the next page. +UPDATE texts +SET TxStartDate = ( + SELECT MAX(T.TxReadDate) + FROM texts T + WHERE T.TxReadDate IS NOT NULL + AND T.TxBkID = texts.TxBkID +) +WHERE TxStartDate IS NULL +AND TxID IN ( + SELECT BkCurrentTxID + FROM books + WHERE BkCurrentTxID <> 0 +) +AND TxBkID IN ( + SELECT DISTINCT TxBkID + FROM texts + WHERE TxReadDate IS NOT NULL +); +*/ + +-- After check: +/* +select TxID, TxStartDate, TxReadDate from texts +WHERE TxID IN ( + SELECT BkCurrentTxID + FROM books + WHERE BkCurrentTxID <> 0 +) +AND TxReadDate IS NULL; +*/ diff --git a/lute/db/schema/migrations/20250206_create_srsexportspecs.sql b/lute/db/schema/migrations/20250206_create_srsexportspecs.sql new file mode 100644 index 000000000..c848e3361 --- /dev/null +++ b/lute/db/schema/migrations/20250206_create_srsexportspecs.sql @@ -0,0 +1,12 @@ +-- srs export specs + +CREATE TABLE IF NOT EXISTS "srsexportspecs" ( + "SrsID" INTEGER NOT NULL, + "SrsExportName" VARCHAR(200) NOT NULL UNIQUE, + "SrsCriteria" VARCHAR(1000) NOT NULL, + "SrsDeckName" VARCHAR(200) NOT NULL, + "SrsNoteType" VARCHAR(200) NOT NULL, + "SrsFieldMapping" VARCHAR(1000) NOT NULL, + "SrsActive" TINYINT NOT NULL DEFAULT '1', + PRIMARY KEY ("SrsID") +); diff --git a/lute/db/schema/migrations_repeatable/20230408_194151_create_trig_words_update_WoStatusChanged.sql b/lute/db/schema/migrations_repeatable/20230408_194151_create_trig_words_update_WoStatusChanged.sql deleted file mode 100644 index ef3115ab0..000000000 --- a/lute/db/schema/migrations_repeatable/20230408_194151_create_trig_words_update_WoStatusChanged.sql +++ /dev/null @@ -1,11 +0,0 @@ -DROP TRIGGER IF EXISTS trig_words_update_WoStatusChanged; - -CREATE TRIGGER trig_words_update_WoStatusChanged -AFTER UPDATE OF WoStatus ON words -FOR EACH ROW -WHEN old.WoStatus <> new.WoStatus -BEGIN - UPDATE words - SET WoStatusChanged = CURRENT_TIMESTAMP - WHERE WoID = NEW.WoID; -END; diff --git a/lute/db/schema/migrations_repeatable/trig_wordparents.sql b/lute/db/schema/migrations_repeatable/trig_wordparents.sql new file mode 100644 index 000000000..bdafa72fb --- /dev/null +++ b/lute/db/schema/migrations_repeatable/trig_wordparents.sql @@ -0,0 +1,57 @@ +DROP TRIGGER IF EXISTS trig_wordparents_after_insert_update_parent_WoStatus_if_following; + +CREATE TRIGGER trig_wordparents_after_insert_update_parent_WoStatus_if_following +-- created by db/schema/migrations_repeatable/trig_wordparents.sql +AFTER INSERT ON wordparents +BEGIN + UPDATE words + SET WoStatus = ( + select WoStatus from words where WoID = new.WpWoID + ) + WHERE WoID = new.WpParentWoID + AND 1 = ( + SELECT COUNT(*) + FROM wordparents + INNER JOIN words ON WoID = WpWoID + WHERE WoSyncStatus = 1 + AND WoID = new.WpWoID + ); +END; + + +-- Delete old bad trigger. +DROP TRIGGER IF EXISTS trig_wordparents_after_delete_change_WoSyncStatus; + +/* +-- This trigger isn't correct, per issue 416: When changing a term's +-- parents, the existing parent might be deleted first before adding +-- the new parent. If this trigger gets fired before the new parent +-- is assigned, the term will be set to WoSyncStatus = 0, even if the +-- user wants the term to follow the new parent's status. Since we +-- can't say for sure when sqlalchemy will actually make database +-- changes for child records (i.e., will adding new children happen +-- before deleting old?), this trigger on its own isn't good enough. + + +CREATE TRIGGER trig_wordparents_after_delete_change_WoSyncStatus +-- created by db/schema/migrations_repeatable/trig_wordparents.sql +-- +-- This is a data sanity method only: if all of a term's parents are deleted, +-- then the term must have WoSyncStatus = 0. +-- +-- Issue 416: We can't simply set WoSyncStatus = 0 on parent deletion, +-- because the user may have _changed_ the parents, but still want to +-- follow the status for the new parent. +BEFORE DELETE ON wordparents +FOR EACH ROW +BEGIN + UPDATE words + SET WoSyncStatus = 0 + WHERE WoID = old.WpWoID + AND NOT EXISTS ( + SELECT 1 FROM wordparents + WHERE WpWoID = OLD.WpWoID + AND WpParentWoID != OLD.WpParentWoID + ); +END; +*/ diff --git a/lute/db/schema/migrations_repeatable/trig_words.sql b/lute/db/schema/migrations_repeatable/trig_words.sql new file mode 100644 index 000000000..4f5c4c584 --- /dev/null +++ b/lute/db/schema/migrations_repeatable/trig_words.sql @@ -0,0 +1,76 @@ +DROP TRIGGER IF EXISTS trig_words_after_update_WoStatus_if_following_parent; + +CREATE TRIGGER trig_words_after_update_WoStatus_if_following_parent +-- created by db/schema/migrations_repeatable/trig_words.sql +AFTER UPDATE OF WoStatus, WoSyncStatus ON words +FOR EACH ROW +WHEN (old.WoStatus <> new.WoStatus or (old.WoSyncStatus = 0 and new.WoSyncStatus = 1)) +BEGIN + UPDATE words + SET WoStatus = new.WoStatus + WHERE WoID in ( + -- single parent children that are following this term. + select WpWoID + from wordparents + inner join words on WoID = WpWoID + where WoSyncStatus = 1 + and WpParentWoID = old.WoID + group by WpWoID + having count(*) = 1 + + UNION + + -- The parent of this term, + -- if this term has a single parent and has "follow parent" + select WpParentWoID + from wordparents + inner join words on WoID = WpWoID + where WoSyncStatus = 1 + and WoID = old.WoID + group by WpWoID + having count(*) = 1 + ); +END; + + +DROP TRIGGER IF EXISTS trig_words_update_WoStatusChanged; + +CREATE TRIGGER trig_words_update_WoStatusChanged +-- created by db/schema/migrations_repeatable/trig_words.sql +AFTER UPDATE OF WoStatus ON words +FOR EACH ROW +WHEN old.WoStatus <> new.WoStatus +BEGIN + UPDATE words + SET WoStatusChanged = CURRENT_TIMESTAMP + WHERE WoID = NEW.WoID; +END; + + +DROP TRIGGER IF EXISTS trig_words_update_WoCreated_if_no_longer_unknown; + +CREATE TRIGGER trig_words_update_WoCreated_if_no_longer_unknown +-- created by db/schema/migrations_repeatable/trig_words.sql +AFTER UPDATE OF WoStatus ON words +FOR EACH ROW +WHEN old.WoStatus <> new.WoStatus and old.WoStatus = 0 +BEGIN + UPDATE words + SET WoCreated = CURRENT_TIMESTAMP + WHERE WoID = NEW.WoID; +END; + + +DROP TRIGGER IF EXISTS trig_word_after_delete_change_WoSyncStatus_for_orphans; + +CREATE TRIGGER trig_word_after_delete_change_WoSyncStatus_for_orphans +-- created by db/schema/migrations_repeatable/trig_words.sql +-- +-- If a term is deleted, any orphaned children must +-- be updated to have WoSyncStatus = 0. +AFTER DELETE ON words +BEGIN + UPDATE words + SET WoSyncStatus = 0 + WHERE WoID NOT IN (SELECT WpWoID FROM wordparents); +END; diff --git a/lute/db/setup/main.py b/lute/db/setup/main.py index c4e0410c5..88b09b0c5 100644 --- a/lute/db/setup/main.py +++ b/lute/db/setup/main.py @@ -73,7 +73,7 @@ class Setup: # pylint: disable=too-few-public-methods Main setup class, coordinates other classes. """ - def __init__( # pylint: disable=too-many-arguments + def __init__( # pylint: disable=too-many-arguments,too-many-positional-arguments self, db_filename: str, baseline_schema_file: str, diff --git a/lute/dev_api/routes.py b/lute/dev_api/routes.py index 65f4b8782..056680551 100644 --- a/lute/dev_api/routes.py +++ b/lute/dev_api/routes.py @@ -14,14 +14,15 @@ safeguards, so they can only be run against a test_ db. """ +import os from sqlalchemy import text from flask import Blueprint, current_app, Response, jsonify, redirect, flash from lute.models.language import Language -from lute.models.setting import UserSetting +from lute.models.repositories import UserSettingRepository import lute.parse.registry from lute.db import db import lute.db.management -import lute.db.demo +from lute.db.demo import Service as DemoService bp = Blueprint("dev_api", __name__, url_prefix="/dev_api") @@ -37,7 +38,7 @@ def _ensure_is_test_db(): @bp.route("/wipe_db", methods=["GET"]) def wipe_db(): "Clean it all." - lute.db.management.delete_all_data() + lute.db.management.delete_all_data(db.session) flash("db wiped") return redirect("/", 302) @@ -45,8 +46,10 @@ def wipe_db(): @bp.route("/load_demo", methods=["GET"]) def load_demo(): "Clean out everything, and load the demo." - lute.db.management.delete_all_data() - lute.db.demo.load_demo_data() + lute.db.management.delete_all_data(db.session) + demosvc = DemoService(db.session) + demosvc.set_load_demo_flag() + demosvc.load_demo_data() flash("demo loaded") return redirect("/", 302) @@ -54,11 +57,14 @@ def load_demo(): @bp.route("/load_demo_languages", methods=["GET"]) def load_demo_languages(): "Clean out everything, and load the demo langs with dummy dictionaries." - lute.db.management.delete_all_data() - lute.db.demo.load_demo_languages() + lute.db.management.delete_all_data(db.session) + demosvc = DemoService(db.session) + demosvc.load_demo_languages() langs = db.session.query(Language).all() for lang in langs: - lang.dict_1_uri = f"/dev_api/dummy_dict/{lang.name}/###" + d = lang.dictionaries[0] + d.dicturi = f"/dev_api/dummy_dict/{lang.name}/[LUTE]" + d.dicttype = "embeddedhtml" # Ensure not pop-up db.session.add(lang) db.session.commit() return redirect("/", 302) @@ -67,7 +73,8 @@ def load_demo_languages(): @bp.route("/load_demo_stories", methods=["GET"]) def load_demo_stories(): "Stories only. No db wipe." - lute.db.demo.load_demo_stories() + demosvc = DemoService(db.session) + demosvc.load_demo_stories() flash("stories loaded") return redirect("/", 302) @@ -111,6 +118,14 @@ def clean_val(v): return jsonify(content) +@bp.route("/execsql/", methods=["GET"]) +def exec_sql(sql): + "Execute arbitrary sql!!! NO CHECKS ARE DONE!" + db.session.execute(text(sql)) + db.session.commit() + return jsonify("ok") + + @bp.route("/dummy_dict//", methods=["GET"]) def dummy_language_dict(langname, term): "Fake language dictionary/term lookup." @@ -120,7 +135,7 @@ def dummy_language_dict(langname, term): @bp.route("/disable_parser//", methods=["GET"]) def disable_parser(parsername, renameto): "Hack: rename a parser in the registry so that languages can't find it." - p = lute.parse.registry.parsers + p = lute.parse.registry.__LUTE_PARSERS__ if parsername in p: p[renameto] = p.pop(parsername) langs = db.session.query(Language).all() @@ -145,13 +160,39 @@ def disable_parser(parsername, renameto): @bp.route("/disable_backup", methods=["GET"]) def disable_backup(): "Disables backup -- tests don't need to back up." - UserSetting.set_value("backup_enabled", False) + repo = UserSettingRepository(db.session) + repo.set_value("backup_enabled", False) db.session.commit() flash("backup disabled") return redirect("/", 302) -@bp.route("/throw_error", methods=["GET"]) -def throw_error(): +@bp.route("/throw_error/", methods=["GET"]) +def throw_error(message): "Throw an error to ensure handler works!" - raise RuntimeError("testing handler") + raise RuntimeError(message) + + +@bp.route("/fake_story.html", methods=["GET"]) +def fake_story(): + "Return a fake story for import book test." + return Response( + """ + + Mi perro. + + +

Hola. Tengo un perro.

+ + """ + ) + + +@bp.route("/temp_file_content/", methods=["GET"]) +def temp_file_content(filename): + "Get the content of the file." + fpath = os.path.join(current_app.env_config.temppath, filename) + s = "" + with open(fpath, "r", encoding="utf-8") as f: + s = f.read() + return s diff --git a/lute/language/forms.py b/lute/language/forms.py index ad1770737..a3aa08023 100644 --- a/lute/language/forms.py +++ b/lute/language/forms.py @@ -3,8 +3,39 @@ """ from flask_wtf import FlaskForm -from wtforms import StringField, BooleanField, SelectField +from wtforms import ( + StringField, + IntegerField, + BooleanField, + SelectField, + FormField, + FieldList, + Form, + ValidationError, +) from wtforms.validators import DataRequired +from lute.models.language import LanguageDictionary + + +class LanguageDictionaryForm(Form): + """ + Language dictionary form, nested in Language form. + """ + + usefor = SelectField( + choices=[("terms", "Terms"), ("sentences", "Sentences")], + render_kw={"title": "Use dictionary for"}, + ) + dicttype = SelectField( + choices=[ + ("embeddedhtml", "Embedded"), + ("popuphtml", "Pop-up window"), + ], + render_kw={"title": "Show as"}, + ) + dicturi = StringField("URL", validators=[DataRequired()]) + is_active = BooleanField("Is active", render_kw={"title": "Is active?"}) + sort_order = IntegerField("Sort", render_kw={"style": "display: none"}) class LanguageForm(FlaskForm): @@ -13,12 +44,10 @@ class LanguageForm(FlaskForm): """ name = StringField("Name", validators=[DataRequired()]) - dict_1_uri = StringField("Dictionary 1", validators=[DataRequired()]) - dict_2_uri = StringField("Dictionary 2") - sentence_translate_uri = StringField( - "Sentence translation", validators=[DataRequired()] + dictionaries = FieldList( + FormField(LanguageDictionaryForm, default=LanguageDictionary) ) - show_romanization = BooleanField("Show Romanization field") + show_romanization = BooleanField("Show Pronunciation field") right_to_left = BooleanField("Right-to-left") # Note! The choices have to be set in the routes! @@ -31,7 +60,28 @@ class LanguageForm(FlaskForm): character_substitutions = StringField("Character substitutions") regexp_split_sentences = StringField( - "Split sentences at", validators=[DataRequired()] + "Split sentences at (default: all Unicode sentence terminators)" ) exceptions_split_sentences = StringField("Split sentence exceptions") - word_characters = StringField("Word characters", validators=[DataRequired()]) + word_characters = StringField( + "Word characters (default: all Unicode letters and marks)" + ) + + def validate_dictionaries(self, field): # pylint: disable=unused-argument + "Dictionaries must be valid." + + # raise ValueError(self.dictionaries.data) # debugging + def _get_actives(usefor): + "Return dictionaries." + return [ + d + for d in self.dictionaries.data + if d.get("usefor", "") == usefor and d.get("is_active") + ] + + term_dicts = _get_actives("terms") + sentence_dicts = _get_actives("sentences") + if len(term_dicts) == 0: + raise ValidationError("Please add an active Terms dictionary") + if len(sentence_dicts) == 0: + raise ValidationError("Please add an active Sentences dictionary") diff --git a/lute/language/routes.py b/lute/language/routes.py index 0fdd0cefb..d664abf09 100644 --- a/lute/language/routes.py +++ b/lute/language/routes.py @@ -2,15 +2,14 @@ /language endpoints. """ -from sqlalchemy import func +from sqlalchemy import text from sqlalchemy.exc import IntegrityError from flask import Blueprint, current_app, render_template, redirect, url_for, flash from lute.models.language import Language -from lute.models.book import Book -from lute.models.term import Term +from lute.models.repositories import UserSettingRepository +from lute.language.service import Service from lute.language.forms import LanguageForm from lute.db import db -from lute.db.demo import predefined_languages from lute.parse.registry import supported_parsers bp = Blueprint("language", __name__, url_prefix="/language") @@ -19,47 +18,29 @@ @bp.route("/index") def index(): """ - List all languages. - - This includes the Book and Term count for each Language. These - counts are pulled in by subqueries, because Language doesn't have - "books" and "terms" members ... I was having trouble with session - management when these were added, and they're only used here, so - this is good enough for now. + List all languages, with book and term counts. """ - def create_count_subquery(class_, count_column): - # Re the pylint disable, ref - # https://github.com/pylint-dev/pylint/issues/8138 ... - ret = ( - db.session.query( - class_.language_id, - # pylint: disable=not-callable - func.count(class_.id).label(count_column), - ) - .group_by(class_.language_id) - .subquery() - ) - return ret - - # Create subqueries for counting books and terms - book_subquery = create_count_subquery(Book, "book_count") - term_subquery = create_count_subquery(Term, "term_count") - - # Query to join Language with book and term counts - query = ( - db.session.query( - Language, book_subquery.c.book_count, term_subquery.c.term_count - ) - .outerjoin(book_subquery, Language.id == book_subquery.c.language_id) - .outerjoin(term_subquery, Language.id == term_subquery.c.language_id) - ) - - results = query.all() - - results = [rec for rec in results if rec[0].is_supported is True] - - return render_template("language/index.html", language_data=results) + # Using plain sql, easier to get bulk quantities. + sql = """ + select LgID, LgName, book_count, term_count from languages + left outer join ( + select BkLgID, count(BkLgID) as book_count from books + group by BkLgID + ) bc on bc.BkLgID = LgID + left outer join ( + select WoLgID, count(WoLgID) as term_count from words + where WoStatus != 0 + group by WoLgID + ) tc on tc.WoLgID = LgID + order by LgName + """ + result = db.session.execute(text(sql)).all() + languages = [ + {"LgID": row[0], "LgName": row[1], "book_count": row[2], "term_count": row[3]} + for row in result + ] + return render_template("language/index.html", language_data=languages) def _handle_form(language, form) -> bool: @@ -78,14 +59,35 @@ def _handle_form(language, form) -> bool: flash(f"Language {language.name} updated", "success") result = True except IntegrityError as e: + current_app.db.session.rollback() msg = e.orig if "languages.LgName" in f"{e.orig}": - msg = f"{language.name} already exists." + msg = f"Language {form.name.data} already exists." flash(msg, "error") return result +def _add_hidden_dictionary_template_entry(form): + "Add a dummy placeholder dictionary to be used as a template." + # Add a dummy dictionary entry with dicturi __TEMPLATE__. + # + # This entry is used as a "template" when adding a new dictionary + # to the list of dictionaries (see templates/language/_form.html). + # This is the easiest way to ensure that new dictionary entries + # have the correct controls. + # + # This dummy entry is not rendered on the form, or submitted + # when the form is submitted. Search for __TEMPLATE__ in + # templates/language/_form.html to see where it is handled. + form.dictionaries.append_entry({"dicturi": "__TEMPLATE__"}) + + +def _dropdown_parser_choices(): + "Get dropdown list of parser type name to name." + return [(a[0], a[1].name()) for a in supported_parsers()] + + @bp.route("/edit/", methods=["GET", "POST"]) def edit(langid): """ @@ -98,10 +100,13 @@ def edit(langid): return redirect(url_for("language.index")) form = LanguageForm(obj=language) - form.parser_type.choices = supported_parsers() + form.parser_type.choices = _dropdown_parser_choices() if _handle_form(language, form): return redirect("/") + + _add_hidden_dictionary_template_entry(form) + return render_template("language/edit.html", form=form, language=language) @@ -111,7 +116,8 @@ def new(langname): """ Create a new language. """ - predefined = predefined_languages() + service = Service(db.session) + predefined = service.supported_predefined_languages() language = Language() if langname is not None: candidates = [lang for lang in predefined if lang.name == langname] @@ -119,11 +125,24 @@ def new(langname): language = candidates[0] form = LanguageForm(obj=language) - form.parser_type.choices = supported_parsers() + form.parser_type.choices = _dropdown_parser_choices() if _handle_form(language, form): + # New language, so show everything b/c user should re-choose + # the default. + # + # Reason for this: a user may start off with just language X, + # so the current_language_id is set to X.id. If the user then + # adds language Y, the filter stays on X, which may be + # disconcerting/confusing. Forcing a reselect is painless and + # unambiguous. + repo = UserSettingRepository(db.session) + repo.set_value("current_language_id", 0) + db.session.commit() return redirect("/") + _add_hidden_dictionary_template_entry(form) + return render_template( "language/new.html", form=form, language=language, predefined=predefined ) @@ -137,5 +156,29 @@ def delete(langid): language = db.session.get(Language, langid) if not language: flash(f"Language {langid} not found") - Language.delete(language) + db.session.delete(language) + db.session.commit() return redirect(url_for("language.index")) + + +@bp.route("/list_predefined", methods=["GET"]) +def list_predefined(): + "Show supported predefined languages that are not already in the db." + service = Service(db.session) + predefined = service.supported_predefined_languages() + existing_langs = db.session.query(Language).all() + existing_names = [l.name for l in existing_langs] + new_langs = [p for p in predefined if p.name not in existing_names] + return render_template("language/list_predefined.html", predefined=new_langs) + + +@bp.route("/load_predefined/", methods=["GET"]) +def load_predefined(langname): + "Load a predefined language and its stories." + service = Service(db.session) + lang_id = service.load_language_def(langname) + repo = UserSettingRepository(db.session) + repo.set_value("current_language_id", lang_id) + db.session.commit() + flash(f"Loaded {langname} and sample book(s)") + return redirect("/") diff --git a/lute/language/service.py b/lute/language/service.py new file mode 100644 index 000000000..cb9f43d4c --- /dev/null +++ b/lute/language/service.py @@ -0,0 +1,132 @@ +"Language helper methods." + +import os +import re +from glob import glob +import yaml +from lute.models.language import Language +from lute.book.model import Book, Repository + +# from lute.utils.debug_helpers import DebugTimer + + +class LangDef: + "Language, built from language definition.yml, and .txt book files." + + # Map of definition.yaml directory to the yaml.safe_load content. + # The definition files never change, and loading them takes time, so + # cache it for better unit test performance. + yaml_cache = {} + + @classmethod + def _get_loaded_yaml(cls, definition_file_path): + "Get from cache, or load it and return it." + if definition_file_path not in LangDef.yaml_cache: + with open(definition_file_path, "r", encoding="utf-8") as df: + d = yaml.safe_load(df) + LangDef.yaml_cache[definition_file_path] = d + return LangDef.yaml_cache[definition_file_path] + + def __init__(self, directory): + "Build from files." + self.directory = directory + self.language_name = self._get_name(directory) + + def _get_name(self, directory): + def_file = os.path.join(directory, "definition.yaml") + d = LangDef._get_loaded_yaml(def_file) + return d["name"] + + @property + def language(self): + return self._load_lang_def(self.directory) + + @property + def books(self): + return self._get_books(self.directory, self.language_name) + + def _load_lang_def(self, directory): + "Load from file, must exist." + def_file = os.path.join(directory, "definition.yaml") + d = LangDef._get_loaded_yaml(def_file) + return Language.from_dict(d) + + def _get_books(self, directory, language_name): + "Get the stories." + books = [] + story_glob = os.path.join(directory, "*.txt") + for filename in glob(story_glob): + with open(filename, "r", encoding="utf-8") as f: + content = f.read() + title_match = re.search(r"title:\s*(.*)\n", content) + title = title_match.group(1).strip() + content = re.sub(r"#.*\n", "", content) + b = Book() + b.language_name = language_name + b.title = title + b.text = content + books.append(b) + return books + + +class Service: + "Service." + + def __init__(self, session): + self.session = session + self.lang_defs_cache = self._get_langdefs_cache() + + def _get_langdefs_cache(self): + "Load cache." + # dt = DebugTimer("_get_langdefs_cache", False) + # dt.step("start") + thisdir = os.path.dirname(__file__) + langdefs_dir = os.path.join(thisdir, "..", "db", "language_defs") + langdefs_dir = os.path.abspath(langdefs_dir) + # dt.step("got base directory") + cache = [] + def_glob = os.path.join(langdefs_dir, "**", "definition.yaml") + def_list = glob(def_glob) + # dt.step("globbed") + def_list.sort() + for f in def_list: + lang_dir, _ = os.path.split(f) + ld = LangDef(lang_dir) + # dt.step(f"build ld {ld.language_name}".ljust(30)) + cache.append(ld) + # dt.summary() + return cache + + def get_supported_defs(self): + "Return supported language definitions." + ret = [ld for ld in self.lang_defs_cache if ld.language.is_supported] + ret.sort(key=lambda x: x.language_name) + return ret + + def supported_predefined_languages(self): + "Supported Languages defined in yaml files." + return [d.language for d in self.get_supported_defs()] + + def get_language_def(self, lang_name): + "Get a lang def and its stories." + ret = [ld for ld in self.lang_defs_cache if ld.language_name == lang_name] + if len(ret) == 0: + raise RuntimeError(f"Missing language def name {lang_name}") + return ret[0] + + def load_language_def(self, lang_name): + "Load a language def and its stories, save to database." + load_def = self.get_language_def(lang_name) + lang = load_def.language + if not lang.is_supported: + raise RuntimeError(f"{lang_name} not supported, can't be loaded.") + + self.session.add(lang) + self.session.commit() + + r = Repository(self.session) + for b in load_def.books: + r.add(b) + r.commit() + + return lang.id diff --git a/lute/main.py b/lute/main.py index 43df5b55a..615acadbd 100644 --- a/lute/main.py +++ b/lute/main.py @@ -1,20 +1,23 @@ """ User entry point. -Start lute running on given port, or 5000 if not set. +Start lute running on given port, or 5001 if not set. e.g. python -m lute.main --port 5001 """ - +import errno import os import argparse import shutil import logging +import textwrap from waitress import serve -from lute.app_factory import create_app +from lute import __version__ +from lute.app_factory import create_app, data_initialization from lute.config.app_config import AppConfig +from lute.db import db logging.getLogger("waitress.queue").setLevel(logging.ERROR) logging.getLogger("natto").setLevel(logging.CRITICAL) @@ -26,7 +29,7 @@ def _print(s): """ if isinstance(s, str): s = s.split("\n") - msg = "\n".join([" " + lin.strip() for lin in s]) + msg = "\n".join(f" {lin}" for lin in s) print(msg, flush=True) @@ -41,63 +44,114 @@ def _create_prod_config_if_needed(): _print(["", "Using new production config.", ""]) -def start(port, config_file_path=None): +def _get_config_file_path(config_file_path=None): """ - Main entry point: Configure and init the app, and start. + Get final config file to use. Uses config file if set (throws if doesn't exist); otherwise, uses the prod config, creating a prod config if necessary. """ - _print(["", "Starting Lute:"]) - - app_config = None - if config_file_path is None: - _create_prod_config_if_needed() - _print(["Using default config"]) - app_config = AppConfig.create_from_config() + use_config = config_file_path + if config_file_path is not None: + _print(f"Using specified config: {config_file_path}") + elif os.path.exists("config.yml"): + _print("Using config.yml found in root") + use_config = "config.yml" else: - app_config = AppConfig(config_file_path) - _print([f"Using config: {config_file_path}"]) + _print("Using default config") + _create_prod_config_if_needed() + use_config = AppConfig.default_config_filename() - _print(["", "Initializing app."]) - app = create_app(app_config, output_func=_print) - _print(f"data path: {app_config.datapath}") - _print(f"database: {app_config.dbfilename}") - if app_config.is_docker: - _print("(Note these are container paths, not host paths.)") + ac = AppConfig(use_config) + _print(f" data path: {ac.datapath}") + _print(f" database: {ac.dbfilename}") + if ac.is_docker: + _print(" (Note these are container paths, not host paths.)") + _print("") - _print( - f""" - Running at: + return use_config - http://localhost:{port} - """ - ) + +def _start(args): + "Configure and start the app." + _print(f"\nStarting Lute version {__version__}.\n") + + config_file_path = _get_config_file_path(args.config) + app = create_app(config_file_path, output_func=_print) + with app.app_context(): + data_initialization(db.session, _print) close_msg = """ When you're finished reading, stop this process with Ctrl-C or your system equivalent. """ - if app_config.is_docker: + if app.env_config.is_docker: close_msg = """ When you're finished reading, stop this container with Ctrl-C, docker compose stop, or docker stop as appropriate. """ - _print(close_msg) - - serve(app, host="0.0.0.0", port=port) + _print(textwrap.dedent(close_msg)) + host_ip = "127.0.0.1" if args.local else "0.0.0.0" + ip_port = f"{host_ip}:{args.port}" + msg = f"""Lute v{__version__} is running on {ip_port}. Open a web browser and go to: -if __name__ == "__main__": + http://localhost:{args.port} + """ + _print(textwrap.dedent(msg)) + + try: + serve(app, host=host_ip, port=args.port) + except OSError as err: + if err.errno == errno.EADDRINUSE: + msg = [ + f"ERROR: port {args.port} is already in use.", + "please try adding a --port parameter, e.g.:", + "", + " python -m lute.main --port 9876", + "", + ] + _print(msg) + else: + # Throw back up, to get general error message + raise + + +def start(): + "Main entry point. Called via scripts and pyproject.toml." parser = argparse.ArgumentParser(description="Start lute.") parser.add_argument( - "--port", type=int, default=5000, help="Port number (default: 5000)" + "--local", + action="store_true", + help="Run local only (not accessible on other devices on the same network)", + ) + parser.add_argument( + "--port", type=int, default=5001, help="Port number (default: 5001)" ) parser.add_argument( "--config", help="Path to override config file. Uses lute/config/config.yml if not set.", ) - args = parser.parse_args() - start(args.port, args.config) + try: + _start(parser.parse_args()) + except Exception as e: # pylint: disable=broad-exception-caught + dashes = "-" * 50 + failmsg = f""" + {dashes} + Error during startup: + Type: {type(e)} + {e} + + Please check your setup and try again. + Ask for help on Discord, or report an issue on GitHub. + Additionally, help is available with --help. + {dashes} + """ + + print(textwrap.dedent(failmsg)) + + +if __name__ == "__main__": + start() diff --git a/lute/models/book.py b/lute/models/book.py index 97dec376d..e0ddb4748 100644 --- a/lute/models/book.py +++ b/lute/models/book.py @@ -2,8 +2,9 @@ Book entity. """ +import sqlite3 +from contextlib import closing from lute.db import db -from lute.parse.base import SentenceGroupIterator booktags = db.Table( "booktags", @@ -29,19 +30,6 @@ def make_book_tag(text, comment=""): tt.comment = comment return tt - @staticmethod - def find_by_text(text): - "Find a tag by text, or None if not found." - return db.session.query(BookTag).filter(BookTag.text == text).first() - - @staticmethod - def find_or_create_by_text(text): - "Return tag or create one." - ret = BookTag.find_by_text(text) - if ret is not None: - return ret - return BookTag.make_book_tag(text) - class Book( db.Model @@ -57,11 +45,14 @@ class Book( language_id = db.Column( "BkLgID", db.Integer, db.ForeignKey("languages.LgID"), nullable=False ) - word_count = db.Column("BkWordCount", db.Integer) source_uri = db.Column("BkSourceURI", db.String(length=1000)) current_tx_id = db.Column("BkCurrentTxID", db.Integer, default=0) archived = db.Column("BkArchived", db.Boolean, default=False) + audio_filename = db.Column("BkAudioFilename", db.String) + audio_current_pos = db.Column("BkAudioCurrentPos", db.Float) + audio_bookmarks = db.Column("BkAudioBookmarks", db.String) + language = db.relationship("Language") texts = db.relationship( "Text", @@ -95,43 +86,57 @@ def remove_book_tag(self, book_tag): def page_count(self): return len(self.texts) - @property - def is_supported(self): - "True if the book's language's parser is supported." - return self.language.is_supported - - @staticmethod - def create_book(title, language, fulltext, max_word_tokens_per_text=250): - """ - Create a book with given fulltext content, - splitting the content into separate Text objects with max - token count. - """ - tokens = language.parser.get_parsed_tokens(fulltext, language) - - def token_string(toks): - a = [t.token for t in toks] - ret = "".join(a) - ret = ret.replace("\r", "") - ret = ret.replace("¶", "\n") - return ret.strip() + def page_in_range(self, n): + "Return page number that is in the book's page count." + ret = max(n, 1) + ret = min(ret, self.page_count) + return ret + + def text_at_page(self, n): + "Return the text object at page n." + pagenum = self.page_in_range(n) + return self.texts[pagenum - 1] + + def _add_page(self, new_pagenum): + "Add new page, increment other page orders." + pages_after = [t for t in self.texts if t.order >= new_pagenum] + for t in pages_after: + t.order = t.order + 1 + t = Text(None, "", new_pagenum) + # TODO fix_refs: None first arg is garbage code. Passing self + # as the text's book causes a "SAWarning: Object of type + # not in session, add operation along 'Book.texts' will + # not proceed" warning ... so adding the text to the book + # manually is needed. The book's language is required to + # correctly parse the Text's text though ... + self.texts.append(t) + return t - b = Book(title, language) - b.word_count = len([t for t in tokens if t.is_word]) + def add_page_before(self, pagenum): + "Add page before page n, renumber all subsequent pages, return new page." + return self._add_page(self.page_in_range(pagenum)) - page_number = 0 - it = SentenceGroupIterator(tokens, max_word_tokens_per_text) - while toks := it.next(): - page_number += 1 - # Note the text is automatically added to b.texts! - t = Text(b, token_string(toks), page_number) + def add_page_after(self, pagenum): + "Add page after page n, renumber all subsequent pages, return new page." + return self._add_page(self.page_in_range(pagenum) + 1) - return b + def remove_page(self, pagenum): + "Remove page, renumber all subsequent pages." + # Don't delete page of single-page books. + if len(self.texts) == 1: + return + texts = [t for t in self.texts if t.order == pagenum] + if len(texts) == 0: + return + texts[0].book = None + pages_after = [t for t in self.texts if t.order > pagenum] + for t in pages_after: + t.order = t.order - 1 - @staticmethod - def find(book_id): - "Get by ID." - return db.session.query(Book).filter(Book.id == book_id).first() + @property + def is_supported(self): + "True if the book's language's parser is supported." + return self.language.is_supported # TODO zzfuture fix: rename class and table to Page/pages @@ -145,10 +150,17 @@ class Text(db.Model): id = db.Column("TxID", db.Integer, primary_key=True) _text = db.Column("TxText", db.String, nullable=False) order = db.Column("TxOrder", db.Integer) + start_date = db.Column("TxStartDate", db.DateTime, nullable=True) _read_date = db.Column("TxReadDate", db.DateTime, nullable=True) bk_id = db.Column("TxBkID", db.Integer, db.ForeignKey("books.BkID"), nullable=False) + word_count = db.Column("TxWordCount", db.Integer, nullable=True) book = db.relationship("Book", back_populates="texts") + bookmarks = db.relationship( + "TextBookmark", + back_populates="text", + cascade="all, delete-orphan", + ) sentences = db.relationship( "Sentence", back_populates="text", @@ -179,7 +191,13 @@ def text(self): @text.setter def text(self, s): self._text = s - self._load_sentences() + if s.strip() == "": + return + toks = self._get_parsed_tokens() + wordtoks = [t for t in toks if t.is_word] + self.word_count = len(wordtoks) + if self._read_date is not None: + self._load_sentences_from_tokens(toks) @property def read_date(self): @@ -188,56 +206,82 @@ def read_date(self): @read_date.setter def read_date(self, s): self._read_date = s - self._load_sentences() - - def _load_sentences(self): - """ - Parse the current text and create Sentence objects. - Sentences are only needed once the text has been read. - """ - self.remove_sentences() - - if self.read_date is None: - return + # Ensure loaded. + self.load_sentences() + def _get_parsed_tokens(self): + "Return the tokens." lang = self.book.language - parser = lang.parser - parsedtokens = parser.get_parsed_tokens(self.text, lang) + return lang.parser.get_parsed_tokens(self.text, lang) + def _load_sentences_from_tokens(self, parsedtokens): + "Save sentences using the tokens." + parser = self.book.language.parser + self._remove_sentences() curr_sentence_tokens = [] - sentence_number = 1 + sentence_num = 1 + + def _add_current(): + "Create and add sentence from current state." + if curr_sentence_tokens: + se = Sentence.from_tokens(curr_sentence_tokens, parser, sentence_num) + self._add_sentence(se) + # Reset for the next sentence. + curr_sentence_tokens.clear() for pt in parsedtokens: curr_sentence_tokens.append(pt) if pt.is_end_of_sentence: - se = Sentence.from_tokens(curr_sentence_tokens, sentence_number) - self.add_sentence(se) - - # Reset for the next sentence. - curr_sentence_tokens = [] - sentence_number += 1 + _add_current() + sentence_num += 1 # Add any stragglers. - if len(curr_sentence_tokens) > 0: - se = Sentence.from_tokens(curr_sentence_tokens, sentence_number) - self.add_sentence(se) + _add_current() - def add_sentence(self, sentence): + def load_sentences(self): + """ + Parse the current text and create Sentence objects. + """ + toks = self._get_parsed_tokens() + self._load_sentences_from_tokens(toks) + + def _add_sentence(self, sentence): "Add a sentence to the Text." if sentence not in self.sentences: self.sentences.append(sentence) sentence.text = self - def remove_sentences(self): + def _remove_sentences(self): "Remove all sentence from the Text." for sentence in self.sentences: sentence.text = None self.sentences = [] - @staticmethod - def find(text_id): - "Get by ID." - return db.session.query(Text).filter(Text.id == text_id).first() + +class WordsRead(db.Model): + """ + Tracks reading events for Text entities. + """ + + __tablename__ = "wordsread" + id = db.Column("WrID", db.Integer, primary_key=True) + language_id = db.Column( + "WrLgID", db.Integer, db.ForeignKey("languages.LgID"), nullable=False + ) + tx_id = db.Column( + "WrTxID", + db.Integer, + db.ForeignKey("texts.TxID", ondelete="SET NULL"), + nullable=True, + ) + read_date = db.Column("WrReadDate", db.DateTime, nullable=False) + word_count = db.Column("WrWordCount", db.Integer, nullable=False) + + def __init__(self, text, read_date, word_count): + self.tx_id = text.id + self.language_id = text.book.language.id + self.read_date = read_date + self.word_count = word_count class Sentence(db.Model): @@ -253,32 +297,87 @@ class Sentence(db.Model): tx_id = db.Column("SeTxID", db.Integer, db.ForeignKey("texts.TxID"), nullable=False) order = db.Column("SeOrder", db.Integer, default=1) text_content = db.Column("SeText", db.Text, default="") + textlc_content = db.Column("SeTextLC", db.Text) text = db.relationship("Text", back_populates="sentences") - def __init__(self, text_content="", text=None, order=1): - self.text_content = text_content - self.text = text - self.order = order + def set_lowercase_text(self, parser): + """ + Load textlc_content from text_content. + + If a call to sqlite's LOWER() function for the text_content + returns the same lowercase text as a call to the parser, + store '*' as the lowercase text. This seeming hack can save a + pile of space: for my ~30meg db of ~135K sentences, only 750 + sentences were different when lowercased by the LOWER() vs by + the parser. + + This method is public for use in the data_cleanup module. + """ + + def _get_sql_lower(input_string): + "Returns result of sqlite LOWER call of input_string." + if input_string is None: + return None + with sqlite3.connect(":memory:") as conn, closing(conn.cursor()) as cur: + cur.execute("SELECT LOWER(?)", (input_string,)) + result = cur.fetchone() + return result[0] + + lcased = parser.get_lowercase(self.text_content) + if lcased == _get_sql_lower(self.text_content): + lcased = "*" + self.textlc_content = lcased @staticmethod - def from_tokens(tokens, senumber): + def from_tokens(tokens, parser, senumber): """ Create a new Sentence from ParsedTokens. """ - ptstrings = [t.token for t in tokens] - - zws = chr(0x200B) # Zero-width space. - s = zws.join(ptstrings) - s = s.strip(" ") - - # The zws is added at the start and end of each - # sentence, to standardize the string search when - # looking for terms. - s = zws + s + zws + def _sentence_string(string_array): + "Create properly-zws-joined sentence string." + zws = chr(0x200B) # Zero-width space. + s = zws.join(string_array).strip(" ") + # The zws is added at the start and end of each + # sentence, to standardize the string search when + # looking for terms. + return zws + s + zws sentence = Sentence() sentence.order = senumber - sentence.text_content = s + sentence.text_content = _sentence_string([t.token for t in tokens]) + sentence.set_lowercase_text(parser) return sentence + + +class TextBookmark(db.Model): + """ + Bookmarks for a given Book page + + The TextBookmark includes a title + """ + + __tablename__ = "textbookmarks" + + id = db.Column("TbID", db.Integer, primary_key=True) + tx_id = db.Column( + "TbTxID", + db.Integer, + db.ForeignKey("texts.TxID", ondelete="CASCADE"), + nullable=False, + ) + title = db.Column("TbTitle", db.Text, nullable=False) + + text = db.relationship("Text", back_populates="bookmarks") + + +class BookStats(db.Model): + "The stats table." + __tablename__ = "bookstats" + + BkID = db.Column(db.Integer, primary_key=True) + distinctterms = db.Column(db.Integer) + distinctunknowns = db.Column(db.Integer) + unknownpercent = db.Column(db.Integer) + status_distribution = db.Column(db.String, nullable=True) diff --git a/lute/models/language.py b/lute/models/language.py index 7f2652479..8a927384e 100644 --- a/lute/models/language.py +++ b/lute/models/language.py @@ -3,11 +3,37 @@ """ import re -from sqlalchemy import text, func from lute.db import db from lute.parse.registry import get_parser, is_supported +class LanguageDictionary(db.Model): + """ + Language dictionary. + """ + + __tablename__ = "languagedicts" + + id = db.Column("LdID", db.SmallInteger, primary_key=True) + language_id = db.Column( + "LdLgID", db.Integer, db.ForeignKey("languages.LgID"), nullable=False + ) + language = db.relationship("Language", back_populates="dictionaries") + usefor = db.Column("LdUseFor", db.String(20), nullable=False) + dicttype = db.Column("LdType", db.String(20), nullable=False) + dicturi = db.Column("LdDictURI", db.String(200), nullable=False) + is_active = db.Column("LdIsActive", db.Boolean, default=True) + sort_order = db.Column("LdSortOrder", db.SmallInteger, nullable=False) + + # HACK: pre-pend '*' to URLs that need to open a new window. + # This is a relic of the original code, and should be changed. + # TODO remove-asterisk-hack: remove * from URL start. + def make_uri(self): + "Hack add asterisk." + prepend = "*" if self.dicttype == "popuphtml" else "" + return f"{prepend}{self.dicturi}" + + class Language( db.Model ): # pylint: disable=too-few-public-methods, too-many-instance-attributes @@ -19,15 +45,19 @@ class Language( id = db.Column("LgID", db.SmallInteger, primary_key=True) name = db.Column("LgName", db.String(40)) - dict_1_uri = db.Column("LgDict1URI", db.String(200)) - dict_2_uri = db.Column("LgDict2URI", db.String(200)) - sentence_translate_uri = db.Column("LgGoogleTranslateURI", db.String(200)) + + dictionaries = db.relationship( + "LanguageDictionary", + back_populates="language", + order_by="LanguageDictionary.sort_order", + lazy="subquery", + cascade="all, delete-orphan", + ) + character_substitutions = db.Column("LgCharacterSubstitutions", db.String(500)) regexp_split_sentences = db.Column("LgRegexpSplitSentences", db.String(500)) exceptions_split_sentences = db.Column("LgExceptionsSplitSentences", db.String(500)) _word_characters = db.Column("LgRegexpWordCharacters", db.String(500)) - remove_spaces = db.Column("LgRemoveSpaces", db.Boolean) - split_each_char = db.Column("LgSplitEachChar", db.Boolean) right_to_left = db.Column("LgRightToLeft", db.Boolean) show_romanization = db.Column("LgShowRomanization", db.Boolean) parser_type = db.Column("LgParserType", db.String(20)) @@ -37,11 +67,10 @@ def __init__(self): self.regexp_split_sentences = ".!?" self.exceptions_split_sentences = "Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds." self.word_characters = "a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ" - self.remove_spaces = False - self.split_each_char = False self.right_to_left = False self.show_romanization = False self.parser_type = "spacedel" + self.dictionaries = [] def __repr__(self): return f"" @@ -74,48 +103,19 @@ def word_characters(self): def word_characters(self, s): self._word_characters = self._get_python_regex_pattern(s) - @classmethod - def all_dictionaries(cls): - """ - All dictionaries for all languages. - """ - languages = Language.query.all() - language_data = {} - for language in languages: - term_dicts = [language.dict_1_uri, language.dict_2_uri] - term_dicts = [uri for uri in term_dicts if uri is not None] - - data = {"term": term_dicts, "sentence": language.sentence_translate_uri} - - language_data[language.id] = data - return language_data + def active_dict_uris(self, use_for): + "Get sorted uris for active dicts of correct type." + actives = [d for d in self.dictionaries if d.is_active and d.usefor == use_for] + sorted_actives = sorted(actives, key=lambda x: x.sort_order) + return [d.make_uri() for d in sorted_actives] - @staticmethod - def delete(language): - """ - Hacky method to delete language and all terms and books - associated with it. - - There is _certainly_ a better way to do this using - Sqlalchemy relationships and cascade deletes, but I - was running into problems with it (things not cascading, - or warnings ("SAWarning: Object of type not in - session, add operation along 'Language.terms' will not - proceed") during test runs. It would be nice to have - a "correct" mapping, but this is good enough for now. - - TODO zzfuture fix: fix Language-Book and -Term mappings. - """ - sqls = [ - "pragma foreign_keys = ON", - f"delete from languages where LgID = {language.id}", - ] - for s in sqls: - db.session.execute(text(s)) - db.session.commit() + @property + def sentence_dict_uris(self): + return self.active_dict_uris("sentences") @property def parser(self): + "Note: this throws if the parser is not supported!!!" return get_parser(self.parser_type) @property @@ -129,16 +129,81 @@ def get_parsed_tokens(self, s): def get_lowercase(self, s) -> str: return self.parser.get_lowercase(s) - @staticmethod - def find(language_id): - "Get by ID." - return db.session.query(Language).filter(Language.id == language_id).first() + def to_dict(self): + "Return dictionary of data, for serialization." + ret = {} + ret["name"] = self.name + ret["dictionaries"] = [] + for d in self.dictionaries: + dd = {} + dd["for"] = d.usefor + dd["type"] = d.dicttype.replace("html", "") + dd["url"] = d.dicturi + dd["active"] = d.is_active + ret["dictionaries"].append(dd) + ret["show_romanization"] = self.show_romanization + ret["right_to_left"] = self.right_to_left + ret["parser_type"] = self.parser_type + ret["character_substitutions"] = self.character_substitutions + ret["split_sentences"] = self.regexp_split_sentences + ret["split_sentence_exceptions"] = self.exceptions_split_sentences + ret["word_chars"] = self.word_characters + return ret @staticmethod - def find_by_name(name): - "Get by name." - return ( - db.session.query(Language) - .filter(func.lower(Language.name) == func.lower(name)) - .first() - ) + def from_dict(d): + "Create new Language from dictionary d." + + lang = Language() + + def load(key, method): + if key in d: + val = d[key] + # Handle boolean values + if isinstance(val, str): + temp = val.lower() + if temp == "true": + val = True + elif temp == "false": + val = False + setattr(lang, method, val) + + # Define mappings for fields + mappings = { + "name": "name", + "show_romanization": "show_romanization", + "right_to_left": "right_to_left", + "parser_type": "parser_type", + "character_substitutions": "character_substitutions", + "split_sentences": "regexp_split_sentences", + "split_sentence_exceptions": "exceptions_split_sentences", + "word_chars": "word_characters", + } + + for key in d.keys(): + funcname = mappings.get(key, "") + if funcname: + load(key, funcname) + + ld_sort = 1 + for ld_data in d["dictionaries"]: + dtype = ld_data["type"] + if dtype == "embedded": + dtype = "embeddedhtml" + elif dtype == "popup": + dtype = "popuphtml" + else: + raise ValueError(f"Invalid dictionary type {dtype}") + + ld = LanguageDictionary() + # ld.language = lang -- if you do this, the dict is added twice. + ld.usefor = ld_data["for"] + ld.dicttype = dtype + ld.dicturi = ld_data["url"] + ld.is_active = ld_data.get("active", True) + + ld.sort_order = ld_sort + ld_sort += 1 + lang.dictionaries.append(ld) + + return lang diff --git a/lute/models/repositories.py b/lute/models/repositories.py new file mode 100644 index 000000000..ad075a9a4 --- /dev/null +++ b/lute/models/repositories.py @@ -0,0 +1,244 @@ +""" +Repositories. +""" + +from sqlalchemy import text as sqltext, and_, func +from lute.db import db +from lute.models.setting import UserSetting, BackupSettings, SystemSetting +from lute.models.language import Language +from lute.models.term import Term, TermTag +from lute.models.book import Book, BookTag + + +class SettingRepositoryBase: + "Repository." + + def __init__(self, session, classtype): + self.session = session + self.classtype = classtype + + def key_exists_precheck(self, keyname): + """ + Check key validity for certain actions. + """ + + def set_value(self, keyname, keyvalue): + "Set, but don't save, a setting." + self.key_exists_precheck(keyname) + s = ( + self.session.query(self.classtype) + .filter(self.classtype.key == keyname) + .first() + ) + if s is None: + s = self.classtype() + s.key = keyname + s.value = keyvalue + self.session.add(s) + + def key_exists(self, keyname): + "True if exists." + s = ( + self.session.query(self.classtype) + .filter(self.classtype.key == keyname) + .first() + ) + no_key = s is None + return not no_key + + def get_value(self, keyname): + "Get the saved key, or None if it doesn't exist." + self.key_exists_precheck(keyname) + s = ( + self.session.query(self.classtype) + .filter(self.classtype.key == keyname) + .first() + ) + if s is None: + return None + return s.value + + def delete_key(self, keyname): + "Delete a key." + s = ( + self.session.query(self.classtype) + .filter(self.classtype.key == keyname) + .first() + ) + if s is not None: + self.session.delete(s) + + +class MissingUserSettingKeyException(Exception): + """ + Cannot set or get unknown user keys. + """ + + +class UserSettingRepository(SettingRepositoryBase): + "Repository." + + def __init__(self, session): + super().__init__(session, UserSetting) + + def key_exists_precheck(self, keyname): + """ + User keys must exist. + """ + if not self.key_exists(keyname): + raise MissingUserSettingKeyException(keyname) + + def get_backup_settings(self): + "Convenience method." + bs = BackupSettings() + + def _bool(v): + return v in (1, "1", "y", True) + + bs.backup_enabled = _bool(self.get_value("backup_enabled")) + bs.backup_auto = _bool(self.get_value("backup_auto")) + bs.backup_warn = _bool(self.get_value("backup_warn")) + bs.backup_dir = self.get_value("backup_dir") + bs.backup_count = int(self.get_value("backup_count") or 5) + bs.last_backup_datetime = self.get_last_backup_datetime() + return bs + + def get_last_backup_datetime(self): + "Get the last_backup_datetime as int, or None." + v = self.get_value("lastbackup") + if v is None: + return None + return int(v) + + def set_last_backup_datetime(self, v): + "Set and save the last backup time." + self.set_value("lastbackup", v) + self.session.commit() + + +class SystemSettingRepository(SettingRepositoryBase): + "Repository." + + def __init__(self, session): + super().__init__(session, SystemSetting) + + +class LanguageRepository: + "Repository." + + def __init__(self, session): + self.session = session + + def find(self, language_id): + "Get by ID." + return self.session.query(Language).filter(Language.id == language_id).first() + + def find_by_name(self, name): + "Get by name." + return ( + self.session.query(Language) + .filter(func.lower(Language.name) == func.lower(name)) + .first() + ) + + def all_dictionaries(self): + "All dictionaries for all languages." + lang_dicts = {} + for lang in db.session.query(Language).all(): + lang_dicts[lang.id] = { + "term": lang.active_dict_uris("terms"), + "sentence": lang.active_dict_uris("sentences"), + } + return lang_dicts + + +class TermTagRepository: + "Repository." + + def __init__(self, session): + self.session = session + + def find(self, termtag_id): + "Get by ID." + return self.session.query(TermTag).filter(TermTag.id == termtag_id).first() + + def find_by_text(self, text): + "Find a tag by text, or None if not found." + return self.session.query(TermTag).filter(TermTag.text == text).first() + + def find_or_create_by_text(self, text): + "Return tag or create one." + ret = self.find_by_text(text) + if ret is not None: + return ret + return TermTag(text) + + +class TermRepository: + "Repository." + + def __init__(self, session): + self.session = session + + def find(self, term_id): + "Get by ID." + return self.session.query(Term).filter(Term.id == term_id).first() + + def find_by_spec(self, spec): + """ + Find by the given spec term's language ID and text. + Returns None if not found. + """ + langid = spec.language.id + text_lc = spec.text_lc + query = self.session.query(Term).filter( + and_(Term.language_id == langid, Term.text_lc == text_lc) + ) + terms = query.all() + if not terms: + return None + return terms[0] + + def delete_empty_images(self): + """ + Data clean-up: delete empty images. + + The code was leaving empty images in the db, which are obviously no good. + This is a hack to clean up the data. + """ + sql = "delete from wordimages where trim(WiSource) = ''" + self.session.execute(sqltext(sql)) + self.session.commit() + + +class BookTagRepository: + "Repository." + + def __init__(self, session): + self.session = session + + def find_or_create_by_text(self, text): + "Return tag or create one." + ret = db.session.query(BookTag).filter(BookTag.text == text).first() + if ret is not None: + return ret + return BookTag.make_book_tag(text) + + +class BookRepository: + "Repository." + + def __init__(self, session): + self.session = session + + def find(self, book_id): + "Get by ID." + return self.session.query(Book).filter(Book.id == book_id).first() + + def find_by_title(self, book_title, language_id): + "Get by title." + return ( + self.session.query(Book) + .filter(and_(Book.title == book_title, Book.language_id == language_id)) + .first() + ) diff --git a/lute/models/setting.py b/lute/models/setting.py index 9a4e70cc1..dcb573d40 100644 --- a/lute/models/setting.py +++ b/lute/models/setting.py @@ -2,11 +2,9 @@ Lute settings, in settings key-value table. """ -import os import datetime -from flask import current_app +import time from lute.db import db -from lute.parse.mecab_parser import JapaneseParser class SettingBase(db.Model): @@ -23,159 +21,12 @@ class SettingBase(db.Model): value = db.Column("StValue", db.String, nullable=False) __mapper_args__ = {"polymorphic_on": keytype} - @classmethod - def key_exists_precheck(cls, keyname): - """ - Check key validity for certain actions. - """ - - @classmethod - def set_value(cls, keyname, keyvalue): - "Set, but don't save, a setting." - cls.key_exists_precheck(keyname) - s = db.session.query(cls).filter(cls.key == keyname).first() - if s is None: - s = cls() - s.key = keyname - s.value = keyvalue - db.session.add(s) - - @classmethod - def key_exists(cls, keyname): - "True if exists." - s = db.session.query(cls).filter(cls.key == keyname).first() - no_key = s is None - return not no_key - - @classmethod - def get_value(cls, keyname): - "Get the saved key, or None if it doesn't exist." - cls.key_exists_precheck(keyname) - s = db.session.query(cls).filter(cls.key == keyname).first() - if s is None: - return None - return s.value - - @classmethod - def delete_key(cls, keyname): - "Delete a key." - s = db.session.query(cls).filter(cls.key == keyname).first() - if s is not None: - db.session.delete(s) - - -class MissingUserSettingKeyException(Exception): - """ - Cannot set or get unknown user keys. - """ - class UserSetting(SettingBase): "User setting." __tablename__ = None __mapper_args__ = {"polymorphic_identity": "user"} - @classmethod - def key_exists_precheck(cls, keyname): - """ - User keys must exist. - """ - if not UserSetting.key_exists(keyname): - raise MissingUserSettingKeyException(keyname) - - @staticmethod - def _revised_mecab_path(): - """ - Change the mecab_path if it's not found, and a - replacement is found. - - Lute Docker images are built to be multi-arch, and - interestingly (annoyingly), mecab libraries are installed into - different locations depending on the architecture, even with - the same Dockerfile and base image. - - Returns: new mecab path if old one is missing _and_ - new one found, otherwise just return the old one. - """ - - mp = UserSetting.get_value("mecab_path") - if mp is not None and os.path.exists(mp): - return mp - - # See develop docs for notes on how to find the libmecab path! - candidates = [ - # linux/arm64 - "/lib/aarch64-linux-gnu/libmecab.so.2", - # linux/amd64 - "/lib/x86_64-linux-gnu/libmecab.so.2", - # github CI, ubuntu-latest - "/lib/x86_64-linux-gnu/libmecab.so.2", - ] - replacements = [p for p in candidates if os.path.exists(p)] - if len(replacements) > 0: - return replacements[0] - # Replacement not found, leave current value as-is. - return mp - - @staticmethod - def load(): - """ - Load missing user settings with default values. - """ - app_config = current_app.env_config - - keys_and_defaults = { - "backup_enabled": True, - "backup_auto": True, - "backup_warn": True, - "backup_dir": app_config.default_user_backup_path, - "backup_count": 5, - "mecab_path": None, - "custom_styles": "/* Custom css to modify Lute's appearance. */", - } - for k, v in keys_and_defaults.items(): - if not UserSetting.key_exists(k): - s = UserSetting() - s.key = k - s.value = v - db.session.add(s) - db.session.commit() - - # Revise the mecab path if necessary. - # Note this is done _after_ the defaults are loaded, - # because the user may have already loaded the defaults - # (e.g. on machine upgrade) and stored them in the db, - # so we may have to _update_ the existing setting. - revised_mecab_path = UserSetting._revised_mecab_path() - UserSetting.set_value("mecab_path", revised_mecab_path) - db.session.commit() - - # This feels wrong, somehow ... possibly could have an event - # bus that posts messages about the setting. - JapaneseParser.set_mecab_path_envkey(UserSetting.get_value("mecab_path")) - - -class SystemSetting(SettingBase): - "System setting." - __tablename__ = None - __mapper_args__ = {"polymorphic_identity": "system"} - - # Helpers for certain sys settings. - - @classmethod - def get_last_backup_datetime(cls): - "Get the last_backup_datetime as int, or None." - v = cls.get_value("lastbackup") - if v is None: - return None - return int(v) - - @classmethod - def set_last_backup_datetime(cls, v): - "Set and save the last backup time." - cls.set_value("lastbackup", v) - db.session.commit() - class BackupSettings: """ @@ -184,16 +35,12 @@ class BackupSettings: """ def __init__(self): - def _bool(k): - v = UserSetting.get_value(k) - return v in (1, "1", "y", True) - - self.backup_enabled = _bool("backup_enabled") - self.backup_auto = _bool("backup_auto") - self.backup_warn = _bool("backup_warn") - self.backup_dir = UserSetting.get_value("backup_dir") - self.backup_count = int(UserSetting.get_value("backup_count") or 5) - self.last_backup_datetime = SystemSetting.get_last_backup_datetime() + self.backup_enabled = None + self.backup_auto = None + self.backup_warn = None + self.backup_dir = None + self.backup_count = None + self.last_backup_datetime = None @property def last_backup_display_date(self): @@ -203,7 +50,43 @@ def last_backup_display_date(self): return None return datetime.datetime.fromtimestamp(t).strftime("%Y-%m-%d %H:%M:%S") - @staticmethod - def get_backup_settings(): - "Get BackupSettings." - return BackupSettings() + @property + def time_since_last_backup(self): + """ + Return the time since the last backup. Returns None either if not set or + it is in the future. + Eg. "3 days ago" + """ + t = self.last_backup_datetime + if t is None: + return None + + delta = int(time.time() - t) + if delta < 0: + return None + + thresholds = [ + ("week", 1 * 60 * 60 * 24 * 7), + ("day", 1 * 60 * 60 * 24), + ("hour", 1 * 60 * 60), + ("minute", 1 * 60), + ("second", 1), + ] + + for unit, seconds in thresholds: + multiples = abs(delta // seconds) + if multiples >= 1: + message = f"{multiples} {unit}" + if multiples > 1: + message += "s" + break + else: + message = f"{abs(delta)} seconds" + + return message + " ago" + + +class SystemSetting(SettingBase): + "System setting." + __tablename__ = None + __mapper_args__ = {"polymorphic_identity": "system"} diff --git a/lute/models/srsexport.py b/lute/models/srsexport.py new file mode 100644 index 000000000..b9fb43e9d --- /dev/null +++ b/lute/models/srsexport.py @@ -0,0 +1,23 @@ +""" +Srs export entity. +""" + +from lute.db import db + + +class SrsExportSpec(db.Model): + """ + Srs export spec entity. + """ + + __tablename__ = "srsexportspecs" + + id = db.Column("SrsID", db.Integer, primary_key=True) + export_name = db.Column( + "SrsExportName", db.String(200), nullable=False, unique=True + ) + criteria = db.Column("SrsCriteria", db.String(1000), nullable=False) + deck_name = db.Column("SrsDeckName", db.String(200), nullable=False) + note_type = db.Column("SrsNoteType", db.String(200), nullable=False) + field_mapping = db.Column("SrsFieldMapping", db.String(1000), nullable=False) + active = db.Column("SrsActive", db.Boolean, nullable=False, default=True) diff --git a/lute/models/term.py b/lute/models/term.py index 324706009..bfd1224f5 100644 --- a/lute/models/term.py +++ b/lute/models/term.py @@ -4,7 +4,6 @@ from lute.db import db - wordparents = db.Table( "wordparents", db.Model.metadata, @@ -72,24 +71,6 @@ def comment(self, c): "Set cleaned comment." self._comment = c if c is not None else "" - @staticmethod - def find(termtag_id): - "Get by ID." - return db.session.query(TermTag).filter(TermTag.id == termtag_id).first() - - @staticmethod - def find_by_text(text): - "Find a tag by text, or None if not found." - return db.session.query(TermTag).filter(TermTag.text == text).first() - - @staticmethod - def find_or_create_by_text(text): - "Return tag or create one." - ret = TermTag.find_by_text(text) - if ret is not None: - return ret - return TermTag(text) - class TermTextChangedException(Exception): """ @@ -123,6 +104,7 @@ class Term( translation = db.Column("WoTranslation", db.String(500)) romanization = db.Column("WoRomanization", db.String(100)) token_count = db.Column("WoTokenCount", db.Integer) + sync_status = db.Column("WoSyncStatus", db.Boolean) language = db.relationship("Language") term_tags = db.relationship("TermTag", secondary="wordtags") @@ -157,6 +139,7 @@ def __init__(self, language=None, text=None): self.status = 1 self.translation = None self.romanization = None + self.sync_status = False self.term_tags = [] self.parents = [] self.children = [] @@ -166,6 +149,27 @@ def __init__(self, language=None, text=None): if text is not None: self.text = text + @staticmethod + def create_term_no_parsing(language, text): + """ + Create a term, but do not reparse it during creation. + + This method is necessary because some parsers return + different parsed tokens for a given text string based + on its context. The general __init__() is used for + parsing without context, such as creating Terms from + the UI or during CSV import. This method is used + when new terms are created from an already-parsed + and already-tokenized page of text. + """ + t = Term() + t.language = language + t._text = text # pylint: disable=protected-access + t.text_lc = language.get_lowercase(text) + t.romanization = language.parser.get_reading(text) + t._calc_token_count() # pylint: disable=protected-access + return t + def __repr__(self): return f"" @@ -182,12 +186,8 @@ def text(self): "Get the text." return self._text - @text.setter - def text(self, textstring): - "Set the text, textlc, and token count." - if self.language is None: - raise RuntimeError("Must set term language before setting text") - + def _parse_string_add_zws(self, lang, textstring): + "Parse the string using the language." # Clean up encoding cruft. t = textstring.strip() zws = "\u200B" # zero-width space @@ -195,7 +195,6 @@ def text(self, textstring): nbsp = "\u00A0" # non-breaking space t = t.replace(nbsp, " ") - lang = self.language tokens = lang.get_parsed_tokens(t) # Terms can't contain paragraph markers. @@ -203,19 +202,35 @@ def text(self, textstring): tok_strings = [tok.token for tok in tokens] t = zws.join(tok_strings) - old_text_lc = self.text_lc - new_text_lc = lang.get_lowercase(t) + return t + + @text.setter + def text(self, textstring): + """ + Set the text, textlc, and token count. + + For new terms, just parse, downcase, and get the count. - text_changed = old_text_lc is not None and new_text_lc != old_text_lc - if self.id is not None and text_changed: - msg = ( - f"Cannot change text of term '{self.text}' (id = {self.id}) once saved." - ) - raise TermTextChangedException(msg) + For existing terms, ensure that the actual text content has + not changed. + """ + if self.language is None: + raise RuntimeError("Must set term language before setting text") + lang = self.language - self._text = t - self.text_lc = new_text_lc - self._calc_token_count() + if self.id is None: + t = self._parse_string_add_zws(lang, textstring) + self._text = t + self.text_lc = lang.get_lowercase(t) + self.romanization = lang.parser.get_reading(t) + self._calc_token_count() + else: + # new_lc = lang.get_lowercase(textstring) + # print(f"new lowercase = '{new_lc}', old = '{self.text_lc}'", flush=True) + if lang.get_lowercase(textstring) != self.text_lc: + msg = f'Cannot change text of saved term "{self._text}" (id {self.id}).' + raise TermTextChangedException(msg) + self._text = textstring def _calc_token_count(self): "Tokens are separated by zero-width space." @@ -234,7 +249,8 @@ def add_term_tag(self, term_tag): self.term_tags.append(term_tag) def remove_term_tag(self, term_tag): - self.term_tags.remove(term_tag) + if term_tag in self.term_tags: + self.term_tags.remove(term_tag) def remove_all_parents(self): self.parents = [] @@ -247,30 +263,24 @@ def add_parent(self, parent): return if parent not in self.parents: self.parents.append(parent) + if len(self.parents) > 1: + self.sync_status = False - def get_current_image(self, strip_jpeg=True): + def get_current_image(self): "Get the current (first) image for the term." if len(self.images) == 0: return None i = self.images[0] - - src = i.source - - if not strip_jpeg: - return src - - # Ugly hack: we have to remove the .jpeg at the end because - # Flask doesn't handle params with periods. - return src.replace(".jpeg", "") + return i.source def set_current_image(self, s): "Set the current image for this term." - if self.images: + while len(self.images) > 0: self.images.pop(0) - if s is not None: + if (s or "").strip() != "": ti = TermImage() ti.term = self - ti.source = s + ti.source = s.strip() self.images.append(ti) def get_flash_message(self): @@ -295,29 +305,8 @@ def pop_flash_message(self): self.term_flash_message = None return m - @staticmethod - def find(term_id): - "Get by ID." - return db.session.query(Term).filter(Term.id == term_id).first() - @staticmethod - def find_by_spec(spec): - """ - Find by the given spec term's language ID and text. - Returns None if not found. - """ - langid = spec.language.id - text_lc = spec.text_lc - query = db.session.query(Term).filter( - Term.language_id == langid, Term.text_lc == text_lc - ) - terms = query.all() - if not terms: - return None - return terms[0] - - -class Status: +class Status(db.Model): # pylint: disable=too-few-public-methods """ Term statuses. """ @@ -325,3 +314,10 @@ class Status: UNKNOWN = 0 WELLKNOWN = 99 IGNORED = 98 + ALLOWED = [UNKNOWN, 1, 2, 3, 4, 5, IGNORED, WELLKNOWN] + + __tablename__ = "statuses" + + id = db.Column("StID", db.SmallInteger, primary_key=True) + text = db.Column("StText", db.String(250)) + abbreviation = db.Column("StAbbreviation", db.String(250)) diff --git a/lute/parse/base.py b/lute/parse/base.py index acd6d89a8..7d44d98e9 100644 --- a/lute/parse/base.py +++ b/lute/parse/base.py @@ -17,7 +17,6 @@ class ParsedToken: # Class counters. cls_sentence_number = 0 - cls_paragraph_number = 0 cls_order = 0 @classmethod @@ -26,7 +25,6 @@ def reset_counters(cls): Reset all the counters. """ ParsedToken.cls_sentence_number = 0 - ParsedToken.cls_paragraph_number = 0 ParsedToken.cls_order = 0 def __init__(self, token: str, is_word: bool, is_end_of_sentence: bool = False): @@ -38,79 +36,51 @@ def __init__(self, token: str, is_word: bool, is_end_of_sentence: bool = False): self.order = ParsedToken.cls_order self.sentence_number = ParsedToken.cls_sentence_number - self.paragraph_number = ParsedToken.cls_paragraph_number # Increment counters after the TextToken has been # completed, so that it belongs to the correct - # sentence/paragraph. + # sentence. if self.is_end_of_sentence: ParsedToken.cls_sentence_number += 1 - if self.token == "¶": - ParsedToken.cls_paragraph_number += 1 + + @property + def is_end_of_paragraph(self): + return self.token.strip() == "¶" def __repr__(self): - return ( - f'<"{self.token}" (word: {self.is_word}, eos: {self.is_end_of_sentence})>' - ) + attrs = [ + f"word: {self.is_word}", + f"eos: {self.is_end_of_sentence}", + # f"sent: {self.sentence_number}", + ] + attrs = ", ".join(attrs) + return f'<"{self.token}" ({attrs})>' -class SentenceGroupIterator: +class AbstractParser(ABC): """ - An iterator of ParsedTokens that groups them by sentence, up - to a maximum number of tokens. + Abstract parser, inherited from by all parsers. + + Attributes: + data_directory: Optional. A full path to a + directory that the parser uses. Should be + initialized with init_data_directory(). """ - def __init__(self, tokens, maxcount=500): - self.tokens = tokens - self.maxcount = maxcount - self.currpos = 0 + data_directory = None - def count(self): - """ - Get count of groups that will be returned. - """ - old_currpos = self.currpos - c = 0 - while self.next(): - c += 1 - self.currpos = old_currpos - return c - - def next(self): + @classmethod + def uses_data_directory(cls): + "True if the parser needs user-supplied data." + return False + + @classmethod + def init_data_directory(cls): """ - Get next sentence group. + Initialize the data_directory if needed. Not + necessary for all parsers. """ - if self.currpos >= len(self.tokens): - return False - - curr_tok_count = 0 - last_eos = -1 - i = self.currpos - - while (curr_tok_count <= self.maxcount or last_eos == -1) and i < len( - self.tokens - ): - tok = self.tokens[i] - if tok.is_end_of_sentence == 1: - last_eos = i - if tok.is_word == 1: - curr_tok_count += 1 - i += 1 - - if curr_tok_count <= self.maxcount or last_eos == -1: - ret = self.tokens[self.currpos : i] - self.currpos = i + 1 - else: - ret = self.tokens[self.currpos : last_eos + 1] - self.currpos = last_eos + 1 - - return ret - - -class AbstractParser(ABC): - """ - Abstract parser, inherited from by all parsers. - """ + return @classmethod def is_supported(cls): diff --git a/lute/parse/mecab_parser.py b/lute/parse/mecab_parser.py index de8bebcba..b3faf8a1b 100644 --- a/lute/parse/mecab_parser.py +++ b/lute/parse/mecab_parser.py @@ -16,7 +16,9 @@ import re from typing import List from natto import MeCab +import jaconv from lute.parse.base import ParsedToken, AbstractParser +from lute.settings.current import current_settings class JapaneseParser(AbstractParser): @@ -28,37 +30,33 @@ class JapaneseParser(AbstractParser): The parser uses natto-py library, and so should be able to find mecab automatically; if it can't, you may need to set the MECAB_PATH env variable, - managed here by the set_mecab_path_envkey() method. + managed by UserSettingRepository.set_value("mecab_path", p) """ _is_supported = None - - @staticmethod - def set_mecab_path_envkey(v): - """ - Sets the key MECAB_PATH key for natto-py. - Deletes if None or ''. - """ - if "MECAB_PATH" in os.environ: - del os.environ["MECAB_PATH"] - if v is not None and v.strip() != "": - os.environ["MECAB_PATH"] = v.strip() - JapaneseParser._is_supported = None - - @staticmethod - def get_mecab_path_envkey(): - return os.getenv("MECAB_PATH") + _old_mecab_path = None @classmethod def is_supported(cls): """ True if a natto MeCab can be instantiated, - otherwise false. The value is cached _just in case_, - thought that's probably premature optimization. + otherwise false. """ - if JapaneseParser._is_supported is not None: + + mecab_path = current_settings.get("mecab_path", "") or "" + mecab_path = mecab_path.strip() + path_unchanged = mecab_path == JapaneseParser._old_mecab_path + if path_unchanged and JapaneseParser._is_supported is not None: return JapaneseParser._is_supported - b = False + + # Natto uses the MECAB_PATH env key if it's set. + env_key = "MECAB_PATH" + if mecab_path != "": + os.environ[env_key] = mecab_path + else: + os.environ.pop(env_key, None) + + mecab_works = False # Calling MeCab() prints to stderr even if the # exception is caught. Suppress that output noise. @@ -66,14 +64,15 @@ def is_supported(cls): try: sys.stderr = temp_err MeCab() - b = True + mecab_works = True except: # pylint: disable=bare-except - b = False + mecab_works = False finally: sys.stderr = sys.__stderr__ - JapaneseParser._is_supported = b - return b + JapaneseParser._old_mecab_path = mecab_path + JapaneseParser._is_supported = mecab_works + return mecab_works @classmethod def name(cls): @@ -112,7 +111,14 @@ def line_to_token(lin): is_eos = term in language.regexp_split_sentences if term == "EOP" and third == "7": term = "¶" - is_word = node_type in "2678" + + # Node type values ref + # https://github.com/buruzaemon/natto-py/wiki/ + # Node-Parsing-char_type + # + # The repeat character is sometimes returned as a "symbol" + # (node type = 3), so handle that specifically. + is_word = node_type in "2678" or term == "々" return ParsedToken(term, is_word, is_eos or term == "¶") tokens = [line_to_token(lin) for lin in lines] @@ -137,6 +143,11 @@ def get_reading(self, text: str): if self._string_is_hiragana(text): return None + jp_reading_setting = current_settings.get("japanese_reading", "").strip() + if jp_reading_setting == "": + # Don't set reading if nothing specified. + return None + flags = r"-O yomi" readings = [] with MeCab(flags) as nm: @@ -147,4 +158,11 @@ def get_reading(self, text: str): ret = "".join(readings).strip() if ret in ("", text): return None - return ret + + if jp_reading_setting == "katakana": + return ret + if jp_reading_setting == "hiragana": + return jaconv.kata2hira(ret) + if jp_reading_setting == "alphabet": + return jaconv.kata2alphabet(ret) + raise RuntimeError(f"Bad reading type {jp_reading_setting}") diff --git a/lute/parse/registry.py b/lute/parse/registry.py index 1067f7b08..991c5ad4a 100644 --- a/lute/parse/registry.py +++ b/lute/parse/registry.py @@ -4,15 +4,16 @@ List of available parsers. """ +from importlib.metadata import entry_points +from sys import version_info + from lute.parse.base import AbstractParser from lute.parse.space_delimited_parser import SpaceDelimitedParser, TurkishParser from lute.parse.mecab_parser import JapaneseParser from lute.parse.character_parser import ClassicalChineseParser -# List of ALL parsers available, not necessarily all supported. -# This design feels fishy, but it suffices for now. -parsers = { +__LUTE_PARSERS__ = { "spacedel": SpaceDelimitedParser, "turkish": TurkishParser, "japanese": JapaneseParser, @@ -20,45 +21,67 @@ } -def _supported_parsers(): - "Get the supported parsers." - ret = {} - for k, v in parsers.items(): - if v.is_supported(): - ret[k] = v - return ret +def init_parser_plugins(): + """ + Initialize parsers from plugins + """ + + # Handle API breakage of entry_points. + # pylint: disable=no-member + vmaj = version_info.major + vmin = version_info.minor + if vmaj == 3 and vmin in (8, 9, 10, 11): + custom_parser_eps = entry_points().get("lute.plugin.parse") + elif (vmaj == 3 and vmin >= 12) or (vmaj >= 4): + # Can't be sure this will always work, API may change again, + # but can't plan for the unforseeable everywhere. + custom_parser_eps = entry_points().select(group="lute.plugin.parse") + else: + # earlier version of python than 3.8? What madness is this? + # Not going to throw, just print and hope the user sees it. + msg = f"Unable to load plugins for python {vmaj}.{vmin}, please upgrade to 3.8+" + print(msg, flush=True) + return + + if custom_parser_eps is None: + return + + for custom_parser_ep in custom_parser_eps: + name = custom_parser_ep.name + klass = custom_parser_ep.load() + if issubclass(klass, AbstractParser): + __LUTE_PARSERS__[name] = klass + else: + raise ValueError(f"{name} is not a subclass of AbstractParser") def get_parser(parser_name) -> AbstractParser: "Return the supported parser with the given name." - if parser_name in _supported_parsers(): - pclass = parsers[parser_name] - return pclass() - raise ValueError(f"Unknown parser type '{parser_name}'") + if parser_name not in __LUTE_PARSERS__: + raise ValueError(f"Unknown parser type '{parser_name}'") + pclass = __LUTE_PARSERS__[parser_name] + if not pclass.is_supported(): + raise ValueError(f"Unsupported parser type '{parser_name}'") + return pclass() def is_supported(parser_name) -> bool: - "Return True if the specified parser is supported, false otherwise or if not found." - if parser_name not in parsers: + "Return True if the specified parser is present and supported." + if parser_name not in __LUTE_PARSERS__: return False - p = parsers[parser_name] + p = __LUTE_PARSERS__[parser_name] return p.is_supported() def supported_parsers(): """ - Dictionary of supported parser strings and class names, for UI. - - For select list entries, use supported_parsers().items(). + List of supported parser strings and classes. """ - ret = [] - for k, v in _supported_parsers().items(): - ret.append([k, v.name()]) - return ret + return [(k, v) for k, v in __LUTE_PARSERS__.items() if v.is_supported()] def supported_parser_types(): """ List of supported Language.parser_types """ - return list(_supported_parsers().keys()) + return list(a[0] for a in supported_parsers()) diff --git a/lute/parse/space_delimited_parser.py b/lute/parse/space_delimited_parser.py index 2b227689b..0ef26979d 100644 --- a/lute/parse/space_delimited_parser.py +++ b/lute/parse/space_delimited_parser.py @@ -7,11 +7,16 @@ Includes classes: - SpaceDelimitedParser -- Turkish +- TurkishParser """ +import functools import re +import sys +import unicodedata + from typing import List + from lute.parse.base import ParsedToken, AbstractParser @@ -25,11 +30,142 @@ class SpaceDelimitedParser(AbstractParser): def name(cls): return "Space Delimited" + @staticmethod + @functools.lru_cache + def compile_re_pattern(pattern: str, *args, **kwargs) -> re.Pattern: + """Compile regular expression pattern, cache result for fast re-use.""" + return re.compile(pattern, *args, **kwargs) + + @staticmethod + @functools.lru_cache + def get_default_word_characters() -> str: + """Return default value for lang.word_characters.""" + + # Unicode categories reference: https://www.compart.com/en/unicode/category + categories = set(["Cf", "Ll", "Lm", "Lo", "Lt", "Lu", "Mc", "Mn", "Sk"]) + + # There are more than 130,000 characters across all these categories. + # Expressing this a single character at a time, mostly using unicode + # escape sequences like \u1234 or \U12345678, would require 1 megabyte. + # Converting to ranges like \u1234-\u1256 requires only 10K. + ranges = [] + current = None + + def add_current_to_ranges(): + def ucode(n): + "Unicode point for integer." + fstring = r"\u{:04x}" if n < 0x10000 else r"\U{:08x}" + return (fstring).format(n) + + start_code = ucode(current[0]) + if current[0] == current[1]: + range_string = start_code + else: + endcode = ucode(current[1]) + range_string = f"{start_code}-{endcode}" + ranges.append(range_string) + + for i in range(1, sys.maxunicode): + if unicodedata.category(chr(i)) not in categories: + if current is not None: + add_current_to_ranges() + current = None + elif current is None: + # Starting a new range. + current = [i, i] + else: + # Extending existing range. + current[1] = i + + if current is not None: + add_current_to_ranges() + + return "".join(ranges) + + @staticmethod + @functools.lru_cache + def get_default_regexp_split_sentences() -> str: + """Return default value for lang.regexp_split_sentences.""" + + # Construct pattern from Unicode ATerm and STerm categories. + # See: https://www.unicode.org/Public/UNIDATA/auxiliary/SentenceBreakProperty.txt + # and: https://unicode.org/reports/tr29/ + + # Also include colon, since that is used to separate speakers + # and their dialog, and is a reasonable dividing point for + # sentence translations. + + return "".join( + [ + re.escape(".!?:"), + # ATerm entries (other than ".", covered above): + r"\u2024\uFE52\uFF0E", + # STerm entries (other than "!" and "?", covered above): + r"\u0589", + r"\u061D-\u061F\u06D4", + r"\u0700-\u0702", + r"\u07F9", + r"\u0837\u0839\u083D\u083E", + r"\u0964\u0965", + r"\u104A\u104B", + r"\u1362\u1367\u1368", + r"\u166E", + r"\u1735\u1736", + r"\u17D4\u17D5", + r"\u1803\u1809", + r"\u1944\u1945", + r"\u1AA8-\u1AAB", + r"\u1B5A\u1B5B\u1B5E\u1B5F\u1B7D\u1B7E", + r"\u1C3B\u1C3C", + r"\u1C7E\u1C7F", + r"\u203C\u203D\u2047-\u2049\u2E2E\u2E3C\u2E53\u2E54\u3002", + r"\uA4FF", + r"\uA60E\uA60F", + r"\uA6F3\uA6F7", + r"\uA876\uA877", + r"\uA8CE\uA8CF", + r"\uA92F", + r"\uA9C8\uA9C9", + r"\uAA5D\uAA5F", + r"\uAAF0\uAAF1\uABEB", + r"\uFE56\uFE57\uFF01\uFF1F\uFF61", + r"\U00010A56\U00010A57", + r"\U00010F55-\U00010F59", + r"\U00010F86-\U00010F89", + r"\U00011047\U00011048", + r"\U000110BE-\U000110C1", + r"\U00011141-\U00011143", + r"\U000111C5\U000111C6\U000111CD\U000111DE\U000111DF", + r"\U00011238\U00011239\U0001123B\U0001123C", + r"\U000112A9", + r"\U0001144B\U0001144C", + r"\U000115C2\U000115C3\U000115C9-\U000115D7", + r"\U00011641\U00011642", + r"\U0001173C-\U0001173E", + r"\U00011944\u00011946", + r"\U00011A42\U00011A43", + r"\U00011A9B\U00011A9C", + r"\U00011C41\U00011C42", + r"\U00011EF7\U00011EF8", + r"\U00011F43\U00011F44", + r"\U00016A6E\U00016A6F", + r"\U00016AF5", + r"\U00016B37\U00016B38\U00016B44", + r"\U00016E98", + r"\U0001BC9F", + r"\U0001DA88", + ] + ) + def get_parsed_tokens(self, text: str, language) -> List[ParsedToken]: "Return parsed tokens." + + # Remove extra spaces. clean_text = re.sub(r" +", " ", text) - zws = chr(0x200B) # zero-width space - clean_text = clean_text.replace(zws, "") + + # Remove zero-width spaces. + clean_text = clean_text.replace(chr(0x200B), "") + return self._parse_to_tokens(clean_text, language) def preg_match_capture(self, pattern, subject): @@ -39,7 +175,8 @@ def preg_match_capture(self, pattern, subject): E.g. search for r'cat' in "there is a CAT and a Cat" returns: [['CAT', 11], ['Cat', 21]] """ - matches = re.finditer(pattern, subject, flags=re.IGNORECASE) + compiled = SpaceDelimitedParser.compile_re_pattern(pattern, flags=re.IGNORECASE) + matches = compiled.finditer(subject) result = [[match.group(), match.start()] for match in matches] return result @@ -73,11 +210,9 @@ def parse_para(self, text: str, lang, tokens: List[ParsedToken]): """ Parse a string, appending the tokens to the list of tokens. """ - termchar = lang.word_characters - if termchar.strip() == "": - raise RuntimeError( - f"Language {lang.name} has invalid Word Characters specification." - ) + termchar = lang.word_characters.strip() + if not termchar: + termchar = SpaceDelimitedParser.get_default_word_characters() splitex = lang.exceptions_split_sentences.replace(".", "\\.") pattern = rf"({splitex}|[{termchar}]*)" @@ -95,7 +230,10 @@ def add_non_words(s): """ if not s: return - pattern = f"[{re.escape(lang.regexp_split_sentences)}]" + splitchar = lang.regexp_split_sentences.strip() + if not splitchar: + splitchar = SpaceDelimitedParser.get_default_regexp_split_sentences() + pattern = f"[{re.escape(splitchar)}]" has_eos = False if pattern != "[]": # Should never happen, but ... allmatches = self.preg_match_capture(pattern, s) diff --git a/lute/read/render/calculate_textitems.py b/lute/read/render/calculate_textitems.py new file mode 100644 index 000000000..eb1b4a0df --- /dev/null +++ b/lute/read/render/calculate_textitems.py @@ -0,0 +1,261 @@ +""" +Given text and Terms, determine what to render in the browser. + +For example, given the following TextTokens A-I: + + A B C D E F G H I + +And the following terms: + + "A" through "I" (single-word terms) + "B C" (term J) + "E F G H I" (K) + "F G" (L) + "C D E" (M) + +The following TextItems would be displayed on the reading screen, +with some of the Terms overlapping: + + [A][B C][-D E][-F G H I] +""" + +import re +from collections import Counter +from lute.models.term import Term +from lute.read.render.text_item import TextItem + +# from lute.utils.debug_helpers import DebugTimer + +zws = "\u200B" # zero-width space + + +def get_string_indexes(strings, content): + """ + Returns list of arrays: [[string, index], ...] + + e.g., _get_string_indexes(["is a", "cat"], "here is a cat") + returns [("is a", 1), ("cat", 3)]. + + strings and content must be lowercased! + """ + searchcontent = zws + content + zws + zwsindexes = [index for index, letter in enumerate(searchcontent) if letter == zws] + + ret = [] + + for s in strings: + # "(?=())" is required because sometimes the search pattern can + # overlap -- e.g. _b_b_ has _b_ *twice*. + # https://stackoverflow.com/questions/5616822/ + # how-to-use-regex-to-find-all-overlapping-matches + pattern = rf"(?=({re.escape(zws + s + zws)}))" + add_matches = [ + (s, zwsindexes.index(m.start())) + for m in re.finditer(pattern, searchcontent) + ] + ret.extend(add_matches) + + return ret + + +# pylint: disable=too-many-arguments,too-many-positional-arguments +def _make_textitem(index, text, text_lc, count, sentence_number, term): + "Make a TextItem." + r = TextItem() + r.text = text + r.sentence_number = sentence_number + r.text_lc = text_lc + r.token_count = count + r.display_count = r.token_count + r.index = index + r.is_word = term is not None + r.term = term + return r + + +def _create_missing_status_0_terms(tokens, terms, language): + "Make new terms as needed for all tokens, using case of last instance." + + original_word_tokens = {t.token for t in tokens if t.is_word} + parser = language.parser + lc_word_tokens = {parser.get_lowercase(t): t for t in original_word_tokens} + term_text_lcs = {t.text_lc for t in terms} + + missing_word_tokens = [ + original for lc, original in lc_word_tokens.items() if lc not in term_text_lcs + ] + + # Note: create the terms _without parsing_ because some parsers + # break up characters when the words are given out of context. + missing_word_tokens = list(set(missing_word_tokens)) + new_terms = [Term.create_term_no_parsing(language, t) for t in missing_word_tokens] + for t in new_terms: + t.status = 0 + + return new_terms + + +def get_textitems(tokens, terms, language, multiword_term_indexer=None): + """ + Return TextItems that will **actually be rendered**. + + Method to determine what should be rendered: + + - Create TextItems for all of the tokens, finding their + starting index in the tokens. + + - "Write" the TextItems to an array in correctly sorted + order, so that the correct TextItems take precendence + in the final rendering. + + - Calculate any term overlaps. + + - Return the final list of TextItems that will actually + be rendered. + + --- + + Applying the above algorithm to the example given in the class + header: + + We have the following TextTokens A-I: + + A B C D E F G H I + + And given the following terms: + "A" through "I" (single-word terms) + "B C" (term J) + "E F G H I" (K) + "F G" (L) + "C D E" (M) + + Creating TextItems for all of the terms, finding their starting + indices in the tokens: + + TextToken index length + ---- ----- ------ + [A] 0 1 + [B] 1 1 + ... + [I] 8 1 + [B C] 1 2 + [E F G H I] 4 5 + [F G] 5 2 + [C D E] 2 3 + + Sorting by index, then decreasing token count: + + TextToken index length ID (for later reference) + ---- ----- ------ ------------------------ + [A] 0 1 t1 + [B C] 1 2 t2 + [B] 1 1 t3 + [C D E] 2 3 t4 + [C] 2 1 t5 + [D] 3 1 t6 + [E F G H I] 4 5 t7 + [E] 4 1 t8 + [F G] 5 2 t9 + [F] 5 1 t10 + [G] 6 1 t11 + [H] 7 1 t12 + [I] 8 1 t13 + + Starting at the bottom of the above list and + working upwards: + + - ID of [I] is written to index 8: [] [] [] [] [] [] [] [] [t13] + - ID of [H] to index 7: [] [] [] [] [] [] [] [t12] [t13] + - ... + - [F G] to index 5 *and* 6: [] [] [] [] [] [t9] [t9] [t12] [t13] + - [E] to index 4: [] [] [] [] [t8] [t9] [t9] [t12] [t13] + - [E F G H I] to indexes 4-8: [] [] [] [] [t7] [t7] [t7] [t7] [t7] + - ... etc + + Using the TextItem IDs, the resulting array would be: + + output array: [t1] [t2] [t2] [t4] [t4] [t7] [t7] [t7] [t7] + [A] [B C] [-D E] [-F G H I] + + The only TextItems that will be shown are therefore: + t1, t2, t3, t7 + + To calculate what text is actually displayed, the count + of each ID is used. e.g.: + - ID t7 appears 4 times in the output array. The last 4 tokens of + [E F G H I] are [F G H I], which will be used as t7's display text. + - ID t2 appears 2 times. The last 2 tokens of [B C] are [B C], + so that will be the display text. etc. + """ + # pylint: disable=too-many-locals + + # dt = DebugTimer("get_textitems", display=False) + + new_unknown_terms = _create_missing_status_0_terms(tokens, terms, language) + # dt.step("new_unknown_terms") + + all_terms = terms + new_unknown_terms + text_to_term = {dt.text_lc: dt for dt in all_terms} + + tokens_orig = [t.token for t in tokens] + tokens_lc = [language.parser.get_lowercase(t) for t in tokens_orig] + + textitems = [] + + def _add_textitem(index, text_lc, count): + "Add a TextItem for position index in tokens." + text_orig = tokens_orig[index] + if count > 1: + text_orig = zws.join(tokens_orig[index : index + count]) + text_lc = zws.join(tokens_lc[index : index + count]) + sentence_number = tokens[index].sentence_number + term = text_to_term.get(text_lc, None) + ti = _make_textitem(index, text_orig, text_lc, count, sentence_number, term) + textitems.append(ti) + + # Single-word terms. + for index, _ in enumerate(tokens): + _add_textitem(index, tokens_lc[index], 1) + # dt.step("single word textitems") + + # Multiword terms. + if multiword_term_indexer is not None: + for r in multiword_term_indexer.search_all(tokens_lc): + mwt = text_to_term[r[0]] + count = mwt.token_count + _add_textitem(r[1], r[0], count) + # dt.step(f"get mw textitems w indexer") + else: + multiword_terms = [t.text_lc for t in all_terms if t.token_count > 1] + for e in get_string_indexes(multiword_terms, zws.join(tokens_lc)): + count = e[0].count(zws) + 1 + _add_textitem(e[1], e[0], count) + # dt.step("mw textitems without indexer") + + # Sorting by index, then decreasing token count. + textitems = sorted(textitems, key=lambda x: (x.index, -x.token_count)) + + # "Write out" TextItems to the output array. + output_textitem_ids = [None] * len(tokens) + for ti in reversed(textitems): + for c in range(ti.index, ti.index + ti.token_count): + output_textitem_ids[c] = id(ti) + + # Calc display_counts; e.g. if a textitem's id shows up 3 times + # in the output_textitem_ids, it should display 3 tokens. + id_counts = dict(Counter(output_textitem_ids)) + for ti in textitems: + ti.display_count = id_counts.get(id(ti), 0) + # dt.step("display_count") + + textitems = [ti for ti in textitems if ti.display_count > 0] + + current_paragraph = 0 + for ti in textitems: + ti.paragraph_number = current_paragraph + if ti.text == "¶": + current_paragraph += 1 + # dt.step("paragraphs") + # dt.step("done") + + return textitems diff --git a/lute/read/render/multiword_indexer.py b/lute/read/render/multiword_indexer.py new file mode 100644 index 000000000..dbbc80087 --- /dev/null +++ b/lute/read/render/multiword_indexer.py @@ -0,0 +1,40 @@ +""" +Find terms in contest string using ahocorapy. +""" + +from ahocorapy.keywordtree import KeywordTree + + +class MultiwordTermIndexer: + """ + Find terms in strings using ahocorapy. + """ + + zws = "\u200B" # zero-width space + + def __init__(self): + self.kwtree = KeywordTree(case_insensitive=True) + self.finalized = False + + def add(self, t): + "Add zws-enclosed term to tree." + add_t = f"{self.zws}{t}{self.zws}" + self.kwtree.add(add_t) + + def search_all(self, lc_tokens): + "Find all terms and starting token index." + if not self.finalized: + self.kwtree.finalize() + self.finalized = True + + zws = self.zws + content = zws + zws.join(lc_tokens) + zws + zwsindexes = [i for i, char in enumerate(content) if char == zws] + results = self.kwtree.search_all(content) + + for result in results: + # print(f"{result}\n", flush=True) + t = result[0].strip(zws) + charpos = result[1] + index = zwsindexes.index(charpos) + yield (t, index) diff --git a/lute/read/render/renderable_calculator.py b/lute/read/render/renderable_calculator.py deleted file mode 100644 index 3504a9db2..000000000 --- a/lute/read/render/renderable_calculator.py +++ /dev/null @@ -1,495 +0,0 @@ -""" -Calculating what items should be rendered in the browser. -""" - -import re -import functools -from lute.models.language import Language -from lute.models.term import Term, Status - - -class RenderableCalculator: - """ - Calculating what TextTokens and Terms should be rendered. - - Suppose we had the following TextTokens A-I, with spaces between: - - A B C D E F G H I - - Then suppose we had the following Terms: - "B C" (term J) - "E F G H I" (K) - "F G" (L) - "C D E" (M) - - Stacking these: - - A B C D E F G H I - - "B C" (term J) - "E F G H I" (term K) - "F G" (term L) - "C D E" (term M) - - We can say: - - - term J "contains" TextTokens B and C, so B and C should not be rendered. - - K contains tokens E-I, and also L, so none of those should be rendered. - - M is _not_ contained by anything else, so it should be rendered. - """ - - def _assert_texttokens_are_contiguous(self, texttokens): - prevtok = None - for tok in texttokens: - if prevtok is not None and prevtok.order != (tok.order - 1): - mparts = [prevtok.token, prevtok.order, tok.token, tok.order] - msg = "; ".join(map(str, mparts)) - raise RuntimeError(f"bad token ordering: {msg}") - prevtok = tok - - def _get_renderable(self, tokenlocator, terms, texttokens): - """ - Return RenderableCandidates that will **actually be rendered**. - - Method to determine what should be rendered: - - 1. Create a "rendered array". On completion of this algorithm, - each position in the array will be filled with the ID of the - RenderableCandidate that should actually appear there (and - which might hide other candidates). - - 2. Start by saying that all the original texttokens will be - rendered by writing each candidate ID in the rendered array. - - 3. Create candidates for all the terms. - - 4. Starting with the shortest terms first (fewest text tokens), - and starting _at the end_ of the string, "write" the candidate - ID to the output "rendered array", for each token in the candidate. - - At the end of this process, each position in the "rendered array" - should be filled with the ID of the corresponding candidate - that will actually appear in that position. By getting the - unique IDs and returning just their candidates, we should have - the list of candidates that would be "visible" on render. - - Applying the above algorithm to the example given in the class - header: - - We have the following TextTokens A-I, with spaces between: - - a b c d e f g h i - - And the following terms, arranged from shortest to longest: - "B C" - "F G" - "C D E" - "E F G H I" - - First, terms are created for each individual token in the - original string: - - A B C D E F G H I - - Then the positions for each of the terms are calculated: - - [A B C D E F G H I] - - "B C" - "F G" - "C D E" - "E F G H I" - - Then, "writing" terms order by their length, and then by their - distance from the *end* of the string: - - - "F G" is written first, because it's short, and is nearest - the end: - => "A B C D E [F-G] H I" - - "B C" is next: - => "A [B-C] D E [F-G] H I" - - then "C D E": - => "A [B-C][C-D-E] [F-G] H I" - then "E F G H I": - => "A [B-C][C-D-E][E-F-G-H-I]" - """ - - # All the candidates to be considered for rendering. - candidates = {} - - # Step 1. Map of the token position to the id of the - # candidate that should be rendered there. - rendered = {} - - # Step 2 - fill with the original texttokens. - for tok in texttokens: - rc = RenderableCandidate() - rc.display_text = tok.token - rc.text = tok.token - rc.pos = tok.order - rc.is_word = tok.is_word - candidates[rc.id] = rc - rendered[rc.pos] = rc.id - - # 3. Create candidates for all the terms. - termcandidates = [] - - for term in terms: - for loc in tokenlocator.locate_string(term.text_lc): - rc = RenderableCandidate() - rc.term = term - rc.display_text = loc["text"] - rc.text = loc["text"] - rc.pos = texttokens[0].order + loc["index"] - rc.length = term.token_count - rc.is_word = 1 - - termcandidates.append(rc) - candidates[rc.id] = rc - - # 4a. Sort the term candidates: first by length, then by position. - def compare(a, b): - # Longest sorts first. - if a.length != b.length: - return -1 if (a.length > b.length) else 1 - # Lowest position (closest to front of string) sorts first. - return -1 if (a.pos < b.pos) else 1 - - termcandidates.sort(key=functools.cmp_to_key(compare)) - - # The termcandidates should now be sorted such that longest - # are first, with items of equal length being sorted by - # position. By traversing this in reverse and "writing" - # their IDs to the "rendered" array, we should end up with - # the final IDs in each position. - termcandidates.reverse() - for tc in termcandidates: - for i in range(tc.length): - rendered[tc.pos + i] = tc.id - - rcids = list(set(rendered.values())) - return [candidates[rcid] for rcid in rcids] - - def _sort_by_order_and_tokencount(self, items): - items.sort(key=lambda x: (x.pos, -x.length)) - return items - - def _calc_overlaps(self, items): - for i in range(1, len(items)): - prev = items[i - 1] - curr = items[i] - - prevterm_last_token_pos = prev.pos + prev.length - 1 - overlap = prevterm_last_token_pos - curr.pos + 1 - - if overlap > 0: - zws = chr(0x200B) - curr_tokens = curr.text.split(zws) - show = curr_tokens[overlap:] - curr.display_text = zws.join(show) - - return items - - def main(self, language, words, texttokens): - """ - Main entrypoint. - - Given a language and some terms and texttokens, - return the RenderableCandidates to be rendered. - """ - texttokens.sort(key=lambda x: x.order) - self._assert_texttokens_are_contiguous(texttokens) - - subject = TokenLocator.make_string([t.token for t in texttokens]) - tocloc = TokenLocator(language, subject) - - renderable = self._get_renderable(tocloc, words, texttokens) - items = self._sort_by_order_and_tokencount(renderable) - items = self._calc_overlaps(items) - return items - - @staticmethod - def get_renderable(lang, words, texttokens): - "Convenience method, calls main." - rc = RenderableCalculator() - return rc.main(lang, words, texttokens) - - -class RenderableCandidate: # pylint: disable=too-many-instance-attributes - """ - An item that may or may not be rendered on the browser. - - Given some Terms contained in a text, the RenderableCalculator - creates RenderableCandidates for each Term in the text, as well as - the original text tokens. - - When the final set of actually rendered RenderableCandidates is - found (with self.render is True), these are convered into TextItems - for the final render. - """ - - # ID incremented for each instance. - class_id = 0 - - def __init__(self): - RenderableCandidate.class_id += 1 - - self.id: int = RenderableCandidate.class_id - self.term: Term = None - self.display_text: str # Text to show, if there is any overlap - self.text: str # Actual text of the term - self.pos: int - self.length: int = 1 - self.is_word: int - self.render: bool = True - - def __repr__(self): - parts = [f"pos {self.pos}", f"render {self.render}" f"(id {self.id})"] - parts = " ".join(parts) - return f'' - - @property - def term_id(self) -> int: - return self.term.id if self.term else None - - @property - def order_end(self) -> int: - return self.pos + self.length - 1 - - def make_text_item(self, p_num: int, se_id: int, text_id: int, lang: Language): - """ - Create a TextItem for final rendering. - """ - t = TextItem() - t.order = self.pos - t.text_id = text_id - t.lang_id = lang.id - t.display_text = self.display_text - t.text = self.text - t.token_count = self.length - t.text_lc = lang.get_lowercase(self.text) - t.para_id = p_num - t.se_id = se_id - t.is_word = self.is_word - t.text_length = len(self.text) - - t.load_term_data(self.term) - - return t - - -class TokenLocator: - """ - Helper class for finding tokens and positions in a subject string. - - Finds a given token (word) in a sentence, ignoring case, returning - the actual word in the sentence (its original case), and its index - or indices. - - For example, given: - - - $subject "/this/ /CAT/ /is/ /big/" - - $find_patt = "/cat/" - - (where "/" is the zero-width space to indicate word boundaries) - - this method would return [ { 'term': "CAT", 'index': 2 } ] - - the token "cat" is actually "CAT" (uppercase) in the sentence - - it's at index = 2 - - Note that the language of the string must also be provided, because - some languages (Turkish!) have unusual case requirements. - - See the test cases for more examples. - """ - - def __init__(self, language, subject): - self.language = language - self.subject = subject - - def locate_string(self, s): - """ - Find the string s in the subject self.subject. - """ - find_lc = self.language.get_lowercase(s) - find_lc = TokenLocator.make_string(find_lc) - - subjLC = self.language.get_lowercase(self.subject) - - matches = self.preg_match_capture(find_lc, subjLC) - - # The matches were performed with the lowercased subject, - # because some languages (Turkish!) have funny cases. - # We need to convert the matched text back to the _original_ - # subject string cases. - subj = self.subject - - def make_text_index_pair(match): - matchtext = match[0] # includes zws at start and end. - matchlen = len(matchtext) - matchpos = match[1] - - # print(f"found match \"{matchtext}\" len={matchlen} pos={matchpos}") - original_subject_text = subj[matchpos : matchpos + matchlen] - zws = "\u200B" - t = original_subject_text.lstrip(zws).rstrip(zws) - index = self.get_count_before(subj, matchpos) - return {"text": t, "index": index} - - termmatches = list(map(make_text_index_pair, matches)) - - return termmatches - - def get_count_before(self, string, pos): - """ - Count of tokens found in string before position pos. - """ - zws = "\u200B" - beforesubstr = string[:pos] - n = beforesubstr.count(zws) - return n - - def preg_match_capture(self, find_lc, subject): - """ - Return the matched text and their start positions in the subject. - - E.g. search for r'cat' in "there is a CAT and a Cat" returns: - [['CAT', 11], ['Cat', 21]] - """ - - # "(?=())" is required because sometimes the search pattern can - # overlap -- e.g. _b_b_ has _b_ *twice*. - # https://stackoverflow.com/questions/5616822/ - # how-to-use-regex-to-find-all-overlapping-matches - pattern = rf"(?=({re.escape(find_lc)}))" - - matches = re.finditer(pattern, subject, flags=re.IGNORECASE) - - # Use group(1) to get the match text, because group(0) is a - # zero-length string. - result = [[match.group(1), match.start()] for match in matches] - return result - - @staticmethod - def make_string(t): - """ - Append zero-width string to string to simplify/standardize searches. - """ - zws = "\u200B" - if isinstance(t, list): - t = zws.join(t) - return zws + t + zws - - -class TextItem: # pylint: disable=too-many-instance-attributes - """ - Unit to be rendered. - - Data structure for template read/textitem.html - """ - - def __init__(self): - self.text_id: int - self.lang_id: int - self.order: int - self.text: str # The original, un-overlapped text. - self.display_text: str # The actual text to display on screen. - # If part of the text has been overlapped by a - # prior token, this will be different from Text. - self.token_count: int - self.text_lc: str - self.para_id: int - self.se_id: int - self.is_word: int - self.text_length: int - # The tooltip should be shown for well-known/ignored TextItems - # that merit a tooltip. e.g., if there isn't any actual Term - # entity associated with this TextItem, nothing more is needed. - # Also, if there is a Term entity but it's mostly empty, a - # tooltip isn't useful. - self.show_tooltip: bool = False - self.wo_id: int = None - self.wo_status: int = None - self.flash_message: str = None - - def load_term_data(self, term): - """ - Load extra term data, if any. - """ - if term is None: - return - - self.wo_id = term.id - self.wo_status = term.status - self.flash_message = term.get_flash_message() - - def has_extra(cterm): - if cterm is None: - return False - no_extra = ( - cterm.translation is None - and cterm.romanization is None - and cterm.get_current_image() is None - ) - return not no_extra - - show_tooltip = has_extra(term) - for p in term.parents: - show_tooltip = show_tooltip or has_extra(p) - self.show_tooltip = show_tooltip - - @property - def html_display_text(self): - """ - Content to be rendered to browser. - """ - zws = chr(0x200B) - return self.display_text.replace(zws, "").replace(" ", " ") - - @property - def span_id(self): - """ - Each span gets a unique id. - Arbitrary format: ID-{order}-{tokencount}. - - This *might* not be necessary ... I don't think IDs are used anywhere. - """ - parts = ["ID", str(self.order), str(max(1, self.token_count))] - return "-".join(parts) - - @property - def html_class_string(self): - """ - Create class string for TextItem. - """ - if self.is_word == 0: - return "textitem" - - if self.wo_id is None: - classes = ["textitem", "click", "word", "status0"] - return " ".join(classes) - - st = self.wo_status - classes = [ - "textitem", - "click", - "word", - "word" + str(self.wo_id), - "status" + str(st), - ] - - tooltip = ( - st not in (Status.WELLKNOWN, Status.IGNORED) - or self.show_tooltip - or self.flash_message is not None - ) - if tooltip: - classes.append("showtooltip") - - if self.flash_message is not None: - classes.append("hasflash") - - if self.display_text != self.text: - classes.append("overlapped") - - return " ".join(classes) diff --git a/lute/read/render/service.py b/lute/read/render/service.py new file mode 100644 index 000000000..dc706398d --- /dev/null +++ b/lute/read/render/service.py @@ -0,0 +1,216 @@ +""" +Reading rendering helpers. +""" + +import itertools +import re +from sqlalchemy import text as sqltext + +from lute.models.term import Term +from lute.parse.base import ParsedToken +from lute.read.render.calculate_textitems import get_textitems as calc_get_textitems +from lute.read.render.multiword_indexer import MultiwordTermIndexer + +# from lute.utils.debug_helpers import DebugTimer + + +class Service: + "Service." + + def __init__(self, session): + self.session = session + + def find_all_Terms_in_string(self, s, language): # pylint: disable=too-many-locals + """ + Find all terms contained in the string s. + + For example + - given s = "Here is a cat" + - given terms in the db: [ "cat", "a cat", "dog" ] + + This would return the terms "cat" and "a cat". + """ + cleaned = re.sub(r" +", " ", s) + tokens = language.get_parsed_tokens(cleaned) + return self._find_all_terms_in_tokens(tokens, language) + + def _get_multiword_terms(self, language): + "Get all multiword terms." + sql = sqltext( + """ + SELECT WoID, WoTextLC FROM words + WHERE WoLgID=:language_id and WoTokenCount>1 + """ + ) + sql = sql.bindparams(language_id=language.id) + return self.session.execute(sql).all() + + def _find_all_multi_word_term_text_lcs_in_content(self, text_lcs, language): + "Find multiword terms, return list of text_lcs." + + # There are a few ways of finding multi-word Terms + # (with token_count > 1) in the content: + # + # 1. load each mword term text_lc via sql and check. + # 2. using the model + # 3. SQL with "LIKE" + # + # During reasonable test runs with my data, the times in seconds + # for each are similar (~0.02, ~0.05, ~0.025). This method is + # only used for small amounts of data, and the user experience hit + # is negligible, so I'll use the first method which IMO is the clearest + # code. + + zws = "\u200B" # zero-width space + content = zws + zws.join(text_lcs) + zws + + # Method 1: + reclist = self._get_multiword_terms(language) + return [p[1] for p in reclist if f"{zws}{p[1]}{zws}" in content] + + ## # Method 2: use the model. + ## contained_term_qry = self.session.query(Term).filter( + ## Term.language == language, + ## Term.token_count > 1, + ## func.instr(content, Term.text_lc) > 0, + ## ) + ## return [r.text_lc for r in contained_term_qry.all()] + + ## # Method 3: Query with LIKE + ## sql = sqltext( + ## """ + ## SELECT WoTextLC FROM words + ## WHERE WoLgID=:lid and WoTokenCount>1 + ## AND :content LIKE '%' || :zws || WoTextLC || :zws || '%' + ## """ + ## ) + ## sql = sql.bindparams(lid=language.id, content=content, zws=zws) + ## recs = self.session.execute(sql).all() + ## return [r[0] for r in recs] + + def _find_all_terms_in_tokens(self, tokens, language, kwtree=None): + """ + Find all terms contained in the (ordered) parsed tokens tokens. + + For example + - given tokens = "Here", " ", "is", " ", "a", " ", "cat" + - given terms in the db: [ "cat", "a/ /cat", "dog" ] + + This would return the terms "cat" and "a/ /cat". + + Method: + - build list of lowercase text in the tokens + - append all multword term strings that exist in the content + - query for Terms that exist in the list + + Note: this method only uses indexes for multiword terms, as any + content analyzed is first parsed into tokens before being passed + to this routine. There's no need to search for single-word Terms + in the tokenized strings, they can be found by a simple query. + """ + + # Performance: About half of the time in this routine is spent in + # Step 1 (finding multiword terms), the rest in step 2 (the actual + # query). + # dt = DebugTimer("_find_all_terms_in_tokens", display=True) + + parser = language.parser + text_lcs = [parser.get_lowercase(t.token) for t in tokens] + + # Step 1: get the multiwords in the content. + if kwtree is None: + mword_terms = self._find_all_multi_word_term_text_lcs_in_content( + text_lcs, language + ) + else: + results = kwtree.search_all(text_lcs) + mword_terms = [r[0] for r in results] + # dt.step("filtered mword terms") + + # Step 2: load the Term objects. + # + # The Term fetch is actually performant -- there is no + # real difference between loading the Term objects versus + # loading raw data with SQL and getting dicts. + # + # Code for getting raw data: + # param_keys = [f"w{i}" for i, _ in enumerate(text_lcs)] + # keys_placeholders = ','.join([f":{k}" for k in param_keys]) + # param_dict = dict(zip(param_keys, text_lcs)) + # param_dict["langid"] = language.id + # sql = sqltext(f"""SELECT WoID, WoTextLC FROM words + # WHERE WoLgID=:langid and WoTextLC in ({keys_placeholders})""") + # sql = sql.bindparams(language.id, *text_lcs) + # results = self.session.execute(sql, param_dict).fetchall() + text_lcs.extend(mword_terms) + tok_strings = list(set(text_lcs)) + terms_matching_tokens_qry = self.session.query(Term).filter( + Term.text_lc.in_(tok_strings), Term.language == language + ) + all_terms = terms_matching_tokens_qry.all() + # dt.step("exec query") + + return all_terms + + def get_textitems(self, s, language, multiword_term_indexer=None): + """ + Get array of TextItems for the string s. + + The multiword_term_indexer is a big performance boost, but takes + time to initialize. + """ + # Hacky reset of state of ParsedToken state. + # _Shouldn't_ be needed but doesn't hurt, even if it's lame. + ParsedToken.reset_counters() + + cleaned = re.sub(r" +", " ", s) + tokens = language.get_parsed_tokens(cleaned) + terms = self._find_all_terms_in_tokens(tokens, language, multiword_term_indexer) + textitems = calc_get_textitems(tokens, terms, language, multiword_term_indexer) + return textitems + + def get_multiword_indexer(self, language): + "Return indexer loaded with all multiword terms." + mw = MultiwordTermIndexer() + for r in self._get_multiword_terms(language): + mw.add(r[1]) + return mw + + def get_paragraphs(self, s, language): + """ + Get array of arrays of TextItems for the given string s. + + This doesn't use an indexer, as it should only be used + for a single page of text! + """ + textitems = self.get_textitems(s, language) + + def _split_textitems_by_paragraph(textitems): + "Split by ¶" + ret = [] + curr_para = [] + for t in textitems: + if t.text == "¶": + ret.append(curr_para) + curr_para = [] + else: + curr_para.append(t) + if len(curr_para) > 0: + ret.append(curr_para) + return ret + + def _split_by_sentence_number(p): + sentences = [ + list(sentence) + for _, sentence in itertools.groupby(p, key=lambda t: t.sentence_number) + ] + for s in sentences: + s[0].add_html_class("sentencestart") + return sentences + + paras = [ + _split_by_sentence_number(list(sentences)) + for sentences in _split_textitems_by_paragraph(textitems) + ] + + return paras diff --git a/lute/read/render/text_item.py b/lute/read/render/text_item.py new file mode 100644 index 000000000..2cc7845b1 --- /dev/null +++ b/lute/read/render/text_item.py @@ -0,0 +1,134 @@ +"TextItem class." + +zws = "\u200B" # zero-width space + + +class TextItem: # pylint: disable=too-many-instance-attributes + """ + Unit to be rendered. + + Data structure for template read/textitem.html + + Some elements are lazy loaded, because they're only needed in + certain situations. + """ + + def __init__(self, term=None): + self.index: int + self.lang_id: int + self.text: str # The original, un-overlapped text. + self.text_lc: str + self.is_word: int + + # Number of tokens originally in the Text item. + self.token_count: int = 1 + + # Number of tokens that should be displayed, starting from the + # end of the string. + self.display_count: int = 1 + + self.sentence_number: int = 0 + self.paragraph_number: int = 0 + + # Calls setter + self.term = term + + self.extra_html_classes = [] + + # TODO code + # # The flash message can be None, so we need an extra flag + # # to determine if it has been loaded or not. + # self._flash_message_loaded: bool = False + # self._flash_message: str = None + + def __repr__(self): + return f'' + + @property + def term(self): + return self._term + + @property + def wo_id(self): + "The term id is the wo_id." + if self._term is None: + return None + return self._term.id + + @term.setter + def term(self, t): + self.wo_status = None + self._term = t + if t is None: + return + self.lang_id = t.language.id + self.wo_status = t.status + + # TODO - reactivate with non-lazy query results. + # @property + # def flash_message(self): + # """ + # Return flash message if anything present. + # Lazy loaded as needed. + # """ + # if self._flash_message_loaded: + # return self._flash_message + # if self.term is None: + # return None + + # self._flash_message = self.term.get_flash_message() + # self._flash_message_loaded = True + # return self._flash_message + + @property + def display_text(self): + "Show last n tokens, if some of the textitem is covered." + toks = self.text.split(zws) + disp_toks = toks[-self.display_count :] + return zws.join(disp_toks) + + @property + def html_display_text(self): + """ + Content to be rendered to browser. + """ + return self.display_text.replace(zws, "") + + @property + def span_id(self): + """ + Each span gets a unique id. + """ + return f"ID-{self.sentence_number}-{self.index}" + + @property + def status_class(self): + "Status class to apply." + if self.wo_id is None: + return "status0" + return f"status{self.wo_status}" + + def add_html_class(self, c): + "Add extra class to term." + self.extra_html_classes.append(c) + + @property + def html_class_string(self): + """ + Create class string for TextItem. + """ + if self.is_word == 0: + return "textitem" + + classes = [ + "textitem", + "click", + "word", + "word" + str(self.wo_id), + ] + + if self.display_text != self.text: + classes.append("overlapped") + classes.extend(self.extra_html_classes) + + return " ".join(classes) diff --git a/lute/read/routes.py b/lute/read/routes.py index 737695f33..24d7c85a2 100644 --- a/lute/read/routes.py +++ b/lute/read/routes.py @@ -2,105 +2,198 @@ /read endpoints. """ -from datetime import datetime - -from flask import Blueprint, render_template, redirect -from lute.read.service import get_paragraphs, set_unknowns_to_known +from flask import Blueprint, flash, request, render_template, redirect, jsonify +from lute.read.service import Service from lute.read.forms import TextForm from lute.term.model import Repository from lute.term.routes import handle_term_form -from lute.models.book import Book, Text -from lute.models.term import Term as DBTerm -from lute.book.stats import mark_stale +from lute.settings.current import current_settings +from lute.models.book import Text +from lute.models.repositories import BookRepository, LanguageRepository from lute.db import db bp = Blueprint("read", __name__, url_prefix="/read") -def _page_in_range(book, n): - "Return the page number respecting the page range." - ret = max(n, 1) - ret = min(ret, book.page_count) - return ret +def _render_book_page(book, pagenum, track_page_open=True): + """ + Render a particular book page. + """ + lang = book.language + show_highlights = current_settings["show_highlights"] + lang_repo = LanguageRepository(db.session) + term_dicts = lang_repo.all_dictionaries()[lang.id]["term"] + + return render_template( + "read/index.html", + hide_top_menu=True, + is_rtl=lang.right_to_left, + html_title=book.title, + book=book, + sentence_dict_uris=lang.sentence_dict_uris, + page_num=pagenum, + page_count=book.page_count, + show_highlights=show_highlights, + lang_id=lang.id, + track_page_open=track_page_open, + term_dicts=term_dicts, + ) + + +def _find_book(bookid): + "Find book from db." + br = BookRepository(db.session) + return br.find(bookid) + + +@bp.route("/", methods=["GET"]) +def read(bookid): + """ + Read a book, opening to its current page. + + This is called from the book listing, on Lute index. + """ + book = _find_book(bookid) + if book is None: + flash(f"No book matching id {bookid}") + return redirect("/", 302) + + page_num = 1 + text = book.texts[0] + if book.current_tx_id: + text = db.session.get(Text, book.current_tx_id) + page_num = text.order + + return _render_book_page(book, page_num) @bp.route("//page/", methods=["GET"]) -def read(bookid, pagenum): - "Display reading pane for book page." +def read_page(bookid, pagenum): + """ + Read a particular page of a book. + """ + book = _find_book(bookid) + if book is None: + flash(f"No book matching id {bookid}") + return redirect("/", 302) - book = Book.find(bookid) - lang = book.language + pagenum = book.page_in_range(pagenum) + return _render_book_page(book, pagenum) - pagenum = _page_in_range(book, pagenum) - text = book.texts[pagenum - 1] - book.current_tx_id = text.id - db.session.add(book) - db.session.commit() - paragraphs = get_paragraphs(text) +@bp.route("//peek/", methods=["GET"]) +def peek_page(bookid, pagenum): + """ + Peek at a page; i.e. render it, but don't set the current text or start date. + """ + book = _find_book(bookid) + if book is None: + flash(f"No book matching id {bookid}") + return redirect("/", 302) - prevpage = _page_in_range(book, pagenum - 1) - nextpage = _page_in_range(book, pagenum + 1) - prev10 = _page_in_range(book, pagenum - 10) - next10 = _page_in_range(book, pagenum + 10) + pagenum = book.page_in_range(pagenum) + return _render_book_page(book, pagenum, track_page_open=False) - mark_stale(book) - return render_template( - "read/index.html", - hide_top_menu=True, - text=text, - textid=text.id, - is_rtl=lang.right_to_left, - html_title=text.title, - book=book, - dictionary_url=lang.sentence_translate_uri, - pagenum=pagenum, - pagecount=book.page_count, - prevpage=prevpage, - prev10page=prev10, - nextpage=nextpage, - next10page=next10, - paragraphs=paragraphs, - ) +@bp.route("/page_done", methods=["post"]) +def page_done(): + "Handle POST when page is done." + data = request.json + bookid = int(data.get("bookid")) + pagenum = int(data.get("pagenum")) + restknown = data.get("restknown") + + service = Service(db.session) + service.mark_page_read(bookid, pagenum, restknown) + return jsonify("ok") -def _process_footer_action(bookid, pagenum, nextpage, set_to_known=True): - """ " - Mark as read, - optionally mark all terms as known on the current page, - and go to the next page. +@bp.route("/delete_page//", methods=["GET"]) +def delete_page(bookid, pagenum): """ - book = Book.find(bookid) - pagenum = _page_in_range(book, pagenum) - text = book.texts[pagenum - 1] - text.read_date = datetime.now() - db.session.add(text) - db.session.commit() - if set_to_known: - set_unknowns_to_known(text) - return redirect(f"/read/{bookid}/page/{nextpage}", code=302) + Delete page. + """ + book = _find_book(bookid) + if book is None: + flash(f"No book matching id {bookid}") + return redirect("/", 302) + + if len(book.texts) == 1: + flash("Cannot delete only page in book.") + else: + book.remove_page(pagenum) + db.session.add(book) + db.session.commit() + + url = f"/read/{bookid}/page/{pagenum}" + return redirect(url, 302) -@bp.route("//page//allknown/", methods=["post"]) -def allknown(bookid, pagenum, nextpage): - "Mark all as known, go to next page." - return _process_footer_action(bookid, pagenum, nextpage, True) +@bp.route("/new_page///", methods=["GET", "POST"]) +def new_page(bookid, position, pagenum): + "Create a new page." + form = TextForm() + book = _find_book(bookid) + if form.validate_on_submit(): + t = None + if position == "before": + t = book.add_page_before(pagenum) + else: + t = book.add_page_after(pagenum) + t.book = book + t.text = form.text.data + db.session.add(book) + db.session.commit() -@bp.route("//page//markread/", methods=["post"]) -def mark_read(bookid, pagenum, nextpage): - "Mark page as read, go to the next page." - return _process_footer_action(bookid, pagenum, nextpage, False) + book.current_tx_id = t.id + db.session.add(book) + db.session.commit() + return redirect(f"/read/{book.id}", 302) -@bp.route("/sentences/", methods=["GET"]) -def sentences(textid): - "Display sentences for the given text." - text = db.session.query(Text).filter(Text.id == textid).first() - paragraphs = get_paragraphs(text) - return render_template("read/sentences.html", paragraphs=paragraphs) + text_dir = "rtl" if book.language.right_to_left else "ltr" + return render_template( + "read/page_edit_form.html", hide_top_menu=True, form=form, text_dir=text_dir + ) + + +@bp.route("/save_player_data", methods=["post"]) +def save_player_data(): + "Save current player position, bookmarks. Called on a loop by the player." + data = request.json + bookid = int(data.get("bookid")) + book = _find_book(bookid) + book.audio_current_pos = float(data.get("position")) + book.audio_bookmarks = data.get("bookmarks") + db.session.add(book) + db.session.commit() + return jsonify("ok") + + +@bp.route("/start_reading//", methods=["GET"]) +def start_reading(bookid, pagenum): + "Called by ajax. Update the text.start_date, and render page." + book = _find_book(bookid) + if book is None: + flash(f"No book matching id {bookid}") + return redirect("/", 302) + service = Service(db.session) + paragraphs = service.start_reading(book, pagenum) + return render_template("read/page_content.html", paragraphs=paragraphs) + + +@bp.route("/refresh_page//", methods=["GET"]) +def refresh_page(bookid, pagenum): + "Refreshes the page content, but doesn't set the text's start_date." + book = _find_book(bookid) + if book is None: + flash(f"No book matching id {bookid}") + return redirect("/", 302) + service = Service(db.session) + paragraphs = service.get_paragraphs(book, pagenum) + return render_template("read/page_content.html", paragraphs=paragraphs) @bp.route("/empty", methods=["GET"]) @@ -112,81 +205,80 @@ def empty(): @bp.route("/termform//", methods=["GET", "POST"]) def term_form(langid, text): """ - Create or edit a term. + Create a multiword term for the given text, replacing the LUTESLASH hack. """ - repo = Repository(db) - term = repo.find_or_new(langid, text) - + usetext = text.replace("LUTESLASH", "/") + repo = Repository(db.session) + term = repo.find_or_new(langid, usetext) + if term.status == 0: + term.status = 1 return handle_term_form( term, repo, - "/read/frameform.html", + db.session, + "/read/term_edit_form.html", render_template("/read/updated.html", term_text=term.text), embedded_in_reading_frame=True, ) -@bp.route("/termpopup/", methods=["GET"]) -def term_popup(termid): +@bp.route("/edit_term/", methods=["GET", "POST"]) +def edit_term_form(term_id): """ - Show a term popup for the given DBTerm. + Edit a term. """ - term = DBTerm.query.get(termid) - - term_tags = [tt.text for tt in term.term_tags] - - def make_array(t): - ret = { - "term": t.text, - "roman": t.romanization, - "trans": t.translation if t.translation else "-", - "tags": [tt.text for tt in t.term_tags], - } - return ret - - parent_terms = [p.text for p in term.parents] - parent_terms = ", ".join(parent_terms) + repo = Repository(db.session) + term = repo.load(term_id) + # print(f"editing term {term_id}", flush=True) + if term.status == 0: + term.status = 1 + return handle_term_form( + term, + repo, + db.session, + "/read/term_edit_form.html", + render_template("/read/updated.html", term_text=term.text), + embedded_in_reading_frame=True, + ) - parent_data = [] - if len(term.parents) == 1: - parent = term.parents[0] - if parent.translation != term.translation: - parent_data.append(make_array(parent)) - else: - parent_data = [make_array(p) for p in term.parents] - images = [term.get_current_image()] if term.get_current_image() else [] - for p in term.parents: - if p.get_current_image(): - images.append(p.get_current_image()) +@bp.route("/term_bulk_edit_form", methods=["GET"]) +def term_bulk_edit_form(): + """ + show_bulk_form + """ + repo = Repository(db.session) + return render_template( + "read/term_bulk_edit_form.html", + tags=repo.get_term_tags(), + ) - images = list(set(images)) +@bp.route("/termpopup/", methods=["GET"]) +def term_popup(termid): + """ + Get popup html for DBTerm, or None if nothing should be shown. + """ + service = Service(db.session) + d = service.get_popup_data(termid) + if d is None: + return "" return render_template( "read/termpopup.html", - term=term, - flashmsg=term.get_flash_message(), - term_tags=term_tags, - term_images=images, - parentdata=parent_data, - parentterms=parent_terms, + data=d, ) -@bp.route("/keyboard_shortcuts", methods=["GET"]) -def keyboard_shortcuts(): - return render_template("read/keyboard_shortcuts.html") - - @bp.route("/flashcopied", methods=["GET"]) def flashcopied(): return render_template("read/flashcopied.html") -@bp.route("/editpage/", methods=["GET", "POST"]) -def edit_page(textid): +@bp.route("/editpage//", methods=["GET", "POST"]) +def edit_page(bookid, pagenum): "Edit the text on a page." - text = db.session.get(Text, textid) + book = _find_book(bookid) + text = book.text_at_page(pagenum) if text is None: return redirect("/", 302) form = TextForm(obj=text) @@ -195,6 +287,9 @@ def edit_page(textid): form.populate_obj(text) db.session.add(text) db.session.commit() - return redirect(f"/read/{text.book.id}/page/{text.order}", 302) + return redirect(f"/read/{book.id}", 302) - return render_template("read/page_edit_form.html", hide_top_menu=True, form=form) + text_dir = "rtl" if book.language.right_to_left else "ltr" + return render_template( + "read/page_edit_form.html", hide_top_menu=True, form=form, text_dir=text_dir + ) diff --git a/lute/read/service.py b/lute/read/service.py index d95f99530..a67896c41 100644 --- a/lute/read/service.py +++ b/lute/read/service.py @@ -2,189 +2,253 @@ Reading helpers. """ -import re -from sqlalchemy import func - +from collections import defaultdict +from datetime import datetime +import functools from lute.models.term import Term, Status -from lute.models.book import Text -from lute.parse.base import ParsedToken -from lute.read.render.renderable_calculator import RenderableCalculator +from lute.models.book import Text, WordsRead +from lute.models.repositories import BookRepository, UserSettingRepository +from lute.book.stats import Service as StatsService +from lute.read.render.service import Service as RenderService +from lute.read.render.calculate_textitems import get_string_indexes from lute.term.model import Repository -from lute.db import db - - -def find_all_Terms_in_string(s, language): - """ - Find all terms contained in the string s. - - For example - - given s = "Here is a cat" - - given terms in the db: [ "cat", "a cat", "dog" ] - - This would return the terms "cat" and "a cat". - - The code first queries for exact single-token matches, - and then multiword matches, because that's much faster - than querying for everthing at once. (This may no longer - be true, can change it later.) - """ - - # Extract word tokens from the input string - cleaned = re.sub(r"\s+", " ", s) - tokens = language.get_parsed_tokens(cleaned) - - parser = language.parser - - # Query for terms with a single token that match the unique word tokens - word_tokens = filter(lambda t: t.is_word, tokens) - tok_strings = [parser.get_lowercase(t.token) for t in word_tokens] - tok_strings = list(set(tok_strings)) - terms_matching_tokens = ( - db.session.query(Term) - .filter( - Term.language == language, - Term.text_lc.in_(tok_strings), - Term.token_count == 1, - ) - .all() - ) - - # Multiword terms have zws between all tokens. - # Create content string with zws between all tokens for the match. - zws = "\u200B" # zero-width space - lctokens = [parser.get_lowercase(t.token) for t in tokens] - content = zws + zws.join(lctokens) + zws - contained_term_query = db.session.query(Term).filter( - Term.language == language, - Term.token_count > 1, - func.instr(content, Term.text_lc) > 0, - ) - contained_terms = contained_term_query.all() - - return terms_matching_tokens + contained_terms - - -class RenderableSentence: - """ - A collection of TextItems to be rendered. - """ - - def __init__(self, sentence_id, textitems): - self.sentence_id = sentence_id - self.textitems = textitems - - def __repr__(self): - s = "".join([t.display_text for t in self.textitems]) - return f'' - - -def get_paragraphs(text): - """ - Get array of arrays of RenderableSentences for the given Text. - """ - if text.id is None: - return [] - - language = text.book.language - - # Hacky reset of state of ParsedToken state. - # _Shouldn't_ matter ... :-( - ParsedToken.reset_counters() - tokens = language.get_parsed_tokens(text.text) - tokens = [t for t in tokens if t.token != "¶"] - - terms = find_all_Terms_in_string(text.text, language) - - def make_RenderableSentence(pnum, sentence_num, tokens, terms): +# from lute.utils.debug_helpers import DebugTimer + + +class TermPopup: + "Popup data for a term." + + # pylint: disable=too-many-instance-attributes + def __init__(self, term): + self.term = term + self.term_text = self._clean(term.text) + self.parents_text = ", ".join([self._clean(p.text) for p in term.parents]) + self.translation = self._clean(term.translation) + self.romanization = self._clean(term.romanization) + self.tags = [tt.text for tt in term.term_tags] + self.flash = self._clean(term.get_flash_message()) + self.image = term.get_current_image() + self.popup_image_data = self._get_popup_image_data() + + # Final data to include in popup. + self.parents = [] + self.components = [] + + def _clean(self, t): + "Clean text for popup usage." + zws = "\u200B" + ret = (t or "").strip() + ret = ret.replace(zws, "") + ret = ret.replace("\n", "
") + return ret + + @property + def show(self): + "Calc if should show. Must be deferred as values can be changed." + checks = [self.romanization != "", self.translation != "", len(self.tags) > 0] + return len([b for b in checks if b]) > 0 + + def term_and_parents_text(self): + "Return term text with parents if any." + ret = self.term_text + if self.parents_text != "": + ret = f"{ret} ({self.parents_text})" + return ret + + def _get_popup_image_data(self): + "Get images" + # Don't include component images in the hover for now, + # it can get confusing! + # ref https://github.com/LuteOrg/lute-v3/issues/355 + terms = [self.term, *self.term.parents] + + def _make_image_url(t): + return f"/userimages/{t.language.id}/{t.get_current_image()}" + + images = [(_make_image_url(t), t.text) for t in terms if t.get_current_image()] + imageresult = defaultdict(list) + for key, value in images: + imageresult[key].append(self._clean(value)) + # Convert lists to comma-separated strings + return {k: ", ".join(v) for k, v in imageresult.items()} + + +class Service: + "Service." + + def __init__(self, session): + self.session = session + + def mark_page_read(self, bookid, pagenum, mark_rest_as_known): + "Mark page as read, record stats, rest as known." + br = BookRepository(self.session) + book = br.find(bookid) + text = book.text_at_page(pagenum) + d = datetime.now() + text.read_date = d + + w = WordsRead(text, d, text.word_count) + self.session.add(text) + self.session.add(w) + self.session.commit() + if mark_rest_as_known: + self.set_unknowns_to_known(text) + + def set_unknowns_to_known(self, text: Text): """ - Make a RenderableSentences using the tokens present in - that sentence. The current text and language are pulled - into the function from the closure. + Given a text, create new Terms with status Well-Known + for any new Terms. """ - sentence_tokens = [t for t in tokens if t.sentence_number == sentence_num] - renderable = RenderableCalculator.get_renderable( - language, terms, sentence_tokens - ) - textitems = [ - i.make_text_item(pnum, sentence_num, text.id, language) for i in renderable - ] - return RenderableSentence(sentence_num, textitems) - - def unique(arr): - return list(set(arr)) - - renderable_paragraphs = [] - paranums = unique([t.paragraph_number for t in tokens]) - for pnum in paranums: - paratokens = [t for t in tokens if t.paragraph_number == pnum] - senums = sorted(unique([t.sentence_number for t in paratokens])) - - # A renderable paragraph is a collection of - # RenderableSentences. - renderable_sentences = [ - make_RenderableSentence(pnum, senum, paratokens, terms) for senum in senums + rs = RenderService(self.session) + paragraphs = rs.get_paragraphs(text.text, text.book.language) + self._save_new_status_0_terms(paragraphs) + + unknowns = [ + ti.term + for para in paragraphs + for sentence in para + for ti in sentence + if ti.is_word and ti.term.status == 0 ] - renderable_paragraphs.append(renderable_sentences) - return renderable_paragraphs + batch_size = 100 + i = 0 + for t in unknowns: + t.status = Status.WELLKNOWN + self.session.add(t) + i += 1 + if i % batch_size == 0: + self.session.commit() -def set_unknowns_to_known(text: Text): - """ - Given a text, create new Terms with status Well-Known - for any new Terms. - """ - language = text.book.language - - sentences = sum(get_paragraphs(text), []) + # Commit any remaining. + self.session.commit() - tis = [] - for sentence in sentences: - for ti in sentence.textitems: - tis.append(ti) + def bulk_status_update(self, text: Text, terms_text_array, new_status): + """ + Given a text and list of terms, update or create new terms + and set the status. + """ + language = text.book.language + repo = Repository(self.session) + for term_text in terms_text_array: + t = repo.find_or_new(language.id, term_text) + t.status = new_status + repo.add(t) + repo.commit() + + def _save_new_status_0_terms(self, paragraphs): + "Add status 0 terms for new textitems in paragraph." + tis_with_new_terms = [ + ti + for para in paragraphs + for sentence in para + for ti in sentence + if ti.is_word and ti.term.id is None and ti.term.status == 0 + ] - def is_unknown(ti): - return ( - ti.is_word == 1 - and (ti.wo_id == 0 or ti.wo_id is None) - and ti.token_count == 1 + for ti in tis_with_new_terms: + self.session.add(ti.term) + self.session.commit() + + def _get_reading_data(self, dbbook, pagenum, track_page_open=False): + "Get paragraphs, set text.start_date if needed." + text = dbbook.text_at_page(pagenum) + text.load_sentences() + svc = StatsService(self.session) + svc.mark_stale(dbbook) + + if track_page_open: + text.start_date = datetime.now() + dbbook.current_tx_id = text.id + + self.session.add(dbbook) + self.session.add(text) + self.session.commit() + + lang = text.book.language + rs = RenderService(self.session) + paragraphs = rs.get_paragraphs(text.text, lang) + self._save_new_status_0_terms(paragraphs) + + return paragraphs + + def get_paragraphs(self, dbbook, pagenum): + "Get the paragraphs for the book." + return self._get_reading_data(dbbook, pagenum, False) + + def start_reading(self, dbbook, pagenum): + "Start reading a page in the book, getting paragraphs." + return self._get_reading_data(dbbook, pagenum, True) + + def _sort_components(self, term, components): + "Sort components by min position in string and length." + component_and_pos = [] + for c in components: + c_indices = [ + loc[1] for loc in get_string_indexes([c.text_lc], term.text_lc) + ] + + # Sometimes the components aren't found + # in the string, which makes no sense ... + # ref https://github.com/LuteOrg/lute-v3/issues/474 + if len(c_indices) > 0: + component_and_pos.append([c, min(c_indices)]) + + def compare(a, b): + # Lowest position (closest to front of string) sorts first. + if a[1] != b[1]: + return -1 if (a[1] < b[1]) else 1 + # Longest sorts first. + alen = len(a[0].text) + blen = len(b[0].text) + return -1 if (alen > blen) else 1 + + component_and_pos.sort(key=functools.cmp_to_key(compare)) + return [c[0] for c in component_and_pos] + + def get_popup_data(self, termid): + "Get popup data, or None if popup shouldn't be shown." + term = self.session.get(Term, termid) + if term is None: + return None + + repo = UserSettingRepository(self.session) + show_components = int(repo.get_value("term_popup_show_components")) == 1 + components = [] + if show_components: + rs = RenderService(self.session) + components = [ + c + for c in rs.find_all_Terms_in_string(term.text, term.language) + if c.id != term.id and c.status != Status.UNKNOWN + ] + + t = TermPopup(term) + if ( + t.show is False + and t.image is None + and len(term.parents) == 0 + and len(components) == 0 + ): + # Nothing to show." + return None + + parent_data = [TermPopup(p) for p in term.parents] + + promote_parent_trans = int( + repo.get_value("term_popup_promote_parent_translation") ) - - unknowns = list(filter(is_unknown, tis)) - words_lc = [ti.text_lc for ti in unknowns] - uniques = list(set(words_lc)) - uniques.sort() - - batch_size = 100 - i = 0 - - # There is likely a better way to write this using generators and - # yield. - for u in uniques: - candidate = Term(language, u) - t = Term.find_by_spec(candidate) - if t is None: - candidate.status = Status.WELLKNOWN - db.session.add(candidate) - i += 1 - - if i % batch_size == 0: - db.session.commit() - - # Commit any remaining. - db.session.commit() - - -def bulk_status_update(text: Text, terms_text_array, new_status): - """ - Given a text and list of terms, update or create new terms - and set the status. - """ - language = text.book.language - repo = Repository(db) - for term_text in terms_text_array: - t = repo.find_or_new(language.id, term_text) - t.status = new_status - repo.add(t) - repo.commit() + if (promote_parent_trans == 1) and len(term.parents) == 1: + ptrans = parent_data[0].translation + if t.translation == "": + t.translation = ptrans + if t.translation == ptrans: + parent_data[0].translation = "" + + component_data = [TermPopup(c) for c in self._sort_components(term, components)] + + t.parents = [p for p in parent_data if p.show] + t.components = [c for c in component_data if c.show] + return t diff --git a/lute/settings/current.py b/lute/settings/current.py new file mode 100644 index 000000000..1e5fe0938 --- /dev/null +++ b/lute/settings/current.py @@ -0,0 +1,46 @@ +""" +Current user settings stored in UserSettings. + +Storing a global dict to allow for db-less access, they're +global settings, after all. + +They're written to at load (or when the settings change). +""" + +from lute.models.setting import UserSetting + +# The current user settings, key/value dict. +current_settings = {} + +# Current user hotkey mappings, mapping to mapping_name dict. +current_hotkeys = {} + + +def refresh_global_settings(session): + "Refresh all settings dictionary." + # Have to reload to not mess up any references + # (e.g. during testing). + current_settings.clear() + current_hotkeys.clear() + + settings = session.query(UserSetting).all() + for s in settings: + current_settings[s.key] = s.value + + hotkeys = [ + s for s in settings if s.key.startswith("hotkey_") and (s.value or "") != "" + ] + for h in hotkeys: + current_hotkeys[h.value] = h.key + + # Convert some ints into bools. + boolkeys = [ + "open_popup_in_new_tab", + "stop_audio_on_term_form_open", + "show_highlights", + "term_popup_promote_parent_translation", + "term_popup_show_components", + "use_ankiconnect", + ] + for k in boolkeys: + current_settings[k] = current_settings[k] == "1" diff --git a/lute/settings/forms.py b/lute/settings/forms.py new file mode 100644 index 000000000..0ee02b3ae --- /dev/null +++ b/lute/settings/forms.py @@ -0,0 +1,88 @@ +""" +Settings form. +""" + +import os +from flask_wtf import FlaskForm +from wtforms import BooleanField, StringField, IntegerField, TextAreaField, SelectField +from wtforms.validators import InputRequired, NumberRange +from wtforms import ValidationError + + +class UserSettingsForm(FlaskForm): + """ + Settings. + + Note the field names here must match the keys in the settings table. + """ + + backup_enabled = BooleanField("Backup Enabled") + backup_dir = StringField("Backup directory") + backup_auto = BooleanField("Run backups automatically (daily)") + backup_warn = BooleanField("Warn if backup hasn't run in a week") + backup_count = IntegerField( + "Retain backup count", + validators=[InputRequired(), NumberRange(min=1)], + render_kw={ + "title": "Count of zipfiles to retain, oldest files are deleted first" + }, + ) + + current_theme = SelectField("Theme") + custom_styles = TextAreaField("Custom styles") + show_highlights = BooleanField("Highlight terms by status") + + open_popup_in_new_tab = BooleanField("Open popup in new tab") + stop_audio_on_term_form_open = BooleanField("Stop audio on term form open") + stats_calc_sample_size = IntegerField( + "Book stats page sample size", + validators=[InputRequired(), NumberRange(min=1, max=500)], + render_kw={"title": "Number of pages to use for book stats calculation."}, + ) + + term_popup_promote_parent_translation = BooleanField( + "Promote parent translation to term translation if possible" + ) + term_popup_show_components = BooleanField("Show component terms") + + mecab_path = StringField("MECAB_PATH environment variable") + reading_choices = [ + ("katakana", "Katakana"), + ("hiragana", "Hiragana"), + ("alphabet", "Romaji"), + ] + japanese_reading = SelectField("Pronunciation characters", choices=reading_choices) + + use_ankiconnect = BooleanField("Enable export using AnkiConnect") + ankiconnect_url = StringField("AnkiConnect URL", validators=[InputRequired()]) + + def validate_backup_dir(self, field): + "Field must be set if enabled." + if self.backup_enabled.data is False: + return + v = field.data + if (v or "").strip() == "": + raise ValidationError("Backup directory required") + + abspath = os.path.abspath(v) + if v != abspath: + msg = f'Backup dir must be absolute path. Did you mean "{abspath}"?' + raise ValidationError(msg) + if not os.path.exists(v): + raise ValidationError(f'Directory "{v}" does not exist.') + if not os.path.isdir(v): + raise ValidationError(f'"{v}" is not a directory.') + + +class UserShortcutsForm(FlaskForm): + """ + Shortcuts form. + + The route manages getting and storing the settings + from the db, as there's a variable number of settings, + and it's easier to just work with the data directly + rather than trying to create a variable number of fields. + + I'm only using this form to get the validate_on_submit()! + There's likely a better way to do this. + """ diff --git a/lute/settings/hotkey_data.py b/lute/settings/hotkey_data.py new file mode 100644 index 000000000..21939e2ad --- /dev/null +++ b/lute/settings/hotkey_data.py @@ -0,0 +1,174 @@ +""" +All customizable user hotkeys. +""" + +import pprint +import yaml + + +# Hotkeys and descriptions. +_ALL_HOTKEY_DATA = """ +Copy: +- hotkey: hotkey_CopySentence + desc: Copy the sentence of the current word +- hotkey: hotkey_CopyPara + desc: Copy the paragraph of the current word +- hotkey: hotkey_CopyPage + desc: Copy the full page + +Misc: +- hotkey: hotkey_PageTermList + desc: Show the term listing for the current page +- hotkey: hotkey_Bookmark + desc: Bookmark the current page +- hotkey: hotkey_EditPage + desc: Edit the current page +- hotkey: hotkey_NextTheme + desc: Change to the next theme +- hotkey: hotkey_ToggleHighlight + desc: Toggle highlights +- hotkey: hotkey_ToggleFocus + desc: Toggle focus mode +- hotkey: hotkey_SaveTerm + desc: Save term in term form +- hotkey: hotkey_PostTermsToAnki + desc: Post selected terms to Anki + +Navigation: +- hotkey: hotkey_StartHover + desc: Deselect all words +- hotkey: hotkey_PrevWord + desc: Move to previous word +- hotkey: hotkey_NextWord + desc: Move to next word +- hotkey: hotkey_PrevUnknownWord + desc: Move to previous unknown word +- hotkey: hotkey_NextUnknownWord + desc: Move to next unknown word +- hotkey: hotkey_PrevSentence + desc: Move to previous sentence +- hotkey: hotkey_NextSentence + desc: Move to next sentence + +Paging: +- hotkey: hotkey_PreviousPage + desc: Go to previous page, do not mark current page read +- hotkey: hotkey_NextPage + desc: Go to next page, do not mark current page read +- hotkey: hotkey_MarkReadWellKnown + desc: Set remaining unknown words to Well Known, mark page as read, go to next page +- hotkey: hotkey_MarkRead + desc: Mark page as read, go to next page +- hotkey: hotkey_MarkReadWellKnown + desc: Set remaining unknown words to Well Known, mark page as read, go to next page + +Translate: +- hotkey: hotkey_TranslateSentence + desc: Translate the sentence of the current word +- hotkey: hotkey_TranslatePara + desc: Translate the paragraph of the current word +- hotkey: hotkey_TranslatePage + desc: Translate the full page + +Update status: +- hotkey: hotkey_Status1 + desc: Set status to 1 +- hotkey: hotkey_Status2 + desc: Set status to 2 +- hotkey: hotkey_Status3 + desc: Set status to 3 +- hotkey: hotkey_Status4 + desc: Set status to 4 +- hotkey: hotkey_Status5 + desc: Set status to 5 +- hotkey: hotkey_StatusIgnore + desc: Set status to Ignore +- hotkey: hotkey_StatusWellKnown + desc: Set status to Well Known +- hotkey: hotkey_StatusUp + desc: Bump the status up by 1 +- hotkey: hotkey_StatusDown + desc: Bump that status down by 1 +- hotkey: hotkey_DeleteTerm + desc: Delete term (set status to Unknown) +""" + + +# Initial hotkey values +# +# Only *some* of the hotkeys have default values assigned, as these +# hotkeys were initially set in the early releases of Lute. +# +# Any new hotkeys added *MUST NOT* have defaults assigned, as users +# may have already setup their hotkeys, and we can't assume that a +# given key combination is free: +_initial_values = { + "hotkey_StartHover": "Escape", + "hotkey_PrevWord": "ArrowLeft", + "hotkey_NextWord": "ArrowRight", + "hotkey_Status1": "Digit1", + "hotkey_Status2": "Digit2", + "hotkey_Status3": "Digit3", + "hotkey_Status4": "Digit4", + "hotkey_Status5": "Digit5", + "hotkey_StatusIgnore": "KeyI", + "hotkey_StatusWellKnown": "KeyW", + "hotkey_StatusUp": "ArrowUp", + "hotkey_StatusDown": "ArrowDown", + "hotkey_TranslateSentence": "KeyT", + "hotkey_TranslatePara": "shift+KeyT", + "hotkey_CopySentence": "KeyC", + "hotkey_CopyPara": "shift+KeyC", + "hotkey_Bookmark": "KeyB", + "hotkey_NextTheme": "KeyM", + "hotkey_ToggleHighlight": "KeyH", + "hotkey_ToggleFocus": "KeyF", + "hotkey_SaveTerm": "ctrl+Enter", +} + + +def initial_hotkey_defaults(): + """ + Get initial hotkeys and defaults (or empty string). + Used for db initialization. + """ + y = yaml.safe_load(_ALL_HOTKEY_DATA) + hks = [] + for _, keys in y.items(): + hks.extend([k["hotkey"] for k in keys]) + ret = {h: _initial_values.get(h, "") for h in hks} + return ret + + +def categorized_hotkeys(): + "Hotkeys by category. Used by routes." + y = yaml.safe_load(_ALL_HOTKEY_DATA) + ordered_keys = [ + "Navigation", + "Update status", + "Paging", + "Translate", + "Copy", + "Misc", + ] + if set(ordered_keys) != set(y.keys()): + raise RuntimeError("ordered_keys doesn't match expected") + ret = {k: [h["hotkey"] for h in y[k]] for k in ordered_keys} + return ret + + +def hotkey_descriptions(): + """ + Get hotkeys and descriptions. Used by routes. + """ + y = yaml.safe_load(_ALL_HOTKEY_DATA) + return {key["hotkey"]: key["desc"] for group in y.values() for key in group} + + +if __name__ == "__main__": + print("---") + pprint.pprint(categorized_hotkeys()) + print("---") + pprint.pprint(initial_hotkey_defaults()) + print("---") + pprint.pprint(hotkey_descriptions()) diff --git a/lute/settings/routes.py b/lute/settings/routes.py index 76dcb6437..1c6b08afd 100644 --- a/lute/settings/routes.py +++ b/lute/settings/routes.py @@ -2,68 +2,27 @@ Settings routes. """ -import os from flask import ( Blueprint, current_app, request, - Response, render_template, redirect, flash, jsonify, ) -from flask_wtf import FlaskForm -from wtforms import BooleanField, StringField, IntegerField, TextAreaField -from wtforms.validators import InputRequired, NumberRange -from wtforms import ValidationError +from wtforms import BooleanField from lute.models.language import Language from lute.models.setting import UserSetting +from lute.models.repositories import UserSettingRepository +from lute.themes.service import Service as ThemeService +from lute.settings.forms import UserSettingsForm, UserShortcutsForm +from lute.settings.current import refresh_global_settings +from lute.settings.hotkey_data import categorized_hotkeys, hotkey_descriptions from lute.db import db from lute.parse.mecab_parser import JapaneseParser -class UserSettingsForm(FlaskForm): - """ - Settings. - - Note the field names here must match the keys in the settings table. - """ - - backup_enabled = BooleanField("Backup Enabled") - backup_dir = StringField("Backup directory") - backup_auto = BooleanField("Run backups automatically (daily)") - backup_warn = BooleanField("Warn if backup hasn't run in a week") - backup_count = IntegerField( - "Retain backup count", - validators=[InputRequired(), NumberRange(min=1)], - render_kw={ - "title": "Count of zipfiles to retain, oldest files are deleted first" - }, - ) - - custom_styles = TextAreaField("Custom styles") - - mecab_path = StringField("MECAB_PATH environment variable") - - def validate_backup_dir(self, field): - "Field must be set if enabled." - if self.backup_enabled.data is False: - return - v = field.data - if (v or "").strip() == "": - raise ValidationError("Backup directory required") - - abspath = os.path.abspath(v) - if v != abspath: - msg = f'Backup dir must be absolute path. Did you mean "{abspath}"?' - raise ValidationError(msg) - if not os.path.exists(v): - raise ValidationError(f'Directory "{v}" does not exist.') - if not os.path.isdir(v): - raise ValidationError(f'"{v}" is not a directory.') - - bp = Blueprint("settings", __name__, url_prefix="/settings") @@ -72,6 +31,10 @@ def edit_settings(): "Edit settings." form = UserSettingsForm() + with current_app.app_context(): + svc = ThemeService(db.session) + form.current_theme.choices = svc.list_themes() + ac = current_app.env_config if ac.is_docker: # User shouldn't change some things with docker. @@ -79,14 +42,14 @@ def edit_settings(): # Backup dir gets mounted from host. form.backup_dir.render_kw = kw + repo = UserSettingRepository(db.session) if form.validate_on_submit(): # Update the settings in the database for field in form: if field.id not in ("csrf_token", "submit"): - UserSetting.set_value(field.id, field.data) + repo.set_value(field.id, field.data) db.session.commit() - - JapaneseParser.set_mecab_path_envkey(form.mecab_path.data) + refresh_global_settings(db.session) flash("Settings updated", "success") return redirect("/") @@ -94,10 +57,10 @@ def edit_settings(): # Load current settings from the database for field in form: if field.id != "csrf_token": - field.data = UserSetting.get_value(field.id) - # Hack: set boolean settings to ints, otherwise they're always checked. - form.backup_warn.data = int(form.backup_warn.data or 0) - form.backup_auto.data = int(form.backup_auto.data or 0) + field.data = repo.get_value(field.id) + if isinstance(field, BooleanField): + # Hack: set boolean settings to ints, otherwise they're always checked. + field.data = int(field.data or 0) return render_template("settings/form.html", form=form) @@ -112,10 +75,11 @@ def test_parse(): """ mecab_path = request.args.get("mecab_path", None) - old_key = JapaneseParser.get_mecab_path_envkey() + repo = UserSettingRepository(db.session) + old_setting = repo.get_value("mecab_path") result = {"failure": "tbd"} try: - JapaneseParser.set_mecab_path_envkey(mecab_path) + repo.set_value("mecab_path", mecab_path) # Parsing requires a language, even if it's a dummy. lang = Language() p = JapaneseParser() @@ -128,17 +92,60 @@ def test_parse(): message = f"{type(e).__name__}: { str(e) }" result = {"result": "failure", "message": message} finally: - JapaneseParser.set_mecab_path_envkey(old_key) + repo.set_value("mecab_path", old_setting) return jsonify(result) -@bp.route("/custom_styles", methods=["GET"]) -def custom_styles(): +@bp.route("/set//", methods=["POST"]) +def set_key_value(key, value): + "Set a UserSetting key to value." + repo = UserSettingRepository(db.session) + old_value = repo.get_value(key) + try: + repo.set_value(key, value) + result = {"result": "success", "message": "OK"} + except Exception as e: # pylint: disable=broad-exception-caught + message = f"{type(e).__name__}: { str(e) }" + repo.set_value(key, old_value) + result = {"result": "failure", "message": message} + db.session.commit() + refresh_global_settings(db.session) + return jsonify(result) + + +def _get_categorized_hotkeys(): """ - Return the custom settings for inclusion in the base.html. + Return hotkey UserSetting keys and values, + grouped by category. """ - css = UserSetting.get_value("custom_styles") - response = Response(css, 200) - response.content_type = "text/css; charset=utf-8" - return response + categorized = categorized_hotkeys() + settings = {h.key: h.value for h in db.session.query(UserSetting).all()} + return { + category: {k: settings[k] for k in keylist} + for category, keylist in categorized.items() + } + + +@bp.route("/shortcuts", methods=["GET", "POST"]) +def edit_shortcuts(): + "Edit shortcuts." + repo = UserSettingRepository(db.session) + form = UserShortcutsForm() + if form.validate_on_submit(): + # print(request.form, flush=True) + # Update the settings in the database + for k, v in request.form.items(): + # print(f"{k} = {v}", flush=True) + repo.set_value(k, v) + db.session.commit() + refresh_global_settings(db.session) + flash("Shortcuts updated", "success") + return redirect("/") + + categorized_settings = _get_categorized_hotkeys() + return render_template( + "settings/shortcuts.html", + setting_descs=hotkey_descriptions(), + categorized_settings=categorized_settings, + ) diff --git a/lute/static/css/datatables.min.css b/lute/static/css/datatables.min.css deleted file mode 100644 index 27e5a2381..000000000 --- a/lute/static/css/datatables.min.css +++ /dev/null @@ -1,18 +0,0 @@ -/* - * This combined file was created by the DataTables downloader builder: - * https://datatables.net/download - * - * To rebuild or modify this file with the latest versions of the included - * software please visit: - * https://datatables.net/download/#dt/dt-1.13.7/b-2.4.2/b-html5-2.4.2 - * - * Included libraries: - * DataTables 1.13.7, Buttons 2.4.2, HTML5 export 2.4.2 - */ - -:root{--dt-row-selected: 13, 110, 253;--dt-row-selected-text: 255, 255, 255;--dt-row-selected-link: 9, 10, 11;--dt-row-stripe: 0, 0, 0;--dt-row-hover: 0, 0, 0;--dt-column-ordering: 0, 0, 0;--dt-html-background: white}:root.dark{--dt-html-background: rgb(33, 37, 41)}table.dataTable td.dt-control{text-align:center;cursor:pointer}table.dataTable td.dt-control:before{display:inline-block;color:rgba(0, 0, 0, 0.5);content:"▶"}table.dataTable tr.dt-hasChild td.dt-control:before{content:"▼"}html.dark table.dataTable td.dt-control:before{color:rgba(255, 255, 255, 0.5)}html.dark table.dataTable tr.dt-hasChild td.dt-control:before{color:rgba(255, 255, 255, 0.5)}table.dataTable thead>tr>th.sorting,table.dataTable thead>tr>th.sorting_asc,table.dataTable thead>tr>th.sorting_desc,table.dataTable thead>tr>th.sorting_asc_disabled,table.dataTable thead>tr>th.sorting_desc_disabled,table.dataTable thead>tr>td.sorting,table.dataTable thead>tr>td.sorting_asc,table.dataTable thead>tr>td.sorting_desc,table.dataTable thead>tr>td.sorting_asc_disabled,table.dataTable thead>tr>td.sorting_desc_disabled{cursor:pointer;position:relative;padding-right:26px}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after{position:absolute;display:block;opacity:.125;right:10px;line-height:9px;font-size:.8em}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:before{bottom:50%;content:"▲";content:"▲"/""}table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:after{top:50%;content:"▼";content:"▼"/""}table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:after{opacity:.6}table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting_asc_disabled:before{display:none}table.dataTable thead>tr>th:active,table.dataTable thead>tr>td:active{outline:none}div.dataTables_scrollBody>table.dataTable>thead>tr>th:before,div.dataTables_scrollBody>table.dataTable>thead>tr>th:after,div.dataTables_scrollBody>table.dataTable>thead>tr>td:before,div.dataTables_scrollBody>table.dataTable>thead>tr>td:after{display:none}div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:2px;z-index:10}div.dataTables_processing>div:last-child{position:relative;width:80px;height:15px;margin:1em auto}div.dataTables_processing>div:last-child>div{position:absolute;top:0;width:13px;height:13px;border-radius:50%;background:rgb(13, 110, 253);background:rgb(var(--dt-row-selected));animation-timing-function:cubic-bezier(0, 1, 1, 0)}div.dataTables_processing>div:last-child>div:nth-child(1){left:8px;animation:datatables-loader-1 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(2){left:8px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(3){left:32px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(4){left:56px;animation:datatables-loader-3 .6s infinite}@keyframes datatables-loader-1{0%{transform:scale(0)}100%{transform:scale(1)}}@keyframes datatables-loader-3{0%{transform:scale(1)}100%{transform:scale(0)}}@keyframes datatables-loader-2{0%{transform:translate(0, 0)}100%{transform:translate(24px, 0)}}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th,table.dataTable thead td,table.dataTable tfoot th,table.dataTable tfoot td{text-align:left}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable>thead>tr>th,table.dataTable>thead>tr>td{padding:10px;border-bottom:1px solid rgba(0, 0, 0, 0.3)}table.dataTable>thead>tr>th:active,table.dataTable>thead>tr>td:active{outline:none}table.dataTable>tfoot>tr>th,table.dataTable>tfoot>tr>td{padding:10px 10px 6px 10px;border-top:1px solid rgba(0, 0, 0, 0.3)}table.dataTable tbody tr{background-color:transparent}table.dataTable tbody tr.selected>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.9);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.9);color:rgb(255, 255, 255);color:rgb(var(--dt-row-selected-text))}table.dataTable tbody tr.selected a{color:rgb(9, 10, 11);color:rgb(var(--dt-row-selected-link))}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border>tbody>tr>th,table.dataTable.row-border>tbody>tr>td,table.dataTable.display>tbody>tr>th,table.dataTable.display>tbody>tr>td{border-top:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.row-border>tbody>tr:first-child>th,table.dataTable.row-border>tbody>tr:first-child>td,table.dataTable.display>tbody>tr:first-child>th,table.dataTable.display>tbody>tr:first-child>td{border-top:none}table.dataTable.row-border>tbody>tr.selected+tr.selected>td,table.dataTable.display>tbody>tr.selected+tr.selected>td{border-top-color:#0262ef}table.dataTable.cell-border>tbody>tr>th,table.dataTable.cell-border>tbody>tr>td{border-top:1px solid rgba(0, 0, 0, 0.15);border-right:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.cell-border>tbody>tr>th:first-child,table.dataTable.cell-border>tbody>tr>td:first-child{border-left:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.cell-border>tbody>tr:first-child>th,table.dataTable.cell-border>tbody>tr:first-child>td{border-top:none}table.dataTable.stripe>tbody>tr.odd>*,table.dataTable.display>tbody>tr.odd>*{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.023);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-stripe), 0.023)}table.dataTable.stripe>tbody>tr.odd.selected>*,table.dataTable.display>tbody>tr.odd.selected>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.923);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.923)}table.dataTable.hover>tbody>tr:hover>*,table.dataTable.display>tbody>tr:hover>*{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.035);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.035)}table.dataTable.hover>tbody>tr.selected:hover>*,table.dataTable.display>tbody>tr.selected:hover>*{box-shadow:inset 0 0 0 9999px #0d6efd !important;box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 1) !important}table.dataTable.order-column>tbody tr>.sorting_1,table.dataTable.order-column>tbody tr>.sorting_2,table.dataTable.order-column>tbody tr>.sorting_3,table.dataTable.display>tbody tr>.sorting_1,table.dataTable.display>tbody tr>.sorting_2,table.dataTable.display>tbody tr>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.019);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.019)}table.dataTable.order-column>tbody tr.selected>.sorting_1,table.dataTable.order-column>tbody tr.selected>.sorting_2,table.dataTable.order-column>tbody tr.selected>.sorting_3,table.dataTable.display>tbody tr.selected>.sorting_1,table.dataTable.display>tbody tr.selected>.sorting_2,table.dataTable.display>tbody tr.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.919);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.919)}table.dataTable.display>tbody>tr.odd>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.054);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.054)}table.dataTable.display>tbody>tr.odd>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.047);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.047)}table.dataTable.display>tbody>tr.odd>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.039);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.039)}table.dataTable.display>tbody>tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.954);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.954)}table.dataTable.display>tbody>tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.947);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.947)}table.dataTable.display>tbody>tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.939);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.939)}table.dataTable.display>tbody>tr.even>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.019);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.019)}table.dataTable.display>tbody>tr.even>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.011);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.011)}table.dataTable.display>tbody>tr.even>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.003);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.003)}table.dataTable.display>tbody>tr.even.selected>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.919);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.919)}table.dataTable.display>tbody>tr.even.selected>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.911);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.911)}table.dataTable.display>tbody>tr.even.selected>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.903);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.903)}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.082);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.082)}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.074);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.074)}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.062);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.062)}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.982);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.982)}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.974);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.974)}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.962);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.962)}table.dataTable.no-footer{border-bottom:1px solid rgba(0, 0, 0, 0.3)}table.dataTable.compact thead th,table.dataTable.compact thead td,table.dataTable.compact tfoot th,table.dataTable.compact tfoot td,table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;color:inherit;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;color:inherit;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;color:inherit !important;border:1px solid transparent;border-radius:2px;background:transparent}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:inherit !important;border:1px solid rgba(0, 0, 0, 0.3);background-color:rgba(0, 0, 0, 0.05);background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, rgba(230, 230, 230, 0.05)), color-stop(100%, rgba(0, 0, 0, 0.05)));background:-webkit-linear-gradient(top, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%);background:-moz-linear-gradient(top, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%);background:-ms-linear-gradient(top, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%);background:-o-linear-gradient(top, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%);background:linear-gradient(to bottom, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#111;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#0c0c0c;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:inherit}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid rgba(0, 0, 0, 0.3)}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}}html.dark{--dt-row-hover: 255, 255, 255;--dt-row-stripe: 255, 255, 255;--dt-column-ordering: 255, 255, 255}html.dark table.dataTable>thead>tr>th,html.dark table.dataTable>thead>tr>td{border-bottom:1px solid rgb(89, 91, 94)}html.dark table.dataTable>thead>tr>th:active,html.dark table.dataTable>thead>tr>td:active{outline:none}html.dark table.dataTable>tfoot>tr>th,html.dark table.dataTable>tfoot>tr>td{border-top:1px solid rgb(89, 91, 94)}html.dark table.dataTable.row-border>tbody>tr>th,html.dark table.dataTable.row-border>tbody>tr>td,html.dark table.dataTable.display>tbody>tr>th,html.dark table.dataTable.display>tbody>tr>td{border-top:1px solid rgb(64, 67, 70)}html.dark table.dataTable.row-border>tbody>tr.selected+tr.selected>td,html.dark table.dataTable.display>tbody>tr.selected+tr.selected>td{border-top-color:#0257d5}html.dark table.dataTable.cell-border>tbody>tr>th,html.dark table.dataTable.cell-border>tbody>tr>td{border-top:1px solid rgb(64, 67, 70);border-right:1px solid rgb(64, 67, 70)}html.dark table.dataTable.cell-border>tbody>tr>th:first-child,html.dark table.dataTable.cell-border>tbody>tr>td:first-child{border-left:1px solid rgb(64, 67, 70)}html.dark .dataTables_wrapper .dataTables_filter input,html.dark .dataTables_wrapper .dataTables_length select{border:1px solid rgba(255, 255, 255, 0.2);background-color:var(--dt-html-background)}html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.current,html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{border:1px solid rgb(89, 91, 94);background:rgba(255, 255, 255, 0.15)}html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.disabled,html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{color:#666 !important}html.dark .dataTables_wrapper .dataTables_paginate .paginate_button:hover{border:1px solid rgb(53, 53, 53);background:rgb(53, 53, 53)}html.dark .dataTables_wrapper .dataTables_paginate .paginate_button:active{background:#3a3a3a} - - -@keyframes dtb-spinner{100%{transform:rotate(360deg)}}@-o-keyframes dtb-spinner{100%{-o-transform:rotate(360deg);transform:rotate(360deg)}}@-ms-keyframes dtb-spinner{100%{-ms-transform:rotate(360deg);transform:rotate(360deg)}}@-webkit-keyframes dtb-spinner{100%{-webkit-transform:rotate(360deg);transform:rotate(360deg)}}@-moz-keyframes dtb-spinner{100%{-moz-transform:rotate(360deg);transform:rotate(360deg)}}div.dataTables_wrapper{position:relative}div.dt-buttons{position:initial}div.dt-buttons .dt-button{overflow:hidden;text-overflow:ellipsis}div.dt-button-info{position:fixed;top:50%;left:50%;width:400px;margin-top:-100px;margin-left:-200px;background-color:white;border-radius:.75em;box-shadow:3px 4px 10px 1px rgba(0, 0, 0, 0.8);text-align:center;z-index:2003;overflow:hidden}div.dt-button-info h2{padding:2rem 2rem 1rem 2rem;margin:0;font-weight:normal}div.dt-button-info>div{padding:1em 2em 2em 2em}div.dtb-popover-close{position:absolute;top:6px;right:6px;width:22px;height:22px;text-align:center;border-radius:3px;cursor:pointer;z-index:2003}button.dtb-hide-drop{display:none !important}div.dt-button-collection-title{text-align:center;padding:.3em 0 .5em;margin-left:.5em;margin-right:.5em;font-size:.9em}div.dt-button-collection-title:empty{display:none}span.dt-button-spacer{display:inline-block;margin:.5em;white-space:nowrap}span.dt-button-spacer.bar{border-left:1px solid rgba(0, 0, 0, 0.3);vertical-align:middle;padding-left:.5em}span.dt-button-spacer.bar:empty{height:1em;width:1px;padding-left:0}div.dt-button-collection .dt-button-active{padding-right:3em}div.dt-button-collection .dt-button-active:after{position:absolute;top:50%;margin-top:-10px;right:1em;display:inline-block;content:"✓";color:inherit}div.dt-button-collection .dt-button-active.dt-button-split{padding-right:0}div.dt-button-collection .dt-button-active.dt-button-split:after{display:none}div.dt-button-collection .dt-button-active.dt-button-split>*:first-child{padding-right:3em}div.dt-button-collection .dt-button-active.dt-button-split>*:first-child:after{position:absolute;top:50%;margin-top:-10px;right:1em;display:inline-block;content:"✓";color:inherit}div.dt-button-collection .dt-button-active-a a{padding-right:3em}div.dt-button-collection .dt-button-active-a a:after{position:absolute;right:1em;display:inline-block;content:"✓";color:inherit}div.dt-button-collection span.dt-button-spacer{width:100%;font-size:.9em;text-align:center;margin:.5em 0}div.dt-button-collection span.dt-button-spacer:empty{height:0;width:100%}div.dt-button-collection span.dt-button-spacer.bar{border-left:none;border-bottom:1px solid rgba(0, 0, 0, 0.1);padding-left:0}html.dark div.dt-button-info{background-color:var(--dt-html-background);border:1px solid rgba(255, 255, 255, 0.15)}div.dt-buttons{float:left}div.dt-buttons.buttons-right{float:right}div.dataTables_layout_cell div.dt-buttons{float:none}div.dataTables_layout_cell div.dt-buttons.buttons-right{float:none}div.dt-buttons>.dt-button,div.dt-buttons>div.dt-button-split .dt-button{position:relative;display:inline-block;box-sizing:border-box;margin-left:.167em;margin-right:.167em;margin-bottom:.333em;padding:.5em 1em;border:1px solid rgba(0, 0, 0, 0.3);border-radius:2px;cursor:pointer;font-size:.88em;line-height:1.6em;color:inherit;white-space:nowrap;overflow:hidden;background-color:rgba(0, 0, 0, 0.1);background:linear-gradient(to bottom, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);filter:progid:DXImageTransform.Microsoft.gradient(GradientType=0,StartColorStr="rgba(230, 230, 230, 0.1)", EndColorStr="rgba(0, 0, 0, 0.1)");-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;text-decoration:none;outline:none;text-overflow:ellipsis}div.dt-buttons>.dt-button:first-child,div.dt-buttons>div.dt-button-split .dt-button:first-child{margin-left:0}div.dt-buttons>.dt-button.disabled,div.dt-buttons>div.dt-button-split .dt-button.disabled{cursor:default;opacity:.4}div.dt-buttons>.dt-button.dt-button-active:not(.disabled),div.dt-buttons>div.dt-button-split .dt-button.dt-button-active:not(.disabled){background-color:rgba(0, 0, 0, 0.1);background:linear-gradient(to bottom, rgba(179, 179, 179, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);filter:progid:DXImageTransform.Microsoft.gradient(GradientType=0,StartColorStr="rgba(179, 179, 179, 0.1)", EndColorStr="rgba(0, 0, 0, 0.1)");box-shadow:inset 1px 1px 3px #999}div.dt-buttons>.dt-button.dt-button-active:not(.disabled):hover:not(.disabled),div.dt-buttons>div.dt-button-split .dt-button.dt-button-active:not(.disabled):hover:not(.disabled){box-shadow:inset 1px 1px 3px #999;background-color:rgba(0, 0, 0, 0.1);background:linear-gradient(to bottom, rgba(128, 128, 128, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);filter:progid:DXImageTransform.Microsoft.gradient(GradientType=0,StartColorStr="rgba(128, 128, 128, 0.1)", EndColorStr="rgba(0, 0, 0, 0.1)")}div.dt-buttons>.dt-button:hover,div.dt-buttons>div.dt-button-split .dt-button:hover{text-decoration:none}div.dt-buttons>.dt-button:hover:not(.disabled),div.dt-buttons>div.dt-button-split .dt-button:hover:not(.disabled){border:1px solid #666;background-color:rgba(0, 0, 0, 0.1);background:linear-gradient(to bottom, rgba(153, 153, 153, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);filter:progid:DXImageTransform.Microsoft.gradient(GradientType=0,StartColorStr="rgba(153, 153, 153, 0.1)", EndColorStr="rgba(0, 0, 0, 0.1)")}div.dt-buttons>.dt-button:focus:not(.disabled),div.dt-buttons>div.dt-button-split .dt-button:focus:not(.disabled){outline:2px solid rgb(53, 132, 228)}div.dt-buttons>.dt-button embed,div.dt-buttons>div.dt-button-split .dt-button embed{outline:none}div.dt-buttons>div.dt-button-split .dt-button:first-child{border-right:1px solid rgba(0, 0, 0, 0.15);border-top-right-radius:0;border-bottom-right-radius:0}div.dt-buttons>div.dt-button-split .dt-button:first-child:hover{border-right:1px solid #666}div.dt-buttons>div.dt-button-split .dt-button:last-child{border-left:1px solid transparent;border-top-left-radius:0;border-bottom-left-radius:0}div.dt-buttons>div.dt-button-split .dt-button:last-child:hover{border-left:1px solid #666}div.dt-buttons span.dt-button-down-arrow{position:relative;top:-2px;font-size:10px;padding-left:10px;line-height:1em;opacity:.6}div.dt-buttons div.dt-button-split{display:inline-block}div.dt-buttons div.dt-button-split .dt-button:first-child{margin-right:0}div.dt-buttons div.dt-button-split .dt-button:last-child{margin-left:-1px;padding-left:.75em;padding-right:.75em;z-index:2}div.dt-buttons div.dt-button-split .dt-button:last-child span{padding-left:0}div.dt-button-collection{position:absolute;top:0;left:0;width:200px;margin-top:3px;margin-bottom:3px;padding:.75em 0;border:1px solid rgba(0, 0, 0, 0.4);background-color:white;overflow:hidden;z-index:2002;border-radius:5px;box-shadow:3px 4px 10px 1px rgba(0, 0, 0, 0.3);box-sizing:border-box}div.dt-button-collection .dt-button{position:relative;left:0;right:0;width:100%;display:block;float:none;background:none;margin:0;padding:.5em 1em;border:none;text-align:left;cursor:pointer;color:inherit}div.dt-button-collection .dt-button.dt-button-active{background:none;box-shadow:none}div.dt-button-collection .dt-button.disabled{cursor:default;opacity:.4}div.dt-button-collection .dt-button:hover:not(.disabled){border:none;background:rgba(153, 153, 153, 0.1);box-shadow:none}div.dt-button-collection div.dt-button-split{display:flex;flex-direction:row;flex-wrap:wrap;justify-content:flex-start;align-content:flex-start;align-items:stretch}div.dt-button-collection div.dt-button-split button.dt-button{margin:0;display:inline-block;width:0;flex-grow:1;flex-shrink:0;flex-basis:50px}div.dt-button-collection div.dt-button-split button.dt-button-split-drop{min-width:33px;flex:0}div.dt-button-collection.fixed .dt-button{border-radius:.25em;background:rgba(255, 255, 255, 0.1)}div.dt-button-collection.fixed{position:fixed;display:block;top:50%;left:50%;margin-left:-75px;border-radius:5px;background-color:white;padding:.5em}div.dt-button-collection.fixed.two-column{margin-left:-200px}div.dt-button-collection.fixed.three-column{margin-left:-225px}div.dt-button-collection.fixed.four-column{margin-left:-300px}div.dt-button-collection.fixed.columns{margin-left:-409px}@media screen and (max-width: 1024px){div.dt-button-collection.fixed.columns{margin-left:-308px}}@media screen and (max-width: 640px){div.dt-button-collection.fixed.columns{margin-left:-203px}}@media screen and (max-width: 460px){div.dt-button-collection.fixed.columns{margin-left:-100px}}div.dt-button-collection.fixed>:last-child{max-height:100vh;overflow:auto}div.dt-button-collection.two-column>:last-child,div.dt-button-collection.three-column>:last-child,div.dt-button-collection.four-column>:last-child{display:block !important;-webkit-column-gap:8px;-moz-column-gap:8px;-ms-column-gap:8px;-o-column-gap:8px;column-gap:8px}div.dt-button-collection.two-column>:last-child>*,div.dt-button-collection.three-column>:last-child>*,div.dt-button-collection.four-column>:last-child>*{-webkit-column-break-inside:avoid;break-inside:avoid}div.dt-button-collection.two-column{width:400px}div.dt-button-collection.two-column>:last-child{padding-bottom:1px;column-count:2}div.dt-button-collection.three-column{width:450px}div.dt-button-collection.three-column>:last-child{padding-bottom:1px;column-count:3}div.dt-button-collection.four-column{width:600px}div.dt-button-collection.four-column>:last-child{padding-bottom:1px;column-count:4}div.dt-button-collection .dt-button{border-radius:0}div.dt-button-collection.columns{width:auto}div.dt-button-collection.columns>:last-child{display:flex;flex-wrap:wrap;justify-content:flex-start;align-items:center;gap:6px;width:818px;padding-bottom:1px}div.dt-button-collection.columns>:last-child .dt-button{min-width:200px;flex:0 1;margin:0}div.dt-button-collection.columns.dtb-b3>:last-child,div.dt-button-collection.columns.dtb-b2>:last-child,div.dt-button-collection.columns.dtb-b1>:last-child{justify-content:space-between}div.dt-button-collection.columns.dtb-b3 .dt-button{flex:1 1 32%}div.dt-button-collection.columns.dtb-b2 .dt-button{flex:1 1 48%}div.dt-button-collection.columns.dtb-b1 .dt-button{flex:1 1 100%}@media screen and (max-width: 1024px){div.dt-button-collection.columns>:last-child{width:612px}}@media screen and (max-width: 640px){div.dt-button-collection.columns>:last-child{width:406px}div.dt-button-collection.columns.dtb-b3 .dt-button{flex:0 1 32%}}@media screen and (max-width: 460px){div.dt-button-collection.columns>:last-child{width:200px}}div.dt-button-background{position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0, 0, 0, 0.7);background:radial-gradient(ellipse farthest-corner at center, rgba(0, 0, 0, 0.3) 0%, rgba(0, 0, 0, 0.7) 100%);z-index:2001}.dt-button.processing{color:rgba(0, 0, 0, 0.2)}.dt-button.processing:after{position:absolute;top:50%;left:50%;width:16px;height:16px;margin:-8px 0 0 -8px;box-sizing:border-box;display:block;content:" ";border:2px solid rgb(40, 40, 40);border-radius:50%;border-left-color:transparent;border-right-color:transparent;animation:dtb-spinner 1500ms infinite linear;-o-animation:dtb-spinner 1500ms infinite linear;-ms-animation:dtb-spinner 1500ms infinite linear;-webkit-animation:dtb-spinner 1500ms infinite linear;-moz-animation:dtb-spinner 1500ms infinite linear}@media screen and (max-width: 640px){div.dt-buttons{float:none !important;text-align:center}}html.dark div.dt-buttons>.dt-button,html.dark div.dt-buttons>div.dt-button-split .dt-button{border:1px solid rgb(89, 91, 94);background:rgba(255, 255, 255, 0.15)}html.dark div.dt-buttons>.dt-button.dt-button-active:not(.disabled),html.dark div.dt-buttons>div.dt-button-split .dt-button.dt-button-active:not(.disabled){background:rgba(179, 179, 179, 0.15);box-shadow:inset 1px 1px 2px black}html.dark div.dt-buttons>.dt-button.dt-button-active:not(.disabled):hover:not(.disabled),html.dark div.dt-buttons>div.dt-button-split .dt-button.dt-button-active:not(.disabled):hover:not(.disabled){background:rgba(128, 128, 128, 0.15);box-shadow:inset 1px 1px 3px black}html.dark div.dt-buttons>.dt-button:hover:not(.disabled),html.dark div.dt-buttons>div.dt-button-split .dt-button:hover:not(.disabled){background:rgba(179, 179, 179, 0.15)}html.dark div.dt-buttons>.dt-button:focus:not(.disabled),html.dark div.dt-buttons>div.dt-button-split .dt-button:focus:not(.disabled){outline:2px solid rgb(110, 168, 254)}html.dark div.dt-buttons>div.dt-button-split .dt-button:first-child{border-right:1px solid rgba(255, 255, 255, 0.1)}html.dark div.dt-buttons>div.dt-button-split .dt-button:first-child:hover{border-right:1px solid rgb(89, 91, 94)}html.dark div.dt-buttons>div.dt-button-split .dt-button:last-child:hover{border-left:1px solid rgb(89, 91, 94)}html.dark div.dt-button-collection{border:1px solid rgba(255, 255, 255, 0.15);background-color:rgb(33, 37, 41);box-shadow:3px 4px 10px 1px rgba(0, 0, 0, 0.8)} - - diff --git a/lute/static/css/jquery.tagit.css b/lute/static/css/jquery.tagit.css deleted file mode 100644 index 4cf76f2e9..000000000 --- a/lute/static/css/jquery.tagit.css +++ /dev/null @@ -1,62 +0,0 @@ -ul.tagit { - padding: 1px 5px; - overflow: auto; - margin-left: inherit; /* usually we don't want the regular ul margins. */ - margin-right: inherit; -} -ul.tagit li { - display: block; - float: left; - margin: 2px 5px 2px 0; -} -ul.tagit li.tagit-choice { - position: relative; - line-height: inherit; -} - -ul.tagit li.tagit-choice-read-only { - padding: .2em .5em .2em .5em; -} - -ul.tagit li.tagit-choice-editable { - padding: .2em 18px .2em .5em; -} - -ul.tagit li.tagit-new { - padding: .25em 4px .25em 0; -} - -ul.tagit li.tagit-choice a.tagit-label { - cursor: pointer; - text-decoration: none; -} -ul.tagit li.tagit-choice .tagit-close { - cursor: pointer; - position: absolute; - right: .1em; - top: 50%; - margin-top: -8px; -} - -/* used for some custom themes that don't need image icons */ -ul.tagit li.tagit-choice .tagit-close .text-icon { - display: none; -} - -ul.tagit li.tagit-choice input { - display: block; - float: left; - margin: 2px 5px 2px 0; -} -ul.tagit input[type="text"] { - -moz-box-sizing: border-box; - -webkit-box-sizing: border-box; - box-sizing: border-box; - - border: none; - margin: 0; - padding: 0; - width: inherit; - background-color: inherit; - outline: none; -} diff --git a/lute/static/css/player-styles.css b/lute/static/css/player-styles.css new file mode 100644 index 000000000..4ce770289 --- /dev/null +++ b/lute/static/css/player-styles.css @@ -0,0 +1,430 @@ +#read_pane_container { + --player-height: 130px; +} + +.audio-player-container { + border: 2px solid #d7e6f4; + border-radius: 5px; + padding: 0.9rem; + background-color: var(--audio-color-3); + width: 80%; + margin: 0 auto; + margin-bottom: 1.6rem; + min-width: fit-content; + box-sizing: border-box; + z-index: 999; + + --audio-color-1: #6da9e9; + --audio-color-2: #ff5252; + --audio-color-3: aliceblue; + --audio-color-4: #79b7e7; + --audio-color-5: #d6edff; + --button-size-smaller: 22px; + --button-size-small: 28px; + --button-size-big: 64px; + + /* use variables to fix marker placement in the bookmark container*/ + --padding-top-val: 0.2rem; + --padding-sides-val: 0.5rem; + --timeline-height: 0.7rem; + + position: relative; +} + +.audio-player-top-container { + display: grid; + grid-template-columns: min-content 2.2fr min-content; + align-items: center; + + gap: .9rem; +} + +.audio-player-timeline-container { + width: 100%; + /* for marker overlay */ + position: relative; + + background-color: var(--audio-color-3); + padding: var(--padding-top-val) var(--padding-sides-val); + border-radius: 5px; + box-sizing: border-box; + margin-bottom: 6px; +} + +.audio-player-central-container { + background-color: var(--audio-color-5); + border-radius: 5px; + padding: 0.5rem; +} + +.audio-player-controls-container { + display: grid; + grid-template-columns: auto auto auto; + justify-content: center; + align-items: center; + gap: 0.7rem; +} + +.audio-button { + background-position: center; + background-repeat: no-repeat; + background-size: cover; + border: none; + padding: 0; + border-radius: 50%; + + background-color: white; + cursor: pointer; + user-select: none; + --webkit-user-select: none; +} + +#play-btn { + background-image: url("/static/icn/play.svg"); + height: var(--button-size-big); + width: var(--button-size-big); + display: block; + + transition: all 50ms; +} + +#playback-rate-container { + display: flex; + gap: 0.3rem; + align-items: center; +} + +.rate-btn { + width: var(--button-size-smaller); + height: var(--button-size-smaller); + display: block; + background-color: #79b7e7; + color: #fff; +} + +#playback-rate-indicator { + font-size: 0.9rem; + cursor: pointer; +} + +#skip-back-btn { + background-image: url("/static/icn/skip-back.svg"); + height: var(--button-size-small); + width: var(--button-size-small); +} + +#rewind-btn { + background-image: url("/static/icn/rewind.svg"); + height: var(--button-size-small); + width: var(--button-size-small); +} + +#ff-btn { + background-image: url("/static/icn/ff.svg"); + height: var(--button-size-small); + width: var(--button-size-small); +} + +#bkm-save-btn { + background-image: url("/static/icn/bookmark-off.svg"); + height: var(--button-size-small); + width: var(--button-size-small); + + border-radius: unset; + background-size: 67%; + background-color: transparent; +} + +#bkm-prev-btn { + background-image: url("/static/icn/prev.svg"); + height: var(--button-size-small); + width: var(--button-size-small); +} + +#bkm-next-btn { + background-image: url("/static/icn/next.svg"); + height: var(--button-size-small); + width: var(--button-size-small); +} + +#pin { + /* background-image: url("/static/icn/pin.svg"); */ + position: absolute; + left: 0.6rem; + top: 0.6rem; + width: 14px; + height: 14px; + /* background-color: transparent; */ + background-color: #79b7e79c; + border-radius: 5px; +} + +.bookmark-markers-container { + position: absolute; + top: calc(var(--padding-top-val) + 4px); + left: var(--padding-sides-val); + width: calc(100% - var(--padding-sides-val)*2); + /* height: var(--timeline-height); */ + box-sizing: border-box; + user-select: none; + pointer-events: none; +} + +.timeline { + -webkit-appearance: none; + appearance: none; + width: 100%; + height: var(--timeline-height); + margin: 0; + background-color: #fff; + border-radius: 2px; + box-sizing: border-box; + background-size: 0% 100%; + background-image: linear-gradient(var(--audio-color-1), var(--audio-color-1)); + background-repeat: no-repeat; +} + +.volume { + -webkit-appearance: none; + appearance: none; + width: 100px; + height: 0.6rem; + margin: 0; + background-color: #fff; + border-radius: 5px; + background-size: 100% 100%; + background-image: linear-gradient(var(--audio-color-1), var(--audio-color-1)); + background-repeat: no-repeat; + + /* appearance: slider-vertical; */ + /* transform: scaleY(-1); */ + /* margin-bottom: 1.1rem; */ +} + +.timeline::-webkit-slider-runnable-track, +.volume::-webkit-slider-runnable-track { + -webkit-appearance: none; + box-shadow: none; + border: none; + /* background: transparent; */ +} + +.timeline::-moz-range-track, +.volume::-moz-range-track { + box-shadow: none; + border: none; + /* background: transparent; */ +} + +.rewind-container { + display: grid; + grid-template-columns: min-content 1fr; + gap: 0.8rem; + align-items: center; + justify-content: center; + + background-color: aliceblue; + border-radius: 5px; + padding: 0.3rem 0.4rem 0.3rem 0.4rem; +} + +.rewind-btn-container { + display: flex; + justify-content: center; + align-items: center; + gap: 0.3rem; +} + +#rewind-option { + background-color: #d6edff; + border: none; + border-radius: 5px; + /* width: 90%; */ + height: 1.6rem; + padding-left: 0.8rem; + font-family: inherit; +} + +.audio-player-right-container { + display: grid; + grid-template-columns: 1fr; + align-items: center; + justify-items: center; + row-gap: 0.8rem; +} + +.duration-container { + display: flex; + justify-content: space-between; +} + +.timeline::-moz-range-thumb { + width: 1px; + height: 1em; + opacity: 0; +} + +.timeline::-webkit-slider-thumb { + -webkit-appearance: none; + width: 1px; + height: 1em; + opacity: 0; +} + +.volume::-moz-range-thumb { + width: 16px; + height: 16px; + background-image: url("/static/icn/volume.svg"); + border: 2px solid #89c2f9; + background-size: 80%; + border-radius: 5px; + background-color: #fff; + background-size: 60%; + + background-position: center; + background-repeat: no-repeat; + /* background-size: cover; */ + /* border: none; */ + padding: 0; +} + +.volume::-webkit-slider-thumb { + -webkit-appearance: none; + width: 16px; + height: 16px; + background-image: url("/static/icn/volume.svg"); + border: 2px solid #89c2f9; + background-size: 80%; + border-radius: 5px; + background-color: #fff; + background-size: 60%; + + background-position: center; + background-repeat: no-repeat; + /* background-size: cover; */ + /* border: none; */ + padding: 0; +} + +.bookmark-buttons-container { + display: grid; + gap: 0.6rem; + grid-template-columns: min-content max-content; + align-items: center; + + background-color: var(--audio-color-5); + padding: 0.3rem 0.4rem 0.3rem 0.4rem; + border-radius: 5px; +} + +.bookmark-jump-container { + display: flex; + align-items: center; + gap: 0.3rem; +} + +/* sticky styles */ +.sticky-player .audio-player-container { + position: fixed; + width: 100%; + height: var(--player-height); + left: 0; + bottom: 0; + border: none; + margin-bottom: 0; +} + +.sticky-player #read_pane_right { + height: calc(100% - var(--player-height)); +} + +.sticky-player .btm-margin-container { + margin-top: calc(var(--player-height) + 10px); +} + +/* pseudo styles */ +.audio-button:hover { + transform: scale(1.04); + filter: brightness(110%); +} + +.audio-button:active { + filter: brightness(70%); +} + +@media screen and (max-width: 480px) { + #read_pane_container { + --player-height: 90px; + } + + .audio-player-container { + min-width: unset; + padding: 0.4rem; + --button-size-big: 36px; + --button-size-small: 22px; + --padding-top-val: 0px; + --padding-sides-val: 0px; + } + + .duration-container { + font-size: 0.8rem; + } + + .audio-player-timeline-container { + background-color: var(--audio-color-5); + } + + .audio-player-central-container { + padding: 0.3rem; + } + + .audio-player-top-container { + gap: 0.2rem; + } + + .audio-player-controls-container { + gap: 0.7rem; + } + + .rewind-container { + background-color: unset; + padding: 0; + gap: 0.4rem; + } + + .rewind-btn-container { + gap: 0.5rem; + } + + #playback-rate-indicator { + font-size: 0.8rem; + } + + .bookmark-buttons-container { + background-color: unset; + padding: 0; + column-gap: 0; + row-gap: 0.6rem; + } + + #bkm-save-btn { + grid-column: 1 / 3; + justify-self: center; + } + + #rewind-option { + background-color: aliceblue; + padding-left: 0.2rem; + min-width: 2.3rem; + font-size: 0.6rem; + } + + .volume { + display: none; + width: 90%; + } + + #pin { + display: none; + } +} \ No newline at end of file diff --git a/lute/static/css/script-fallbacks.css b/lute/static/css/script-fallbacks.css new file mode 100644 index 000000000..dbae01313 --- /dev/null +++ b/lute/static/css/script-fallbacks.css @@ -0,0 +1,31 @@ +/** + * Script-specific font fallback for Tibetan. + * + * The unicode-range scopes the face to the Tibetan block so other scripts + * continue to use the theme's font stack. + */ +@font-face { + font-family: "Lute Tibetan Fallback"; + src: + local("Noto Sans Tibetan"), + local("Noto Serif Tibetan"), + local("Kailasa"), + local("Microsoft Himalaya"), + local("Jomolhari"), + local("Tibetan Machine Uni"); + unicode-range: U+0F00-0FFF; + font-display: swap; +} + +/* Apply at body level; only Tibetan glyphs will use the Tibetan-capable fonts. */ +body { + font-family: + "Lute Tibetan Fallback", + "Noto Sans Tibetan", + "Noto Serif Tibetan", + "Kailasa", + "Microsoft Himalaya", + "Jomolhari", + "Tibetan Machine Uni", + var(--lute-theme-font-stack, -apple-system, BlinkMacSystemFont, "Segoe UI", "Lucida Grande", Arial, sans-serif); +} diff --git a/lute/static/css/styles-compact.css b/lute/static/css/styles-compact.css deleted file mode 100644 index 270412b7a..000000000 --- a/lute/static/css/styles-compact.css +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Stylesheet for forms shown in right-hand frame in /read page. - */ - - body - { - background-color: #FFFFFF; - color: #000000; - font: 100%/1.25 "Lucida Grande",Arial,sans-serif,STHeiti,"Arial Unicode MS",MingLiu; - margin: 20px; - padding: 0px; - } - - input[type=text] - { - font: 85% "Lucida Grande",Arial,sans-serif,STHeiti,"Arial Unicode MS",MingLiu; - border: 1px solid #C6C6C6; - padding: 1px; - } - - th, td { - padding: 0px; - } - - table#term - { - width: 70%; - } - - input[type=text].form-text - { - width: 100%; - /* padding: 5px 5px; */ - margin: 0; - box-sizing: border-box; - } - - .textarea - { - font: 85% "Lucida Grande",Arial,sans-serif,STHeiti,"Arial Unicode MS",MingLiu; - border: 1px solid #C6C6C6; - padding: 3px; - } - - .nomarginblock { - margin-block-start: 0px; - margin-block-end: 0px; - } - - p - { - margin: 1px 0 1px 0; - padding: 0; - } - - img - { - border: 0pt none; - } - - .zoomableTermImage { - width: 25px; - transition: transform .2s; /* Animation */ - } - - .zoomableTermImage:hover { - transform: scale(10) translate(10px); - } diff --git a/lute/static/css/styles.css b/lute/static/css/styles.css index 343634962..1d1bb4d00 100644 --- a/lute/static/css/styles.css +++ b/lute/static/css/styles.css @@ -3,15 +3,40 @@ * \brief Main stylesheet for the default theme. */ -body -{ - background-color: #FFFFFF; - color: #000000; - font: 100%/1.25 "Lucida Grande",Arial,sans-serif,STHeiti,"Arial Unicode MS",MingLiu; +:root { + --lute-theme-font-stack: "Lucida Grande",Arial,sans-serif,STHeiti,"Arial Unicode MS",MingLiu; +} + +* { margin: 0; padding: 0; } +body { + --background-color: #fff; + --font-color: #000; + + --status-0-color: #D5FFFF; + --status-1-color: #F5B8A9; + --status-2-color: #F5CCA9; + --status-3-color: #F5E1A9; + --status-4-color: #F5F3A9; + --status-5-color: #DDFFDD; + + --status-98-color: #ee8577; + --status-99-color: #72da88; + + --form-border-color: #ddd; + --form-border-radius: 3px; + + background-color: var(--background-color); + color: var(--font-color); + + font-size: 100%; + line-height: 1.25; + font-family: var(--lute-theme-font-stack, "Lucida Grande", Arial, sans-serif, STHeiti, "Arial Unicode MS", MingLiu); +} + /* Main container div for Lute site content. */ .container { width: 95%; @@ -19,13 +44,6 @@ body box-sizing: border-box; } -/* Media query for smaller screens */ -@media screen and (max-width: 600px) { - .container { - width: 95%; /* Adjust as needed */ - } -} - .lutelogo_small { margin-right: 2px; @@ -58,9 +76,6 @@ a.home-link:hover { flex: 1; /* Allow the title to take up remaining space */ } -#luteTitle { -} - p.version { margin: 0px; font-size: 0.6em; @@ -73,7 +88,7 @@ p.version { display: flex; justify-content: space-between; align-items: center; - padding: 10px; + padding: 1.7rem 0; } .title { @@ -83,11 +98,12 @@ p.version { .menu { display: flex; + justify-content: space-between; + gap: 2.2rem; } .menu-item { position: relative; /* Required for sub-menu positioning */ - margin-right: 40px; } /* main menu item text (not links) */ @@ -140,18 +156,129 @@ div#divbooktable { margin-left: auto; } +div.dt-search { + /* Change to inline so that it's shown on the same line as the #defaultLanguageSelect. */ + display: inline; +} + +#datatables_config_widget { + display: inline; + cursor: pointer; + margin-right: 0.25rem; +} + +div#booktable_wrapper div.dt-info { + display: inline; +} + +#defaultLanguageSelect { + text-align: left; + padding: 4px; /* copied padding from datatables css. */ + margin-right: 10px; +} + +div.toolbar { + float: right; + margin-right: 1rem; +} + +/* blank space to hang completed_book bullets while maintaining proper justification on the whole column */ +a.book-title:before { + content: ''; + display: inline-block; + width: 1rem; + height: 1rem; + flex-shrink: 0; +} +/* flex fixes wrapping issue */ +.book-title { + display: inline-flex; + gap: 0.5rem; + align-items: flex-start; +} + +/* show a check mark for completed books */ +a.completed_book:before { + content: url('/static/icn/tick.png'); +} + +/** + * when the books list contains NO completed books, get rid of the awkward indent + */ +table#booktable:not(:has(a.completed_book)) a.book-title:before { + display: none; +} + +.refresh { + display: inline-block; + width: 22px; + height: 22px; + background-image: url("../icn/reload.png"); + background-size: 80%; + border-radius: 2px; + background-repeat: no-repeat; + background-position: center; + + vertical-align: middle; + margin-left: 0.5rem; +} + +.refresh:hover { + background-color: #dadada; +} + +.refresh:active { + background-color: #9f9f9f; +} + +.refreshed { + background-image: url("../icn/waiting2.gif"); + background-repeat: no-repeat; +} + +.book-action-dropdown { + position: relative; + display: inline-block; +} + +.book-action-dropdown-content { + display: none; + position: absolute; + background-color: #f9f9f9; + min-width: 120px; + box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); + z-index: 1; + border-radius: 5px; + padding: 5px; +} + +.book-action-dropdown-content a { + color: black; + padding: 5px 10px; + text-decoration: none; + display: block; +} + +.book-action-dropdown-content a:hover { + background-color: #f0f0f0; +} + +.book-action-dropdown:hover .book-action-dropdown-content { + display: block; +} + /* End book listing */ -/* Term listing CSV export. */ -button.buttons-csv { +div.dt-buttons>.dt-button { padding: 3px !important; margin: 5px 0 0 10px !important; } .flash-notice { - margin: 20px; - padding: 20px; + margin: 1rem; + margin-bottom: 2rem; + padding: 0.5rem; background-color: #fff3cd; border-color: #ffeeba; } @@ -189,142 +316,428 @@ div#read_pane_left { } div#read_pane_right { - width: 50%; + display: grid; + grid-template-rows: 18rem 1fr; + position: static; + width: 100%; + height: 100%; + box-sizing: border-box; position: fixed; + /* different width and right values because #container width=95% */ + right: var(--read-grid-margin); + width: calc(50% - var(--read-grid-margin)); + height: 100%; top: 0; - right: 0; - height: 95%; } +.read-pane-right-close-btn { + display: none; +} + +.reading_header_container { + display: grid; + grid-template-columns: min-content 1fr; + gap: 2rem; + justify-items: center; + align-items: center; + padding: 1.5rem 2.2rem 0 2.2rem; +} + +.reading_header_mid { + width: 100%; +} + +.reading_header_page { + justify-self: end; +} + +#reading-header { + background-color: var(--background-color); + /* padding: 0 1.5rem 1.5rem 1.5rem; */ + margin-bottom: 2rem; +} + +#page_indicator { + font-weight: 500; + font-size: 0.9rem; +} + +.reading_header_mid_top { + display: grid; + grid-template-columns: auto 1fr; + /* display: flex; + justify-content: space-between; */ + margin: 0 1.6rem; + margin-bottom: 0.2rem; + gap: 0.9rem; + align-items: end; +} + /* End layouts *****************************************/ -input[type=text] -{ - font: 85% "Lucida Grande",Arial,sans-serif,STHeiti,"Arial Unicode MS",MingLiu; - border: 1px solid #C6C6C6; - padding: 3px; +/* Reader slide-in hamburger menu. *********************/ + +/* The hamburger slices. */ +span.hamburger { + width: 20px; + height: 4px; + /* margin-bottom: 3px; */ + background: #777; + border-radius: 3px; + display: block; } -::placeholder { /* Chrome, Firefox, Opera, Safari 10.1+ */ - color: #D8D8D8; - opacity: 1; /* Firefox */ +.hamburger-btn { + display: flex; + flex-direction: column; + gap: 0.2rem; + border: 1px solid #bcbcbc; + border-radius: 6.9px; + padding: 0.5rem; + box-sizing: border-box; + cursor: pointer; } -div#thetext { - margin-left: 20px; - margin-right: 20px; +#reading_menu.open-menu { + transform: translateX(0); } -div#thetext p { - line-height: 1.25; - margin-top: 1em; - margin-bottom: 1em; +#reading_menu { + display: flex; + flex-direction: column; + gap: 0.3rem; + + padding: 0; + margin: 0; + position: fixed; + left: 0; + top: 0; + z-index: 1004; /* higher than audio, header and right_pane (for mobile view) */ + height: 100vh; + border-right: 4px solid #a9cfef; + filter: drop-shadow(4px 0 4px #0000002e); + box-sizing: border-box; + background-color: var(--background-color); + + transform-origin: 0% 0%; + transform: translate(-110%, 0); /* 110 to hide the shadow */ + transition: transform 0.3s cubic-bezier(0.77,0.2,0.05,1.0); } -table#text -{ - width: 70%; +#reading_menu ul { + margin: 0; + list-style: none; } -th, td { - padding: 2px; +#reading_menu .reading-menu-item { + display: block; + padding: 0.5rem 1.6rem; + color: inherit; + text-decoration: none; } -table#text thead, table#text tr, table#text th:first-child, table#text tbody, table#text tr, table#text td:first-child { - width: 20%; - min-width: 20%; - max-width: 20%; - vertical-align: top; +.close-btn:hover, +#reading_menu .reading-menu-item:hover, +.text-options-button:hover { + background-color: #d5e9fa; + cursor: pointer; } -table#book -{ - width: 70%; +.close-btn { + background-image: url("/static/icn/close.svg"); + background-position: center; + background-repeat: no-repeat; + width: 20px; + height: 20px; + padding: 0.9rem; + border-radius: 5px; + border: none; + background-size: 42.4%; + background-color: transparent; + cursor: pointer; } -table#book thead, table#book tr, table#book th:first-child, table#book tbody, table#book tr, table#book td:first-child { - width: 20%; - min-width: 20%; - max-width: 20%; - vertical-align: top; +.reading-menu-close-btn { + margin-left: 5.4rem; } -table#termimport -{ - width: 70%; +#reading_menu .text-options-container { + display: grid; + grid-template-columns: max-content max-content; + justify-content: center; + gap: 0.2rem; + padding: 0; + margin-block: 1rem; } -table#termimport thead, table#termimport tr, table#termimport th:first-child, table#termimport tbody, table#termimport tr, table#termimport td:first-child { - width: 10%; - min-width: 10%; - max-width: 10%; - vertical-align: top; +.text-options-btn-container { + padding: 0.2rem; + border-radius: 5px; } -table.settingstable -{ - width: 80%; +.reading_header_left { + display: flex; + align-items: center; + gap: 1rem; } -table.settingstable thead, table.settingstable tr, table.settingstable th:first-child, table.settingstable tbody, table.settingstable tr, table.settingstable td:first-child { - width: 30%; - min-width: 30%; - max-width: 30%; - vertical-align: top; +.reading_menu_logo_container { + display: flex; + justify-content: space-between; + align-items: center; + gap: 0.4rem; + margin-bottom: 1.2rem; + background-color: aliceblue; + padding: 0.9rem 1.5rem; } -table#language -{ - width: 80%; +#focus-container, +#tap_sets_status-container { + display: flex; + justify-content: center; + gap: 1rem; + align-items: center; } -table#language thead, table#language tr, table#language th:first-child, table#language tbody, table#language tr, table#language td:first-child { - width: 30%; - min-width: 30%; - max-width: 30%; - vertical-align: top; +#tap_sets_status-container { + /* Toggled later in mobile media check. */ + display:none; } -table#language ul -{ - color: red; - list-style-type: none; - margin-block-start: 0em; - margin-block-end: 0em; - margin-inline-start: 0px; - margin-inline-end: 0px; - padding-inline-start: 0px; +#focus, +#tap_sets_status { + appearance: none; + display: block; + background-color: #ffffff; + border: 1px solid #616161; + width: 34px; + height: 21px; + border-radius: 50px; + padding: 3px; + + transition: background-color 0.2s } -input[type=text].form-control -{ +#focus-container label, +#tap_sets_status-container label { + font-weight: bold; +} + +#focus::after, +#tap_sets_status::after { + content: ""; + display: block; + background-color: #616161; + width: 13px; + height: 13px; + border-radius: 50%; + + transition: transform 0.2s; +} + +.focus-mode-active #focus::after, +.tap_sets_status-active #tap_sets_status::after { + transform: translate(100%, 0); + background-color: #7950f2; +} + +.focus-mode-active #focus, +.tap_sets_status-active #tap_sets_status { + background-color: #b197fc; + border-color: #b197fc; +} + +#tap_sets_status-container label, +#tap_sets_status-container input { + display: inline-block; + vertical-align: middle; +} + +.focus-mode-active #read_pane_right { + display: none; +} + +.focus-mode-active #read_pane_left { + margin: 0 auto; +} + +.reading-menu-top-level { + display: block; + position: relative; width: 100%; - /* padding: 5px 5px; */ - margin: 2px 0; - box-sizing: border-box; } -textarea.form-largetextarea -{ - /* - 1em is /supposedly/ = the current font size, roughly - (ref https://stackoverflow.com/questions/2034544/ - textareas-rows-and-cols-attribute-in-css), - but on my machine at least it doesn't appear to be so! - */ - width:50em; - height:20em; - padding: 5px 5px; +.reading-menu-top-level::after { + content: "\232A"; /* right angle bracket code. */ + position: absolute; + right: 0; +} + +.reading-menu-top-level-li { + display: flex; + align-items: center; + position: relative; +} + +.reading-menu-sublist { + display: none; + position: absolute; + left: 100%; + width: 100%; + background-color: #d5e9fa; + top: 0; + border-right: 4px solid #a9cfef; + border-left: none; + border-top-right-radius: 4px; + border-bottom-right-radius: 4px; +} + +#page-operations-li:hover #page-operations-menu, +#bookmark-operations-li:hover #bookmark-operations-menu, +#screen-interactions-li:hover #screen-interactions-menu { + display: block; +} + +#page-operations-menu .reading-menu-item:hover, +#bookmark-operations-menu .reading-menu-item:hover, +#screen-interactions-menu .reading-menu-item:hover { + background-color: #e4f2fe; +} + +/* Reading menu, mobile */ +@media screen and (max-width: 980px) { + + #reading_menu { + width: 90%; + } + + .reading_menu_logo_container { + margin-bottom: 2.2rem; + } + + #focus-container { + display: none; + } + + #tap_sets_status-container { + display: flex; + } + + .text-options-container { + gap: 0.3rem !important; + grid-template-columns: max-content max-content !important; + } + + .text-options-button { + width: 3rem; + height: 3rem; + } + + .text-options-btn-container:nth-child(3), + .text-options-btn-container:nth-child(4) { + display: none; + } + + .reading-menu-item { + font-size: 1.2rem; + padding: 0.5rem 0.5rem; + } + + .reading-menu-sublist { + position: relative; /* relative to the parent */ + left: 0; /* below the parent */ + top: 0; /* at the same level as the parent */ + width: 100%; /* Full width of the parent container */ + border-right: none; + border-top-right-radius: 0; + border-bottom-right-radius: 0; + border-top-left-radius: 4px; /* Optional: Adjust corners */ + border-bottom-left-radius: 4px; + } + + .reading-menu-sublist.active { + display: block; + } + + #page-operations-li:hover #page-operations-menu, + #screen-interactions-li:hover #screen-interactions-menu { + position: relative; /* Prevent overflow issues */ + } + +} +/* End reading menu, mobile */ + +/* End reader slide-in hamburger menu. *********************/ + +.read_page_nav { + display: block; + font-size: 1.2rem; + line-height: 1; /* higher value adds unnecessary height */ + cursor: pointer; + user-select: none; + color: var(--read-slider-color); +} + +.read_page_disabled { + color: lightgrey; + cursor: default; +} + +div#reading-footer span { + margin: 5px; +} + +.texttitlecontainer { + overflow: hidden; + white-space: nowrap; +} + +#headertexttitle { + font-size: 0.9rem; + font-weight: normal; + margin: 0; +} + +#thetexttitle { + font-size: 1.5rem; + margin-bottom: 1rem; + word-wrap: break-word; +} + +#thetexttitle, +div#thetext { + padding: 0 2.2rem; + padding: 0 2.2rem; +} + +div#thetext p { + line-height: 1.25; + margin-top: 1em; + margin-bottom: 1em; + font-size: 0; +} + +span.drag-handle { + cursor: pointer; + color: grey; +} + +.valign { + display: flex; + align-items: center; + gap: 0.6rem; +} + +input[type=text].form-control-narrower { + min-width: 8rem; + /* padding: 5px 5px; */ margin: 2px 0; box-sizing: border-box; - font: 85% "Lucida Grande",Arial,sans-serif,STHeiti,"Arial Unicode MS",MingLiu; - border: 1px solid #C6C6C6; } -div.help-text { - font-size: 0.8em; +.formfieldcomment { + font-size: 0.85em; font-style: italic; - color: gray !important; } .nomarginblock { @@ -337,7 +750,15 @@ p margin: 5px 0 5px 0; padding: 0; } - + +h1 { + margin: 1rem 0; +} + +h2 { + margin: 0.8rem 0; +} + h3 { margin: 0px 0 0px 0; @@ -354,59 +775,31 @@ span.flashtextcopy { background-color: yellow !important; } -span.status0 -{ - background-color: #D5FFFF; - color: #000000; +span.status0 { + background-color: var(--status-0-color); } -span.status1 -{ - background-color: #F5B8A9; - color: #000000; +span.status1 { + background-color: var(--status-1-color); } -span.status2 -{ - background-color: #F5CCA9; - color: #000000; +span.status2 { + background-color: var(--status-2-color); } -span.status3 -{ - background-color: #F5E1A9; - color: #000000; +span.status3 { + background-color: var(--status-3-color); } -span.status4 -{ - background-color: #F5F3A9; - color: #000000; +span.status4 { + background-color: var(--status-4-color); } -span.status5 -{ - background-color: #DDFFDD; - color: #000000; +span.status5 { + background-color: var(--status-5-color); } -span.status99 -{ - /* no styling, just regular text. - background-color: #F8F8F8; - border-bottom: solid 2px #CCFFCC; - color: #000000; - */ -} - -span.status98 -{ - /* no styling, just regular text. - background-color: #F8F8F8; - border-bottom: dashed 1px #000000; - color: #000000; - */ -} +/* span.status99, span.status98 have no styles, just regular text. */ span.hasflash:after { content: "*"; @@ -424,12 +817,10 @@ span.textsentence { font-size: 0%; } -/** A word shown in the reading pane. */ -span.textitem -{ +/* A word shown in the reading pane. */ +span.textitem { font-size: 16px; - color: #000000; - display:inline-block; + color: var(--font-color); /* disallow select, only allow mouse-down-drag-up to define multiword terms. */ @@ -439,19 +830,12 @@ span.textitem style is added, things don't get pushed around. */ border-bottom: 1px solid transparent; } - -.textarea -{ - font: 85% "Lucida Grande",Arial,sans-serif,STHeiti,"Arial Unicode MS",MingLiu; - border: 1px solid #C6C6C6; - padding: 3px; -} -.click -{ - cursor: pointer; - color: #C00000; -} +/* .click */ +/* { */ + /* cursor: pointer; */ + /* color: #C00000; */ +/* } */ .hide { @@ -490,15 +874,6 @@ img border: 0pt none; } -.uwordmarked -{ - font-weight: bold; - border-top: 3px solid red; - border-bottom: 3px solid red; - border-right: 3px solid red; - border-left: 3px solid red; -} - .wordhover { border-bottom: 1px solid blue !important; @@ -536,9 +911,28 @@ img margin-left: 20pt; } +/* Tooltip shown on copy or anki card creation. */ +.manual-tooltip { + position: absolute; + background-color: white; + color: black; + border: 1px solid black; + padding: 5px; + border-radius: 3px; + font-size: 12pt; + white-space: pre-line; /* break if the text contains "\n" */ + z-index: 500; /* Initial z-index */ +} + +.manual-tooltip.hovered { + z-index: 10000; /* Higher z-index when hovered */ +} + /* Widening the tooltip. */ div.ui-tooltip { max-width: 400px !important; + z-index: 1000; /*higher than audio because of words on the bottom, but lower than header and side menu */ + word-wrap: break-word; } .tooltip-image { @@ -559,94 +953,1387 @@ ul.sentencelist { border: 1px solid darkgrey; } -/* */ +/* error handler */ -ul.shorttaglist { - margin: 0; - padding: 0; - font-size: 1em; - margin-block-start: 0em; - margin-block-end: 0em; +div.bug_report { + margin: 20px; + padding: 5px; + border: 1px solid grey; +} + +div.code { + font-family: 'Courier New', Courier, monospace; + line-height: 1.5; + padding: 20px; + background-color: #f8f9fa; +} + +div.code pre { + white-space: pre-wrap; + background-color: #f8f9fa; + border: 1px solid #dee2e6; + border-radius: 5px; + padding: 15px; + overflow: auto; + font-size: 14px; + color: #212529; +} + + +/******************************** + * Type controls menu + */ + +.text-options-button { + width: 36px; + height: 36px; + border-radius: 5px; + border: 2px solid #d5e9fa; + background-color: white; + background-position: center; + background-repeat: no-repeat; + background-size: cover; + cursor: pointer; } -ul.shorttaglist li { - padding: 0 18px 0 0.5em; - padding-left: 3px !important; - margin: 0 5px 0 0; - font-size: 85%; +.font-plus { + background-image: url("/static/icn/font-increase.svg"); } +.font-minus { + background-image: url("/static/icn/font-decrease.svg"); +} -ul.shorttaglist li.tagit-new { - padding: 0 18px 0 0.5em; - margin: 0 5px 0 0; - font-size: 85%; +.lh-plus { + background-image: url("/static/icn/line-spacing-increase.svg"); } -ul.shorttaglist li.tagit-choice { - padding: 0 18px 0 0.5em; - margin: 0 5px 0 0; - font-size: 85%; +.lh-minus { + background-image: url("/static/icn/line-spacing-decrease.svg"); } -ul.shorttaglist li.tagit-choice-editable { - padding: 0 18px 0 0.5em; - margin: 0 5px 0 0; - font-size: 85%; +.width-plus { + background-image: url("/static/icn/caret-right.svg"); } -/* Term form, status elements */ +.width-minus { + background-image: url("/static/icn/caret-left.svg"); +} -ul.form-control { - list-style: none; /* Remove the marker */ - padding: 0; /* Remove default padding */ - display: flex; /* Display items in a row */ - - /* Adjust spacing between items as needed */ - margin: 0; +.column-one { + background-image: url("/static/icn/text-column-one.svg"); +} + +.column-two { + background-image: url("/static/icn/text-column-two.svg"); +} + +/********************************* + * read page layout + */ + +#read_pane_container { + --read-grid-margin: 0; /*(100 - 95) / 2 -> do not use calc here. does not work*/ + width: 100%; +} + +#wordframeid { + height: 100% !important; +} + +/* resize */ + +/* border */ +#read_pane_right::after { + content: ''; + background-color: #dbefff; + position: absolute; + left: 0; + top: 0; + transform: translateX(-50%); + width: 4px; + height: 100%; + cursor: col-resize; +} + +/* border */ +.dictcontainer::after { + content: ''; + background-color: #dbefff; + /* background-image: linear-gradient(to right, #dbefff 50%, #dbefff 50%); */ + position: absolute; + top: 0; + left: 0; + height: 4px; + width: 100%; + cursor: row-resize; } -ul.form-control li { - margin-right: 8px; /* Adjust the spacing between items */ +.dictcontainer { + position: relative; + display: none; + height: 100%; } -/* Style radio buttons */ -ul.form-control input[type="radio"] { + +/* read slider */ +#read-slider { + -webkit-appearance: none; + appearance: none; + width: 100%; + height: 1rem; margin: 0; + background-color: #d7d7d7; + border-radius: 3px; + box-sizing: border-box; + background-size: 0% 100%; + background-image: linear-gradient(var(--read-slider-color), var(--read-slider-color)); + background-repeat: no-repeat; +} + +#read-slider::-moz-range-thumb { + width: 30px; + height: 1rem; + border: 2px solid var(--read-slider-color); + background-size: 80%; + border-radius: 5px; + background-color: #fff; + background-size: 60%; + + background-position: center; + background-repeat: no-repeat; padding: 0; - width: auto; /* Allow the content to determine the width */ + box-sizing: border-box; } -/* Style labels */ -ul.form-control label { - margin: 0; +#read-slider::-webkit-slider-thumb { + -webkit-appearance: none; + width: 30px; + height: 1rem; + border: 2px solid var(--read-slider-color); + background-size: 80%; + border-radius: 5px; + background-color: #fff; + background-size: 60%; + + background-position: center; + background-repeat: no-repeat; + padding: 0; - cursor: pointer; /* Add pointer cursor for better UX */ + box-sizing: border-box; } -/* error handler */ +#read-slider.read_page_disabled::-moz-range-thumb { + border: 2px solid #d7d7d7; +} -div.bug_report { - margin: 20px; - padding: 5px; - border: 1px solid grey; +#read-slider.read_page_disabled::-webkit-slider-thumb { + border: 2px solid #d7d7d7; } -div.code { - font-family: 'Courier New', Courier, monospace; - line-height: 1.5; - padding: 20px; - background-color: #f8f9fa; +#read-slider::-webkit-slider-runnable-track { + -webkit-appearance: none; + box-shadow: none; + border: none; } -div.code pre { - white-space: pre-wrap; - background-color: #f8f9fa; - border: 1px solid #dee2e6; - border-radius: 5px; - padding: 15px; - overflow: auto; - max-height: 500px; - font-size: 14px; - color: #212529; +#read-slider::-moz-range-track { + box-shadow: none; + border: none; +} + +.read-slide-container { + display: flex; + align-items: center; + gap: 0.4rem; + + --read-slider-color: #79b7e7; +} + + +/* STYLES FOR PAGE BOOKMARKS */ +.read-bkm-btn { + font-size: 1.6rem; + color: var(--read-slider-color); +} + +.read-bkm-btn { + background-position: center; + background-repeat: no-repeat; + background-size: cover; + border: none; + padding: 0; + border-radius: 50%; + + background-color: white; + cursor: pointer; + user-select: none; + --webkit-user-select: none; + + display: none; /*temporarily*/ +} + +/* .read-bookmark-buttons-container { */ + /* display: grid; */ + /* grid-template-columns: min-content min-content; */ + /* gap: 0.4rem; */ +/* } */ + +/* #read-bkm-save-btn { + background-image: url("/static/icn/bookmark-off.svg"); + height: 28px; + width: 28px; + + border-radius: unset; + background-size: 67%; + background-color: transparent; +} */ + +/* #read-bkm-prev-btn { + background-image: url("/static/icn/prev.svg"); + height: 28px; + width: 28px; +} + +#read-bkm-next-btn { + background-image: url("/static/icn/next.svg"); + height: 28px; + width: 28px; +} */ + +/* .read-bookmark-jump-container { + display: flex; + align-items: center; + gap: 0.3rem; +} */ + +/* .read-bookmark-buttons-container { + display: flex; +} */ + + +/* COMMON FORM STYLES */ +#text, +#translation, +#romanization, +#book #title, +#book #source_uri, +#backup_dir, +#mecab_path, +#custom_styles, +#language input, +#importurl, +#comment { + font-size: 0.9rem; + font-family: inherit; + padding: 0.2rem 0.2rem; + border: 1px solid var(--form-border-color); + border-radius: var(--form-border-radius); + box-sizing: border-box; +} + +/* GENERAL FORM STYLES */ +input, +select, +textarea { + font-family: inherit; +} + +.arrow_only_dropdown { + appearance: none; /* Hides default styling */ + width: 20px; /* Only show the dropdown arrow */ + overflow: hidden; + border: none; + background: none; + cursor: pointer; +} + +textarea { + width: 100%; + resize: both; +} + +/* focus on fields makes border same as tags */ +form input:focus, +form textarea:focus, +form select:focus { + border: 1px solid var(--tagify__tag-bg) !important; + outline: none !important; +} + +form input[type="checkbox"]:focus, +form input[type="file"]:focus { + outline: 1px solid var(--tagify__tag-bg) !important; +} + +form input[type="checkbox"]:focus, +form input[type="file"]:focus { + border: none !important; +} +/* / */ + +::placeholder { + font-family: inherit; + font-size: 0.8rem; + color: #D8D8D8; + opacity: 0.7; +} + +:focus::placeholder { + opacity: 1; +} + +.btn { + font-size: 0.8rem; + padding: 0.25rem 0.7rem; +} + +/* TABLE STYLES */ +th, td { + padding: 2px; +} + +tbody { + vertical-align: top; +} + +table input:not([type="checkbox"]) { + width: 100%; +} + +div#mappingContainer input:not([type="checkbox"]) { + width: 40%; +} + +.statsWordsRead th, +.statsWordsRead td { + padding: 5px; +} + +#book, +#termimport, +#language, +.settingstable { + width: 80%; +} + +#edit-page-table { + width: 50%; +} + +#edit-page-table textarea { + height: 18rem; +} + +#termimport td:first-child, +.settingstable td:first-child { + width: 20rem; +} + +#shortcutstable th { + text-align: left; +} + +#shortcutstable .settingcategory { + font-size: 1.2em; + font-weight: bold; +} + +#shortcutstable input[type=text] { + max-width: 8rem; +} + +.dupShortcut { + background-color: #FFCCCB; +} + +#book td:first-child, +#language td:first-child { + width: 15rem; +} + +#predefined-lang-container { + margin-left: 0.3rem; + margin-bottom: 0.4rem; +} + +#predefined { + margin-left: 5.3rem; +} + +.smallfilename { + font-size: 0.9em; +} + +div.help-text { + font-size: 0.8em; + font-style: italic; + color: gray !important; +} + + +/* TERM FORM STYLES */ +#term-form-container { + padding: 1.5rem; + padding-bottom: 1rem; +} + +#translation-container { + display: grid; + grid-template-columns: 1fr; + gap: 0 0.6rem; +} + +#romanization { + width: 100%; +} + +#translation { + height: 50px; + min-height: 50px; + vertical-align: top /* removes annoying extra space below */ +} + +#term { + display: flex; + flex-direction: column; + gap: 0.2rem; +} + +#term-bulk-form-fields { + display: flex; + flex-direction: column; + gap: 0.2rem; +} + +#term-form #text { + width: 100%; +} + +#term #languageSel { + display: flex; + justify-content: space-between; +} + +#load-dicts-btn { + background: url("../icn/book-open-text.svg"); + background-position: center; + background-size: 80%; + background-repeat: no-repeat; + border: none; + width: 30px; + height: 30px; + cursor: pointer; + border-radius: 3px; +} + +#load-dicts-btn:hover { + background-color: #97531f; + filter: invert(); +} + +#status-container { + display: flex; + justify-content: flex-start; + gap: 1rem; + align-items: center; +} + +#sync-status-container { + text-align: right; +} + +input[type="checkbox"][disabled] + label { + color: #ccc; +} + +#term-button-container { + display: flex; + justify-content: flex-end; + align-items: center; + gap: 0.2rem; + margin-top: 0.6rem; +} + +.zoomableTermImage { + transition: transform 0.2s; /* Animation */ + border-radius: var(--form-border-radius); + border: 1px solid var(--form-border-color); + box-sizing: border-box; + width: 40px; + display: none; +} + +.clickedZoomableImage { + border: 1px solid red; +} + +.zoomableTermImage:hover { + transform: scale(4) translate(-17px, 5px); +} + +/* DICT TABS */ +#dicttabs { + display: flex; + justify-content: space-between; + background-color: #dbefff; + /* height: 1.6rem; */ + padding: 4px 0 0 2px; +} + +#dicttabslayout { + display: grid; +} + +#dictframes { + width: 100%; + /* height: 100%; */ + flex: 1; +} + +#dict-menu-container { + position: relative; + border: 1px solid #85c0ee; + border-top-left-radius: 3px; + border-top-right-radius: 3px; + /* overflow: hidden; */ + border-bottom: none; + left: 0.3rem; + /* flex fixes long button size */ + display: flex; +} + +#dict-menu-container .dict-btn{ + height: 100%; + border: none; +} + +.dict-btn { + position: relative; + padding: 0.2rem 1rem; + border: none; + color: #969696; + background-color: #dcebf8; + /* height: 100%; */ + box-sizing: border-box; + border-top-left-radius: 3px; + border-top-right-radius: 3px; + /* filter: brightness(0.8) grayscale(0.7); */ + z-index: 1; + overflow: hidden; + white-space: nowrap; + + display: flex; + align-items: center; + /* justify-content: center; */ + gap: 0.3rem; +} + +.dict-btn:not(#dict-image-btn) { + width: 100%; +} + +#dicttabsstatic { + display: flex; + margin-left: 1rem; + /* justify-content: end; */ +} + +/* after element is for hiding text at the edges */ +#dicttabslayout .dict-btn::after, +#dicttabslayout .dict-menu-item::after { + content: ""; + position: absolute; + width: 100%; + height: 100%; + top: 0; + left: 0; + border-inline: 4px solid #dcebf8; + box-sizing: border-box; +} + +#dicttabslayout .dict-menu-item::after { + border-color: aliceblue; +} + +#dicttabslayout .dict-btn-active::after { + border-color: #68ace0; +} + +#dicttabslayout .dict-btn.dict-btn-external::after, +#dicttabslayout .dict-menu-item.dict-btn-external::after { + border-right-width: 1.5rem; +} + +#dicttabslayout .dict-btn-select.dict-btn-external::after { + border-right-width: 2.6rem +} + +#dicttabslayout .dict-btn:not(.dict-btn-active):not(.dict-btn.dict-btn-external):hover::after, +#dicttabslayout .dict-btn:not(.dict-btn-active):hover::after { + border-color: aliceblue; +} + +.dict-btn-fav-img, +.dict-btn-list-img, +.dict-btn-external-img { + height: 60%; +} + +.dict-btn-list-img, +.dict-btn-external-img { + position: absolute; + top: 50%; + transform: translateY(-50%); + z-index: 2; + opacity: 0.4; +} + +.dict-btn-list-img { + /* src needs to be set here instead of js. explanation in createTabBtn */ + content: url("../icn/list.svg"); + right: 0; + opacity: 1; + /* add padding so clicking area is bigger */ + padding: 0.3rem; +} + +.dict-btn-external-img{ + content: url("../icn/open.svg"); + right: 0.3rem; +} + +.dict-btn-select .dict-btn-external-img { + right: 1.4rem; +} + +.dict-btn:not(.dict-btn-active):not(.dict-btn.dict-btn-external) img { + opacity: 0.5; +} + +.dict-btn:hover + #dict-select, +.dict-btn:not(.dict-btn-active):hover { + background-color: aliceblue; +} + +#dict-image-btn:hover { + background-color: #623006; + filter: invert(1); +} + +.dict-btn.dict-btn-active, +.dict-btn.dict-btn-active + #dict-select { + background-color: #68ace0; + color: #fff; + /* filter: brightness(1); */ + z-index: 2; +} + +.dict-btn:not(.dict-btn-select):not(#dict-image-btn) { + border: 1px solid #aaceea; + border-bottom: none; + border-right: none; +} + +#dicttabsstatic .dict-btn.dict-sentences-btn { + width: 6rem; + border: 1px solid #85c0ee; + border-bottom: none; +} + +.dict-btn:not(#dict-image-btn):not(.dict-btn-select):last-of-type { + border-right: 1px solid #aaceea; +} + +.dict-btn-active:not(.dict-btn-select):not(#dict-image-btn) { + /* border: 2px solid #68ace0; */ + box-shadow: inset 0 0 0 2px #68ace0; +} + +.dict-btn.dict-btn-external { + color: #5b5b5b; +} + +#dict-image-btn.dict-btn-active { + background-color: #97531f; + filter: invert(1); +} + +.dictframe { + display: none; + border: none; + border-top: 3px solid #68ace0; +} + +.dict-active { + display: block; + + width: 100%; + height: 100%; +} + +#dict-image-btn { + background: url("../icn/images.svg"); + background-repeat: no-repeat; + background-position: center; + background-size: 85%; + + /* width: 32px; */ + padding-top: 1.6rem; + border-top-left-radius: 3px; + border-top-right-radius: 3px; + /* height: unset; */ + /* margin-left: 0.5rem; */ + margin-inline: 0.8rem; + /* justify-self: center; */ + /* margin-bottom: 4px; */ + cursor: pointer; + + filter: invert(66%) sepia(33%) saturate(700%) hue-rotate(170deg) brightness(92%) contrast(90%); +} + +#dict-list-container.dict-list-hide { + opacity: 0; + pointer-events: none; + /* clip-path: rect(0 0 0 114%); */ +} + +#dict-list-container { + opacity: 1; + pointer-events: unset; + /* clip-path: rect(0 0 106% 114%); */ + + position: absolute; + background-color: aliceblue; + min-width: 100%; + top: 100%; + right: 0; + border-bottom-left-radius: 3px; + border-bottom-right-radius: 3px; + border-top: none; + box-sizing: border-box; + box-shadow: 0 3px 5px 0 #0000002b; + /* easier way to hide sharp edges of list elements + instead adding border-radius to them too */ + overflow: hidden; + + transition: opacity 0.1s; +} + +#dict-menu-container:hover .dict-btn-select:not(.dict-btn-active) { + background-color: aliceblue; +} + +#dict-menu-container:hover .dict-btn-select:not(.dict-btn-active)::after { + border-color: aliceblue; +} + +.dict-btn-select { + display: flex; + justify-content: flex-end; + padding-inline: 0.5rem; +} + +.dict-menu-item { + font-size: 0.9rem; + padding: 0.5rem 1rem; + margin: 0; + display: flex; + align-items: center; + gap: 0.3rem; + cursor: pointer; + border: none; + background: none; + width: 100%; + position: relative; +} + +.dict-menu-item:hover { + background-color: #68ace0; + color: #fff; +} + +#dict-menu-container .dict-menu-item:hover::after { + border-color: #68ace0; +} + +/* STATUS RADIO */ + +#status input { + display: none; +} + +#status { + display: flex; + align-items: center; + list-style: none; + + --status-select-color: #4e4f51c4; + --status-hover-color: #4e4f5156; +} + +#status label { + display: flex; + justify-content: center; + align-items: center; + + width: 30px; + height: 30px; + border: 1px solid #a5a5a5; + color: var(--font-color); + + box-sizing: border-box; + cursor: pointer; + user-select: none; +} + +/* thicker border for Ign */ +#status label[for="status-6"] { + border-width: 2px; + /* border-color: #a5a5a5; */ +} +/* same as checked, but more transparent */ +#status label:hover { + box-shadow: inset 0 0 0 2px var(--status-hover-color); + border: none; +} + +#status label[for="status-6"]:hover { + background-color: #f5c6c0; +} + +#status input:checked + label { + /* background-color: #6da8e8; */ + font-weight: 700; + box-shadow: inset 0 0 0 2px var(--status-select-color); + border: none; +} + +#status #status-6:checked + label { + background-color: var(--status-98-color); +} + +#status #status-5:checked + label::after, +#status #status-6:checked + label::after { + filter: invert(1); +} + +/* move label back(border width amount) */ +#status label[for="status-5"]:hover { + padding-right: 1px; +} + +#status label[for="status-0"]:hover { + padding-left: 1px; +} + +#status-0:checked + label { + padding-left: 1px; +} + +#status-5:checked + label { + padding-right: 1px; +} + +#status label[for="status-0"] { + background-color: var(--status-1-color); + + border-top-left-radius: var(--form-border-radius); + border-bottom-left-radius: var(--form-border-radius); + + border-right: none; +} + +#status label[for="status-1"], +#status label[for="status-2"], +#status label[for="status-3"], +#status label[for="status-4"] { + border-right: none; + border-left: none; +} + +#status label[for="status-1"] { + background-color: var(--status-2-color); +} + +#status label[for="status-2"] { + background-color: var(--status-3-color); +} + +#status label[for="status-3"] { + background-color: var(--status-4-color); +} + +#status label[for="status-4"] { + background-color: var(--status-5-color); +} + +#status label[for="status-5"] { + background-color: var(--status-99-color); + + border-top-right-radius: var(--form-border-radius); + border-bottom-right-radius: var(--form-border-radius); + + border-left: none; +} + +#status label[for="status-6"] { + background-color: var(--background-color); + + border-radius: var(--form-border-radius); + + margin-left: 10px; +} + +/* hide Wkn and Ign */ +#status label[for="status-5"], +#status label[for="status-6"] { + font-size: 0; +} + +#status label[for="status-5"]::after, +#status label[for="status-6"]::after { + content: ""; + background-size: cover; + background-repeat: no-repeat; + width: 1.2rem; + height: 1.2rem; + /* color black */ + filter: invert(0); +} + +#status label[for="status-5"]::after { + background-image: url('data:image/svg+xml,'); +} + +#status label[for="status-6"]::after { + background-image: url('data:image/svg+xml,'); +} + +input[name='status']:disabled + label { + color: grey; /* Make the label text grey */ + cursor: not-allowed; /* Change cursor to indicate non-interactivity */ + opacity: 0.6; /* Dim the appearance */ +} + +/* SETTINGS TABLE */ +#custom_styles { + height: 100px; + font-size: 1rem; + font-family: 'Courier New', Courier, monospace; +} + +#current_theme, +#japanese_reading, +#backup_count, +#stats_calc_sample_size, +#test_mecab_btn, +#parser_type, +#language_id, +#txtSetParent, +#predefined, +#threshold_page_tokens, +#dict_tabs, +.dict-type, +.dict-usefor { + width: 8rem; + box-sizing: border-box; + border: 1px solid var(--form-border-color); + border-radius: 3px; + padding: 0.2rem 0.2rem; +} + + +/* CREATE BOOK TABLE */ +#book #text { + height: 420px; +} + + +@media screen and (max-width: 980px) { + + #thetexttitle, + div#thetext { + padding: 0 1rem; + padding: 0 1rem; + } + + div#reading-footer span { + margin-right: 30px; + } + + .menu-icon { + width: 2rem; + height: 2rem; + } + + .close-btn { + padding: 1.3rem; + } + + .read-pane-right-close-btn { + position: absolute; + left: 0; + top: 0; + width: 50px; + transform: translateY(-100%); + background-image: url("/static/icn/close-white.svg"); + background-color: #79b7e7; + border-top-left-radius: 0; + + display: block; + } + + .read-pane-right-close-btn:hover { + background-color: #79b7e7; + } + + #reading-header { + position: fixed; + left: 0; + top: 0; + width: 100%; + background: var(--background-color); + height: 4rem; /* Matches the read_pane_left margin-top. */ + z-index: 1001; + margin-bottom: 0.5rem; + } + + div.ui-tooltip { + z-index: 1002; + } + + #read_pane_left { + width: unset !important; + margin-top: 4rem; + } + + .reading_header_container { + padding-right: 3.2em; + padding-left: 3.2em; + align-items: end; + gap: 0.5rem; + padding: 0.5rem 0.5rem 0 0.5rem; + } + + .reading_header_left { + align-items: flex-end; + } + + div#read_pane_right { + background-color: white; + z-index: 1003; + transform: translateY(100%); + /* transition: all creates issues for drag resizing (laggy, slow)*/ + transition: transform 0.2s cubic-bezier(0.77,0.2,0.05,1.0), + opacity 0.2s cubic-bezier(0.77,0.2,0.05,1.0); + width: 100% !important; + height: 100vh !important; /* unset height set when player is sticky */ + bottom: 0; + left: 0; + opacity: 0; + /* for touch drag resize to work */ + touch-action: none; + } + + #read_pane_right::after { + content: ''; + background-color: #79b7e7; + position: absolute; + left: 0; + top: 0; + transform: translateY(-50%); + width: 100%; + height: 6px; + cursor: row-resize; + } + + /*drag button*/ + #read_pane_right::before, + .dictcontainer::before { + content: ''; + background-color: #79b7e7; + position: absolute; + top: 0; + cursor: row-resize; + + background-image: url("/static/icn/drag-handle.svg"); + background-position: center; + background-repeat: no-repeat; + } + + #read_pane_right::before { + right: 0; + transform: translate(0, -100%); + width: 50px; + height: 42px; + border-top-left-radius: 5px; + background-size: 91%; + } + + .dictcontainer::before { + left: 50%; + width: 100px; + height: 25px; + transform: translate(-50%, -85%); + background-size: 30%; + border-top-left-radius: 5px; + border-top-right-radius: 5px; + } + + .dictcontainer::after { + background-color: #79b7e7; + } + + #dicttabs { + padding-top: 6px; + } + + #read-slider { + height: 0.5rem; + } + + #read-slider::-moz-range-thumb { + width: 2.2rem; + height: 0.5rem; + } + + #read-slider::-webkit-slider-thumb { + width: 2.2rem; + height: 0.5rem; + } + + .reading_header_mid_top { + margin-left: 2.3rem; + margin-right: 2.3rem; + margin-bottom: 0.2rem; + } + + .read_page_nav { + font-size: 1.5rem; + } + + #page_indicator, + #headertexttitle { + font-size: 1.1rem; + } + + span.hamburger { + width: 24px; + height: 5px; + } + + .lutelogo_small { + width: 50px; + } + + #booktable td:nth-child(2), + #booktable th:nth-child(2), + #booktable td:nth-child(3), + #booktable th:nth-child(3) { + display: none; + } + + table#term { + width: 100% !important; + } +} + + +/* status stats */ + +.status-bar-container { + display: inline-flex; + align-items: center; + justify-content: center; + gap: 1px; + width: 100%; + border-radius: 3px; + box-sizing: border-box; + background-color: #787878; + border: 1px solid #787878; +} + +#booktable td:has(.status-bar-container) { + padding-right: 1.5rem; +} + +.book-stats-ajax-cell { + font-style: italic; +} + +.status-bar-container-empty { + border-color: #e6e6e6; + background-color: #fff; +} + +.status-bar { + display: flex; + align-items: center; + height: 0.7rem; + font-size: 0.8rem; + box-sizing: border-box; + /* saturate the colors */ + filter: saturate(2.4) brightness(0.95); +} + +.status-bar0 { + background-color: var(--status-0-color); +} + +.status-bar1 { + background-color: var(--status-1-color); +} + +.status-bar2 { + background-color: var(--status-2-color); +} + +.status-bar3 { + background-color: var(--status-3-color); +} + +.status-bar4 { + background-color: var(--status-4-color); +} + +.status-bar5 { + background-color: var(--status-5-color); +} + +.status-bar99 { + background-color: var(--status-99-color); + filter: saturate(1.4); +} + +#booktable td { + vertical-align: middle; +} + + +.term-listing-image { + max-width: 250px; + max-height: 250px; +} + +/** Term listing (/term) action dropdown. ********************/ + +table#termtable .tagify, +table#termtable .translationDiv { + border: 1px solid transparent; /* Prevent elements pushing others around on hover. */ + box-sizing: border-box; +} + +table#termtable .tagify:hover, +table#termtable .translationDiv:hover { + border: 1px solid; +} + +.term-action-container { + display: flex; +} + +#bulkEditDiv { + margin: 0.5rem; + padding: 1rem; + width: 50%; + border: 1px solid lightgrey; +} + +/* The container
to position the dropdown content */ +.term-action-dropdown { + position: relative; + display: inline-block; + margin-top: 10px; + margin-bottom: 10px; +} + +.term-action-button { + background-color: #04AA6D; + color: white; + padding: 8px; + font-size: 16px; + border: none; +} + +/* Dropdown Content (Hidden by Default) */ +.term-action-content { + display: none; + position: absolute; + background-color: #f1f1f1; + min-width: 160px; + box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); + z-index: 1; +} + +.term-action-content a { + text-decoration: none; + padding: 12px 16px; + display: block; +} + +.term-action-content a:hover { + background-color: #ddd; +} + +.term-action-content a.actionDisabled { + color: grey; +} + +/* Hover shows content and changes button color. */ +.term-action-dropdown:hover .term-action-content { + display: block; +} +.term-action-dropdown:hover .term-action-button { + background-color: #3e8e41; +} + +div#termtable_wrapper div.dt-buttons { + display: none; +} + +.ajax-saved-checkmark { + position: absolute; + width: 1.5em; /* Define the size of the circle */ + height: 1.5em; /* Make it a perfect circle */ + background: green; + color: white; + border-radius: 50%; /* Turns the square into a circle */ + font-size: 1em; /* Adjust size of the checkmark */ + text-align: center; + line-height: 1.5em; /* Vertically center the checkmark */ + box-shadow: 0px 2px 5px rgba(0, 0, 0, 0.2); + z-index: 2004; +} + +/** Term image search ****************/ + +div#termimagesearch p.termimagesearchtitle { + margin-top: 1rem; + margin-bottom: 1rem; +} + +div#termimagesearch .initial { + border: solid 2px transparent; +} + +div#termimagesearch .highlight { + border: solid 2px red !important; +} + +div#termimagesearch .saved { + border: solid 2px green !important; +} + +div#termimagesearch span > img { + display:inline; +} + +div#termimagesearch span.imageAction { + text-decoration: underline; +} + +div#termimagesearch span.imageAction:hover { + color: #C00000; + cursor: pointer; } diff --git a/lute/static/icn/book-open-text.svg b/lute/static/icn/book-open-text.svg new file mode 100644 index 000000000..ddb051434 --- /dev/null +++ b/lute/static/icn/book-open-text.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/lute/static/icn/bookmark-off.svg b/lute/static/icn/bookmark-off.svg new file mode 100644 index 000000000..fb3eedcc9 --- /dev/null +++ b/lute/static/icn/bookmark-off.svg @@ -0,0 +1,15 @@ + + + + + bookmark-off + + + + + + diff --git a/lute/static/icn/bookmark-on.svg b/lute/static/icn/bookmark-on.svg new file mode 100644 index 000000000..8c7b2914c --- /dev/null +++ b/lute/static/icn/bookmark-on.svg @@ -0,0 +1,15 @@ + + + + + bookmark-on + + + + + + diff --git a/lute/static/icn/caret-left.svg b/lute/static/icn/caret-left.svg new file mode 100644 index 000000000..daaa43cdf --- /dev/null +++ b/lute/static/icn/caret-left.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/lute/static/icn/caret-right.svg b/lute/static/icn/caret-right.svg new file mode 100644 index 000000000..f6254b9fe --- /dev/null +++ b/lute/static/icn/caret-right.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/lute/static/icn/close-white.svg b/lute/static/icn/close-white.svg new file mode 100644 index 000000000..22530c8fd --- /dev/null +++ b/lute/static/icn/close-white.svg @@ -0,0 +1,18 @@ + + + + +ic_fluent_dismiss_28_filled +Created with Sketch. + + + + + + diff --git a/lute/static/icn/close.svg b/lute/static/icn/close.svg new file mode 100644 index 000000000..6baec14bf --- /dev/null +++ b/lute/static/icn/close.svg @@ -0,0 +1,12 @@ + + + + ic_fluent_dismiss_28_filled + Created with Sketch. + + + + + + + \ No newline at end of file diff --git a/lute/static/icn/drag-handle.svg b/lute/static/icn/drag-handle.svg new file mode 100644 index 000000000..eb3edc52b --- /dev/null +++ b/lute/static/icn/drag-handle.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/lute/static/icn/ff.svg b/lute/static/icn/ff.svg new file mode 100644 index 000000000..08e0626be --- /dev/null +++ b/lute/static/icn/ff.svg @@ -0,0 +1,15 @@ + + + + + ff + + + + + + diff --git a/lute/static/icn/font-decrease.svg b/lute/static/icn/font-decrease.svg new file mode 100644 index 000000000..690ec4ab0 --- /dev/null +++ b/lute/static/icn/font-decrease.svg @@ -0,0 +1,22 @@ + + + + +ic_fluent_font_decrease_24_regular +Created with Sketch. + + + + + + + + diff --git a/lute/static/icn/font-increase.svg b/lute/static/icn/font-increase.svg new file mode 100644 index 000000000..1895d8acd --- /dev/null +++ b/lute/static/icn/font-increase.svg @@ -0,0 +1,24 @@ + + + + +ic_fluent_font_increase_24_regular +Created with Sketch. + + + + + + + + + diff --git a/lute/static/icn/images.svg b/lute/static/icn/images.svg new file mode 100644 index 000000000..99229f0c4 --- /dev/null +++ b/lute/static/icn/images.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/lute/static/icn/line-spacing-decrease.svg b/lute/static/icn/line-spacing-decrease.svg new file mode 100644 index 000000000..702de95f7 --- /dev/null +++ b/lute/static/icn/line-spacing-decrease.svg @@ -0,0 +1,29 @@ + + + + + + + + +ic_fluent_text_line_spacing_24_regular +Created with Sketch. + + + + + + + + + + diff --git a/lute/static/icn/line-spacing-increase.svg b/lute/static/icn/line-spacing-increase.svg new file mode 100644 index 000000000..1ee19b3ea --- /dev/null +++ b/lute/static/icn/line-spacing-increase.svg @@ -0,0 +1,7 @@ + + + + + + ic_fluent_text_line_spacing_24_regular Created with Sketch. + \ No newline at end of file diff --git a/lute/static/icn/list.svg b/lute/static/icn/list.svg new file mode 100644 index 000000000..6d915e0e8 --- /dev/null +++ b/lute/static/icn/list.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/lute/static/icn/next.svg b/lute/static/icn/next.svg new file mode 100644 index 000000000..cc524b5ea --- /dev/null +++ b/lute/static/icn/next.svg @@ -0,0 +1,15 @@ + + + + + next + + + + + + diff --git a/lute/static/icn/open.svg b/lute/static/icn/open.svg new file mode 100644 index 000000000..2cb612dec --- /dev/null +++ b/lute/static/icn/open.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/lute/static/icn/pause.svg b/lute/static/icn/pause.svg new file mode 100644 index 000000000..a521ade20 --- /dev/null +++ b/lute/static/icn/pause.svg @@ -0,0 +1,15 @@ + + + + + pause + + + + + + diff --git a/lute/static/icn/pin.svg b/lute/static/icn/pin.svg new file mode 100644 index 000000000..4ae65209a --- /dev/null +++ b/lute/static/icn/pin.svg @@ -0,0 +1,15 @@ + + + + + pin + + + + + + diff --git a/lute/static/icn/play.svg b/lute/static/icn/play.svg new file mode 100644 index 000000000..5b3f21b02 --- /dev/null +++ b/lute/static/icn/play.svg @@ -0,0 +1,15 @@ + + + + + play + + + + + + diff --git a/lute/static/icn/prev.svg b/lute/static/icn/prev.svg new file mode 100644 index 000000000..674ddb7c1 --- /dev/null +++ b/lute/static/icn/prev.svg @@ -0,0 +1,15 @@ + + + + + prev + + + + + + diff --git a/lute/static/icn/reload.png b/lute/static/icn/reload.png new file mode 100644 index 000000000..f67281511 Binary files /dev/null and b/lute/static/icn/reload.png differ diff --git a/lute/static/icn/rewind.svg b/lute/static/icn/rewind.svg new file mode 100644 index 000000000..f42ddc20a --- /dev/null +++ b/lute/static/icn/rewind.svg @@ -0,0 +1,15 @@ + + + + + rewind + + + + + + diff --git a/lute/static/icn/settings-gear-icon.svg b/lute/static/icn/settings-gear-icon.svg new file mode 100644 index 000000000..9072f875b --- /dev/null +++ b/lute/static/icn/settings-gear-icon.svg @@ -0,0 +1 @@ +gear \ No newline at end of file diff --git a/lute/static/icn/skip-back.svg b/lute/static/icn/skip-back.svg new file mode 100644 index 000000000..8cfb94d25 --- /dev/null +++ b/lute/static/icn/skip-back.svg @@ -0,0 +1,15 @@ + + + + + skip-back + + + + + + diff --git a/lute/static/icn/text-column-one.svg b/lute/static/icn/text-column-one.svg new file mode 100644 index 000000000..fe7d1f1e9 --- /dev/null +++ b/lute/static/icn/text-column-one.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/lute/static/icn/text-column-two.svg b/lute/static/icn/text-column-two.svg new file mode 100644 index 000000000..82c779d9b --- /dev/null +++ b/lute/static/icn/text-column-two.svg @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/lute/static/icn/volume.svg b/lute/static/icn/volume.svg new file mode 100644 index 000000000..ba069058f --- /dev/null +++ b/lute/static/icn/volume.svg @@ -0,0 +1,15 @@ + + + + + volume + + + + + + diff --git a/lute/static/img/lute.png b/lute/static/img/lute.png index 3e42b62c4..ad9d247cc 100644 Binary files a/lute/static/img/lute.png and b/lute/static/img/lute.png differ diff --git a/lute/static/iui/backButton.png b/lute/static/iui/backButton.png deleted file mode 100644 index e27ea8cdf..000000000 Binary files a/lute/static/iui/backButton.png and /dev/null differ diff --git a/lute/static/iui/blueButton.png b/lute/static/iui/blueButton.png deleted file mode 100644 index 0f92dfd94..000000000 Binary files a/lute/static/iui/blueButton.png and /dev/null differ diff --git a/lute/static/iui/cancel.png b/lute/static/iui/cancel.png deleted file mode 100644 index 5f6dcc87d..000000000 Binary files a/lute/static/iui/cancel.png and /dev/null differ diff --git a/lute/static/iui/grayButton.png b/lute/static/iui/grayButton.png deleted file mode 100644 index 0ce6a30d4..000000000 Binary files a/lute/static/iui/grayButton.png and /dev/null differ diff --git a/lute/static/iui/iui-logo-touch-icon.png b/lute/static/iui/iui-logo-touch-icon.png deleted file mode 100644 index 8817b3022..000000000 Binary files a/lute/static/iui/iui-logo-touch-icon.png and /dev/null differ diff --git a/lute/static/iui/iui.css b/lute/static/iui/iui.css deleted file mode 100644 index ae7f595a5..000000000 --- a/lute/static/iui/iui.css +++ /dev/null @@ -1,386 +0,0 @@ -/* iui.css (c) 2007-9 by iUI Project Members, see LICENSE.txt for license */ -body { - margin: 0; - font-family: Helvetica; - background: #FFFFFF; - color: #000000; - overflow-x: hidden; - -webkit-user-select: none; - -webkit-text-size-adjust: none; -} - -body > *:not(.toolbar) { - display: none; - position: absolute; - margin: 0; - padding: 0; - left: 0; - top: 45px; - width: 100%; - min-height: 372px; -} - -body[orient="landscape"] > *:not(.toolbar) { - min-height: 268px; -} - -body > *[selected="true"] { - display: block; -} - -a[selected], a:active { - background-color: #194fdb !important; - background-image: url(listArrowSel.png), url(selection.png) !important; - background-repeat: no-repeat, repeat-x; - background-position: right center, left top; - color: #FFFFFF !important; -} - -a[selected="progress"] { - background-image: url(loading.gif), url(selection.png) !important; -} - -/************************************************************************************************/ - -body > .toolbar { - box-sizing: border-box; - -moz-box-sizing: border-box; - -webkit-box-sizing: border-box; - border-bottom: 1px solid #2d3642; - border-top: 1px solid #6d84a2; - padding: 10px; - height: 45px; - background: url(toolbar.png) #6d84a2 repeat-x; -} - -.toolbar > h1 { - position: absolute; - overflow: hidden; - left: 50%; - margin: 1px 0 0 -75px; - height: 45px; - font-size: 20px; - width: 150px; - font-weight: bold; - text-shadow: rgba(0, 0, 0, 0.4) 0px -1px 0; - text-align: center; - text-overflow: ellipsis; - white-space: nowrap; - color: #FFFFFF; -} - -body[orient="landscape"] > .toolbar > h1 { - margin-left: -125px; - width: 250px; -} - -.button { - position: absolute; - overflow: hidden; - top: 8px; - right: 6px; - margin: 0; - border-width: 0 5px; - padding: 0 3px; - width: auto; - height: 30px; - line-height: 30px; - font-family: inherit; - font-size: 12px; - font-weight: bold; - color: #FFFFFF; - text-shadow: rgba(0, 0, 0, 0.6) 0px -1px 0; - text-overflow: ellipsis; - text-decoration: none; - white-space: nowrap; - background: none; - -webkit-border-image: url(toolButton.png) 0 5 0 5; -} - -.blueButton { - -webkit-border-image: url(blueButton.png) 0 5 0 5; - border-width: 0 5px; -} - -.leftButton { - left: 6px; - right: auto; -} - -#backButton { - display: none; - left: 6px; - right: auto; - padding: 0; - max-width: 55px; - border-width: 0 8px 0 14px; - -webkit-border-image: url(backButton.png) 0 8 0 14; -} - -.whiteButton, -.grayButton { - display: block; - border-width: 0 12px; - padding: 10px; - text-align: center; - font-size: 20px; - font-weight: bold; - text-decoration: inherit; - color: inherit; -} - -.whiteButton { - -webkit-border-image: url(whiteButton.png) 0 12 0 12; - text-shadow: rgba(255, 255, 255, 0.7) 0 1px 0; -} - -.grayButton { - -webkit-border-image: url(grayButton.png) 0 12 0 12; - color: #FFFFFF; -} - -/************************************************************************************************/ - -body > ul > li { - position: relative; - margin: 0; - border-bottom: 1px solid #E0E0E0; - padding: 8px 0 8px 10px; - font-size: 20px; - font-weight: bold; - list-style: none; -} - -body > ul > li.group { - position: relative; - top: -1px; - margin-bottom: -2px; - border-top: 1px solid #7d7d7d; - border-bottom: 1px solid #999999; - padding: 1px 10px; - background: url(listGroup.png) repeat-x; - font-size: 17px; - font-weight: bold; - text-shadow: rgba(0, 0, 0, 0.4) 0 1px 0; - color: #FFFFFF; -} - -body > ul > li.group:first-child { - top: 0; - border-top: none; -} - -body > ul > li > a { - display: block; - margin: -8px 0 -8px -10px; - padding: 8px 32px 8px 10px; - text-decoration: none; - color: inherit; - background: url(listArrow.png) no-repeat right center; -} - -a[target="_replace"] { - box-sizing: border-box; - -webkit-box-sizing: border-box; - padding-top: 25px; - padding-bottom: 25px; - font-size: 18px; - color: cornflowerblue; - background-color: #FFFFFF; - background-image: none; -} - -/************************************************************************************************/ - -body > .dialog { - top: 0; - width: 100%; - min-height: 417px; - z-index: 2; - background: rgba(0, 0, 0, 0.8); - padding: 0; - text-align: right; -} - -.dialog > fieldset { - box-sizing: border-box; - -webkit-box-sizing: border-box; - width: 100%; - margin: 0; - border: none; - border-top: 1px solid #6d84a2; - padding: 10px 6px; - background: url(toolbar.png) #7388a5 repeat-x; -} - -.dialog > fieldset > h1 { - margin: 0 10px 0 10px; - padding: 0; - font-size: 20px; - font-weight: bold; - color: #FFFFFF; - text-shadow: rgba(0, 0, 0, 0.4) 0px -1px 0; - text-align: center; -} - -.dialog > fieldset > label { - position: absolute; - margin: 16px 0 0 6px; - font-size: 14px; - color: #999999; -} - -input:not(input[type|=radio]):not(input[type|=checkbox]) { - box-sizing: border-box; - -webkit-box-sizing: border-box; - width: 100%; - margin: 8px 0 0 0; - padding: 6px 6px 6px 44px; - font-size: 16px; - font-weight: normal; -} - -/************************************************************************************************/ - -body > .panel { - box-sizing: border-box; - -moz-box-sizing: border-box; - -webkit-box-sizing: border-box; - padding: 10px; - background: #c8c8c8 url(pinstripes.png); -} - -.panel > fieldset { - position: relative; - margin: 0 0 20px 0; - padding: 0; - background: #FFFFFF; - -webkit-border-radius: 10px; - -moz-border-radius: 10px; - border: 1px solid #999999; - text-align: right; - font-size: 16px; -} - -.row { - position: relative; - min-height: 42px; - border-bottom: 1px solid #999999; - -webkit-border-radius: 0; - text-align: right; -} - -fieldset > .row:last-child { - border-bottom: none !important; -} - -.row > input:not(input[type|=radio]):not(input[type|=checkbox]) { - box-sizing: border-box; - -moz-box-sizing: border-box; - -webkit-box-sizing: border-box; - margin: 0; - border: none; - padding: 12px 10px 0 110px; - height: 42px; - background: none; -} -.row > input[type|=radio], .row > input[type|=checkbox] { - margin: 7px 7px 0 0; - height: 25px; - width: 25px; -} - -.row > label { - position: absolute; - margin: 0 0 0 14px; - line-height: 42px; - font-weight: bold; -} - -.row > span { - position: absolute; - padding: 12px 10px 0 110px; - margin: 0; -} - -.row > .toggle { - position: absolute; - top: 6px; - right: 6px; - width: 100px; - height: 28px; -} - -.toggle { - border: 1px solid #888888; - -webkit-border-radius: 6px; - background: #FFFFFF url(toggle.png) repeat-x; - font-size: 19px; - font-weight: bold; - line-height: 30px; -} - -.toggle[toggled="true"] { - border: 1px solid #143fae; - background: #194fdb url(toggleOn.png) repeat-x; -} - -.toggleOn { - display: none; - position: absolute; - width: 60px; - text-align: center; - left: 0; - top: 0; - color: #FFFFFF; - text-shadow: rgba(0, 0, 0, 0.4) 0px -1px 0; -} - -.toggleOff { - position: absolute; - width: 60px; - text-align: center; - right: 0; - top: 0; - color: #666666; -} - -.toggle[toggled="true"] > .toggleOn { - display: block; -} - -.toggle[toggled="true"] > .toggleOff { - display: none; -} - -.thumb { - position: absolute; - top: -1px; - left: -1px; - width: 40px; - height: 28px; - border: 1px solid #888888; - -webkit-border-radius: 6px; - background: #ffffff url(thumb.png) repeat-x; -} - -.toggle[toggled="true"] > .thumb { - left: auto; - right: -1px; -} - -.panel > h2 { - margin: 0 0 8px 14px; - font-size: inherit; - font-weight: bold; - color: #4d4d70; - text-shadow: rgba(255, 255, 255, 0.75) 2px 2px 0; -} - -/************************************************************************************************/ - -#preloader { - display: none; - background-image: url(loading.gif), url(selection.png), - url(blueButton.png), url(listArrowSel.png), url(listGroup.png); -} diff --git a/lute/static/iui/iui.js b/lute/static/iui/iui.js deleted file mode 100644 index 7ae401a1c..000000000 --- a/lute/static/iui/iui.js +++ /dev/null @@ -1,442 +0,0 @@ -/* - Copyright (c) 2007-9, iUI Project Members - See LICENSE.txt for licensing terms - */ - - -(function() { - -var slideSpeed = 20; -var slideInterval = 0; - -var currentPage = null; -var currentDialog = null; -var currentWidth = 0; -var currentHash = location.hash; -var hashPrefix = "#_"; -var pageHistory = []; -var newPageCount = 0; -var checkTimer; -var hasOrientationEvent = false; -var portraitVal = "portrait"; -var landscapeVal = "landscape"; - -// ************************************************************************************************* - -window.iui = -{ - showPage: function(page, backwards) - { - if (page) - { - if (currentDialog) - { - currentDialog.removeAttribute("selected"); - currentDialog = null; - } - - if (hasClass(page, "dialog")) - showDialog(page); - else - { - var fromPage = currentPage; - currentPage = page; - - if (fromPage) - setTimeout(slidePages, 0, fromPage, page, backwards); - else - updatePage(page, fromPage); - } - } - }, - - showPageById: function(pageId) - { - var page = $(pageId); - if (page) - { - var index = pageHistory.indexOf(pageId); - var backwards = index != -1; - if (backwards) - pageHistory.splice(index, pageHistory.length); - - iui.showPage(page, backwards); - } - }, - - showPageByHref: function(href, args, method, replace, cb) - { - var req = new XMLHttpRequest(); - req.onerror = function() - { - if (cb) - cb(false); - }; - - req.onreadystatechange = function() - { - if (req.readyState == 4) - { - if (replace) - replaceElementWithSource(replace, req.responseText); - else - { - var frag = document.createElement("div"); - frag.innerHTML = req.responseText; - iui.insertPages(frag.childNodes); - } - if (cb) - setTimeout(cb, 1000, true); - } - }; - - if (args) - { - req.open(method || "GET", href, true); - req.setRequestHeader("Content-Type", "application/x-www-form-urlencoded"); - req.setRequestHeader("Content-Length", args.length); - req.send(args.join("&")); - } - else - { - req.open(method || "GET", href, true); - req.send(null); - } - }, - - insertPages: function(nodes) - { - var targetPage; - for (var i = 0; i < nodes.length; ++i) - { - var child = nodes[i]; - if (child.nodeType == 1) - { - if (!child.id) - child.id = "__" + (++newPageCount) + "__"; - - var clone = $(child.id); - if (clone) - clone.parentNode.replaceChild(child, clone); - else - document.body.appendChild(child); - - if (child.getAttribute("selected") == "true" || !targetPage) - targetPage = child; - - --i; - } - } - - if (targetPage) - iui.showPage(targetPage); - }, - - getSelectedPage: function() - { - for (var child = document.body.firstChild; child; child = child.nextSibling) - { - if (child.nodeType == 1 && child.getAttribute("selected") == "true") - return child; - } - }, - isNativeUrl: function(href) - { - for(var i = 0; i < iui.nativeUrlPatterns.length; i++) - { - if(href.match(iui.nativeUrlPatterns[i])) return true; - } - return false; - }, - nativeUrlPatterns: [ - new RegExp("^http:\/\/maps.google.com\/maps\?"), - new RegExp("^mailto:"), - new RegExp("^tel:"), - new RegExp("^http:\/\/www.youtube.com\/watch\\?v="), - new RegExp("^http:\/\/www.youtube.com\/v\/") - ] -}; - -// ************************************************************************************************* - -addEventListener("load", function(event) -{ - var page = iui.getSelectedPage(); - if (page) - iui.showPage(page); - - setTimeout(preloadImages, 0); - setTimeout(checkOrientAndLocation, 0); - checkTimer = setInterval(checkOrientAndLocation, 300); -}, false); - -addEventListener("unload", function(event) -{ - return; -}, false); - -addEventListener("click", function(event) -{ - var link = findParent(event.target, "a"); - if (link) - { - function unselect() { link.removeAttribute("selected"); } - - if (link.href && link.hash && link.hash != "#") - { - link.setAttribute("selected", "true"); - iui.showPage($(link.hash.substr(1))); - setTimeout(unselect, 500); - } - else if (link == $("backButton")) - history.back(); - else if (link.getAttribute("type") == "submit") - submitForm(findParent(link, "form")); - else if (link.getAttribute("type") == "cancel") - cancelDialog(findParent(link, "form")); - else if (link.target == "_replace") - { - link.setAttribute("selected", "progress"); - iui.showPageByHref(link.href, null, null, link, unselect); - } - else if (iui.isNativeUrl(link.href)) - { - return; - } - else if (!link.target) - { - link.setAttribute("selected", "progress"); - iui.showPageByHref(link.href, null, null, null, unselect); - } - else - return; - - event.preventDefault(); - } -}, true); - -addEventListener("click", function(event) -{ - var div = findParent(event.target, "div"); - if (div && hasClass(div, "toggle")) - { - div.setAttribute("toggled", div.getAttribute("toggled") != "true"); - event.preventDefault(); - } -}, true); - -function orientChangeHandler() -{ - var orientation=window.orientation; - switch(orientation) - { - case 0: - setOrientation(portraitVal); - break; - - case 90: - case -90: - setOrientation(landscapeVal); - break; - } -} - -if (typeof window.onorientationchange == "object") -{ - window.onorientationchange=orientChangeHandler; - hasOrientationEvent = true; - setTimeout(orientChangeHandler, 0); -} - -function checkOrientAndLocation() -{ - if (!hasOrientationEvent) - { - if (window.innerWidth != currentWidth) - { - currentWidth = window.innerWidth; - var orient = currentWidth == 320 ? portraitVal : landscapeVal; - setOrientation(orient); - } - } - - if (location.hash != currentHash) - { - var pageId = location.hash.substr(hashPrefix.length); - iui.showPageById(pageId); - } -} - -function setOrientation(orient) -{ - document.body.setAttribute("orient", orient); - setTimeout(scrollTo, 100, 0, 1); -} - -function showDialog(page) -{ - currentDialog = page; - page.setAttribute("selected", "true"); - - if (hasClass(page, "dialog") && !page.target) - showForm(page); -} - -function showForm(form) -{ - form.onsubmit = function(event) - { - event.preventDefault(); - submitForm(form); - }; - - form.onclick = function(event) - { - if (event.target == form && hasClass(form, "dialog")) - cancelDialog(form); - }; -} - -function cancelDialog(form) -{ - form.removeAttribute("selected"); -} - -function updatePage(page, fromPage) -{ - if (!page.id) - page.id = "__" + (++newPageCount) + "__"; - - location.href = currentHash = hashPrefix + page.id; - pageHistory.push(page.id); - - var pageTitle = $("pageTitle"); - if (page.title) - pageTitle.innerHTML = page.title; - - if (page.localName.toLowerCase() == "form" && !page.target) - showForm(page); - - var backButton = $("backButton"); - if (backButton) - { - var prevPage = $(pageHistory[pageHistory.length-2]); - if (prevPage && !page.getAttribute("hideBackButton")) - { - backButton.style.display = "inline"; - backButton.innerHTML = prevPage.title ? prevPage.title : "Back"; - } - else - backButton.style.display = "none"; - } -} - -function slidePages(fromPage, toPage, backwards) -{ - var axis = (backwards ? fromPage : toPage).getAttribute("axis"); - if (axis == "y") - (backwards ? fromPage : toPage).style.top = "100%"; - else - toPage.style.left = "100%"; - - toPage.setAttribute("selected", "true"); - scrollTo(0, 1); - clearInterval(checkTimer); - - var percent = 100; - slide(); - var timer = setInterval(slide, slideInterval); - - function slide() - { - percent -= slideSpeed; - if (percent <= 0) - { - percent = 0; - if (!hasClass(toPage, "dialog")) - fromPage.removeAttribute("selected"); - clearInterval(timer); - checkTimer = setInterval(checkOrientAndLocation, 300); - setTimeout(updatePage, 0, toPage, fromPage); - } - - if (axis == "y") - { - backwards - ? fromPage.style.top = (100-percent) + "%" - : toPage.style.top = percent + "%"; - } - else - { - fromPage.style.left = (backwards ? (100-percent) : (percent-100)) + "%"; - toPage.style.left = (backwards ? -percent : percent) + "%"; - } - } -} - -function preloadImages() -{ - var preloader = document.createElement("div"); - preloader.id = "preloader"; - document.body.appendChild(preloader); -} - -function submitForm(form) -{ - iui.showPageByHref(form.action || "POST", encodeForm(form), form.method); -} - -function encodeForm(form) -{ - function encode(inputs) - { - for (var i = 0; i < inputs.length; ++i) - { - if (inputs[i].name) - args.push(inputs[i].name + "=" + escape(inputs[i].value)); - } - } - - var args = []; - encode(form.getElementsByTagName("input")); - encode(form.getElementsByTagName("textarea")); - encode(form.getElementsByTagName("select")); - return args; -} - -function findParent(node, localName) -{ - while (node && (node.nodeType != 1 || node.localName.toLowerCase() != localName)) - node = node.parentNode; - return node; -} - -function hasClass(self, name) -{ - var re = new RegExp("(^|\\s)"+name+"($|\\s)"); - return re.exec(self.getAttribute("class")) != null; -} - -function replaceElementWithSource(replace, source) -{ - var page = replace.parentNode; - var parent = replace; - while (page.parentNode != document.body) - { - page = page.parentNode; - parent = parent.parentNode; - } - - var frag = document.createElement(parent.localName); - frag.innerHTML = source; - - page.removeChild(parent); - - while (frag.firstChild) - page.appendChild(frag.firstChild); -} - -function $(id) { return document.getElementById(id); } -function ddd() { console.log.apply(console, arguments); } - -})(); diff --git a/lute/static/iui/iuix.css b/lute/static/iui/iuix.css deleted file mode 100644 index 67eead37e..000000000 --- a/lute/static/iui/iuix.css +++ /dev/null @@ -1 +0,0 @@ -body{margin:0;font-family:Helvetica;background:#FFF;color:#000;overflow-x:hidden;-webkit-user-select:none;-webkit-text-size-adjust:none;}body>*:not(.toolbar){display:none;position:absolute;margin:0;padding:0;left:0;top:45px;width:100%;min-height:372px;}body[orient="landscape"]>*:not(.toolbar){min-height:268px;}body>*[selected="true"]{display:block;}a[selected],a:active{background-color:#194fdb!important;background-image:url(listArrowSel.png),url(selection.png)!important;background-repeat:no-repeat,repeat-x;background-position:right center,left top;color:#FFF!important;}a[selected="progress"]{background-image:url(loading.gif),url(selection.png)!important;}body>.toolbar{box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;border-bottom:1px solid #2d3642;border-top:1px solid #6d84a2;padding:10px;height:45px;background:url(toolbar.png) #6d84a2 repeat-x;}.toolbar>h1{position:absolute;overflow:hidden;left:50%;margin:1px 0 0 -75px;height:45px;font-size:20px;width:150px;font-weight:bold;text-shadow:rgba(0,0,0,0.4) 0 -1px 0;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#FFF;}body[orient="landscape"]>.toolbar>h1{margin-left:-125px;width:250px;}.button{position:absolute;overflow:hidden;top:8px;right:6px;margin:0;border-width:0 5px;padding:0 3px;width:auto;height:30px;line-height:30px;font-family:inherit;font-size:12px;font-weight:bold;color:#FFF;text-shadow:rgba(0,0,0,0.6) 0 -1px 0;text-overflow:ellipsis;text-decoration:none;white-space:nowrap;background:none;-webkit-border-image:url(toolButton.png) 0 5 0 5;}.blueButton{-webkit-border-image:url(blueButton.png) 0 5 0 5;border-width:0 5px;}.leftButton{left:6px;right:auto;}#backButton{display:none;left:6px;right:auto;padding:0;max-width:55px;border-width:0 8px 0 14px;-webkit-border-image:url(backButton.png) 0 8 0 14;}.whiteButton,.grayButton{display:block;border-width:0 12px;padding:10px;text-align:center;font-size:20px;font-weight:bold;text-decoration:inherit;color:inherit;}.whiteButton{-webkit-border-image:url(whiteButton.png) 0 12 0 12;text-shadow:rgba(255,255,255,0.7) 0 1px 0;}.grayButton{-webkit-border-image:url(grayButton.png) 0 12 0 12;color:#FFF;}body>ul>li{position:relative;margin:0;border-bottom:1px solid #E0E0E0;padding:8px 0 8px 10px;font-size:20px;font-weight:bold;list-style:none;}body>ul>li.group{position:relative;top:-1px;margin-bottom:-2px;border-top:1px solid #7d7d7d;border-bottom:1px solid #999;padding:1px 10px;background:url(listGroup.png) repeat-x;font-size:17px;font-weight:bold;text-shadow:rgba(0,0,0,0.4) 0 1px 0;color:#FFF;}body>ul>li.group:first-child{top:0;border-top:none;}body>ul>li>a{display:block;margin:-8px 0 -8px -10px;padding:8px 32px 8px 10px;text-decoration:none;color:inherit;background:url(listArrow.png) no-repeat right center;}a[target="_replace"]{box-sizing:border-box;-webkit-box-sizing:border-box;padding-top:25px;padding-bottom:25px;font-size:18px;color:cornflowerblue;background-color:#FFF;background-image:none;}body>.dialog{top:0;width:100%;min-height:417px;z-index:2;background:rgba(0,0,0,0.8);padding:0;text-align:right;}.dialog>fieldset{box-sizing:border-box;-webkit-box-sizing:border-box;width:100%;margin:0;border:none;border-top:1px solid #6d84a2;padding:10px 6px;background:url(toolbar.png) #7388a5 repeat-x;}.dialog>fieldset>h1{margin:0 10px 0 10px;padding:0;font-size:20px;font-weight:bold;color:#FFF;text-shadow:rgba(0,0,0,0.4) 0 -1px 0;text-align:center;}.dialog>fieldset>label{position:absolute;margin:16px 0 0 6px;font-size:14px;color:#999;}input:not(input[type|=radio]):not(input[type|=checkbox]){box-sizing:border-box;-webkit-box-sizing:border-box;width:100%;margin:8px 0 0 0;padding:6px 6px 6px 44px;font-size:16px;font-weight:normal;}body>.panel{box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;padding:10px;background:#c8c8c8 url(pinstripes.png);}.panel>fieldset{position:relative;margin:0 0 20px 0;padding:0;background:#FFF;-webkit-border-radius:10px;-moz-border-radius:10px;border:1px solid #999;text-align:right;font-size:16px;}.row{position:relative;min-height:42px;border-bottom:1px solid #999;-webkit-border-radius:0;text-align:right;}fieldset>.row:last-child{border-bottom:none!important;}.row>input:not(input[type|=radio]):not(input[type|=checkbox]){box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;margin:0;border:none;padding:12px 10px 0 110px;height:42px;background:none;}.row>input[type|=radio],.row>input[type|=checkbox]{margin:7px 7px 0 0;height:25px;width:25px;}.row>label{position:absolute;margin:0 0 0 14px;line-height:42px;font-weight:bold;}.row>span{position:absolute;padding:12px 10px 0 110px;margin:0;}.row>.toggle{position:absolute;top:6px;right:6px;width:100px;height:28px;}.toggle{border:1px solid #888;-webkit-border-radius:6px;background:#FFF url(toggle.png) repeat-x;font-size:19px;font-weight:bold;line-height:30px;}.toggle[toggled="true"]{border:1px solid #143fae;background:#194fdb url(toggleOn.png) repeat-x;}.toggleOn{display:none;position:absolute;width:60px;text-align:center;left:0;top:0;color:#FFF;text-shadow:rgba(0,0,0,0.4) 0 -1px 0;}.toggleOff{position:absolute;width:60px;text-align:center;right:0;top:0;color:#666;}.toggle[toggled="true"]>.toggleOn{display:block;}.toggle[toggled="true"]>.toggleOff{display:none;}.thumb{position:absolute;top:-1px;left:-1px;width:40px;height:28px;border:1px solid #888;-webkit-border-radius:6px;background:#fff url(thumb.png) repeat-x;}.toggle[toggled="true"]>.thumb{left:auto;right:-1px;}.panel>h2{margin:0 0 8px 14px;font-size:inherit;font-weight:bold;color:#4d4d70;text-shadow:rgba(255,255,255,0.75) 2px 2px 0;}#preloader{display:none;background-image:url(loading.gif),url(selection.png),url(blueButton.png),url(listArrowSel.png),url(listGroup.png);} \ No newline at end of file diff --git a/lute/static/iui/iuix.js b/lute/static/iui/iuix.js deleted file mode 100644 index 68e1cba10..000000000 --- a/lute/static/iui/iuix.js +++ /dev/null @@ -1 +0,0 @@ -(function(){var _1=20;var _2=0;var _3=null;var _4=null;var _5=0;var _6=location.hash;var _7="#_";var _8=[];var _9=0;var _a;var _b=false;var _c="portrait";var _d="landscape";window.iui={showPage:function(_e,_f){if(_e){if(_4){_4.removeAttribute("selected");_4=null;}if(hasClass(_e,"dialog")){showDialog(_e);}else{var _10=_3;_3=_e;if(_10){setTimeout(slidePages,0,_10,_e,_f);}else{updatePage(_e,_10);}}}},showPageById:function(_11){var _12=$(_11);if(_12){var _13=_8.indexOf(_11);var _14=_13!=-1;if(_14){_8.splice(_13,_8.length);}iui.showPage(_12,_14);}},showPageByHref:function(_15,_16,_17,_18,cb){var req=new XMLHttpRequest();req.onerror=function(){if(cb){cb(false);}};req.onreadystatechange=function(){if(req.readyState==4){if(_18){replaceElementWithSource(_18,req.responseText);}else{var _1b=document.createElement("div");_1b.innerHTML=req.responseText;iui.insertPages(_1b.childNodes);}if(cb){setTimeout(cb,1000,true);}}};if(_16){req.open(_17||"GET",_15,true);req.setRequestHeader("Content-Type","application/x-www-form-urlencoded");req.setRequestHeader("Content-Length",_16.length);req.send(_16.join("&"));}else{req.open(_17||"GET",_15,true);req.send(null);}},insertPages:function(_1c){var _1d;for(var i=0;i<_1c.length;++i){var _1f=_1c[i];if(_1f.nodeType==1){if(!_1f.id){_1f.id="__"+(++_9)+"__";}var _20=$(_1f.id);if(_20){_20.parentNode.replaceChild(_1f,_20);}else{document.body.appendChild(_1f);}if(_1f.getAttribute("selected")=="true"||!_1d){_1d=_1f;}--i;}}if(_1d){iui.showPage(_1d);}},getSelectedPage:function(){for(var _21=document.body.firstChild;_21;_21=_21.nextSibling){if(_21.nodeType==1&&_21.getAttribute("selected")=="true"){return _21;}}},isNativeUrl:function(_22){for(var i=0;i").appendTo(l)),h.nTHead=n[0],l.children("tbody")),n=(0===a.length&&(a=P("").insertAfter(n)),h.nTBody=a[0],l.children("tfoot"));if(0===(n=0===n.length&&0").appendTo(l):n).length||0===n.children().length?l.addClass(p.sNoFooter):0/g,X=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,J=new RegExp("(\\"+["/",".","*","+","?","|","(",")","[","]","{","}","\\","$","^","-"].join("|\\")+")","g"),q=/['\u00A0,$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,h=function(t){return!t||!0===t||"-"===t},$=function(t,e){return c[e]||(c[e]=new RegExp(Ot(e),"g")),"string"==typeof t&&"."!==e?t.replace(/\./g,"").replace(c[e],"."):t},N=function(t,e,n){var a=[],r=0,o=t.length;if(n!==H)for(;r").css({position:"fixed",top:0,left:-1*P(j).scrollLeft(),height:1,width:1,overflow:"hidden"}).append(P("
").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(P("
").css({width:"100%",height:10}))).appendTo("body")).children()).children(),e.barWidth=a[0].offsetWidth-a[0].clientWidth,e.bScrollOversize=100===r[0].offsetWidth&&100!==a[0].clientWidth,e.bScrollbarLeft=1!==Math.round(r.offset().left),e.bBounding=!!n[0].getBoundingClientRect().width,n.remove()),P.extend(t.oBrowser,w.__browser),t.oScroll.iBarWidth=w.__browser.barWidth}function et(t,e,n,a,r,o){var i,l=a,s=!1;for(n!==H&&(i=n,s=!0);l!==r;)t.hasOwnProperty(l)&&(i=s?e(i,t[l],l,t):t[l],s=!0,l+=o);return i}function nt(t,e){var n=w.defaults.column,a=t.aoColumns.length,n=P.extend({},w.models.oColumn,n,{nTh:e||v.createElement("th"),sTitle:n.sTitle||(e?e.innerHTML:""),aDataSort:n.aDataSort||[a],mData:n.mData||a,idx:a}),n=(t.aoColumns.push(n),t.aoPreSearchCols);n[a]=P.extend({},w.models.oSearch,n[a]),at(t,a,P(e).data())}function at(t,e,n){function a(t){return"string"==typeof t&&-1!==t.indexOf("@")}var e=t.aoColumns[e],r=t.oClasses,o=P(e.nTh),i=(!e.sWidthOrig&&(e.sWidthOrig=o.attr("width")||null,u=(o.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/))&&(e.sWidthOrig=u[1]),n!==H&&null!==n&&(Q(n),C(w.defaults.column,n,!0),n.mDataProp===H||n.mData||(n.mData=n.mDataProp),n.sType&&(e._sManualType=n.sType),n.className&&!n.sClass&&(n.sClass=n.className),n.sClass&&o.addClass(n.sClass),u=e.sClass,P.extend(e,n),F(e,n,"sWidth","sWidthOrig"),u!==e.sClass&&(e.sClass=u+" "+e.sClass),n.iDataSort!==H&&(e.aDataSort=[n.iDataSort]),F(e,n,"aDataSort"),e.ariaTitle||(e.ariaTitle=o.attr("aria-label"))),e.mData),l=A(i),s=e.mRender?A(e.mRender):null,u=(e._bAttrSrc=P.isPlainObject(i)&&(a(i.sort)||a(i.type)||a(i.filter)),e._setter=null,e.fnGetData=function(t,e,n){var a=l(t,e,H,n);return s&&e?s(a,e,t,n):a},e.fnSetData=function(t,e,n){return b(i)(t,e,n)},"number"==typeof i||e._isArrayHost||(t._rowReadObject=!0),t.oFeatures.bSort||(e.bSortable=!1,o.addClass(r.sSortableNone)),-1!==P.inArray("asc",e.asSorting)),n=-1!==P.inArray("desc",e.asSorting);e.bSortable&&(u||n)?u&&!n?(e.sSortingClass=r.sSortableAsc,e.sSortingClassJUI=r.sSortJUIAscAllowed):!u&&n?(e.sSortingClass=r.sSortableDesc,e.sSortingClassJUI=r.sSortJUIDescAllowed):(e.sSortingClass=r.sSortable,e.sSortingClassJUI=r.sSortJUI):(e.sSortingClass=r.sSortableNone,e.sSortingClassJUI="")}function O(t){if(!1!==t.oFeatures.bAutoWidth){var e=t.aoColumns;ee(t);for(var n=0,a=e.length;ne&&t[r]--;-1!=a&&n===H&&t.splice(a,1)}function bt(n,a,t,e){function r(t,e){for(;t.childNodes.length;)t.removeChild(t.firstChild);t.innerHTML=S(n,a,e,"display")}var o,i,l=n.aoData[a];if("dom"!==t&&(t&&"auto"!==t||"dom"!==l.src)){var s=l.anCells;if(s)if(e!==H)r(s[e],e);else for(o=0,i=s.length;o").appendTo(r)),c=0,f=s.length;c=s.fnRecordsDisplay()?0:l,s.iInitDisplayStart=-1);var n=R(t,"aoPreDrawCallback","preDraw",[t]);if(-1!==P.inArray(!1,n))D(t,!1);else{var a=[],r=0,o=t.asStripeClasses,i=o.length,l=t.oLanguage,s="ssp"==E(t),u=t.aiDisplay,n=t._iDisplayStart,c=t.fnDisplayEnd();if(t.bDrawing=!0,t.bDeferLoading)t.bDeferLoading=!1,t.iDraw++,D(t,!1);else if(s){if(!t.bDestroying&&!e)return void xt(t)}else t.iDraw++;if(0!==u.length)for(var f=s?t.aoData.length:c,d=s?0:n;d",{class:i?o[0]:""}).append(P("",{valign:"top",colSpan:T(t),class:t.oClasses.sRowEmpty}).html(e))[0]}R(t,"aoHeaderCallback","header",[P(t.nTHead).children("tr")[0],ht(t),n,c,u]),R(t,"aoFooterCallback","footer",[P(t.nTFoot).children("tr")[0],ht(t),n,c,u]);s=P(t.nTBody);s.children().detach(),s.append(P(a)),R(t,"aoDrawCallback","draw",[t]),t.bSorted=!1,t.bFiltered=!1,t.bDrawing=!1}}function u(t,e){var n=t.oFeatures,a=n.bSort,n=n.bFilter;a&&ie(t),n?Rt(t,t.oPreviousSearch):t.aiDisplay=t.aiDisplayMaster.slice(),!0!==e&&(t._iDisplayStart=0),t._drawHold=e,y(t),t._drawHold=!1}function _t(t){for(var e,n,a,r,o,i,l,s=t.oClasses,u=P(t.nTable),u=P("
").insertBefore(u),c=t.oFeatures,f=P("
",{id:t.sTableId+"_wrapper",class:s.sWrapper+(t.nTFoot?"":" "+s.sNoFooter)}),d=(t.nHolding=u[0],t.nTableWrapper=f[0],t.nTableReinsertBefore=t.nTable.nextSibling,t.sDom.split("")),h=0;h")[0],"'"==(r=d[h+1])||'"'==r){for(o="",i=2;d[h+i]!=r;)o+=d[h+i],i++;"H"==o?o=s.sJUIHeader:"F"==o&&(o=s.sJUIFooter),-1!=o.indexOf(".")?(l=o.split("."),a.id=l[0].substr(1,l[0].length-1),a.className=l[1]):"#"==o.charAt(0)?a.id=o.substr(1,o.length-1):a.className=o,h+=i}f.append(a),f=P(a)}else if(">"==n)f=f.parent();else if("l"==n&&c.bPaginate&&c.bLengthChange)e=Gt(t);else if("f"==n&&c.bFilter)e=Lt(t);else if("r"==n&&c.bProcessing)e=Zt(t);else if("t"==n)e=Kt(t);else if("i"==n&&c.bInfo)e=Ut(t);else if("p"==n&&c.bPaginate)e=zt(t);else if(0!==w.ext.feature.length)for(var p=w.ext.feature,g=0,b=p.length;g',s=(s=r.sSearch).match(/_INPUT_/)?s.replace("_INPUT_",l):s+l,l=P("
",{id:i.f?null:a+"_filter",class:t.sFilter}).append(P("
").addClass(t.sLength);return a.aanFeatures.l||(c[0].id=e+"_length"),c.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",l[0].outerHTML)),P("select",c).val(a._iDisplayLength).on("change.DT",function(t){$t(a,P(this).val()),y(a)}),P(a.nTable).on("length.dt.DT",function(t,e,n){a===e&&P("select",c).val(n)}),c[0]}function zt(t){function c(t){y(t)}var e=t.sPaginationType,f=w.ext.pager[e],d="function"==typeof f,e=P("
").addClass(t.oClasses.sPaging+e)[0],h=t.aanFeatures;return d||f.fnInit(t,e,c),h.p||(e.id=t.sTableId+"_paginate",t.aoDrawCallback.push({fn:function(t){if(d)for(var e=t._iDisplayStart,n=t._iDisplayLength,a=t.fnRecordsDisplay(),r=-1===n,o=r?0:Math.ceil(e/n),i=r?1:Math.ceil(a/n),l=f(o,i),s=0,u=h.p.length;s",{id:t.aanFeatures.r?null:t.sTableId+"_processing",class:t.oClasses.sProcessing,role:"status"}).html(t.oLanguage.sProcessing).append("
").insertBefore(t.nTable)[0]}function D(t,e){t.oFeatures.bProcessing&&P(t.aanFeatures.r).css("display",e?"block":"none"),R(t,null,"processing",[t,e])}function Kt(t){var e,n,a,r,o,i,l,s,u,c,f,d,h=P(t.nTable),p=t.oScroll;return""===p.sX&&""===p.sY?t.nTable:(e=p.sX,n=p.sY,a=t.oClasses,o=(r=h.children("caption")).length?r[0]._captionSide:null,s=P(h[0].cloneNode(!1)),i=P(h[0].cloneNode(!1)),u=function(t){return t?M(t):null},(l=h.children("tfoot")).length||(l=null),s=P(f="
",{class:a.sScrollWrapper}).append(P(f,{class:a.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:e?u(e):"100%"}).append(P(f,{class:a.sScrollHeadInner}).css({"box-sizing":"content-box",width:p.sXInner||"100%"}).append(s.removeAttr("id").css("margin-left",0).append("top"===o?r:null).append(h.children("thead"))))).append(P(f,{class:a.sScrollBody}).css({position:"relative",overflow:"auto",width:u(e)}).append(h)),l&&s.append(P(f,{class:a.sScrollFoot}).css({overflow:"hidden",border:0,width:e?u(e):"100%"}).append(P(f,{class:a.sScrollFootInner}).append(i.removeAttr("id").css("margin-left",0).append("bottom"===o?r:null).append(h.children("tfoot"))))),u=s.children(),c=u[0],f=u[1],d=l?u[2]:null,e&&P(f).on("scroll.DT",function(t){var e=this.scrollLeft;c.scrollLeft=e,l&&(d.scrollLeft=e)}),P(f).css("max-height",n),p.bCollapse||P(f).css("height",n),t.nScrollHead=c,t.nScrollBody=f,t.nScrollFoot=d,t.aoDrawCallback.push({fn:Qt,sName:"scrolling"}),s[0])}function Qt(n){function t(t){(t=t.style).paddingTop="0",t.paddingBottom="0",t.borderTopWidth="0",t.borderBottomWidth="0",t.height=0}var e,a,r,o,i,l=n.oScroll,s=l.sX,u=l.sXInner,c=l.sY,l=l.iBarWidth,f=P(n.nScrollHead),d=f[0].style,h=f.children("div"),p=h[0].style,h=h.children("table"),g=n.nScrollBody,b=P(g),m=g.style,S=P(n.nScrollFoot).children("div"),v=S.children("table"),y=P(n.nTHead),D=P(n.nTable),_=D[0],w=_.style,C=n.nTFoot?P(n.nTFoot):null,T=n.oBrowser,x=T.bScrollOversize,A=(N(n.aoColumns,"nTh"),[]),I=[],F=[],L=[],R=g.scrollHeight>g.clientHeight;n.scrollBarVis!==R&&n.scrollBarVis!==H?(n.scrollBarVis=R,O(n)):(n.scrollBarVis=R,D.children("thead, tfoot").remove(),C&&(R=C.clone().prependTo(D),i=C.find("tr"),a=R.find("tr"),R.find("[id]").removeAttr("id")),R=y.clone().prependTo(D),y=y.find("tr"),e=R.find("tr"),R.find("th, td").removeAttr("tabindex"),R.find("[id]").removeAttr("id"),s||(m.width="100%",f[0].style.width="100%"),P.each(Ct(n,R),function(t,e){r=rt(n,t),e.style.width=n.aoColumns[r].sWidth}),C&&k(function(t){t.style.width=""},a),f=D.outerWidth(),""===s?(w.width="100%",x&&(D.find("tbody").height()>g.offsetHeight||"scroll"==b.css("overflow-y"))&&(w.width=M(D.outerWidth()-l)),f=D.outerWidth()):""!==u&&(w.width=M(u),f=D.outerWidth()),k(t,e),k(function(t){var e=j.getComputedStyle?j.getComputedStyle(t).width:M(P(t).width());F.push(t.innerHTML),A.push(e)},e),k(function(t,e){t.style.width=A[e]},y),P(e).css("height",0),C&&(k(t,a),k(function(t){L.push(t.innerHTML),I.push(M(P(t).css("width")))},a),k(function(t,e){t.style.width=I[e]},i),P(a).height(0)),k(function(t,e){t.innerHTML='
'+F[e]+"
",t.childNodes[0].style.height="0",t.childNodes[0].style.overflow="hidden",t.style.width=A[e]},e),C&&k(function(t,e){t.innerHTML='
'+L[e]+"
",t.childNodes[0].style.height="0",t.childNodes[0].style.overflow="hidden",t.style.width=I[e]},a),Math.round(D.outerWidth())g.offsetHeight||"scroll"==b.css("overflow-y")?f+l:f,x&&(g.scrollHeight>g.offsetHeight||"scroll"==b.css("overflow-y"))&&(w.width=M(o-l)),""!==s&&""===u||W(n,1,"Possible column misalignment",6)):o="100%",m.width=M(o),d.width=M(o),C&&(n.nScrollFoot.style.width=M(o)),c||x&&(m.height=M(_.offsetHeight+l)),R=D.outerWidth(),h[0].style.width=M(R),p.width=M(R),y=D.height()>g.clientHeight||"scroll"==b.css("overflow-y"),p[i="padding"+(T.bScrollbarLeft?"Left":"Right")]=y?l+"px":"0px",C&&(v[0].style.width=M(R),S[0].style.width=M(R),S[0].style[i]=y?l+"px":"0px"),D.children("colgroup").insertBefore(D.children("thead")),b.trigger("scroll"),!n.bSorted&&!n.bFiltered||n._drawHold||(g.scrollTop=0))}function k(t,e,n){for(var a,r,o=0,i=0,l=e.length;i/g;function ee(t){var e,n,a=t.nTable,r=t.aoColumns,o=t.oScroll,i=o.sY,l=o.sX,o=o.sXInner,s=r.length,u=it(t,"bVisible"),c=P("th",t.nTHead),f=a.getAttribute("width"),d=a.parentNode,h=!1,p=t.oBrowser,g=p.bScrollOversize,b=a.style.width,m=(b&&-1!==b.indexOf("%")&&(f=b),ae(N(r,"sWidthOrig"),d));for(_=0;_").appendTo(b.find("tbody")));for(b.find("thead, tfoot").remove(),b.append(P(t.nTHead).clone()).append(P(t.nTFoot).clone()),b.find("tfoot th, tfoot td").css("width",""),c=Ct(t,b.find("thead")[0]),_=0;_").css({width:e.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(t.aoData.length)for(_=0;_").css(l||i?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(b).appendTo(d),D=(l&&o?b.width(o):l?(b.css("width","auto"),b.removeAttr("width"),b.width()").css("width",M(t[r])).appendTo(e||v.body)):n.push(null);for(r=0;r").html(S(t,a,e,"display"))[0]}function oe(t,e){for(var n,a=-1,r=-1,o=0,i=t.aoData.length;oa&&(a=n.length,r=o);return r}function M(t){return null===t?"0px":"number"==typeof t?t<0?"0px":t+"px":t.match(/\d$/)?t+"px":t}function I(t){function e(t){t.length&&!Array.isArray(t[0])?h.push(t):P.merge(h,t)}var n,a,r,o,i,l,s,u=[],c=t.aoColumns,f=t.aaSortingFixed,d=P.isPlainObject(f),h=[];for(Array.isArray(f)&&e(f),d&&f.pre&&e(f.pre),e(t.aaSorting),d&&f.post&&e(f.post),n=0;n/g,""),u=i.nTh;u.removeAttribute("aria-sort"),i=i.bSortable?s+("asc"===(0=o.length?[0,e[1]]:e)})),t.search!==H&&P.extend(n.oPreviousSearch,Bt(t.search)),t.columns){for(a=0,r=t.columns.length;a").addClass(e),P("td",n).addClass(e).html(t)[0].colSpan=T(o),l.push(n[0]))}var l=[];i(e,n),t._details&&t._details.detach(),t._details=P(l),t._detailsShow&&t._details.insertAfter(t.nTr)}function xe(t,e){var n=t.context;if(n.length&&t.length){var a=n[0].aoData[t[0]];if(a._details){(a._detailsShow=e)?(a._details.insertAfter(a.nTr),P(a.nTr).addClass("dt-hasChild")):(a._details.detach(),P(a.nTr).removeClass("dt-hasChild")),R(n[0],null,"childRow",[e,t.row(t[0])]);var s=n[0],r=new B(s),a=".dt.DT_details",e="draw"+a,t="column-sizing"+a,a="destroy"+a,u=s.aoData;if(r.off(e+" "+t+" "+a),N(u,"_details").length>0){r.on(e,function(t,e){if(s!==e)return;r.rows({page:"current"}).eq(0).each(function(t){var e=u[t];if(e._detailsShow)e._details.insertAfter(e.nTr)})});r.on(t,function(t,e,n,a){if(s!==e)return;var r,o=T(e);for(var i=0,l=u.length;it?new B(e[t],this[t]):null},filter:function(t){var e=[];if(o.filter)e=o.filter.call(this,t,this);else for(var n=0,a=this.length;n").appendTo(t);h(l,n)}else{var s=!1;switch(p=null,g=n){case"ellipsis":t.append('');break;case"first":p=m.sFirst,0===f&&(s=!0);break;case"previous":p=m.sPrevious,0===f&&(s=!0);break;case"next":p=m.sNext,0!==d&&f!==d-1||(s=!0);break;case"last":p=m.sLast,0!==d&&f!==d-1||(s=!0);break;default:p=u.fnFormatNumber(n+1),g=f===n?b.sPageButtonActive:""}null!==p&&(l=u.oInit.pagingTag||"a",s&&(g+=" "+a),me(P("<"+l+">",{class:b.sPageButton+" "+g,"aria-controls":u.sTableId,"aria-disabled":s?"true":null,"aria-label":S[n],role:"link","aria-current":g===b.sPageButtonActive?"page":null,"data-dt-idx":n,tabindex:s?-1:u.iTabIndex,id:0===c&&"string"==typeof n?u.sTableId+"_"+n:null}).html(p).appendTo(t),{action:n},r))}}var p,g,n,b=u.oClasses,m=u.oLanguage.oPaginate,S=u.oLanguage.oAria.paginate||{};try{n=P(t).find(v.activeElement).data("dt-idx")}catch(t){}h(P(t).empty(),e),n!==H&&P(t).find("[data-dt-idx="+n+"]").trigger("focus")}}}),P.extend(w.ext.type.detect,[function(t,e){e=e.oLanguage.sDecimal;return l(t,e)?"num"+e:null},function(t,e){var n;return(!t||t instanceof Date||X.test(t))&&(null!==(n=Date.parse(t))&&!isNaN(n)||h(t))?"date":null},function(t,e){e=e.oLanguage.sDecimal;return l(t,e,!0)?"num-fmt"+e:null},function(t,e){e=e.oLanguage.sDecimal;return a(t,e)?"html-num"+e:null},function(t,e){e=e.oLanguage.sDecimal;return a(t,e,!0)?"html-num-fmt"+e:null},function(t,e){return h(t)||"string"==typeof t&&-1!==t.indexOf("<")?"html":null}]),P.extend(w.ext.type.search,{html:function(t){return h(t)?t:"string"==typeof t?t.replace(U," ").replace(V,""):""},string:function(t){return!h(t)&&"string"==typeof t?t.replace(U," "):t}});function ke(t,e,n,a){var r;return 0===t||t&&"-"!==t?"number"==(r=typeof t)||"bigint"==r?t:+(t=(t=e?$(t,e):t).replace&&(n&&(t=t.replace(n,"")),a)?t.replace(a,""):t):-1/0}function Me(n){P.each({num:function(t){return ke(t,n)},"num-fmt":function(t){return ke(t,n,q)},"html-num":function(t){return ke(t,n,V)},"html-num-fmt":function(t){return ke(t,n,V,q)}},function(t,e){p.type.order[t+n+"-pre"]=e,t.match(/^html\-/)&&(p.type.search[t+n]=p.type.search.html)})}P.extend(p.type.order,{"date-pre":function(t){t=Date.parse(t);return isNaN(t)?-1/0:t},"html-pre":function(t){return h(t)?"":t.replace?t.replace(/<.*?>/g,"").toLowerCase():t+""},"string-pre":function(t){return h(t)?"":"string"==typeof t?t.toLowerCase():t.toString?t.toString():""},"string-asc":function(t,e){return t").addClass(l.sSortJUIWrapper).append(o.contents()).append(P("").addClass(l.sSortIcon+" "+i.sSortingClassJUI)).appendTo(o),P(r.nTable).on("order.dt.DT",function(t,e,n,a){r===e&&(e=i.idx,o.removeClass(l.sSortAsc+" "+l.sSortDesc).addClass("asc"==a[e]?l.sSortAsc:"desc"==a[e]?l.sSortDesc:i.sSortingClass),o.find("span."+l.sSortIcon).removeClass(l.sSortJUIAsc+" "+l.sSortJUIDesc+" "+l.sSortJUI+" "+l.sSortJUIAscAllowed+" "+l.sSortJUIDescAllowed).addClass("asc"==a[e]?l.sSortJUIAsc:"desc"==a[e]?l.sSortJUIDesc:i.sSortingClassJUI))})}}});function We(t){return"string"==typeof(t=Array.isArray(t)?t.join(","):t)?t.replace(/&/g,"&").replace(//g,">").replace(/"/g,"""):t}function Ee(t,e,n,a,r){return j.moment?t[e](r):j.luxon?t[n](r):a?t[a](r):t}var Be=!1;function Ue(t,e,n){var a;if(j.moment){if(!(a=j.moment.utc(t,e,n,!0)).isValid())return null}else if(j.luxon){if(!(a=e&&"string"==typeof t?j.luxon.DateTime.fromFormat(t,e):j.luxon.DateTime.fromISO(t)).isValid)return null;a.setLocale(n)}else e?(Be||alert("DataTables warning: Formatted date without Moment.js or Luxon - https://datatables.net/tn/17"),Be=!0):a=new Date(t);return a}function Ve(s){return function(a,r,o,i){0===arguments.length?(o="en",a=r=null):1===arguments.length?(o="en",r=a,a=null):2===arguments.length&&(o=r,r=a,a=null);var l="datetime-"+r;return w.ext.type.order[l]||(w.ext.type.detect.unshift(function(t){return t===l&&l}),w.ext.type.order[l+"-asc"]=function(t,e){t=t.valueOf(),e=e.valueOf();return t===e?0:t").addClass(this.c.dom.container.className)},this._constructor()}y.extend(B.prototype,{action:function(t,n){t=this._nodeToButton(t);return n===w?t.conf.action:(t.conf.action=n,this)},active:function(t,n){var t=this._nodeToButton(t),e=this.c.dom.button.active,o=y(t.node);return t.inCollection&&this.c.dom.collection.button&&this.c.dom.collection.button.active!==w&&(e=this.c.dom.collection.button.active),n===w?o.hasClass(e):(o.toggleClass(e,n===w||n),this)},add:function(t,n,e){var o=this.s.buttons;if("string"==typeof n){for(var i=n.split("-"),s=this.s,a=0,r=i.length-1;a"),h.conf._collection=h.collection,y(h.node).append(c.action.dropHtml),this._expandButton(h.buttons,h.conf.buttons,h.conf.split,!r,r,s,h.conf)),h.conf.split){h.collection=y("<"+c.container.tag+"/>"),h.conf._collection=h.collection;for(var b=0;b").addClass("dt-button-spacer "+n.style+" "+d.spacer.className).html(i(n.text)),{conf:n,node:c,inserter:c,buttons:[],inCollection:t,isSplit:e,collection:null,textNode:c};if(n.available&&!n.available(u,n)&&!n.hasOwnProperty("html"))return!1;n.hasOwnProperty("html")?l=y(n.html):(a=function(t,n,e,o){o.action.call(n.button(e),t,n,e,o),y(n.table().node()).triggerHandler("buttons-action.dt",[n.button(e),n,e,o])},c=n.tag||d.tag,r=n.clickBlurs===w||n.clickBlurs,l=y("<"+c+"/>").addClass(d.className).attr("tabindex",this.s.dt.settings()[0].iTabIndex).attr("aria-controls",this.s.dt.table().node().id).on("click.dtb",function(t){t.preventDefault(),!l.hasClass(d.disabled)&&n.action&&a(t,u,l,n),r&&l.trigger("blur")}).on("keypress.dtb",function(t){13===t.keyCode&&(t.preventDefault(),!l.hasClass(d.disabled))&&n.action&&a(t,u,l,n)}),"a"===c.toLowerCase()&&l.attr("href","#"),"button"===c.toLowerCase()&&l.attr("type","button"),s=d.liner.tag?(c=y("<"+d.liner.tag+"/>").html(i(n.text)).addClass(d.liner.className),"a"===d.liner.tag.toLowerCase()&&c.attr("href","#"),l.append(c),c):(l.html(i(n.text)),l),!1===n.enabled&&l.addClass(d.disabled),n.className&&l.addClass(n.className),n.titleAttr&&l.attr("title",i(n.titleAttr)),n.attr&&l.attr(n.attr),n.namespace||(n.namespace=".dt-button-"+C++),n.config!==w&&n.config.split&&(n.split=n.config.split));var f,p,h,b,g,m,c=this.c.dom.buttonContainer,c=c&&c.tag?y("<"+c.tag+"/>").addClass(c.className).append(l):l;return this._addKey(n),this.c.buttonCreated&&(c=this.c.buttonCreated(n,c)),e&&(p=(f=t?y.extend(!0,this.c.dom.split,this.c.dom.collection.split):this.c.dom.split).wrapper,h=y("<"+p.tag+"/>").addClass(p.className).append(l),b=y.extend(n,{align:f.dropdown.align,attr:{"aria-haspopup":"dialog","aria-expanded":!1},className:f.dropdown.className,closeButton:!1,splitAlignClass:f.dropdown.splitAlignClass,text:f.dropdown.text}),this._addKey(b),g=function(t,n,e,o){_.split.action.call(n.button(h),t,n,e,o),y(n.table().node()).triggerHandler("buttons-action.dt",[n.button(e),n,e,o]),e.attr("aria-expanded",!0)},m=y('').html(f.dropdown.dropHtml).on("click.dtb",function(t){t.preventDefault(),t.stopPropagation(),m.hasClass(d.disabled)||g(t,u,m,b),r&&m.trigger("blur")}).on("keypress.dtb",function(t){13===t.keyCode&&(t.preventDefault(),m.hasClass(d.disabled)||g(t,u,m,b))}),0===n.split.length&&m.addClass("dtb-hide-drop"),h.append(m).attr(b.attr)),{conf:n,node:(e?h:l).get(0),inserter:e?h:c,buttons:[],inCollection:t,isSplit:e,inSplit:o,collection:null,textNode:s}},_nodeToButton:function(t,n){for(var e=0,o=(n=n||this.s.buttons).length;e").addClass(b.containerClassName).addClass(b.collectionLayout).addClass(b.splitAlignClass).addClass(p).css("display","none").attr({"aria-modal":!0,role:"dialog"}),o=y(o).addClass(b.contentClassName).attr("role","menu").appendTo(s),m.attr("aria-expanded","true"),m.parents("body")[0]!==x.body&&(m=x.body.lastChild),b.popoverTitle?s.prepend('
'+b.popoverTitle+"
"):b.collectionTitle&&s.prepend('
'+b.collectionTitle+"
"),b.closeButton&&s.prepend('
×
').addClass("dtb-collection-closeable"),A(s.insertAfter(m),b.fade),n=y(t.table().container()),d=s.css("position"),"container"!==b.span&&"dt-container"!==b.align||(m=m.parent(),s.css("width",n.width())),"absolute"===d?(p=y(m[0].offsetParent),t=m.position(),n=m.offset(),a=p.offset(),r=p.position(),l=v.getComputedStyle(p[0]),a.height=p.outerHeight(),a.width=p.width()+parseFloat(l.paddingLeft),a.right=a.left+a.width,a.bottom=a.top+a.height,p=t.top+m.outerHeight(),c=t.left,s.css({top:p,left:c}),l=v.getComputedStyle(s[0]),(u=s.offset()).height=s.outerHeight(),u.width=s.outerWidth(),u.right=u.left+u.width,u.bottom=u.top+u.height,u.marginTop=parseFloat(l.marginTop),u.marginBottom=parseFloat(l.marginBottom),b.dropup&&(p=t.top-u.height-u.marginTop-u.marginBottom),"button-right"!==b.align&&!s.hasClass(b.rightAlignClassName)||(c=t.left-u.width+m.outerWidth()),"dt-container"!==b.align&&"container"!==b.align||(c=ca.width&&(c=a.width-u.width),r.left+c+u.width>y(v).width()&&(c=y(v).width()-u.width-r.left),n.left+c<0&&(c=-n.left),r.top+p+u.height>y(v).height()+y(v).scrollTop()&&(p=t.top-u.height-u.marginTop-u.marginBottom),r.top+p").addClass(n).css("display","none").insertAfter(o),e):k(y("div."+n),e,function(){y(this).removeClass(n).remove()})},B.instanceSelector=function(t,i){var s,a,r;return t===w||null===t?y.map(i,function(t){return t.inst}):(s=[],a=y.map(i,function(t){return t.name}),(r=function(t){var n;if(Array.isArray(t))for(var e=0,o=t.length;e)<[^<]*)*<\/script>/gi,"")).replace(//g,""),n&&!n.stripHtml||(t=t.replace(/<[^>]*>/g,"")),n&&!n.trim||(t=t.replace(/^\s+|\s+$/g,"")),n&&!n.stripNewlines||(t=t.replace(/\n/g," ")),!n||n.decodeEntities)?i?i(t):(c.innerHTML=t,c.value):t},B.entityDecoder=function(t){i=t},B.defaults={buttons:["copy","excel","csv","pdf","print"],name:"main",tabIndex:0,dom:{container:{tag:"div",className:"dt-buttons"},collection:{action:{dropHtml:''},container:{className:"dt-button-collection",content:{className:"",tag:"div"},tag:"div"}},button:{tag:"button",className:"dt-button",active:"dt-button-active",disabled:"disabled",spacer:{className:"dt-button-spacer",tag:"span"},liner:{tag:"span",className:""}},split:{action:{className:"dt-button-split-drop-button dt-button",tag:"button"},dropdown:{align:"split-right",className:"dt-button-split-drop",dropHtml:'',splitAlignClass:"dt-button-split-left",tag:"button"},wrapper:{className:"dt-button-split",tag:"div"}}}},y.extend(_,{collection:{text:function(t){return t.i18n("buttons.collection","Collection")},className:"buttons-collection",closeButton:!(B.version="2.4.2"),init:function(t,n,e){n.attr("aria-expanded",!1)},action:function(t,n,e,o){o._collection.parents("body").length?this.popover(!1,o):this.popover(o._collection,o),"keypress"===t.type&&y("a, button",o._collection).eq(0).focus()},attr:{"aria-haspopup":"dialog"}},split:{text:function(t){return t.i18n("buttons.split","Split")},className:"buttons-split",closeButton:!1,init:function(t,n,e){return n.attr("aria-expanded",!1)},action:function(t,n,e,o){this.popover(o._collection,o)},attr:{"aria-haspopup":"dialog"}},copy:function(t,n){if(_.copyHtml5)return"copyHtml5"},csv:function(t,n){if(_.csvHtml5&&_.csvHtml5.available(t,n))return"csvHtml5"},excel:function(t,n){if(_.excelHtml5&&_.excelHtml5.available(t,n))return"excelHtml5"},pdf:function(t,n){if(_.pdfHtml5&&_.pdfHtml5.available(t,n))return"pdfHtml5"},pageLength:function(t){var n=t.settings()[0].aLengthMenu,e=[],o=[];if(Array.isArray(n[0]))e=n[0],o=n[1];else for(var i=0;i"+t+"":"",A(y('
').html(t).append(y("
")["string"==typeof n?"html":"append"](n)).css("display","none").appendTo("body")),e!==w&&0!==e&&(s=setTimeout(function(){o.buttons.info(!1)},e)),this.on("destroy.btn-info",function(){o.buttons.info(!1)})),this}),e.Api.register("buttons.exportData()",function(t){if(this.context.length)return u(new e.Api(this.context[0]),t)}),e.Api.register("buttons.exportInfo()",function(t){return{filename:n(t=t||{}),title:r(t),messageTop:l(this,t.message||t.messageTop,"top"),messageBottom:l(this,t.messageBottom,"bottom")}});var s,n=function(t){var n;return(n="function"==typeof(n="*"===t.filename&&"*"!==t.title&&t.title!==w&&null!==t.title&&""!==t.title?t.title:t.filename)?n():n)===w||null===n?null:(n=(n=-1!==n.indexOf("*")?n.replace("*",y("head > title").text()).trim():n).replace(/[^a-zA-Z0-9_\u00A1-\uFFFF\.,\-_ !\(\)]/g,""))+(a(t.extension)||"")},a=function(t){return null===t||t===w?null:"function"==typeof t?t():t},r=function(t){t=a(t.title);return null===t?null:-1!==t.indexOf("*")?t.replace("*",y("head > title").text()||"Exported data"):t},l=function(t,n,e){n=a(n);return null===n?null:(t=y("caption",t.table().container()).eq(0),"*"===n?t.css("caption-side")!==e?null:t.length?t.text():"":n)},c=y(" + + + + + + + + {% if spec.id %} + + {% endif %} + + + + + diff --git a/lute/templates/ankiexport/edit.html b/lute/templates/ankiexport/edit.html new file mode 100644 index 000000000..357acb1cd --- /dev/null +++ b/lute/templates/ankiexport/edit.html @@ -0,0 +1,8 @@ +{% extends 'base.html' %} + +{% block body %} +

Edit Anki export

+ +{% include "ankiexport/_form.html" %} + +{% endblock %} diff --git a/lute/templates/ankiexport/index.html b/lute/templates/ankiexport/index.html new file mode 100644 index 000000000..a8ba12bbe --- /dev/null +++ b/lute/templates/ankiexport/index.html @@ -0,0 +1,115 @@ +{% extends 'base.html' %} + +{% block title %}AnkiConnect{% endblock %} +{% block header %}AnkiConnect{% endblock %} + +{% block body %} + +  +
+

Create new export specification

+ + + +{% endblock %} diff --git a/lute/templates/ankiexport/new.html b/lute/templates/ankiexport/new.html new file mode 100644 index 000000000..bd6168db7 --- /dev/null +++ b/lute/templates/ankiexport/new.html @@ -0,0 +1,10 @@ +{% extends 'base.html' %} + +{% block title %}New Anki export{% endblock %} + +{% block body %} +

Create new Anki export

+ +{% include "ankiexport/_form.html" %} + +{% endblock %} diff --git a/lute/templates/backup/backup.html b/lute/templates/backup/backup.html index d49fb532f..9f21190c3 100644 --- a/lute/templates/backup/backup.html +++ b/lute/templates/backup/backup.html @@ -11,22 +11,30 @@ Don't refresh this page, or another backup process will be kicked off!

-

- -

+ diff --git a/lute/templates/backup/index.html b/lute/templates/backup/index.html new file mode 100644 index 000000000..7b8d75d74 --- /dev/null +++ b/lute/templates/backup/index.html @@ -0,0 +1,42 @@ +{% extends 'base.html' %} + +{% block title %}Backups{% endblock %} + +{% block header %}Backups{% endblock %} + +{% block body %} + +{% if backups|length == 0 %} +

No backups have been made yet.

+{% else %} + +

Stored in: {{ backup_dir }}

+
+ + + + + + + + + + + + {% for backup in backups %} + + + + + + + {% endfor %} + + + +{% endif %} + +
+Create new + +{% endblock %} diff --git a/lute/templates/backup/settings.html b/lute/templates/backup/settings.html deleted file mode 100644 index 8e854ed0d..000000000 --- a/lute/templates/backup/settings.html +++ /dev/null @@ -1,39 +0,0 @@ -{% extends 'base.html' %} - -{% block title %}Backup settings{% endblock %} - -{% block body %} -

Backup settings

- -{% for field_name, field_errors in form.errors.items() %} -{% for error in field_errors %} -
{{ error }}
-{% endfor %} -{% endfor %} - -
- {{ form.hidden_tag() }} - {{ form.csrf_token }} - - {% for f in [ - form.backup_enabled, - form.backup_dir, - form.backup_auto, - form.backup_warn, - form.backup_count - ]%} - - - - - - {% endfor %} - -
{{ f.label }}{{ f(class="form-control") }}
- - -
- - - -{% endblock %} diff --git a/lute/templates/base.html b/lute/templates/base.html index 249b4f7c3..5f13a1486 100644 --- a/lute/templates/base.html +++ b/lute/templates/base.html @@ -11,30 +11,45 @@ {% endif %} - + - - - + + + + + - + + + - - - - - - - + {% block preloadassets %}{% endblock %} - + + + + + + + + + + + + + + + @@ -68,7 +83,7 @@

{% endif %} {% if have_languages %} @@ -90,6 +104,8 @@

{% if backup_enabled and backup_directory != '' %} @@ -97,17 +113,23 @@

Backup

{% endif %} @@ -125,4 +147,13 @@

+ + + diff --git a/lute/templates/book/create_new.html b/lute/templates/book/create_new.html index e3d709670..1f04abd2a 100644 --- a/lute/templates/book/create_new.html +++ b/lute/templates/book/create_new.html @@ -24,7 +24,7 @@ {{ form.title.label }} - {{ form.title(class="form-control") }} + {{ form.title(class="form-control", autofocus=True) }} @@ -33,8 +33,10 @@ - {{ form.textfile.label }} - {{ form.textfile(class="form-control") }} + {{ form.textfile.label }}
+ txt, epub, pdf, srt, vtt + + {{ form.textfile() }} @@ -42,56 +44,80 @@ {{ form.source_uri(class="form-control") }} - {# Tags li entries are managed by tagit. #} - Tags - -
    - {% for t in form.book_tags %} -
  • {{ t.object_data }}
  • - {% endfor %} -
+ {{ form.audiofile.label }}
+ mp3, m4a, wav, ogg, opus, aac, flac, webm + {{ form.audiofile() }} + + + + {{ form.book_tags.label }} + {{ form.book_tags(class="form-control") }} + + + + {{ form.split_by.label }} + {{ form.split_by(class="form-control") }} + + + + {{ form.threshold_page_tokens.label }} + {{ form.threshold_page_tokens(class="form-control") }} + - {% endblock %} diff --git a/lute/templates/book/edit.html b/lute/templates/book/edit.html index e962c76a2..0f36eab6b 100644 --- a/lute/templates/book/edit.html +++ b/lute/templates/book/edit.html @@ -13,7 +13,7 @@

Note: you can't change the language or text of a book after it has been created. If you need to do that, please create a new book instead, and delete the old one.

-
+ {{ form.hidden_tag() }} @@ -21,7 +21,7 @@ - + @@ -29,18 +29,28 @@ - {# Tags li entries are managed by tagit. #} - + + + + + +
{{ form.title.label }}{{ form.title(class="form-control") }}{{ form.title(class="form-control", dir=title_direction) }}
{{ form.source_uri(class="form-control") }}
Tags{{ form.audiofile.label }} (.mp3, .m4a, .wav, .ogg, .opus, .aac, .flac, .webm) -
    - {% for t in form.book_tags %} -
  • {{ t.object_data }}
  • - {% endfor %} -
+ {{ form.audiofile() }} + {{ form.audio_filename() }} + {% if book.audio_filename %} +
+ Current file: {{ book.audio_filename }} + +
+ {% endif %}
{{ form.book_tags.label }}{{ form.book_tags(class="form-control") }}
@@ -52,33 +62,31 @@ {% endblock %} diff --git a/lute/templates/book/import_webpage.html b/lute/templates/book/import_webpage.html index 0418de7cd..02dd9408b 100644 --- a/lute/templates/book/import_webpage.html +++ b/lute/templates/book/import_webpage.html @@ -13,8 +13,8 @@ -

- +

+
{% endblock %} diff --git a/lute/templates/book/tablelisting.html b/lute/templates/book/tablelisting.html index 21766bd35..74d3e4378 100644 --- a/lute/templates/book/tablelisting.html +++ b/lute/templates/book/tablelisting.html @@ -1,131 +1,201 @@ + + + - - - - - + + + + + + +
TitleLanguageTagsWord count (% known)ActionsTitleLanguageTagsWord countStatuses + + Last readActions
+ +
+ +
+ + + {# Hidden form for archive, unarchive, delete. #}
+ + + + diff --git a/lute/templates/bookmarks/list.html b/lute/templates/bookmarks/list.html new file mode 100644 index 000000000..846dcc9c9 --- /dev/null +++ b/lute/templates/bookmarks/list.html @@ -0,0 +1,194 @@ +{% extends 'base.html' %} + +{% block title %}Bookmarks{% endblock %} +{% block header %}Bookmarks{% endblock %} + +{% block body %} + +

{{ book.title }}

+ + + + + + + + + +
PageDescriptionActions
+ + +
+ +
+ + + +

Return to {{ book.title }}

+ + + +{% endblock %} diff --git a/lute/templates/errors/404_error.html b/lute/templates/errors/404_error.html new file mode 100644 index 000000000..515851331 --- /dev/null +++ b/lute/templates/errors/404_error.html @@ -0,0 +1,21 @@ +{% extends "base.html" %} + +{% block title %}Lute 404 error{% endblock %} +{% block header %}Missing page{% endblock %} + +{% block body %} + +

404 error: the page you were looking for does not exist.

+ +
+

Version: {{ version }}

+

You were on: {{ referring_page }}

+

You were trying to access: {{ requested_url }}

+
+ +

If this issue persists, please open a + GitHub Issue, including all of the above information in the issue. +

+ +{% endblock %} diff --git a/lute/templates/errors/500_error.html b/lute/templates/errors/500_error.html new file mode 100644 index 000000000..30c06f1fc --- /dev/null +++ b/lute/templates/errors/500_error.html @@ -0,0 +1,46 @@ +{% extends "base.html" %} + +{% block title %}Lute error{% endblock %} +{% block header %}500 internal server error{% endblock %} + +{% block body %} + +

Hmm, something went wrong ...

+ +
+

Platform: {{ platform }}

+

Version: {{ version }}

+

In docker?: {{ is_docker }}

+
+
+ Stack trace +
+
{{ exception_info }}
+
+
+
+ +{% if "CERTIFICATE_VERIFY_FAILED" in exception_info %} +
+

This looks like an SSL certificate verification error.

+

Possible solutions are outlined in + this StackOverflow post ... see if any of these are effective. +

+

After applying any solution, restart Lute.

+
+{% endif %} + +

Please check +the Lute manual +to see if there are reports and solutions for this problem, and also +check Discord to see if other users have encountered and solved it as +well.

+ +

If this issue persists, please open a + GitHub Issue, including all of the above + information, plus the full stack trace. +

+ +{% endblock %} diff --git a/lute/templates/imagesearch/index.html b/lute/templates/imagesearch/index.html index 17f5de3f3..a91a92d46 100644 --- a/lute/templates/imagesearch/index.html +++ b/lute/templates/imagesearch/index.html @@ -3,29 +3,37 @@ Images - - - +

+ Click an image below, + upload an image, or + click here + then paste from your clipboard. +

- - -

Clicking an image will save it in your userimages/{{ langid }}/{{ text }}.jpeg

- - {% for image in images %} - - {{ image['html'] | safe }} - - {% endfor %} - + $(document).ready(function() { + $('#manual_image_upload').submit(function(event) { + event.preventDefault(); + var formData = new FormData(this); + $.ajax({ + url: '/bing/manual_image_post', + type: 'POST', + data: formData, + processData: false, + contentType: false, + success: function(data) { + // console.log("Saved: " + data + "\nStatus: " + status); + _update_term_form_image(data.filename, data.url); + }, + error: function(xhr, status, error) { + console.error(`Error submitting form. ${error}; ${status}; ${xhr.responseText}`); + } + }); + }); + + $(window).on('paste', function(event) { + const e = event.originalEvent; + if (e && e.clipboardData && e.clipboardData.files) { + $("#manual_image_file").prop('files', e.clipboardData.files); + $('#manual_image_upload').submit(); + } + }); + + $("#upload_image_link").on('click', function(event) { + $("#manual_image_file").click(); + }); + + $("#manual_image_file").on('change', function(event) { + event.preventDefault(); + $('#manual_image_upload').submit(); + }); + + }); + + $(document).ready(function() { + const searchUrl = '{{ search_url }}'; + const container = document.getElementById('termimagesearch'); + + $.ajax({ + url: searchUrl, + type: 'GET', + dataType: 'json', + success: function(data) { + if (data.error_message !== "") { + const msg = `Error: ${data.error_message}`; + console.error(msg); + $("#image_search_feedback").text(msg); + return; + } + + $("#image_search_feedback").hide(); + if (data.images.length === 0) { + const p = document.createElement('p'); + p.textContent = 'No images found.'; + container.appendChild(p); + return; + } + + // Make Bing API call for images asynchronously + // This avoids blocking the page load + data.images.forEach(function(item) { + const imgWrapper = create_imageWrapper_element(item.src, data.langid, data.text); + container.appendChild(imgWrapper); + }); + }, + error: function(xhr, status, error) { + const msg = `Error ${error}; ${status}; ${xhr.responseText}`; + console.error(msg); + $("#image_search_feedback").val(msg); + } + }); + }); + + function create_imageWrapper_element(src, langid, text) { + const imgWrapper = document.createElement('span'); + imgWrapper.classList.add('initial'); + imgWrapper.style.margin = '2px'; + imgWrapper.style.display = 'inline-block'; + imgWrapper.onmouseover = () => highlight_image(imgWrapper); + imgWrapper.onmouseout = () => un_highlight_image(imgWrapper); + imgWrapper.onclick = () => save_image_locally(src, langid, text); + + const img = document.createElement('img'); + img.src = src; + + imgWrapper.appendChild(img); + + return imgWrapper; + } + diff --git a/lute/templates/index.html b/lute/templates/index.html index 183e2ae56..bcdfb456a 100644 --- a/lute/templates/index.html +++ b/lute/templates/index.html @@ -2,6 +2,11 @@ {% block title %}Lute index{% endblock %} +{% block preloadassets %} + {# The book status load blocks the waiting2.gif from being served, so preload it. #} + +{% endblock %} + {# Must pass empty block to base, or an empty h1 is written. #} {% block header %}{% endblock %} @@ -16,19 +21,22 @@

When you're done trying out the demo, click here to clear out the database. Note: this removes everything in the db.

+

Or instead, + dismiss + this message. +

{% endif %} {% if is_production_data and backup_show_warning %}
-

Warning: {{ backup_warning_msg }} Create a backup.

+

Warning: {{ backup_warning_msg }} Create a backup.

{% endif %} -
- {% if not have_languages %} -

To get started using Lute, first create your language.

+

To get started using Lute, first load a predefined language and sample text, or create your language. +

{% else %} {% include "book/tablelisting.html" %} {% endif %} diff --git a/lute/templates/language/_form.html b/lute/templates/language/_form.html index 5f79996e9..87e8816e5 100644 --- a/lute/templates/language/_form.html +++ b/lute/templates/language/_form.html @@ -1,21 +1,69 @@ {% for field_name, field_errors in form.errors.items() %} -

{{ field_name }} -

    - {% for error in field_errors %} -
  • {{ error }}
  • - {% endfor %} -
-

+ {% for error in field_errors %} + {% if error is mapping %} + {% if error|length > 0 %} + {% for err_field, err_msg in error.items() %} +
+ {{ err_msg }} +
+ {% endfor %} + {% endif %} + {% else %} +
+ {{ error }} +
+ {% endif %} + {% endfor %} {% endfor %} -
+ {{ form.hidden_tag() }} + + + + + + + + + + {% for field in form %} - {% if field.name != 'csrf_token' %} + {% if field.name not in ('name', 'csrf_token', 'dictionaries') %} @@ -29,9 +77,176 @@ + + {% if language.id %} - - + + {% endif %} diff --git a/lute/templates/language/index.html b/lute/templates/language/index.html index 560183bd8..69f0dd70a 100644 --- a/lute/templates/language/index.html +++ b/lute/templates/language/index.html @@ -10,7 +10,8 @@

No languages defined.

{% else %} -
{{ form.name.label(class="form-control-label") }}{{ form.name(class="form-control") }}
Dictionaries +
+ {# + Render all the dictionaries, but don't show the __TEMPLATE__, + added in "_add_hidden_dictionary_template_entry()" in routes.py + #} + {% for dictionary in form.dictionaries %} + + {% endfor %} +
+ + + +
{{ field.label(class="form-control-label") }} {{ field(class="form-control") }}
+
+
@@ -19,23 +20,47 @@ - {% for language, book_count, term_count in language_data %} + {% for lang in language_data %} - - + + {% endfor %} +
{% endif %}
-Create new +

Load predefined language and sample stories

+

Create new

+ + {% endblock %} diff --git a/lute/templates/language/list_predefined.html b/lute/templates/language/list_predefined.html new file mode 100644 index 000000000..283041889 --- /dev/null +++ b/lute/templates/language/list_predefined.html @@ -0,0 +1,51 @@ +{% extends 'base.html' %} + +{% block title %}Predefined Languages{% endblock %} + +{% block header %}Predefined Languages{% endblock %} + +{% block body %} + +
+ + + + + + + + {% for language in predefined %} + + + + {% endfor %} + + +
+ + + +{% endblock %} diff --git a/lute/templates/language/new.html b/lute/templates/language/new.html index 921392e65..985adcb42 100644 --- a/lute/templates/language/new.html +++ b/lute/templates/language/new.html @@ -5,7 +5,8 @@ {% block body %}

Create new Language

-

Load from predefined: +

+ Load from predefined: -

+
{% include "language/_form.html" %} diff --git a/lute/templates/read/audio_player.html b/lute/templates/read/audio_player.html new file mode 100644 index 000000000..a634caa82 --- /dev/null +++ b/lute/templates/read/audio_player.html @@ -0,0 +1,56 @@ + diff --git a/lute/templates/read/flashcopied.html b/lute/templates/read/flashcopied.html index 4ddfe795d..718ade482 100644 --- a/lute/templates/read/flashcopied.html +++ b/lute/templates/read/flashcopied.html @@ -1,6 +1,6 @@ - + @@ -14,15 +22,27 @@ - {% for k, dtos in references.items() %} + {% if no_references %} +

No references found for "{{ text }}":

+
    +
  • This may be a new term.
  • +
  • The page containing this word may not be marked as "read" (see + the manual). +
  • +
  • Books containing this term may have been deleted.
  • +
+ {% endif %} + + {% for k, dtos in references %} {% if dtos|length > 0 %} -

{{ k }}

+

{{ k }}

    {% for dto in dtos %}
  • {{ dto.sentence | safe }}
    - {{ dto.title }} @@ -36,4 +56,3 @@ - diff --git a/lute/templates/term_parent_map/index.html b/lute/templates/term_parent_map/index.html index 4252b15d6..7a4f62a3e 100644 --- a/lute/templates/term_parent_map/index.html +++ b/lute/templates/term_parent_map/index.html @@ -7,59 +7,13 @@
    -

    You can semi-automatically create parent and child Terms, creating and importing an external "mapping" file that you create. See the wiki, Bulk Mapping Parent Terms, for information.

    - - -

    Export

    -

    Generate a file to help you make an import file.

    - -
    -
    -

    Books (export unknown terms)

    - -
    -
    -

    Language (all existing Terms without parents)

    - -
    -
    - -

    Import

    -

    Import your prepared .csv mapping file.

    - -{% for field_name, field_errors in form.errors.items() %} -{% for error in field_errors %} -
    {{ error }}
    -{% endfor %} -
    -{% endfor %} - -
    - {{ form.hidden_tag() }} - - - - - - - - - - - - - - -
    - -
    +

    These are soon-to-be obsolete helpers to export some term data.

    + +

    Export unknown terms from books.

    + {% endblock %} diff --git a/lute/templates/termimport/index.html b/lute/templates/termimport/index.html index 36280e753..5817598e4 100644 --- a/lute/templates/termimport/index.html +++ b/lute/templates/termimport/index.html @@ -5,7 +5,7 @@ {% block body %} -

    See "Bulk Term Import" on the Lute wiki for notes about the CSV file format and data.

    +

    See "Bulk Term Import" in the Lute manual for notes about the CSV file format and data.


    {% for field_name, field_errors in form.errors.items() %} @@ -23,6 +23,18 @@ CSV file {{ form.text_file }} + + {{ form.create_terms.label }} + {{ form.create_terms }} + + + {{ form.new_as_unknown.label }} + {{ form.new_as_unknown }} + + + {{ form.update_terms.label }} + {{ form.update_terms }} + diff --git a/lute/templates/termtag/_form.html b/lute/templates/termtag/_form.html index 6e67b641e..ebb73548b 100644 --- a/lute/templates/termtag/_form.html +++ b/lute/templates/termtag/_form.html @@ -16,7 +16,7 @@ {{ form.text.label(class="form-control-label") }} - {{ form.text(class="form-control") }} + {{ form.text(class="form-control", autofocus=True) }} diff --git a/lute/templates/termtag/index.html b/lute/templates/termtag/index.html index 74420f351..68479b5d8 100644 --- a/lute/templates/termtag/index.html +++ b/lute/templates/termtag/index.html @@ -29,7 +29,7 @@ let setup_datatable = function() { var table = $('#termtagtable').DataTable({ responsive: true, - select: true, + select: false, lengthMenu: [ 10, 25, 50 ], paging: true, info: true, @@ -37,35 +37,11 @@ processing: true, serverSide: true, stateSave: true, - columnDefs: [ - { - "name": "TgText", "targets": 0, - "render": function ( data, type, row, meta ) { - return `${row[0]}`; - } - }, - { "name": "TgComment", "targets": 1 }, - { "name": "TermCount", "targets": 2, - "render": function ( data, type, row, meta ) { - const count = parseInt(row[2]); - if (count == 0) - return '-'; - return `${count}`; - } - }, - { - "targets": 3, - "data": null, - "searchable": false, - "orderable": false, - "render": function ( data, type, row, meta ) { - // TODO zzfuture fix: security - add CSRF token - const tgid = row[3]; - return ``; - } - }, - /* Extra data that is returned in the row for rendering, but not shown. */ - { "name": "TgID", "targets": 4, "data": null, "visible": false } + columns: [ + { name: "TgText", render: render_tag_text }, + { name: "TgComment", data: "TgComment" }, + { name: "TermCount", render: render_term_count }, + { data: null, searchable: false, orderable: false, render: render_delete }, ], // Ajax call @@ -79,6 +55,22 @@ } // end setup datatable + let render_tag_text = function ( data, type, row, meta ) { + return `${row['TgText']}`; + } + + let render_term_count = function ( data, type, row, meta ) { + const count = parseInt(row['TermCount']); + if (count == 0) + return '-'; + return count; + } + + let render_delete = function ( data, type, row, meta ) { + // TODO zzfuture fix: security - add CSRF token + const tgid = row['TgID']; + return ``; + } $(document).ready(function () { setup_datatable(); diff --git a/lute/templates/version.html b/lute/templates/version.html index aeb055357..463cbd46d 100644 --- a/lute/templates/version.html +++ b/lute/templates/version.html @@ -17,7 +17,7 @@
-

Repository: lute-v3

+

Repository: lute-v3

Lute is released under the MIT License.

diff --git a/lute/term/datatables.py b/lute/term/datatables.py index 009ac17b0..abc95fb00 100644 --- a/lute/term/datatables.py +++ b/lute/term/datatables.py @@ -2,25 +2,29 @@ Show terms in datatables. """ -from lute.db import db from lute.utils.data_tables import DataTablesSqliteQuery, supported_parser_type_criteria -def get_data_tables_list(parameters): +def get_data_tables_list(parameters, session): "Term json data for datatables." base_sql = """SELECT - 0 as chk, w.WoID as WoID, LgName, L.LgID as LgID, w.WoText as WoText, parents.parentlist as ParentText, w.WoTranslation, - replace(wi.WiSource, '.jpeg', '') as WiSource, + w.WoID as WoID, LgName, L.LgID as LgID, w.WoText as WoText, parents.parentlist as ParentText, w.WoTranslation, + w.WoRomanization, + WiSource, ifnull(tags.taglist, '') as TagList, StText, - StID + StID, + StAbbreviation, + case w.WoSyncStatus when 1 then 'y' else '' end as SyncStatus, + datetime(WoCreated, 'localtime') as WoCreated FROM words w INNER JOIN languages L on L.LgID = w.WoLgID INNER JOIN statuses S on S.StID = w.WoStatus LEFT OUTER JOIN ( - SELECT WpWoID as WoID, GROUP_CONCAT(PText, ', ') AS parentlist + /* Special concat used for easy parsing on client. */ + SELECT WpWoID as WoID, GROUP_CONCAT(PText, ';;') AS parentlist FROM ( select WpWoID, WoText as PText @@ -31,7 +35,8 @@ def get_data_tables_list(parameters): GROUP BY WpWoID ) AS parents on parents.WoID = w.WoID LEFT OUTER JOIN ( - SELECT WtWoID as WoID, GROUP_CONCAT(TgText, ', ') AS taglist + /* Special concat used for easy parsing on client. */ + SELECT WtWoID as WoID, GROUP_CONCAT(TgText, ';;') AS taglist FROM ( select WtWoID, TgText @@ -44,43 +49,53 @@ def get_data_tables_list(parameters): LEFT OUTER JOIN wordimages wi on wi.WiWoID = w.WoID """ - filt_parents_only = parameters["filtParentsOnly"] - filt_age_min = parameters["filtAgeMin"].strip() - filt_age_max = parameters["filtAgeMax"].strip() - filt_status_min = int(parameters["filtStatusMin"]) - filt_status_max = int(parameters["filtStatusMax"]) - filt_include_ignored = parameters["filtIncludeIgnored"] - typecrit = supported_parser_type_criteria() wheres = [f"L.LgParserType in ({typecrit})"] - if filt_parents_only == "true": - wheres.append("parents.parentlist IS NULL") - if filt_age_min: - filt_age_min = int(filt_age_min) - wheres.append( - f"cast(julianday('now') - julianday(w.wocreated) as int) >= {filt_age_min}" - ) - if filt_age_max: - filt_age_max = int(filt_age_max) - wheres.append( - f"cast(julianday('now') - julianday(w.wocreated) as int) <= {filt_age_max}" - ) - status_wheres = ["StID <> 98"] - if filt_status_min > 0: - status_wheres.append(f"StID >= {filt_status_min}") - if filt_status_max > 0: - status_wheres.append(f"StID <= {filt_status_max}") + # Add "where" criteria for all the filters. + + # Have to check for 'null' for language filter. + # A new user may filter the language when the demo data is loaded, + # but on "wipe database" the filtLanguage value stored in localdata + # may be invalid, resulting in the filtLanguage form control actually + # sending the **string value** "null" here. + # The other filter values don't change with the data, + # so we don't need to check for null. + # Tricky tricky. + language_id = parameters["filtLanguage"] + if language_id == "null" or language_id is None: + language_id = "0" + language_id = int(language_id) + if language_id != 0: + wheres.append(f"L.LgID == {language_id}") + + if parameters["filtParentsOnly"] == "true": + wheres.append("parents.parentlist IS NULL") - status_wheres = " AND ".join(status_wheres) - if filt_include_ignored == "true": - status_wheres = f"({status_wheres} OR StID = 98)" - wheres.append(status_wheres) + sql_age_calc = "cast(julianday('now') - julianday(w.wocreated) as int)" + age_min = parameters["filtAgeMin"].strip() + if age_min: + wheres.append(f"{sql_age_calc} >= {int(age_min)}") + age_max = parameters["filtAgeMax"].strip() + if age_max: + wheres.append(f"{sql_age_calc} <= {int(age_max)}") - where = " AND ".join(wheres) - full_base_sql = base_sql + " WHERE " + where + st_range = ["StID != 98"] + status_min = int(parameters.get("filtStatusMin", "0")) + status_max = int(parameters.get("filtStatusMax", "99")) + st_range.append(f"StID >= {status_min}") + st_range.append(f"StID <= {status_max}") + st_where = " AND ".join(st_range) + if parameters["filtIncludeIgnored"] == "true": + st_where = f"({st_where} OR StID = 98)" + wheres.append(st_where) - session = db.session - connection = session.connection() + termids = parameters["filtTermIDs"].strip() + if termids != "": + parentsql = f"select WpParentWoID from wordparents where WpWoID in ({termids})" + wheres.append(f"((w.WoID in ({termids})) OR (w.WoID in ({parentsql})))") - return DataTablesSqliteQuery.get_data(full_base_sql, parameters, connection) + # Phew. + return DataTablesSqliteQuery.get_data( + base_sql + " WHERE " + " AND ".join(wheres), parameters, session.connection() + ) diff --git a/lute/term/forms.py b/lute/term/forms.py index 2dba70430..c5f0e9d3e 100644 --- a/lute/term/forms.py +++ b/lute/term/forms.py @@ -2,20 +2,22 @@ Flask-wtf forms. """ +import json +from flask import request from flask_wtf import FlaskForm from wtforms import ( StringField, SelectField, - FieldList, RadioField, TextAreaField, HiddenField, + BooleanField, ) from wtforms import ValidationError from wtforms.validators import DataRequired -from lute.models.language import Language from lute.models.term import Term +from lute.models.repositories import LanguageRepository, TermRepository class TermForm(FlaskForm): @@ -29,7 +31,8 @@ class TermForm(FlaskForm): text = StringField( "Text", validators=[DataRequired()], render_kw={"placeholder": "Term"} ) - parents = FieldList(StringField("parent")) + parentslist = StringField("Parents") + translation = TextAreaField("Translation", render_kw={"placeholder": "Translation"}) romanization = StringField( "Romanization", render_kw={"placeholder": "Pronunciation"} @@ -37,11 +40,58 @@ class TermForm(FlaskForm): status_choices = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (99, "Wkn"), (98, "Ign")] status = RadioField("Status", choices=status_choices) + sync_status = BooleanField("Link to parent") - term_tags = FieldList(StringField("term_tags")) + termtagslist = StringField("term_tags") current_image = HiddenField("current_image") + # DUPLICATE_TERM_CHECK: an already existing term that a new form + # submission would duplicate. + # + # This is a "clever hack" (aka bad hack) that is populated on form + # validation check. If the form post would result in a duplicated + # term, this is set to the value of the original term. + # + # I would have preferred to do some kind of try/catch check and + # have the original duplicated_term be included in the raised + # error's data, but this method works. + # + # See the DUPLICATE_TERM_CHECK comments in term/routes.py. + duplicated_term = None + + def __init__(self, *args, **kwargs): + "Call the constructor of the superclass (FlaskForm)" + super().__init__(*args, **kwargs) + term = kwargs["obj"] + self.session = kwargs["session"] + + def _data(arr): + "Get data in proper format for tagify." + return json.dumps([{"value": p} for p in arr]) + + self.parentslist.data = _data(term.parents) + self.termtagslist.data = _data(term.term_tags) + self.duplicated_term = None + + if request.method == "POST": + self.parentslist.data = request.form.get("parentslist", "") + self.termtagslist.data = request.form.get("termtagslist", "") + + def populate_obj(self, obj): + "Call the populate_obj method from the parent class, then mine." + super().populate_obj(obj) + + def _values(field_data): + "Convert field data to array." + ret = [] + if field_data: + ret = [h["value"] for h in json.loads(field_data)] + return ret + + obj.parents = _values(self.parentslist.data) + obj.term_tags = _values(self.termtagslist.data) + def validate_language_id(self, field): # pylint: disable=unused-argument "Language must be set." if self.language_id.data in (None, 0): @@ -53,24 +103,23 @@ def validate_text(self, field): # pylint: disable=unused-argument if self.language_id.data in (None, 0): return langid = int(self.language_id.data) - lang = Language.find(langid) + language_repo = LanguageRepository(self.session) + lang = language_repo.find(langid) if lang is None: return - if self.original_text.data in ("", None): - # New term. + orig_text = self.original_text.data + if orig_text in ("", None): + # New term - throw if already exists. spec = Term(lang, self.text.data) - checkdup = Term.find_by_spec(spec) - if checkdup is None: - # Not a dup. - return - # Is a dup. - raise ValidationError("Term already exists") - - if self.text.data == self.original_text.data: - return - langid = int(self.language_id.data) - newterm = Term(lang, self.text.data) - origterm = Term(lang, self.original_text.data) - if newterm.text_lc != origterm.text_lc: - raise ValidationError("Can only change term case") + term_repo = TermRepository(self.session) + self.duplicated_term = term_repo.find_by_spec(spec) + if self.duplicated_term is not None: + raise ValidationError("Term already exists") + + elif self.text.data != orig_text: + # Text may have changed. + newterm = Term(lang, self.text.data) + origterm = Term(lang, self.original_text.data) + if newterm.text_lc != origterm.text_lc: + raise ValidationError("Can only change term case") diff --git a/lute/term/model.py b/lute/term/model.py index 3cfb85266..30d97d43f 100644 --- a/lute/term/model.py +++ b/lute/term/model.py @@ -6,11 +6,14 @@ """ import re -import functools -from sqlalchemy import and_, text as sqlalchtext +import sqlalchemy from lute.models.term import Term as DBTerm, TermTag -from lute.models.language import Language +from lute.models.repositories import ( + LanguageRepository, + TermRepository, + TermTagRepository, +) class Term: # pylint: disable=too-many-instance-attributes @@ -21,19 +24,12 @@ class Term: # pylint: disable=too-many-instance-attributes def __init__(self): # The ID of the DBTerm. self.id = None - # A language object is required as the Term bus. object - # must downcase the text and the original_text to see - # if anything has changed. - self._language = None - # Ideally this wouldn't be needed, but the term form - # populates this field with the (primitive) language id. self.language_id = None # The text. self.text = None self.text_lc = None # The original text given to the DTO, to track changes. self.original_text = None - self.status = 1 self.translation = None self.romanization = None self.term_tags = [] @@ -41,47 +37,38 @@ def __init__(self): self.parents = [] self.current_image = None - def __repr__(self): - return ( - f'' - ) + # When loading the Term from a DBTerm, + # assign to properties starting with "_" directly. + self._status = 1 + self._status_explicitly_set = False + self._sync_status = False @property - def language(self): - "Use or get the language." - if self._language is not None: - return self._language - return Language.find(self.language_id) - - @language.setter - def language(self, lang): - if not isinstance(lang, Language): - raise ValueError("not a language") - self._language = lang - - def text_has_changed(self): - "Check the downcased original text with the current text." - # print(f'checking if changed, orig = "{self.original_text}", text = "{self.text}"') - if self.original_text in ("", None): - return False + def status(self): + return self._status - def get_lc(s): - return self.language.get_lowercase(s) - - return get_lc(self.original_text) != get_lc(self.text) + @status.setter + def status(self, v): + """ + If the status is specifically set, + any sync'd parent should get that status. + """ + self._status = v + self._status_explicitly_set = True + @property + def sync_status(self): + "Can only sync if one parent." + if len(self.parents) != 1: + return False + return self._sync_status -class TermReference: - "Where a Term has been used in books." + @sync_status.setter + def sync_status(self, v): + self._sync_status = v - def __init__( - self, bookid, txid, pgnum, title, sentence=None - ): # pylint: disable=too-many-arguments - self.book_id = bookid - self.text_id = txid - self.page_number = pgnum - self.title = title - self.sentence = sentence + def __repr__(self): + return f'' class Repository: @@ -89,8 +76,8 @@ class Repository: Maps Term BO to and from lute.model.Term. """ - def __init__(self, _db): - self.db = _db + def __init__(self, _session): + self.session = _session # Identity map for business lookup. # Note that the same term is stored @@ -116,13 +103,29 @@ def _search_identity_map(self, langid, txt): def load(self, term_id): "Loads a Term business object for the DBTerm with the id." - dbt = DBTerm.find(term_id) + dbt = self.session.get(DBTerm, term_id) if dbt is None: raise ValueError(f"No term with id {term_id} found") term = self._build_business_term(dbt) self._add_to_identity_map(term) return term + def _search_spec_term(self, langid, text): + """ + Make a term to get the correct text_lc to search for. + This ensures that the spec term is properly parsed + and downcased. + """ + lang_repo = LanguageRepository(self.session) + lang = lang_repo.find(langid) + return DBTerm(lang, text) + + def _find_by_spec(self, langid, text): + "Do a search using a spec term." + spec = self._search_spec_term(langid, text) + repo = TermRepository(self.session) + return repo.find_by_spec(spec) + def find(self, langid, text): """ Return a Term business object for the DBTerm with the langid and text. @@ -132,8 +135,7 @@ def find(self, langid, text): if term is not None: return term - spec = self._search_spec_term(langid, text) - dbt = DBTerm.find_by_spec(spec) + dbt = self._find_by_spec(langid, text) if dbt is None: return None term = self._build_business_term(dbt) @@ -145,8 +147,33 @@ def find_or_new(self, langid, text): Return a Term business object for the DBTerm with the langid and text. If no match, return a new term with the text and language. + Note that this does a search by the **tokenized version** + of the text; i.e., first the text argument is converted into + a "search specification" (spec) using the language with the given id. + The db search is then done using this spec. In most cases, this + # will suffice. + + # In some cases, though, it may cause errors. The parsing here is done + # without a fuller context, which in some language parsers can result + # in different results. For example, the Japanese "集めれ" string can + # can be parsed with mecab to return one unit ("集めれ") or two ("集め/れ"), + # depending on context. + + # So what does this mean? It means that any context-less searches + # for terms that have ambiguous parsing results will, themselves, + # also be ambiguous. This impacts csv imports and term form usage. + + # For regular (reading screen) usage, it probably doesn't matter. + # The terms in the reading screen are all created when the page is + # opened, and so have ids assigned. With that, terms are not + # searched by text match, they are only searched by id. + + ## TODO verify_identity_map_comment: If it's new, don't add to the identity map ... it's not saved yet, and so if we search for it again we should hit the db again. + + # the above statement about the identity map was old code, and I'm not + # sure it's a valid statement/condition. """ t = self.find(langid, text) if t is not None: @@ -154,12 +181,15 @@ def find_or_new(self, langid, text): spec = self._search_spec_term(langid, text) t = Term() - t.language = spec.language t.language_id = langid - t.text = text + t.text = spec.text t.text_lc = spec.text_lc t.romanization = spec.language.parser.get_reading(text) - t.original_text = text + t.original_text = spec.text + + # TODO verify_identity_map_comment + # Adding the term to the map, even though it's new. + self._add_to_identity_map(t) return t @@ -175,45 +205,46 @@ def find_matches(self, langid, text, max_results=50): if search == "": return [] - matches = ( - self.db.session.query(DBTerm) - .filter( - and_(DBTerm.language_id == langid, DBTerm.text_lc.like(search + "%")) - ) - .all() - ) + sql_query = """SELECT + t.WoID as id, + t.WoText as text, + t.WoTextLC as text_lc, + t.WoTranslation as translation, + t.WoStatus as status, + t.WoLgID as language_id, + CASE WHEN wp.WpParentWoID IS NOT NULL THEN 1 ELSE 0 END AS has_children, + CASE WHEN t.WoTextLC = :text_lc THEN 2 + WHEN t.WoTextLC LIKE :text_lc_starts_with THEN 1 + ELSE 0 + END as text_starts_with_search_string + + FROM words AS t + LEFT JOIN ( + select WpParentWoID from wordparents group by WpParentWoID + ) wp on wp.WpParentWoID = t.WoID + + WHERE t.WoLgID = :langid AND t.WoTextLC LIKE :text_lc_wildcard + + ORDER BY text_starts_with_search_string DESC, has_children DESC, t.WoTextLC + LIMIT :max_results + """ + # print(sql_query) + params = { + "text_lc": text_lc, + "text_lc_wildcard": f"%{text_lc}%", + "text_lc_starts_with": f"{text_lc}%", + "langid": langid, + "max_results": max_results, + } + # print(params) - exact = [t for t in matches if t.text_lc == text_lc] - - def compare(item1, item2): - c1 = len(item1.children) - c2 = len(item2.children) - if c1 > c2: - return -1 - if c1 < c2: - return 1 - t1 = item1.text_lc - t2 = item2.text_lc - if t1 < t2: - return -1 - if t1 > t2: - return 1 - return 0 - - remaining = [t for t in matches if t.text_lc != text_lc] - # for t in remaining: - # print(f'term: {t.text}; child count = {len(t.children)}') - remaining.sort(key=functools.cmp_to_key(compare)) - # print('remaining = ') - # print(remaining) - ret = exact + remaining - ret = ret[:max_results] - return [self._build_business_term(t) for t in ret] + alchsql = sqlalchemy.text(sql_query) + return self.session.execute(alchsql, params).fetchall() def get_term_tags(self): "Get all available term tags, helper method." - tags = self.db.session.query(TermTag).all() - return [t.text for t in tags] + tags = self.session.query(TermTag).all() + return sorted([t.text for t in tags]) def add(self, term): """ @@ -222,52 +253,51 @@ def add(self, term): clients should not change it. """ dbterm = self._build_db_term(term) - self.db.session.add(dbterm) + self.session.add(dbterm) return dbterm def delete(self, term): """ Add term to be deleted to session. """ - spec = self._search_spec_term(term.language_id, term.text) - dbt = DBTerm.find_by_spec(spec) - if dbt is None: - return - self.db.session.delete(dbt) + dbt = None + if term.id is not None: + dbt = self.session.get(DBTerm, term.id) + else: + dbt = self._find_by_spec(term.language_id, term.text) + if dbt is not None: + self.session.delete(dbt) def commit(self): """ - Commit everything. - """ - self.db.session.commit() - - def _search_spec_term(self, langid, text): - """ - Make a term to get the correct text_lc to search for. - - Creating a term does parsing and correct downcasing, - so term.language.id and term.text_lc match what the - db would contain. + Commit everything, flush the map to force refetches. """ - lang = Language.find(langid) - return DBTerm(lang, text) + self.identity_map = {} + self.session.commit() def _build_db_term(self, term): "Convert a term business object to a DBTerm." + # pylint: disable=too-many-branches if term.text is None: raise ValueError("Text not set for term") - spec = self._search_spec_term(term.language_id, term.text) - t = DBTerm.find_by_spec(spec) - if t is None: - t = DBTerm() + t = None + if term.id is not None: + # This is an existing term, so use it directly. + t = self.session.get(DBTerm, term.id) + else: + # New term, or finding by text. + spec = self._search_spec_term(term.language_id, term.text) + term_repo = TermRepository(self.session) + t = term_repo.find_by_spec(spec) or DBTerm() + t.language = spec.language - t.language = spec.language t.text = term.text t.original_text = term.text t.status = term.status t.translation = term.translation t.romanization = term.romanization + t.sync_status = term.sync_status t.set_current_image(term.current_image) if term.flash_message is not None: @@ -275,15 +305,16 @@ def _build_db_term(self, term): else: t.pop_flash_message() + tt_repo = TermTagRepository(self.session) termtags = [] - for s in term.term_tags: - termtags.append(TermTag.find_or_create_by_text(s)) + for s in list(set(term.term_tags)): + termtags.append(tt_repo.find_or_create_by_text(s)) t.remove_all_term_tags() for tt in termtags: t.add_term_tag(tt) termparents = [] - lang = spec.language + lang = t.language create_parents = [ p for p in term.parents @@ -298,25 +329,43 @@ def _build_db_term(self, term): for tp in termparents: t.add_parent(tp) + if len(termparents) != 1: + t.sync_status = False + + if t.sync_status and len(termparents) == 1: + p = termparents[0] + # pylint: disable=protected-access + if term._status_explicitly_set or p.status == 0: + p.status = t.status + else: + t.status = p.status + return t def _find_or_create_parent(self, pt, language, term, termtags) -> DBTerm: - spec = self._search_spec_term(language.id, pt) - p = DBTerm.find_by_spec(spec) + p = self._find_by_spec(language.id, pt) + new_or_unknown_parent = p is None or p.status == 0 + new_term = term.id is None + + if p is None: + p = DBTerm(language, pt) + + if new_or_unknown_parent: + p.status = term.status - if p is not None: + # Copy translation, image if missing, but _not_ if we're just + # re-saving an existing term. + if new_or_unknown_parent or new_term: if (p.translation or "") == "": p.translation = term.translation if (p.get_current_image() or "") == "": p.set_current_image(term.current_image) - return p - p = DBTerm(language, pt) - p.status = term.status - p.translation = term.translation - p.set_current_image(term.current_image) - for tt in termtags: - p.add_term_tag(tt) + # Only copy tags if this is a new parent. New parents should + # _likely_ inherity the tags of the term. + if new_or_unknown_parent: + for tt in termtags: + p.add_term_tag(tt) return p @@ -324,18 +373,24 @@ def _build_business_term(self, dbterm): "Create a Term bus. object from a lute.model.term.Term." term = Term() term.id = dbterm.id - term.language = dbterm.language term.language_id = dbterm.language.id - # Remove zero-width spaces (zws) from strings for user forms. text = dbterm.text - zws = "\u200B" # zero-width space - text = text.replace(zws, "") + ## Remove zero-width spaces (zws) from strings for user forms. + # + # NOTE: disabling this as it creates challenges for editing + # terms. In some cases, the same term may have a zws + # character as part of it; in other cases, it won't, e.g. " + # 集めれ" sometimes is parsed as one token, and sometimes + # two ("集め/れ"). If we strip the zws from the string, then + # when it's posted back, Lute will think that it has changed. + # ... it gets messy. + # zws = "\u200B" # zero-width space + # text = text.replace(zws, "") term.text_lc = dbterm.text_lc term.original_text = text term.text = text - term.status = dbterm.status term.translation = dbterm.translation term.romanization = dbterm.romanization term.current_image = dbterm.get_current_image() @@ -344,19 +399,69 @@ def _build_business_term(self, dbterm): term.romanization = dbterm.romanization term.term_tags = [tt.text for tt in dbterm.term_tags] + # pylint: disable=protected-access + term._status = dbterm.status + term._sync_status = dbterm.sync_status + return term - ## References. + +## References. + + +class TermReference: + "Where a Term has been used in books." + + def __init__( + self, bookid, txid, pgnum, title, sentence=None + ): # pylint: disable=too-many-arguments,too-many-positional-arguments + self.book_id = bookid + self.text_id = txid + self.page_number = pgnum + self.title = title + self.sentence = sentence + + +class ReferencesRepository: + """ + Lookup terms. + """ + + def __init__(self, _session, limit=20, include_unread=False): + "Init." + self.session = _session + self.limit = limit + self.include_unread = include_unread + + def _search_spec_term(self, langid, text): + """ + Make a term to get the correct text_lc to search for. + This ensures that the spec term is properly parsed + and downcased. + """ + lang_repo = LanguageRepository(self.session) + lang = lang_repo.find(langid) + return DBTerm(lang, text) def find_references(self, term): """ Return references of term, children, and parents. """ spec = self._search_spec_term(term.language_id, term.text) - searchterm = DBTerm.find_by_spec(spec) + term_repo = TermRepository(self.session) + searchterm = term_repo.find_by_spec(spec) if searchterm is None: searchterm = spec + return self._find_references(searchterm) + def find_references_by_id(self, term_id): + "Find references for the given term." + term_repo = TermRepository(self.session) + searchterm = term_repo.find(term_id) + return self._find_references(searchterm) + + def _find_references(self, searchterm): + "Find refs." references = { "term": self._get_references(searchterm), "children": self._get_child_references(searchterm), @@ -380,12 +485,24 @@ def replace_match(m): return ret def _get_references(self, term): + """ + Search the sentences.text_content (or textlc_content if needed). + + sentence.textlc_content is set to '*' if a call to sqlite's LOWER + returns the same data as using the sentence Language.parser. This + saves a pile of space, at least in my case with Spanish, as only + 0.5% of the lowercased sentences actually differ. + """ if term is None: return [] + only_include_read = "TxReadDate IS NOT NULL" + if self.include_unread: + only_include_read = "1=1" # include everything. + term_lc = term.text_lc - query = sqlalchtext( - """ + query = sqlalchemy.text( + f""" SELECT DISTINCT texts.TxBkID, TxID, @@ -400,15 +517,19 @@ def _get_references(self, term): FROM texts GROUP BY TxBkID ) pc ON pc.TxBkID = texts.TxBkID - WHERE TxReadDate IS NOT NULL - AND LOWER(SeText) LIKE :pattern - LIMIT 20 + WHERE { only_include_read } + AND SeText IS NOT NULL + AND CASE WHEN SeTextLC == '*' THEN SeText ELSE SeTextLC END LIKE :pattern + AND BkLgID = {term.language.id} + ORDER BY TxReadDate desc, TxID desc + LIMIT {self.limit} """ ) + # print(query) pattern = f"%{chr(0x200B)}{term_lc}{chr(0x200B)}%" params = {"pattern": pattern} - result = self.db.session.execute(query, params) + result = self.session.execute(query, params) return self._build_term_references(term_lc, result) def _get_all_refs(self, terms): diff --git a/lute/term/routes.py b/lute/term/routes.py index 959f1fd9e..930de9096 100644 --- a/lute/term/routes.py +++ b/lute/term/routes.py @@ -2,11 +2,34 @@ /term routes. """ -from flask import Blueprint, request, jsonify, render_template, redirect +import os +import csv +import json +from flask import ( + Blueprint, + request, + jsonify, + render_template, + redirect, + current_app, + send_file, + flash, +) from lute.models.language import Language +from lute.models.term import Status +from lute.models.repositories import ( + LanguageRepository, + TermRepository, + UserSettingRepository, +) from lute.utils.data_tables import DataTablesFlaskParamParser from lute.term.datatables import get_data_tables_list -from lute.term.model import Repository, Term +from lute.term.model import Repository, Term, ReferencesRepository +from lute.term.service import ( + Service as TermService, + TermServiceException, + BulkTermUpdateData, +) from lute.db import db from lute.term.forms import TermForm import lute.utils.formutils @@ -18,35 +41,190 @@ @bp.route("/index/", methods=["GET"]) def index(search): "Index page." - return render_template("term/index.html", initial_search=search) + repo = TermRepository(db.session) + repo.delete_empty_images() + languages = db.session.query(Language).order_by(Language.name).all() + langopts = [(lang.id, lang.name) for lang in languages] + langopts = [(0, "(all)")] + langopts + all_statuses = db.session.query(Status).all() + filter_statuses = [s for s in all_statuses if s.id != Status.IGNORED] + # Add ignored to the end of the list ... annoying that the numbers + # are "out of order" (i.e., IGNORED comes before WELLKNOWN). + update_statuses = filter_statuses + [ + s for s in all_statuses if s.id == Status.IGNORED + ] + r = Repository(db.session) + return render_template( + "term/index.html", + initial_search=search, + language_options=langopts, + filter_statuses=filter_statuses, + update_statuses=update_statuses, + tags=r.get_term_tags(), + in_term_index_listing=True, + ) -@bp.route("/datatables", methods=["POST"]) -def datatables_active_source(): - "Datatables data for terms." - parameters = DataTablesFlaskParamParser.parse_params(request.form) - - # The DataTablesFlaskParamParser doesn't know about term-specific filters, - # add those manually. +def _load_term_custom_filters(request_form, parameters): + "Manually add filters that the DataTablesFlaskParamParser doesn't know about." filter_param_names = [ + "filtLanguage", "filtParentsOnly", "filtAgeMin", "filtAgeMax", "filtStatusMin", "filtStatusMax", "filtIncludeIgnored", + "filtTermIDs", ] - request_params = request.form.to_dict(flat=True) + request_params = request_form.to_dict(flat=True) for p in filter_param_names: parameters[p] = request_params.get(p) - data = get_data_tables_list(parameters) + +@bp.route("/datatables", methods=["POST"]) +def datatables_active_source(): + "Datatables data for terms." + parameters = DataTablesFlaskParamParser.parse_params(request.form) + _load_term_custom_filters(request.form, parameters) + data = get_data_tables_list(parameters, db.session) return jsonify(data) +def get_bulk_update_from_form(form): + "Load the BulkTermUpdateData from the _bulk_edit_form_fields.html form." + bud = BulkTermUpdateData() + term_ids = form.get("term_ids").strip() + if term_ids == "": + return bud + bud.term_ids = [int(tid.strip()) for tid in term_ids.split(",")] + + bud.lowercase_terms = form.get("lowercase_terms", "off") == "on" + bud.remove_parents = form.get("remove_parents", "off") == "on" + pdata = [] + if form.get("parent", "") != "": + pdata = json.loads(form.get("parent")) + if len(pdata) == 1: + pdata = pdata[0] + bud.parent_text = pdata.get("value") + if "id" in pdata: + bud.parent_id = int(pdata.get("id")) + + bud.change_status = form.get("change_status", "off") == "on" + if "status" in form: + bud.status_value = int(form.get("status")) + + def _get_tags(form_field_name): + if form.get(form_field_name, "") == "": + return [] + return [td["value"] for td in json.loads(form.get(form_field_name))] + + bud.add_tags = _get_tags("add_tags") + bud.remove_tags = _get_tags("remove_tags") + + return bud + + +@bp.route("/bulk_edit_from_index", methods=["POST"]) +def bulk_edit_from_index(): + "Edit from the term index listing." + bud = get_bulk_update_from_form(request.form) + svc = TermService(db.session) + try: + svc.apply_bulk_updates(bud) + except TermServiceException as ex: + flash(f"Error: {str(ex)}", "notice") + return redirect("/term/index", 302) + + +@bp.route("/bulk_edit_from_reading_pane", methods=["POST"]) +def bulk_edit_from_reading_pane(): + "Reading pane updates requires special redirect." + bud = get_bulk_update_from_form(request.form) + svc = TermService(db.session) + try: + svc.apply_bulk_updates(bud) + except TermServiceException as ex: + flash(f"Error: {str(ex)}", "notice") + return redirect("/read/term_bulk_edit_form", 302) + return render_template("/read/updated.html", term_text=None) + + +@bp.route("/ajax_edit_from_index", methods=["POST"]) +def ajax_edit_from_index(): + """ + Ajax edit from the term index listing. + + If successful, returns the term's new status. Only the status is + returned, as that is the only thing that might change as a result + of an ajax update (e.g., if a parent is assigned. + """ + svc = TermService(db.session) + updated_term = None + try: + data = request.get_json() + term_id = int(data.get("term_id", 0)) + update_type = data.get("update_type", "") + values = data.get("values") + svc.apply_ajax_update(term_id, update_type, values) + repo = TermRepository(db.session) + updated_term = repo.find(term_id) + except TermServiceException as ex: + return jsonify({"error": str(ex)}), 400 + except ValueError as ex: + print(ex, flush=True) + return jsonify({"error": f"Invalid input ({ex})"}), 400 + except Exception as ex: # pylint: disable=broad-exception-caught + return jsonify({"error": f"An unexpected error occurred ({ex})"}), 500 + + return jsonify({"status": updated_term.status}) + + +@bp.route("/export_terms", methods=["POST"]) +def export_terms(): + "Generate export file of terms." + parameters = DataTablesFlaskParamParser.parse_params(request.form) + _load_term_custom_filters(request.form, parameters) + parameters["length"] = 1000000 + outfile = os.path.join(current_app.env_config.temppath, "export_terms.csv") + data = get_data_tables_list(parameters, db.session) + term_data = data["data"] + + # Term data is an array of dicts, with the sql field name as dict + # keys. These need to be mapped to headings. + heading_to_fieldname = { + "term": "WoText", + "parent": "ParentText", + "translation": "WoTranslation", + "language": "LgName", + "tags": "TagList", + "added": "WoCreated", + "status": "StID", + "link_status": "SyncStatus", + "pronunciation": "WoRomanization", + } + + headings = heading_to_fieldname.keys() + output_data = [ + [r[heading_to_fieldname[fieldname]] for fieldname in headings] + for r in term_data + ] + with open(outfile, "w", encoding="utf-8", newline="") as f: + csv_writer = csv.writer(f) + csv_writer.writerow(headings) + csv_writer.writerows(output_data) + + return send_file(outfile, as_attachment=True, download_name="Terms.csv") + + def handle_term_form( - term, repo, form_template_name, return_on_success, embedded_in_reading_frame=False -): + term, + repo, + session, + form_template_name, + return_on_success, + embedded_in_reading_frame=False, +): # pylint: disable=too-many-arguments,too-many-positional-arguments """ Handle a form post. @@ -54,13 +232,13 @@ def handle_term_form( lives in an iframe in the reading frames and returns a different template on success. """ - form = TermForm(obj=term) + form = TermForm(obj=term, session=session) # Flash messages get added on things like term imports. # The user opening the form is treated as an acknowledgement. term.flash_message = None - form.language_id.choices = lute.utils.formutils.language_choices() + form.language_id.choices = lute.utils.formutils.language_choices(session) if form.validate_on_submit(): form.populate_obj(term) @@ -68,23 +246,47 @@ def handle_term_form( repo.commit() return return_on_success + # Note: on validation, form.duplicated_term may be set. + # See DUPLICATE_TERM_CHECK comments in other files. + + hide_pronunciation = False + language_repo = LanguageRepository(session) + term_language = language_repo.find( + term.language_id or -1 + ) # -1 hack for no lang set. + if term_language is not None: + hide_pronunciation = not term_language.show_romanization + + # Set the language dropdown to the user's current_language_id IF APPLICABLE. + if embedded_in_reading_frame or term_language is not None: + # Do nothing. The language dropdown is not shown, or the term already + # has a language assigned, and we shouldn't change it. + pass + else: + # The language select control is shown and this is a new term, + # so use the default value. + us_repo = UserSettingRepository(db.session) + current_language_id = int(us_repo.get_value("current_language_id")) + form.language_id.data = current_language_id + return render_template( form_template_name, form=form, term=term, - language_dicts=Language.all_dictionaries(), + duplicated_term=form.duplicated_term, + language_dicts=language_repo.all_dictionaries(), + hide_pronunciation=hide_pronunciation, tags=repo.get_term_tags(), embedded_in_reading_frame=embedded_in_reading_frame, ) -def _handle_form(term, repo): +def _handle_form(term, repo, redirect_to="/term/index"): """ - Handle the form post. Only show lang. selector - for new terms. + Handle the form post, redirecting to specified url. """ return handle_term_form( - term, repo, "/term/formframes.html", redirect("/term/index", 302) + term, repo, db.session, "/term/form.html", redirect(redirect_to, 302) ) @@ -93,8 +295,10 @@ def edit(termid): """ Edit a term. """ - repo = Repository(db) + repo = Repository(db.session) term = repo.load(termid) + if term.status == 0: + term.status = 1 return _handle_form(term, repo) @@ -103,8 +307,10 @@ def edit_by_text(langid, text): """ Edit a term. """ - repo = Repository(db) - term = repo.find(langid, text) + repo = Repository(db.session) + term = repo.find_or_new(langid, text) + if term.status == 0: + term.status = 1 return _handle_form(term, repo) @@ -113,44 +319,92 @@ def new(): """ Create a term. """ - repo = Repository(db) + repo = Repository(db.session) term = Term() - return _handle_form(term, repo) + return _handle_form(term, repo, "/term/new") @bp.route("/search//", methods=["GET"]) def search_by_text_in_language(text, langid): "JSON data for parent data." - repo = Repository(db) + if text.strip() == "" or langid == 0: + return [] + repo = Repository(db.session) matches = repo.find_matches(langid, text) - result = [] - for t in matches: - result.append({"id": t.id, "text": t.text, "translation": t.translation}) + + def _make_entry(t): + return { + "id": t.id, + "text": t.text, + "translation": t.translation, + "status": t.status, + } + + result = [_make_entry(t) for t in matches] return jsonify(result) @bp.route("/sentences//", methods=["GET"]) def sentences(langid, text): "Get sentences for terms." - repo = Repository(db) + repo = Repository(db.session) + # Use find_or_new(): if the user clicks on a parent tag + # in the term form, and the parent does not exist yet, then + # we're creating a new term. t = repo.find_or_new(langid, text) - references = repo.find_references(t) - return render_template("/term/sentences.html", references=references) + refsrepo = ReferencesRepository(db.session) + refs = refsrepo.find_references(t) + + # Transform data for output, to + # { "term": [refs], "children": [refs], "parent1": [refs], "parent2" ... } + refdata = [(f'"{text}"', refs["term"]), (f'"{text}" child terms', refs["children"])] + for p in refs["parents"]: + refdata.append((f"\"{p['term']}\"", p["refs"])) + + refcount = sum(len(ref[1]) for ref in refdata) + return render_template( + "/term/sentences.html", + text=text, + no_references=(refcount == 0), + references=refdata, + ) @bp.route("/bulk_update_status", methods=["POST"]) def bulk_update_status(): - "Update the statuses." + """ + Update the statuses. + + json: + { + updates: [ { new_status: 1, termids: [ 42, ] }, ... }, ] + } + """ + repo = Repository(db.session) + data = request.get_json() - terms = data.get("terms") - language_id = int(data.get("langid")) - new_status = int(data.get("new_status")) - - repo = Repository(db) - for t in terms: - term = repo.find_or_new(language_id, t) - term.status = new_status - repo.add(term) + updates = data.get("updates") + + for u in updates: + new_status = int(u.get("new_status")) + termids = u.get("termids") + for tidstring in termids: + term = repo.load(int(tidstring)) + term.status = new_status + repo.add(term) + repo.commit() + return jsonify("ok") + + +@bp.route("/bulk_delete", methods=["POST"]) +def bulk_delete(): + "Delete terms." + data = request.get_json() + termids = data.get("wordids") + repo = Repository(db.session) + for tid in termids: + term = repo.load(int(tid)) + repo.delete(term) repo.commit() return jsonify("ok") @@ -160,7 +414,7 @@ def delete(termid): """ Delete a term. """ - repo = Repository(db) + repo = Repository(db.session) term = repo.load(termid) repo.delete(term) repo.commit() diff --git a/lute/term/service.py b/lute/term/service.py new file mode 100644 index 000000000..d4b21f764 --- /dev/null +++ b/lute/term/service.py @@ -0,0 +1,134 @@ +""" +/term service for routes to use +""" + +from dataclasses import dataclass, field +from typing import List, Optional +from lute.models.term import Status +from lute.models.repositories import TermRepository, TermTagRepository +from lute.term.model import Repository + + +class TermServiceException(Exception): + """ + Raised if something bad: + + - missing parent + etc. + """ + + +# pylint: disable=too-many-instance-attributes +@dataclass +class BulkTermUpdateData: + "Bulk updates" + term_ids: List[int] = field(default_factory=list) + lowercase_terms: bool = False + remove_parents: bool = False + parent_id: Optional[int] = None + parent_text: Optional[str] = None + change_status: bool = False + status_value: Optional[int] = None + add_tags: List[str] = field(default_factory=list) + remove_tags: List[str] = field(default_factory=list) + + +class Service: + "Service." + + def __init__(self, session): + self.session = session + + def apply_bulk_updates(self, bulk_update_data): + "Apply all updates." + if len(bulk_update_data.term_ids) == 0: + return + + parent = None + repo = TermRepository(self.session) + terms = [repo.find(tid) for tid in bulk_update_data.term_ids] + + lang_ids = list({term.language.id for term in terms}) + if len(lang_ids) > 1: + raise TermServiceException("Terms not all the same language") + + # parent is found either by the ID, or if that returns None, by a text search. + if bulk_update_data.parent_id is not None: + parent = repo.find(bulk_update_data.parent_id) + if parent is None and bulk_update_data.parent_text is not None: + modelrepo = Repository(self.session) + pmodel = modelrepo.find_or_new(lang_ids[0], bulk_update_data.parent_text) + modelrepo.add(pmodel) + modelrepo.commit() + # Re-load it to get its id. ... wasteful, not concerned at the moment. + pmodel = modelrepo.find(lang_ids[0], bulk_update_data.parent_text) + parent = repo.find(pmodel.id) + + ttrepo = TermTagRepository(self.session) + add_tags = [ttrepo.find_or_create_by_text(a) for a in bulk_update_data.add_tags] + remove_tags = [ + ttrepo.find_or_create_by_text(a) for a in bulk_update_data.remove_tags + ] + + for term in terms: + if bulk_update_data.lowercase_terms: + term.text = term.text_lc + if bulk_update_data.remove_parents: + term.remove_all_parents() + term.sync_status = False + if parent is not None: + term.remove_all_parents() + term.add_parent(parent) + if parent is not None and parent.status != Status.UNKNOWN: + term.sync_status = True + term.status = parent.status + + if ( + bulk_update_data.change_status is True + and bulk_update_data.status_value is not None + ): + term.status = bulk_update_data.status_value + + for tag in add_tags: + term.add_term_tag(tag) + for tag in remove_tags: + term.remove_term_tag(tag) + + self.session.add(term) + self.session.commit() + + def apply_ajax_update(self, term_id, update_type, value): + "Apply single update from datatables updatable cells interactions." + + repo = Repository(self.session) + term = None + try: + term = repo.load(term_id) + except ValueError as exc: + raise TermServiceException(f"No term with id {term_id}") from exc + + if update_type == "translation": + trans = (value or "").strip() + if trans == "": + trans = None + term.translation = trans + + elif update_type == "parents": + term.parents = value + if len(term.parents) == 1: + term.sync_status = True + + elif update_type == "term_tags": + term.term_tags = value + + elif update_type == "status": + sval = int(value) + if sval not in Status.ALLOWED: + raise TermServiceException("Bad status value") + term.status = sval + + else: + raise TermServiceException("Bad update type") + + repo.add(term) + repo.commit() diff --git a/lute/term_parent_map/routes.py b/lute/term_parent_map/routes.py index 4f8c9bdab..5a5a2d33c 100644 --- a/lute/term_parent_map/routes.py +++ b/lute/term_parent_map/routes.py @@ -2,66 +2,25 @@ Mapping parents. """ +# TODO issue_336_export_unknown_book_terms: this blueprint can be killed when 336 is done. + import os -import tempfile -from flask import Blueprint, render_template, flash, redirect, send_file -from wtforms import SelectField, ValidationError -from wtforms.validators import DataRequired -from flask_wtf import FlaskForm -from flask_wtf.file import FileField +from flask import Blueprint, current_app, render_template, send_file from sqlalchemy import asc from lute.db import db from lute.models.book import Book from lute.models.language import Language -import lute.utils.formutils -from lute.term_parent_map.service import ( - import_file, - BadImportFileError, - export_unknown_terms, - export_terms_without_parents, -) +from lute.term_parent_map.service import Service bp = Blueprint("term_parent_map", __name__, url_prefix="/term_parent_map") -class TermParentMapImportForm(FlaskForm): - "Form for imports." - language_id = SelectField("Language", coerce=int) - text_file = FileField("Text File", validators=[DataRequired()]) - - def validate_language_id(self, field): # pylint: disable=unused-argument - "Language must be set." - if self.language_id.data in (None, 0): - raise ValidationError("Please select a language") - - @bp.route("/index", methods=["GET", "POST"]) def index(): """ - Show books and languages, process import post. + Show books and languages """ - form = TermParentMapImportForm() - form.language_id.choices = lute.utils.formutils.language_choices() - - if form.validate_on_submit(): - text_file = form.text_file.data - language = db.session.get(Language, form.language_id.data) - if text_file: - temp_file_name = tempfile.mkstemp()[1] - try: - text_file.save(temp_file_name) - stats = import_file(language, temp_file_name) - msg = ( - f"Imported {language.name} mappings: " - + f"created {stats['created']} terms, updated {stats['updated']}." - ) - flash(msg, "notice") - return redirect("/term_parent_map/index", 302) - except BadImportFileError as e: - flash(f"Error on import: {str(e)}", "notice") - finally: - os.unlink(temp_file_name) # sqlalchemy _requires_ "== False" for the comparison! # pylint: disable=singleton-comparison @@ -73,27 +32,16 @@ def index(): ) languages = db.session.query(Language).order_by(asc(Language.name)).all() return render_template( - "/term_parent_map/index.html", books=books, languages=languages, form=form + "/term_parent_map/index.html", books=books, languages=languages ) +# TODO issue_336_export_unknown_book_terms: move this route, or something like it, to book actions. @bp.route("/export_book/", methods=["GET"]) def export_book(bookid): "Generate a file and return it." - temp_file_name = tempfile.mkstemp()[1] + outfile = os.path.join(current_app.env_config.temppath, "export_book.txt") book = db.session.get(Book, bookid) - export_unknown_terms(book, temp_file_name) - return send_file( - temp_file_name, as_attachment=True, download_name="unknown_terms.txt" - ) - - -@bp.route("/export_language/", methods=["GET"]) -def export_language(languageid): - "Generate a file and return it." - temp_file_name = tempfile.mkstemp()[1] - lang = db.session.get(Language, languageid) - export_terms_without_parents(lang, temp_file_name) - return send_file( - temp_file_name, as_attachment=True, download_name="terms_without_parents.txt" - ) + service = Service(db.session) + service.export_unknown_terms(book, outfile) + return send_file(outfile, as_attachment=True, download_name="unknown_terms.txt") diff --git a/lute/term_parent_map/service.py b/lute/term_parent_map/service.py index 24caa7d22..25c822370 100644 --- a/lute/term_parent_map/service.py +++ b/lute/term_parent_map/service.py @@ -2,243 +2,38 @@ Term parent mapping. """ -import csv -from sqlalchemy import text -from lute.db import db -from lute.models.term import Term, Status -from lute.term.model import Repository +from lute.models.term import Term ## Exports -def export_terms_without_parents(language, outfile): - "Export terms without parents in the language to filename outfile." - # All existing terms that don't have parents. - sig = Status.IGNORED - sql = f""" - SELECT w.WoTextLC - FROM words w - LEFT JOIN wordparents ON WpWoID = w.WoID - WHERE w.WoLgID = {language.id} - AND WpWoID IS NULL - AND w.WoTokenCount = 1 - AND w.WoStatus != {sig} - """ - data = db.session.execute(text(sql)).fetchall() - terms = [term[0] for term in data] - with open(outfile, "w", encoding="utf-8") as f: - f.write("\n".join(terms)) - - -def export_unknown_terms(book, outfile): - "Export unknown terms in the book to outfile." - lang = book.language - unique_tokens = { - t for txt in book.texts for t in lang.get_parsed_tokens(txt.text) if t.is_word - } - unique_lcase_toks = {lang.get_lowercase(t.token) for t in unique_tokens} - - lgid = lang.id - known_terms_lc = ( - db.session.query(Term.text_lc) - .filter(Term.language_id == lgid, Term.token_count == 1) - .all() - ) - known_terms_lc = [word[0] for word in known_terms_lc] - - newtoks = [t for t in unique_lcase_toks if t not in known_terms_lc] - with open(outfile, "w", encoding="utf-8") as f: - f.write("\n".join(newtoks)) - - -## Imports - - -class BadImportFileError(Exception): - """ - Raised if the import file is bad. - """ - - -def import_file(language, filename): - """ - Validate and import file. - - Throws BadImportFileError if file contains invalid data. - """ - import_data = _load_import_file(filename) - _validate_data(import_data) - return _do_import(language, import_data) - - -def _load_import_file(filename, encoding="utf-8-sig"): - "Create array of hashes from file." - importdata = [] - with open(filename, "r", encoding=encoding) as f: - reader = csv.DictReader(f) - - fieldnames = reader.fieldnames - if fieldnames is None: - raise BadImportFileError("No mappings in file") - _validate_data_fields(fieldnames) - - for line in reader: - importdata.append(line) - - if len(importdata) == 0: - raise BadImportFileError("No mappings in file") - - return importdata - - -def _validate_data_fields(field_list): - "Check the keys in the file." - for k in ["parent", "term"]: - if k not in field_list: - msg = "File must contain headings 'parent' and 'term'" - raise BadImportFileError(msg) - - -def _validate_data(import_data): - "All records must have parent, term." - blanks = [ - hsh - for hsh in import_data - if hsh["term"].strip() == "" or hsh["parent"].strip() == "" - ] - if len(blanks) > 0: - raise BadImportFileError("Term is required") - - -class ImportRecord: - "Record in the import file." - - repo = None - language = None - - @classmethod - def set_context(cls, repo, language): - "ImportRecord needs context for lookups." - cls.repo = repo - cls.language = language - - def _find(self, t): - return ImportRecord.repo.find(ImportRecord.language.id, t) - - def __init__(self, hsh): - self.ptext = hsh["parent"] - self.parent = self._find(self.ptext) - self.ctext = hsh["term"] - self.child = self._find(self.ctext) - - @staticmethod - def records(import_data): - """ - Convert import data to records. - - This is called periodically during the import - as each step updates the database. - """ - return [ImportRecord(hsh) for hsh in import_data] - - -def _do_import(language, import_data): - """ - Import records. - """ - repo = Repository(db) - ImportRecord.set_context(repo, language) - - updated = 0 - created = 0 - - created, updated = _import_child_exists_parent_no( - import_data, language, repo, created, updated - ) - created, updated = _import_parent_exists_child_no( - import_data, language, repo, created, updated - ) - created, updated = _import_add_extra_parent_child_links( - import_data, repo, created, updated - ) - - stats = {"created": created, "updated": updated} - - return stats - - -def _import_child_exists_parent_no(import_data, language, repo, created, updated): - "Add parent and relationship." - records = [ - p - for p in ImportRecord.records(import_data) - if p.parent is None and p.child is not None - ] - - def _get_flash_msg(ptext): - "Build a flash message for a new parent." - all_children = [d.ctext for d in records if d.ptext == ptext] - msg = f'Auto-created parent for "{all_children[0]}"' - remaining = len(all_children) - 1 - if remaining > 0: - msg += f" + {remaining} more" - return msg - - # First add all the unique parents. - ptexts = list({p.ptext for p in records}) - for p in ptexts: - parent = repo.find_or_new(language.id, p) - parent.flash_message = _get_flash_msg(p) - repo.add(parent) - created += 1 - repo.commit() - - # Then add all the relationships. - for p in records: - p.child.parents.append(p.ptext) - repo.add(p.child) - updated += 1 - repo.commit() - - return created, updated - - -def _import_parent_exists_child_no(import_data, language, repo, created, updated): - "Add child and relationship." - records = [ - p - for p in ImportRecord.records(import_data) - if p.parent is not None and p.child is None - ] - # Add all the children and relationships. - for p in records: - child = repo.find_or_new(language.id, p.ctext) - if child.id is None: - created += 1 - flash_msg = f'Auto-created and mapped to parent "{p.ptext}"' - child.flash_message = flash_msg - child.parents.append(p.ptext) - repo.add(child) - repo.commit() - - return created, updated - - -def _import_add_extra_parent_child_links(import_data, repo, created, updated): - "Add parent to child if needed." - records = [ - p - for p in ImportRecord.records(import_data) - if p.parent is not None - and p.child is not None - and p.parent.id != p.child.id - and p.parent.text not in p.child.parents - ] - for p in records: - p.child.parents.append(p.parent.text) - repo.add(p.child) - updated += 1 - repo.commit() - - return created, updated +class Service: + "Service." + + def __init__(self, session): + self.session = session + + # TODO issue_336_export_unknown_book_terms: move this where needed. + def export_unknown_terms(self, book, outfile): + "Export unknown terms in the book to outfile." + lang = book.language + unique_tokens = { + t + for txt in book.texts + for t in lang.get_parsed_tokens(txt.text) + if t.is_word + } + unique_lcase_toks = {lang.get_lowercase(t.token) for t in unique_tokens} + + lgid = lang.id + known_terms_lc = ( + self.session.query(Term.text_lc) + .filter(Term.language_id == lgid, Term.token_count == 1) + .all() + ) + known_terms_lc = [word[0] for word in known_terms_lc] + + newtoks = [t for t in unique_lcase_toks if t not in known_terms_lc] + with open(outfile, "w", encoding="utf-8") as f: + f.write("\n".join(newtoks)) diff --git a/lute/termimport/routes.py b/lute/termimport/routes.py index 1b104605d..7cccd4fe9 100644 --- a/lute/termimport/routes.py +++ b/lute/termimport/routes.py @@ -3,12 +3,13 @@ """ import os -import tempfile -from flask import Blueprint, render_template, flash, redirect +from flask import Blueprint, current_app, render_template, flash, redirect +from wtforms import BooleanField +from wtforms.validators import DataRequired from flask_wtf import FlaskForm from flask_wtf.file import FileField -from wtforms.validators import DataRequired -from lute.termimport.service import import_file, BadImportFileError +from lute.termimport.service import Service, BadImportFileError +from lute.db import db bp = Blueprint("termimport", __name__, url_prefix="/termimport") @@ -17,28 +18,39 @@ class TermImportForm(FlaskForm): "Form for imports." text_file = FileField("Text File", validators=[DataRequired()]) + create_terms = BooleanField("Create new terms") + new_as_unknown = BooleanField("Set new terms to Unknown") + update_terms = BooleanField("Update existing terms") @bp.route("/index", methods=["GET", "POST"]) def term_import_index(): "Import posted file." form = TermImportForm() - + service = Service(db.session) if form.validate_on_submit(): text_file = form.text_file.data if text_file: - temp_file_name = tempfile.mkstemp()[1] + temp_file_name = os.path.join( + current_app.env_config.temppath, "import_terms.txt" + ) + text_file.save(temp_file_name) try: - text_file.save(temp_file_name) - stats = import_file(temp_file_name) + stats = service.import_file( + temp_file_name, + form.create_terms.data, + form.update_terms.data, + form.new_as_unknown.data, + ) + c = stats["created"] + u = stats["updated"] + s = stats["skipped"] flash( - f"Imported {stats['created']} terms (skipped {stats['skipped']})", + f"Imported {c} terms, updated {u} (skipped {s})", "notice", ) return redirect("/term/index", 302) except BadImportFileError as e: flash(f"Error on import: {str(e)}", "notice") - finally: - os.unlink(temp_file_name) return render_template("termimport/index.html", form=form) diff --git a/lute/termimport/service.py b/lute/termimport/service.py index 57c53febd..8c576bdd9 100644 --- a/lute/termimport/service.py +++ b/lute/termimport/service.py @@ -4,9 +4,8 @@ import csv -from lute.db import db from lute.models.term import Status -from lute.models.language import Language +from lute.models.repositories import LanguageRepository from lute.term.model import Term, Repository @@ -20,203 +19,280 @@ class BadImportFileError(Exception): """ -def import_file(filename): - """ - Validate and import file. - - Throws BadImportFileError if file contains invalid data. - """ - import_data = _load_import_file(filename) - _validate_data(import_data) - return _do_import(import_data) - - -def _load_import_file(filename, encoding="utf-8-sig"): - "Create array of hashes from file." - importdata = [] - with open(filename, "r", encoding=encoding) as f: - reader = csv.DictReader(f) - - fieldnames = reader.fieldnames - if fieldnames is None: +class Service: + "Service." + + def __init__(self, session): + self.session = session + + def import_file( + self, filename, create_terms=True, update_terms=True, new_as_unknowns=False + ): + """ + Validate and import file. + + Throws BadImportFileError if file contains invalid data. + """ + import_data = self._load_import_file(filename) + self._validate_data(import_data) + return self._do_import(import_data, create_terms, update_terms, new_as_unknowns) + + def _load_import_file(self, filename, encoding="utf-8-sig"): + "Create array of hashes from file." + unique_rows = set() + importdata = [] + with open(filename, "r", encoding=encoding) as f: + reader = csv.DictReader(f) + if reader.fieldnames: # Avoid empty file error + reader.fieldnames = [name.lower() for name in reader.fieldnames] + + fieldnames = reader.fieldnames + if fieldnames is None: + raise BadImportFileError("No terms in file") + self._validate_data_fields(fieldnames) + + line_num = 0 + for line in reader: + line_num += 1 + if None in line.values(): + raise BadImportFileError(f"Missing values on line {line_num}") + if None in line.keys(): + raise BadImportFileError(f"Extra values on line {line_num}") + line_tuple = tuple(line.items()) + if line_tuple not in unique_rows: + unique_rows.add(line_tuple) + importdata.append(line) + + if len(importdata) == 0: raise BadImportFileError("No terms in file") - _validate_data_fields(fieldnames) - - for line in reader: - importdata.append(line) - - if len(importdata) == 0: - raise BadImportFileError("No terms in file") - - return importdata - - -def _validate_data_fields(field_list): - "Check the keys in the file." - required = ["language", "term"] - for k in required: - if k not in field_list: - raise BadImportFileError(f"Missing required field '{k}'") - - allowed = required + ["translation", "parent", "status", "tags", "pronunciation"] - for k in field_list: - if k not in allowed: - raise BadImportFileError(f"Unknown field '{k}'") - -def _validate_data(import_data): - """ - Check the data. - """ - _validate_languages(import_data) - _validate_terms_exist(import_data) - _validate_statuses(import_data) - _validate_no_duplicate_terms(import_data) - - -def _create_langs_dict(import_data): - "Create dictionary of language name to Language." - lang_dict = {} - langs = [hsh["language"].strip() for hsh in import_data] - for lang_name in list(set(langs)): - lang_dict[lang_name] = Language.find_by_name(lang_name) - return lang_dict - - -def _get_status(s): - "Convert status to db value." - status_map = { - "": 1, - "1": 1, - "2": 2, - "3": 3, - "4": 4, - "5": 5, - "W": Status.WELLKNOWN, - "I": Status.IGNORED, - } - return status_map.get(s) - - -def _validate_languages(import_data): - "Validate the languages." - lang_dict = _create_langs_dict(import_data) - for lang_name, lang in lang_dict.items(): - if lang is None: - raise BadImportFileError(f"Unknown language '{lang_name}'") - - -def _validate_statuses(import_data): - "All statuses must be valid." - statuses = [hsh["status"].strip() for hsh in import_data if "status" in hsh] - for s in set(statuses): - if _get_status(s) is None: + return importdata + + def _validate_data_fields(self, field_list): + "Check the keys in the file." + required = ["language", "term"] + for k in required: + if k not in field_list: + raise BadImportFileError(f"Missing required field '{k}'") + + allowed = required + [ + "translation", + "parent", + "status", + "tags", + "pronunciation", + "link_status", + ] + ignored = ["added"] + for k in field_list: + if k not in allowed and k not in ignored: + raise BadImportFileError(f"Unknown field '{k}'") + + def _validate_data(self, import_data): + """ + Check the data. + """ + self._validate_languages(import_data) + self._validate_terms_exist(import_data) + self._validate_statuses(import_data) + self._validate_no_duplicate_terms(import_data) + + def _create_langs_dict(self, import_data): + "Create dictionary of language name to Language." + repo = LanguageRepository(self.session) + lang_dict = {} + langs = [hsh["language"].strip() for hsh in import_data] + for lang_name in list(set(langs)): + lang_dict[lang_name] = repo.find_by_name(lang_name) + return lang_dict + + def _get_status(self, s): + "Convert status to db value." + status_map = { + "": 1, + "1": 1, + "2": 2, + "3": 3, + "4": 4, + "5": 5, + "W": Status.WELLKNOWN, + "I": Status.IGNORED, + } + return status_map.get(s) + + def _validate_languages(self, import_data): + "Validate the languages." + lang_dict = self._create_langs_dict(import_data) + for lang_name, lang in lang_dict.items(): + if lang is None: + raise BadImportFileError(f"Unknown language '{lang_name}'") + + def _validate_statuses(self, import_data): + "All statuses must be valid." + statuses = [hsh["status"].strip() for hsh in import_data if "status" in hsh] + for s in set(statuses): + if self._get_status(s) is None: + raise BadImportFileError( + "Status must be one of 1, 2, 3, 4, 5, I, W, or blank" + ) + + def _validate_terms_exist(self, import_data): + "All records must have a term." + blanks = [hsh for hsh in import_data if hsh["term"].strip() == ""] + if len(blanks) > 0: + raise BadImportFileError("Term is required") + + def _validate_no_duplicate_terms(self, import_data): + """ + Duplicate terms aren't allowed. + + If file contained two duplicate terms, which is the "correct" one? + """ + + def make_lang_term_string(hsh): + t = hsh["term"].strip() + # Have to also clear unicode whitespace. + t = " ".join(t.split()) + return f"{hsh['language']}: {t.lower()}" + + lang_terms = [make_lang_term_string(hsh) for hsh in import_data] + term_counts = {} + for term in lang_terms: + term_counts[term] = term_counts.get(term, 0) + 1 + duplicates = [term for term, count in term_counts.items() if count > 1] + if len(duplicates) != 0: raise BadImportFileError( - "Status must be one of 1, 2, 3, 4, 5, I, W, or blank" + f"Duplicate terms in import: {', '.join(duplicates)}" ) - -def _validate_terms_exist(import_data): - "All records must have a term." - blanks = [hsh for hsh in import_data if hsh["term"].strip() == ""] - if len(blanks) > 0: - raise BadImportFileError("Term is required") - - -def _validate_no_duplicate_terms(import_data): - """ - Duplicate terms aren't allowed. - - If file contained two duplicate terms, which is the "correct" one? - """ - - def make_lang_term_string(hsh): - t = hsh["term"].strip() - # Have to also clear unicode whitespace. - t = " ".join(t.split()) - return f"{hsh['language']}: {t.lower()}" - - lang_terms = [make_lang_term_string(hsh) for hsh in import_data] - term_counts = {} - for term in lang_terms: - term_counts[term] = term_counts.get(term, 0) + 1 - duplicates = [term for term, count in term_counts.items() if count > 1] - if len(duplicates) != 0: - raise BadImportFileError(f"Duplicate terms in import: {', '.join(duplicates)}") - - -def _import_term_skip_parents(repo, rec, lang): - "Add a single record to the repo." - t = Term() - t.language = lang - t.language_id = lang.id - t.text = rec["term"] - if "translation" in rec: - t.translation = rec["translation"] - if "status" in rec: - status = _get_status(rec["status"]) - if status is not None: - t.status = int(status) - if "pronunciation" in rec: - t.romanization = rec["pronunciation"] - if "tags" in rec: - tags = list(map(str.strip, rec["tags"].split(","))) - t.term_tags = [t for t in tags if t != ""] - repo.add(t) - - -def _set_term_parents(repo, rec, lang): - "Set the term parents." - t = repo.find(lang.id, rec["term"]) - parents = list(map(str.strip, rec["parent"].split(","))) - t.parents = [p for p in parents if p != ""] - repo.add(t) - - -def _do_import(import_data): - """ - Import records. - - The import is done in two passes: - 1. import the basic terms, without setting their parents - 2. update the terms with parents - - The two passes are done because the import file may - contain a parent in its own row, and we want that to be - imported first to get its own specified data. - """ - repo = Repository(db) - - skipped = 0 - - # Keep track of the created terms: we only want to update - # these ones in pass #2. - created_terms = [] - - def term_string(lang, term): - return f"{lang.id}-{term}" - - for batch in [import_data[i : i + 100] for i in range(0, len(import_data), 100)]: - langs_dict = _create_langs_dict(batch) - for hsh in batch: - lang = langs_dict[hsh["language"]] - if repo.find(lang.id, hsh["term"]) is None: - _import_term_skip_parents(repo, hsh, lang) - created_terms.append(term_string(lang, hsh["term"])) - else: - skipped += 1 - repo.commit() - - pass_2 = [t for t in import_data if "parent" in t and t["parent"] != ""] - for batch in [pass_2[i : i + 100] for i in range(0, len(pass_2), 100)]: - langs_dict = _create_langs_dict(batch) - for hsh in batch: - lang_name = hsh["language"] - lang = langs_dict[lang_name] - if term_string(lang, hsh["term"]) in created_terms: - _set_term_parents(repo, hsh, lang) - repo.commit() - - stats = {"created": len(created_terms), "skipped": skipped} - - return stats + def _import_term_skip_parents(self, repo, rec, lang, set_to_unknown=False): + "Add a single record to the repo." + t = Term() + t.language = lang + t.language_id = lang.id + t.text = rec["term"] + if "translation" in rec: + t.translation = rec["translation"] + if "status" in rec: + status = self._get_status(rec["status"]) + if status is not None: + t.status = int(status) + if set_to_unknown: + t.status = 0 + if "pronunciation" in rec: + t.romanization = rec["pronunciation"] + if "tags" in rec: + tags = list(map(str.strip, rec["tags"].split(","))) + t.term_tags = [t for t in tags if t != ""] + repo.add(t) + + def _update_term_skip_parents(self, t, repo, rec): + "Update a term in the repo." + # Don't change the lang or text of the term + # t.language = lang + # t.language_id = lang.id + # t.text = rec["term"] + if "translation" in rec: + t.translation = rec["translation"] + if "status" in rec: + status = self._get_status(rec["status"]) + if status is not None: + t.status = int(status) + if "pronunciation" in rec: + t.romanization = rec["pronunciation"] + if "tags" in rec: + tags = list(map(str.strip, rec["tags"].split(","))) + t.term_tags = [t for t in tags if t != ""] + + repo.add(t) + + def _set_term_parents(self, repo, rec, lang): + "Set the term parents." + t = repo.find(lang.id, rec["term"]) + parents = list(map(str.strip, rec["parent"].split(","))) + t.parents = [p for p in parents if p != ""] + if "link_status" in rec: + sync_status = rec["link_status"] or "" + t.sync_status = sync_status.strip().lower() == "y" + + # Fallback: if the term status was explicitly set, always use it. + if "status" in rec: + status = self._get_status(rec["status"]) + if status is not None: + t.status = int(status) + + repo.add(t) + + def _do_import( + self, import_data, create_terms=True, update_terms=True, new_as_unknowns=False + ): + """ + Import records. + + If create_terms is True, create new terms. + If update_terms is True, update existing terms. + If new_as_unknowns is True, new terms are given status 0. + + The import is done in two passes: + 1. import the basic terms, without setting their parents + 2. update the terms with parents + + The two passes are done because the import file may + contain a parent in its own row, and we want that to be + imported first to get its own specified data. + """ + # pylint: disable=too-many-locals + + repo = Repository(self.session) + + skipped = 0 + + # Keep track of the created and updated terms: we only want to + # update these ones in pass #2. + created_terms = [] + updated_terms = [] + + def term_string(lang, term): + return f"{lang.id}-{term}" + + for batch in [ + import_data[i : i + 100] for i in range(0, len(import_data), 100) + ]: + langs_dict = self._create_langs_dict(batch) + for hsh in batch: + lang = langs_dict[hsh["language"]] + t = repo.find(lang.id, hsh["term"]) + ts = term_string(lang, hsh["term"]) + + if create_terms and t is None: + # Create a brand-new term. + self._import_term_skip_parents(repo, hsh, lang, new_as_unknowns) + created_terms.append(ts) + + elif update_terms and t is not None: + # Can only update existing terms. + self._update_term_skip_parents(t, repo, hsh) + updated_terms.append(ts) + + else: + skipped += 1 + + repo.commit() + + pass_2 = [t for t in import_data if "parent" in t and t["parent"] != ""] + for batch in [pass_2[i : i + 100] for i in range(0, len(pass_2), 100)]: + langs_dict = self._create_langs_dict(batch) + for hsh in batch: + lang = langs_dict[hsh["language"]] + ts = term_string(lang, hsh["term"]) + if ts in created_terms or ts in updated_terms: + self._set_term_parents(repo, hsh, lang) + repo.commit() + + stats = { + "created": len(created_terms), + "updated": len(updated_terms), + "skipped": skipped, + } + + return stats diff --git a/lute/termtag/datatables.py b/lute/termtag/datatables.py index 9aba0b4f2..353f20cc2 100644 --- a/lute/termtag/datatables.py +++ b/lute/termtag/datatables.py @@ -2,11 +2,10 @@ Show terms in datatables. """ -from lute.db import db from lute.utils.data_tables import DataTablesSqliteQuery -def get_data_tables_list(parameters): +def get_data_tables_list(parameters, session): "json data for datatables." base_sql = """SELECT TgID, @@ -21,6 +20,5 @@ def get_data_tables_list(parameters): group by WtTgID ) src on src.WtTgID = TgID """ - session = db.session connection = session.connection() return DataTablesSqliteQuery.get_data(base_sql, parameters, connection) diff --git a/lute/termtag/routes.py b/lute/termtag/routes.py index 41aee11e5..6b8446d41 100644 --- a/lute/termtag/routes.py +++ b/lute/termtag/routes.py @@ -2,8 +2,10 @@ /termtag routes. """ +from sqlalchemy import text from flask import Blueprint, request, jsonify, render_template, redirect from lute.models.term import TermTag +from lute.models.repositories import TermTagRepository from lute.utils.data_tables import DataTablesFlaskParamParser from lute.termtag.datatables import get_data_tables_list from lute.db import db @@ -23,7 +25,7 @@ def index(search): def datatables_active_source(): "Datatables data for terms." parameters = DataTablesFlaskParamParser.parse_params(request.form) - data = get_data_tables_list(parameters) + data = get_data_tables_list(parameters, db.session) return jsonify(data) @@ -47,7 +49,8 @@ def edit(termtagid): """ Edit a termtag """ - termtag = TermTag.find(termtagid) + repo = TermTagRepository(db.session) + termtag = repo.find(termtagid) return _handle_form(termtag, "termtag/edit.html") @@ -65,7 +68,19 @@ def delete(termtagid): """ Delete a termtag. """ - termtag = TermTag.find(termtagid) + repo = TermTagRepository(db.session) + termtag = repo.find(termtagid) db.session.delete(termtag) + + # ANNOYING HACK. Per GitHub issue 455, the records + # in the wordtags table were not getting deleted when + # the tags were deleted with this method in the UI, + # even though they were getting deleted during unit + # testing. I spent a short time looking, but am adding + # this hack for now. + # TODO fix_relationships: have wordtags records get deleted. + sql = f"""delete from wordtags where WtTgID = {termtagid}""" + db.session.execute(text(sql)) + db.session.commit() return redirect("/termtag/index", 302) diff --git a/lute/themes/__init__.py b/lute/themes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lute/themes/css/Apple_Books.css b/lute/themes/css/Apple_Books.css new file mode 100644 index 000000000..126f76acb --- /dev/null +++ b/lute/themes/css/Apple_Books.css @@ -0,0 +1,36 @@ +body { + --status-1-color: #f2a2a9; + --status-2-color: #f2d35d; + --status-3-color: #a7d287; + --status-4-color: #a5bbe2; + --status-5-color: #c7b1dd; + + --lute-theme-font-stack: Georgia, "Times New Roman", serif; + + --background-color: #f8f1e2; + --font-color: #262523; + + background-color: var(--background-color); /* beige paper */ + color: var(--font-color); /* dark grey font */ +} + +input[type="text"], +textarea, +.ui-widget-content, +.sub-menu, +.tagify { + background-color: #fff; +} + +.tagify--focus { + --tags-border-color: var(--tagify__tag-bg); +} + +div#thetext p { line-height: 1.5; } +span.textitem { font-family: Georgia;} +/* span.status0 { background-color: transparent; } */ +span.status1 { background-color: var(--status-1-color); } /* pink highlight */ +span.status2 { background-color: var(--status-2-color); } /* yellow highlight */ +span.status3 { background-color: var(--status-3-color); } /* green highlight */ +span.status4 { background-color: var(--status-4-color); } /* blue highlight */ +span.status5 { background-color: var(--status-5-color); } /* purple highlight */ diff --git a/lute/themes/css/Black_and_White.css b/lute/themes/css/Black_and_White.css new file mode 100644 index 000000000..392a68937 --- /dev/null +++ b/lute/themes/css/Black_and_White.css @@ -0,0 +1,67 @@ +body { + --status-0-color: #bdbdbd; + --status-1-color: #bdbdbd; + --status-2-color: #e0e0e0; + --status-3-color: #f5f5f5; + --status-4-color: transparent; + --status-5-color: transparent; + + --status-98-color: transparent; + --status-99-color: transparent; + + --border-bottom-color: #ACB2B9; + + --lute-theme-font-stack: Rubik, BlinkMacSystemFont, -apple-system, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; + + font: 100%/1.25 Rubik, BlinkMacSystemFont, -apple-system, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; +} + +span.status1{ +text-decoration: underline; +} +span.status2{ +text-decoration: underline; +} +span.status3{ +text-decoration: underline; +} + +span.status4 { + border-bottom: 1px dashed var(--border-bottom-color); + text-decoration: underline; +} + +span.status5 { + border-bottom: 1px solid var(--border-bottom-color); + text-decoration: underline; +} + +span.status98 { + background-color: var(--status-98-color); +} + +span.status99 { + background-color: var(--status-99-color); +} + +span.textitem { + font-size: 18px; + margin-bottom: 5px; +} + +#status #status-5:checked+label::after, +#status #status-6:checked+label::after { + filter: invert(0); +} + +.status-bar4 { + background-color: var(--border-bottom-color); +} + +.status-bar5 { + background-color: var(--border-bottom-color); +} + +.status-bar99 { + background-color: #72DA88; +} diff --git a/lute/themes/css/Dark_slate.css b/lute/themes/css/Dark_slate.css new file mode 100644 index 000000000..d3eafd2fc --- /dev/null +++ b/lute/themes/css/Dark_slate.css @@ -0,0 +1,117 @@ +body { + --status-0-color: #D5FFFF; + --status-1-color: #b46b7a; + --status-2-color: #988542; + --status-3-color: #699859; + --status-4-color: #5692AE; + --status-5-color: #877aad; + + --status-99-color: #419252; + + --background-color: #48484a; + --light-grey: #c4c8ce; + --font-color: #c4c8ce; + --dark-grey: #333333; + + --form-border-color: var(--light-grey); + + --lute-theme-font-stack: "Lucida Grande", Arial, sans-serif, STHeiti, "Arial Unicode MS", MingLiu; + + background-color: var(--background-color); /* dark grey paper */ + color: var(--font-color); /* light grey font */ +} + +.flash-notice { + background-color: #b46b7a; +} + +.menu-item > span { + color: var(--font-color) !important; +} + +a.home-link { + color: var(--font-color) !important; +} + +a { + color: #ACACF9; +} + +a:link { + color: #ACACF9; +} + +a:visited { + color: #ACACF9; +} + +a:hover { + color: #FF5C5C; +} + +div#thetext p { line-height: 1.5; } +span.textitem { font-family: Georgia; } +span.status0 { background-color: transparent; color: var(--status-0-color); } +span.status1 { background-color: var(--status-1-color); color: #eff1f2; } /* pink highlight */ +span.status2 { background-color: var(--status-2-color); color: #eff1f2; } /* yellow highlight */ +span.status3 { background-color: var(--status-3-color); color: #eff1f2; } /* green highlight */ +span.status4 { background-color: var(--status-4-color); color: #eff1f2; } /* blue highlight */ +span.status5 { background-color: var(--status-5-color); color: #eff1f2; } /* purple highlight */ + +input[type="text"], +textarea, +.ui-widget-content, +.sub-menu { + border-color: var(--light-grey); /* light grey */ + background-color: var(--dark-grey); /* dark grey */ + color: var(--light-grey); /* light grey font */ +} + +.tagify { + --tags-border-color: var(--light-grey); + background-color: var(--dark-grey); + color: var(--light-grey); + --placeholder-color-focus: var(--light-grey); +} + +.tagify--focus { + --tags-border-color: var(--tagify__tag-bg); +} + +#status { + --status-select-color: var(--light-grey); + --status-hover-color: #a2a2a2; +} + +#status label[for="status-5"]::after, +#status label[for="status-6"]::after, +#status #status-5:checked + label::after, +#status #status-6:checked + label::after { + filter: invert(0.85); +} + +#status label[for="status-6"]:hover { + background-color: #bc6b60; +} + +.status-bar-container { + background-color: #fff; + border-color: var(--light-grey); +} + +.status-bar-container-empty { + background-color: var(--background-color); + border-color: var(--light-grey); +} + +.status-bar-container span { + color: var(--dark-grey); +} + +span.hamburger { + background-color: #F3F4F4; +} + +#reading_menu .reading-menu-item:hover { + color: var(--background-color); +} diff --git a/lute/themes/css/LWT.css b/lute/themes/css/LWT.css new file mode 100644 index 000000000..1262b2710 --- /dev/null +++ b/lute/themes/css/LWT.css @@ -0,0 +1,52 @@ +body { + --status-0-color: #ADDFFF; + --status-1-color: #F5B8A9; + --status-2-color: #F5CCA9; + --status-3-color: #F5E1A9; + --status-4-color: #F5F3A9; + --status-5-color: #DDFFDD; + + --status-98-color: #F8F8F8; + --status-99-color: #F8F8F8; + + --lute-theme-font-stack: "Lucida Grande", Arial, sans-serif, STHeiti, "Arial Unicode MS", MingLiu; + + font: 100%/1.25 "Lucida Grande", Arial, sans-serif, STHeiti, "Arial Unicode MS", MingLiu; +} + +span.textitem { + font-size: 24px; + margin-bottom: 5px; + border-bottom: 2px solid transparent; +} + +span.status98 { + background-color: var(--status-98-color); + border-bottom: 2px dashed #000000; +} + +span.status99 { + background-color: var(--status-99-color); + border-bottom: 2px solid #CCFFCC; +} + +.wordhover { + border-bottom: 2px solid blue !important; +} + +.kwordmarked { + border-bottom: 2px solid red !important; +} + +div.ui-tooltip { + font-size: 16px; +} + +#status #status-5:checked+label::after, +#status #status-6:checked+label::after { + filter: invert(0); +} + +.status-bar99 { + background-color: #72DA88; +} diff --git a/lute/themes/css/LingQ.css b/lute/themes/css/LingQ.css new file mode 100644 index 000000000..1e1e280e0 --- /dev/null +++ b/lute/themes/css/LingQ.css @@ -0,0 +1,55 @@ +body { + --status-0-color: #C6DFFF; + --status-1-color: #FFE895; + --status-2-color: #FFF2C5; + --status-3-color: #FFF7DB; + --status-4-color: transparent; + --status-5-color: transparent; + + --status-98-color: transparent; + --status-99-color: transparent; + + --border-bottom-color: #ACB2B9; + + --lute-theme-font-stack: Rubik, BlinkMacSystemFont, -apple-system, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; + + font: 100%/1.25 Rubik, BlinkMacSystemFont, -apple-system, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; +} + +span.status4 { + border-bottom: 1px dashed var(--border-bottom-color); +} + +span.status5 { + border-bottom: 1px solid var(--border-bottom-color); +} + +span.status98 { + background-color: var(--status-98-color); +} + +span.status99 { + background-color: var(--status-99-color); +} + +span.textitem { + font-size: 18px; + margin-bottom: 5px; +} + +#status #status-5:checked+label::after, +#status #status-6:checked+label::after { + filter: invert(0); +} + +.status-bar4 { + background-color: var(--border-bottom-color); +} + +.status-bar5 { + background-color: var(--border-bottom-color); +} + +.status-bar99 { + background-color: #72DA88; +} diff --git a/lute/themes/css/Night.css b/lute/themes/css/Night.css new file mode 100644 index 000000000..ae4e74aaa --- /dev/null +++ b/lute/themes/css/Night.css @@ -0,0 +1,121 @@ +body { + --status-0-color: #59c7fa; + --status-1-color: #7a3d58; + --status-2-color: #82621a; + --status-3-color: #3c6621; + --status-4-color: #325192; + --status-5-color: #5e4583; + + --status-99-color: #38924c; + + --background-color: #121212; /* black paper */ + --font-color: #F3F4F4; + --light-grey: #c4c8ce; + + --lute-theme-font-stack: "Lucida Grande", Arial, sans-serif, STHeiti, "Arial Unicode MS", MingLiu; + + background-color: var(--background-color); + color: var(--font-color); /* grey font */ +} + +.flash-notice { + background-color: #7a3d58; /* pink highlight */ +} + +.menu-item > span { + color: var(--light-grey) !important; /* light grey font */ +} + +a.home-link { + color: var(--light-grey) !important; /* light grey font */ +} + +a { + color: var(--status-0-color); /* ice blue */ +} + +a:link { + color: var(--status-0-color); /* ice blue */ +} + +a:visited { + color: var(--status-0-color); /* ice blue */ +} + +a:hover { + color: #FF5C5C; /* red */ +} + +input[type="text"], +textarea, +.ui-widget-content, +.sub-menu { + border-color: var(--light-grey); /* light grey */ + background-color: var(--background-color); /* black */ + color: var(--light-grey); /* light grey font */ +} + +.tagify { + --tags-border-color: var(--light-grey); + background-color: var(--background-color); + color: var(--light-grey); + --placeholder-color-focus: var(--light-grey); +} + +.tagify--focus { + --tags-border-color: var(--tagify__tag-bg); +} + +div#thetext p { line-height: 1.5; } +span.textitem { font-family: Georgia;} +span.status0 { background-color: transparent; color: var(--status-0-color); } /* ice blue */ +span.status1 { background-color: var(--status-1-color); } /* pink highlight */ +span.status2 { background-color: var(--status-2-color); } /* yellow highlight */ +span.status3 { background-color: var(--status-3-color); } /* green highlight */ +span.status4 { background-color: var(--status-4-color); } /* blue highlight */ +span.status5 { background-color: var(--status-5-color); } /* purple highlight */ +span.status98 { background-color: transparent; } /* transparent */ +span.status99 { background-color: transparent; } /* transparent */ + +#status { + --status-select-color: #d7dce2; + --status-hover-color: var(--light-grey); +} + +#status label[for="status-5"]::after, +#status label[for="status-6"]::after, +#status #status-5:checked + label::after, +#status #status-6:checked + label::after { + filter: invert(1); +} + +#status label[for="status-6"]:hover { + background-color: #bc6b60; +} + +.status-bar-container { + background-color: #fff; + border-color: var(--light-grey); +} + +.status-bar-container-empty { + background-color: var(--background-color); + border-color: #686868; +} + +.status-bar-container span { + color: var(--background-color); +} + +#read-slider { + -webkit-appearence: none; + background-color: #5e5e5e; +} + +span.hamburger { + background-color: #F3F4F4; +} + +#reading_menu .reading-menu-item:hover { + color: var(--background-color); +} diff --git a/lute/themes/css/README.md b/lute/themes/css/README.md new file mode 100644 index 000000000..fb8965d95 --- /dev/null +++ b/lute/themes/css/README.md @@ -0,0 +1,3 @@ +Simple themes for users. + +Each file here is loaded into the settings "theme" dropdown. The filename is used as the drop down display value, with underscore and .css removed. \ No newline at end of file diff --git a/lute/themes/routes.py b/lute/themes/routes.py new file mode 100644 index 000000000..4cd86e13c --- /dev/null +++ b/lute/themes/routes.py @@ -0,0 +1,50 @@ +"Theming routes." + +from flask import Blueprint, Response, jsonify + +from lute.themes.service import Service +from lute.models.repositories import UserSettingRepository +from lute.settings.current import current_settings +from lute.db import db + +bp = Blueprint("themes", __name__, url_prefix="/theme") + + +@bp.route("/current", methods=["GET"]) +def current_theme(): + "Return current css." + service = Service(db.session) + response = Response(service.get_current_css(), 200) + response.content_type = "text/css; charset=utf-8" + return response + + +@bp.route("/custom_styles", methods=["GET"]) +def custom_styles(): + """ + Return the custom settings for inclusion in the base.html. + """ + repo = UserSettingRepository(db.session) + css = repo.get_value("custom_styles") + response = Response(css, 200) + response.content_type = "text/css; charset=utf-8" + return response + + +@bp.route("/next", methods=["POST"]) +def set_next_theme(): + "Go to next theme." + service = Service(db.session) + service.next_theme() + return jsonify("ok") + + +@bp.route("/toggle_highlight", methods=["POST"]) +def toggle_highlight(): + "Fix the highlight." + new_setting = not current_settings["show_highlights"] + repo = UserSettingRepository(db.session) + repo.set_value("show_highlights", new_setting) + db.session.commit() + current_settings["show_highlights"] = new_setting + return jsonify("ok") diff --git a/lute/themes/service.py b/lute/themes/service.py new file mode 100644 index 000000000..47e4a2b86 --- /dev/null +++ b/lute/themes/service.py @@ -0,0 +1,90 @@ +""" +Theming service. + +Themes are stored in the css folder, current theme in UserSetting. +""" + +import os +from glob import glob +from flask import current_app +from lute.models.repositories import UserSettingRepository + +default_entry = ("-", "(default)") + + +class Service: + "Service." + + def __init__(self, session): + self.session = session + + def _css_path(self): + """ + Path to css in this folder. + """ + thisdir = os.path.dirname(__file__) + theme_dir = os.path.join(thisdir, "css") + return os.path.abspath(theme_dir) + + def list_themes(self): + """ + List of theme file names and user-readable name. + """ + + def _make_display_name(s): + ret = os.path.basename(s) + ret = ret.replace(".css", "").replace("_", " ") + return ret + + g = glob(os.path.join(self._css_path(), "*.css")) + themes = [(os.path.basename(f), _make_display_name(f)) for f in g] + theme_basenames = [t[0] for t in themes] + + g = glob(os.path.join(current_app.env_config.userthemespath, "*.css")) + additional_user_themes = [ + (os.path.basename(f), _make_display_name(f)) + for f in g + if os.path.basename(f) not in theme_basenames + ] + + themes += additional_user_themes + sorted_themes = sorted(themes, key=lambda x: x[1]) + return [default_entry] + sorted_themes + + def get_current_css(self): + """ + Return the current css pointed at by the current_theme user setting. + """ + repo = UserSettingRepository(self.session) + current_theme = repo.get_value("current_theme") + if current_theme == default_entry[0]: + return "" + + def _get_theme_css_in_dir(d): + "Get css, or '' if no file." + fname = os.path.join(d, current_theme) + if not os.path.exists(fname): + return "" + with open(fname, "r", encoding="utf-8") as f: + return f.read() + + ret = _get_theme_css_in_dir(self._css_path()) + add = _get_theme_css_in_dir(current_app.env_config.userthemespath) + if add != "": + ret += f"\n\n/* Additional user css */\n\n{add}" + return ret + + def next_theme(self): + """ + Move to the next theme in the list of themes. + """ + repo = UserSettingRepository(self.session) + current_theme = repo.get_value("current_theme") + themes = [t[0] for t in self.list_themes()] + themes.append(default_entry[0]) + for i in range(0, len(themes)): # pylint: disable=consider-using-enumerate + if themes[i] == current_theme: + new_index = i + 1 + break + repo.set_value("current_theme", themes[new_index]) + self.session.commit() diff --git a/lute/useraudio/__init__.py b/lute/useraudio/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lute/useraudio/routes.py b/lute/useraudio/routes.py new file mode 100644 index 000000000..79b39e5ad --- /dev/null +++ b/lute/useraudio/routes.py @@ -0,0 +1,22 @@ +""" +User audio routes. + +User audio files are stored in the database in books table. +""" + +import os +from flask import Blueprint, send_file, current_app +from lute.db import db +from lute.models.repositories import BookRepository + +bp = Blueprint("useraudio", __name__, url_prefix="/useraudio") + + +@bp.route("/stream/", methods=["GET"]) +def stream(bookid): + "Serve the audio, no caching." + dirname = current_app.env_config.useraudiopath + br = BookRepository(db.session) + book = br.find(bookid) + fname = os.path.join(dirname, book.audio_filename) + return send_file(fname, as_attachment=True, max_age=0) diff --git a/lute/userimage/routes.py b/lute/userimage/routes.py index 9e060c41f..cf816a549 100644 --- a/lute/userimage/routes.py +++ b/lute/userimage/routes.py @@ -1,9 +1,5 @@ """ User images routes. - -User images are stored in the database as /userimages/langid/term, but -with no jpeg extension. Reason: the old symfony code couldn't manage -urls with periods. """ import os @@ -12,10 +8,11 @@ bp = Blueprint("userimages", __name__, url_prefix="/userimages") -@bp.route("//", methods=["GET"]) -def get_image(lgid, term): +@bp.route("//", methods=["GET"]) +def get_image(lgid, f): "Serve the image from the data/userimages directory." datapath = current_app.config["DATAPATH"] directory = os.path.join(datapath, "userimages", str(lgid)) - filename = term + ".jpeg" - return send_from_directory(directory, filename) + if not os.path.exists(os.path.join(directory, f)): + return "" + return send_from_directory(directory, f) diff --git a/lute/utils/data_tables.py b/lute/utils/data_tables.py index ad0b2e537..5f96c1025 100644 --- a/lute/utils/data_tables.py +++ b/lute/utils/data_tables.py @@ -105,6 +105,37 @@ def parse_params(requestform) -> dict: "order": DataTablesFlaskParamParser._parse_order(request_params), } + @staticmethod + def parse_params_2(requestform) -> dict: + """Parse the request (query) parameters.""" + + # This method is currently unused, but may be used in the + # future. The idea is to extract all params from the + # datatables request, and then have a separate method that + # takes these params explicitly as arguments, i.e. something + # like book_routes.get_data(start, length, search, order). + # This would allow for other front-end clients using something + # other than datatables. + request_params = requestform.to_dict(flat=True) + + # Need the columns to get the name of the "order" field, as + # datatables only deals with the column indexes, not names. + columns = DataTablesFlaskParamParser._parse_columns(request_params) + order = DataTablesFlaskParamParser._parse_order(request_params) + real_order = [] + for o in order: + col = [c["name"] for c in columns if c["index"] == o["column"]] + if len(col) == 1: + real_order.append({"column": col[0], "dir": o["dir"]}) + + return { + "draw": int(request_params.get("draw", 1)), + "start": int(request_params.get("start", 0)), + "length": int(request_params.get("length", -1)), + "search": request_params.get("search[value]"), + "order": real_order, + } + class DataTablesSqliteQuery: "Get data for datatables rendering." @@ -143,27 +174,32 @@ def where_and_params(searchable_cols, parameters): @staticmethod def get_sql(base_sql, parameters): "Build sql used for datatables queries." - start = parameters["start"] - length = parameters["length"] columns = parameters["columns"] def cols_with(attr): - cols = [c["name"] for c in columns if c[attr] is True] - return cols + return [c["name"] for c in columns if c[attr] is True] + # Default sorting order is all cols marked orderable. orderby = ", ".join(cols_with("orderable")) + # Prepend indicated sorting. for order in parameters["order"]: - sort_field = columns[int(order["column"])]["name"] - orderby = f"ORDER BY {sort_field} {order['dir']}, {orderby}" - - searchable = [c["name"] for c in columns if c["searchable"] is True] - [where, params] = DataTablesSqliteQuery.where_and_params(searchable, parameters) - - realbase = f"({base_sql}) realbase" - select_field_list = ", ".join([c["name"] for c in columns if c["name"] != ""]) + col_index = int(order["column"]) + col = columns[col_index] + sort_field = col["name"] or "" + if col["orderable"] is True and sort_field != "": + orderby = f"{sort_field} {order['dir']}, {orderby}" + orderby = f"ORDER BY {orderby}" + + [where, params] = DataTablesSqliteQuery.where_and_params( + cols_with("searchable"), parameters + ) + + realbase = f"({base_sql}) realbase".replace("\n", " ") + start = parameters["start"] + length = parameters["length"] # pylint: disable=line-too-long - data_sql = f"SELECT {select_field_list} FROM (select * from {realbase} {where} {orderby} LIMIT {start}, {length}) src {orderby}" + data_sql = f"SELECT * FROM (select * from {realbase} {where} {orderby} LIMIT {start}, {length}) src {orderby}" return { "recordsTotal": f"select count(*) from {realbase}", @@ -180,20 +216,22 @@ def get_data(base_sql, parameters, conn): recordsFiltered = None try: - sqla = DataTablesSqliteQuery.get_sql(base_sql, parameters) + sql_dict = DataTablesSqliteQuery.get_sql(base_sql, parameters) def runqry(name, use_params=True): "Run the given query from the datatables list of queries." prms = None if use_params: - prms = sqla["params"] - sql = sqla[name].replace("\n", " ") - return conn.execute(text(sql), prms) + prms = sql_dict["params"] + return conn.execute(text(sql_dict[name]), prms) recordsTotal = runqry("recordsTotal", False).fetchone()[0] recordsFiltered = runqry("recordsFiltered").fetchone()[0] res = runqry("data") - ret = [list(row) for row in res.fetchall()] + column_names = res.keys() + # Convert each row into a dict, { fieldname: value ... } + ret = [dict(zip(column_names, row)) for row in res.fetchall()] + except Exception as e: raise e diff --git a/lute/utils/debug_helpers.py b/lute/utils/debug_helpers.py new file mode 100644 index 000000000..fc2afc95a --- /dev/null +++ b/lute/utils/debug_helpers.py @@ -0,0 +1,71 @@ +"Debug helpers." + +import time + + +class DebugTimer: + """ + Helper to log time. + """ + + global_step_map = {} + + def __init__(self, name, display=True): + self.start = time.process_time() + self.curr_start = self.start + self.name = name + self.step_map = {} + self.display = display + if display: + print(f"{name} timer started") + + def step(self, s): + "Dump time spent in step, total time since start." + n = time.process_time() + step_elapsed = n - self.curr_start + total_step_elapsed = self.step_map.get(s, 0) + total_step_elapsed += step_elapsed + self.step_map[s] = total_step_elapsed + + if s != "": + full_step_map_string = f"{self.name} {s}" + global_step_elapsed = DebugTimer.global_step_map.get( + full_step_map_string, 0 + ) + global_step_elapsed += step_elapsed + DebugTimer.global_step_map[full_step_map_string] = global_step_elapsed + + total_elapsed = n - self.start + self.curr_start = n + + if not self.display: + return + + msg = " ".join( + [ + f"{self.name} {s}:", + f"step_elapsed: {step_elapsed:.6f},", + f"total step_elapsed: {total_step_elapsed:.6f},", + f"total_elapsed: {total_elapsed:.6f}", + ] + ) + print(msg, flush=True) + + def summary(self): + "Print final step summary." + print(f"{self.name} summary ------------------", flush=True) + for k, v in self.step_map.items(): + print(f" {k}: {v:.6f}", flush=True) + print(f"end {self.name} summary --------------", flush=True) + + @classmethod + def clear_total_summary(cls): + cls.global_step_map = {} + + @classmethod + def total_summary(cls): + "Print final step summary." + print("global summary ------------------", flush=True) + for k, v in cls.global_step_map.items(): + print(f" {k}: {v:.6f}", flush=True) + print("end global summary --------------", flush=True) diff --git a/lute/utils/formutils.py b/lute/utils/formutils.py index 367272fdf..92e1dcea0 100644 --- a/lute/utils/formutils.py +++ b/lute/utils/formutils.py @@ -3,19 +3,39 @@ """ from lute.models.language import Language -from lute.db import db +from lute.models.repositories import UserSettingRepository -def language_choices(): +def language_choices(session, dummy_entry_placeholder="-"): """ Return the list of languages for select boxes. If only one lang exists, only return that, otherwise add a '-' dummy entry at the top. """ - langs = db.session.query(Language).order_by(Language.name).all() + langs = session.query(Language).order_by(Language.name).all() supported = [lang for lang in langs if lang.is_supported] lang_choices = [(s.id, s.name) for s in supported] - if len(lang_choices) > 1: - lang_choices = [(0, "-")] + lang_choices + # Add a dummy placeholder even if there are no languages. + if len(lang_choices) != 1: + lang_choices = [(0, dummy_entry_placeholder)] + lang_choices return lang_choices + + +def valid_current_language_id(session): + """ + Get the current language id from UserSetting, ensuring + it's still valid. If not, change it. + """ + repo = UserSettingRepository(session) + current_language_id = repo.get_value("current_language_id") + current_language_id = int(current_language_id) + + valid_language_ids = [int(p[0]) for p in language_choices(session)] + if current_language_id in valid_language_ids: + return current_language_id + + current_language_id = valid_language_ids[0] + repo.set_value("current_language_id", current_language_id) + session.commit() + return current_language_id diff --git a/plugins/_template_/.pytest.ini b/plugins/_template_/.pytest.ini new file mode 100644 index 000000000..8b098e6fe --- /dev/null +++ b/plugins/_template_/.pytest.ini @@ -0,0 +1,10 @@ +[pytest] +testpaths = + tests + +# Acceptance tests were raising FutureWarning: +# FutureWarning: Deleting all cookies via CookieManager.delete() +# with no arguments has been deprecated. use CookieManager.delete_all(). +# This is internal to the package, so stopping that. +filterwarnings = + ignore::FutureWarning diff --git a/plugins/_template_/README.md b/plugins/_template_/README.md new file mode 100644 index 000000000..26d16a378 --- /dev/null +++ b/plugins/_template_/README.md @@ -0,0 +1,20 @@ +# TEMPLATE DIRECTORY for new parser plugins. + +How to use this: + +* copy this entire directory to a new folder called `lute-` +* find langname or LangName in all files and filenames, change to your language's name. e.g., for Thai, `langname` = `thai`, and `LangName` = `Thai` +* implement your class and tests +* fix any remaining TODO items in the files +* delete this section from the README and only include the relevant content below :-) + + +# README + +The Lute LangName parser. + +TODO fix file + +See [the wiki](https://github.com/LuteOrg/lute-v3/wiki/Developing-language-parser-plugins) for development notes. + +See the [Pypi readme](./README_PyPi.md) for extra config notes. \ No newline at end of file diff --git a/plugins/_template_/README_PyPi.md b/plugins/_template_/README_PyPi.md new file mode 100644 index 000000000..9536b725a --- /dev/null +++ b/plugins/_template_/README_PyPi.md @@ -0,0 +1,14 @@ +TODO fix file + +# `lute3-langname` + +A LangName parser for Lute (`lute3`) ... TODO notes here. + +## Installation + +See the [Lute manual](https://luteorg.github.io/lute-manual/install/plugins.html). + +## Usage + +When this parser is installed, you can add "LangName" as a +language to Lute, which comes with a simple story. diff --git a/plugins/_template_/definition.yaml b/plugins/_template_/definition.yaml new file mode 100644 index 000000000..2bfc86ea4 --- /dev/null +++ b/plugins/_template_/definition.yaml @@ -0,0 +1,17 @@ +# TODO fix names +name: LangName +dictionaries: + - for: terms + type: embedded + url: TODO + - for: sentences + type: popup + url: TODO +show_romanization: true +# right_to_left: + +parser_type: lute_langname +# character_substitutions: +# split_sentences: .!?。!? +# split_sentence_exceptions: +word_chars: TODO character range diff --git a/plugins/_template_/lute_langname_parser/__init__.py b/plugins/_template_/lute_langname_parser/__init__.py new file mode 100644 index 000000000..f3a55f4df --- /dev/null +++ b/plugins/_template_/lute_langname_parser/__init__.py @@ -0,0 +1,5 @@ +""" +Lute LANGNAME Parser +""" + +__version__ = "0.0.3" diff --git a/plugins/_template_/lute_langname_parser/parser.py b/plugins/_template_/lute_langname_parser/parser.py new file mode 100644 index 000000000..f4ea00596 --- /dev/null +++ b/plugins/_template_/lute_langname_parser/parser.py @@ -0,0 +1,64 @@ +""" +Parsing using TODO + +Includes classes: + +- LangNameParser + +""" + +import re +from typing import List +from lute.parse.base import ParsedToken, AbstractParser + + +# TODO fix names +class LangNameParser(AbstractParser): + """ + A parser for LANGNAME + """ + + @classmethod + def name(cls): + return "Lute LangName" + + @classmethod + def uses_data_directory(cls): + "Uses the data_directory (defined in the AbstractParser)." + return False # or True + + # @classmethod + # def init_data_directory(cls): + # "Set up necessary files." + # pass + + def get_parsed_tokens(self, text: str, language) -> List[ParsedToken]: + """ + Returns ParsedToken array for given language. + """ + + # Ensure standard carriage returns so that paragraph + # markers are used correctly. Lute uses paragraph markers + # for rendering. + text = text.replace("\r\n", "\n") + + words = [] # ... get words using parser. + tokens = [] + pattern = f"[{language.word_characters}]" + for word in words: + is_word_char = re.match(pattern, word) is not None + is_end_of_sentence = word in language.regexp_split_sentences + if word == "\n": + word = "¶" + if word == "¶": + is_word_char = False + is_end_of_sentence = True + t = ParsedToken(word, is_word_char, is_end_of_sentence) + tokens.append(t) + return tokens + + def get_reading(self, text: str): + """ + Get reading -- some parsers can return readings. + """ + return None diff --git a/plugins/_template_/pyproject.toml b/plugins/_template_/pyproject.toml new file mode 100644 index 000000000..9c7763f0a --- /dev/null +++ b/plugins/_template_/pyproject.toml @@ -0,0 +1,26 @@ +# TODO fix names +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[tool.flit.module] +name = "lute_langname_parser" + +[project] +name = "lute3-langname" +dynamic = ['version'] +description = "Learning Using Texts - LangName Parser" +requires-python = ">=3.8" +authors = [ + {name = "TODO your_name>"} +] +readme = "README_PyPi.md" + +dependencies = [ + "lute3>=3.4.2" + # TODO others +] + + +[project.entry-points."lute.plugin.parse"] +lute_mandarin = "lute_langname_parser.parser:LangNameParser" diff --git a/plugins/_template_/requirements.txt b/plugins/_template_/requirements.txt new file mode 100644 index 000000000..b16c6b984 --- /dev/null +++ b/plugins/_template_/requirements.txt @@ -0,0 +1,4 @@ +# Required dependency for base classes. +lute3>=3.4.2 + +# TODO -- extra requirements here. diff --git a/plugins/_template_/tests/__init__.py b/plugins/_template_/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/_template_/tests/conftest.py b/plugins/_template_/tests/conftest.py new file mode 100644 index 000000000..e75fab216 --- /dev/null +++ b/plugins/_template_/tests/conftest.py @@ -0,0 +1,37 @@ +""" +Common fixtures used by many tests. +""" + +import os +import yaml +import pytest + + +from lute.parse.registry import init_parser_plugins + +from lute.models.language import Language + + +def pytest_sessionstart(session): # pylint: disable=unused-argument + """ + Initialize parser list + """ + init_parser_plugins() + + +def _get_test_language(): + """ + Retrieve the language definition file for testing ths plugin from definition.yaml + """ + thisdir = os.path.dirname(os.path.realpath(__file__)) + definition_file = os.path.join(thisdir, "..", "definition.yaml") + with open(definition_file, "r", encoding="utf-8") as df: + d = yaml.safe_load(df) + lang = Language.from_dict(d) + return lang + + +# TODO fix name +@pytest.fixture(name="langname") +def fixture_langname(): + return _get_test_language() diff --git a/plugins/_template_/tests/test_LangNameParser.py b/plugins/_template_/tests/test_LangNameParser.py new file mode 100644 index 000000000..420d4a0f5 --- /dev/null +++ b/plugins/_template_/tests/test_LangNameParser.py @@ -0,0 +1,106 @@ +""" +LangNameParser tests. +""" + +# TODO fix names, activate tests. + +import pytest + +# pylint: disable=wrong-import-order +from lute.models.term import Term +from lute.parse.base import ParsedToken + +# TODO fix name +from lute_langname_parser.parser import LangNameParser + + +def test_dummy_test(): + "A dummy test so that pytest doesn't complain in github ci." + s = "Hello" + assert s == "Hello", "TODO - fix these tests for your parser :-)" + + +# TODO activate tests. +def todo_test_token_count(langname): + """ + token_count checks. + """ + cases = [ + ("a", 1), + ("ab", 1), + ("ac", 2), + ("ade", 3), + ("a_longer_check.", 21), + ] + for text, expected_count in cases: + t = Term(langname, text) + assert t.token_count == expected_count, text + assert t.text_lc == t.text, "case" + + +def assert_tokens_equals(text, lang, expected): + """ + Parsing a text using a language should give the expected parsed tokens. + + expected is given as array of: + [ original_text, is_word, is_end_of_sentence ] + """ + p = LangNameParser() + actual = p.get_parsed_tokens(text, lang) + expected = [ParsedToken(*a) for a in expected] + assert [str(a) for a in actual] == [str(e) for e in expected] + + +def todo_test_end_of_sentence_stored_in_parsed_tokens(langname): + """ + ParsedToken is marked as EOS=True at ends of sentences. + """ + s = "some text。More text? Yep." + + expected = [ + ("你好", True), + ("。", False, True), + ("吃饭", True), + ("了", True), + ("吗", True), + ("?", False, True), + ("现在", True), + ("是", True), + ("2024", False, False), + ("年", True), + ("。", False, True), + ] + assert_tokens_equals(s, langname, expected) + + +def todo_test_carriage_returns_treated_as_reverse_p_character(langname): + """ + Returns need to be marked with the backwards P for rendering etc. + """ + s = "some。\ntext。" + + expected = [ + ("你好", True), + ("。", False, True), + ("¶", False, True), + ("现在", True), + ("。", False, True), + ] + assert_tokens_equals(s, mandarin_chinese, expected) + + +def todo_test_readings(): + """ + Parser returns readings if they add value. + """ + p = LangNameParser() + + no_reading = ["Hello"] + + for c in no_reading: + assert p.get_reading(c) is None, c + + cases = [("你好", "nǐ hǎo"), ("欢迎", "huān yíng"), ("中国", "zhōng guó")] + + for c in cases: + assert p.get_reading(c[0]) == c[1], c[0] diff --git a/plugins/lute-khmer/.pytest.ini b/plugins/lute-khmer/.pytest.ini new file mode 100644 index 000000000..8b098e6fe --- /dev/null +++ b/plugins/lute-khmer/.pytest.ini @@ -0,0 +1,10 @@ +[pytest] +testpaths = + tests + +# Acceptance tests were raising FutureWarning: +# FutureWarning: Deleting all cookies via CookieManager.delete() +# with no arguments has been deprecated. use CookieManager.delete_all(). +# This is internal to the package, so stopping that. +filterwarnings = + ignore::FutureWarning diff --git a/plugins/lute-khmer/README.md b/plugins/lute-khmer/README.md new file mode 100644 index 000000000..c76877412 --- /dev/null +++ b/plugins/lute-khmer/README.md @@ -0,0 +1,7 @@ +# README + +The Lute Khmer parser. + +See [the wiki](https://github.com/LuteOrg/lute-v3/wiki/Developing-language-parser-plugins) for development notes. + +See the [Pypi readme](./README_PyPi.md) for extra config notes. diff --git a/plugins/lute-khmer/README_PyPi.md b/plugins/lute-khmer/README_PyPi.md new file mode 100644 index 000000000..590eb9bdc --- /dev/null +++ b/plugins/lute-khmer/README_PyPi.md @@ -0,0 +1,12 @@ +# `lute3-khmer` + +A Khmer parser for Lute (`lute3`). + +## Installation + +See the [Lute manual](https://luteorg.github.io/lute-manual/install/plugins.html). + +## Usage + +When this parser is installed, you can add "Khmer" as a +language to Lute, which comes with a simple story. diff --git a/plugins/lute-khmer/definition.yaml b/plugins/lute-khmer/definition.yaml new file mode 100644 index 000000000..e3f2c0bde --- /dev/null +++ b/plugins/lute-khmer/definition.yaml @@ -0,0 +1,22 @@ +name: Khmer +dictionaries: + - for: terms + type: embedded + url: https://en.wiktionary.org/wiki/### + - for: terms + type: popup + url: https://glosbe.com/km/en/### + - for: terms + type: popup + url: https://www.kheng.info/search/?query=### + - for: sentences + type: embedded + url: https://www.bing.com/translator/?from=kh&to=en&text=### +show_romanization: true +# right_to_left: + +parser_type: lute_khmer +# character_substitutions: +split_sentences: ។?៕ +# split_sentence_exceptions: +word_chars: ក-៹ diff --git a/plugins/lute-khmer/lute_khmer_parser/__init__.py b/plugins/lute-khmer/lute_khmer_parser/__init__.py new file mode 100644 index 000000000..cf5eb49ad --- /dev/null +++ b/plugins/lute-khmer/lute_khmer_parser/__init__.py @@ -0,0 +1,5 @@ +""" +Lute Khmer Parser +""" + +__version__ = "0.0.3" diff --git a/plugins/lute-khmer/lute_khmer_parser/parser.py b/plugins/lute-khmer/lute_khmer_parser/parser.py new file mode 100644 index 000000000..035cba5ae --- /dev/null +++ b/plugins/lute-khmer/lute_khmer_parser/parser.py @@ -0,0 +1,118 @@ +""" +Parsing using khmer-nltk + +Includes classes: + +- KhmerParser + +""" + +import re + +from typing import List + +import khmernltk + +from lute.parse.base import ParsedToken, AbstractParser + + +class KhmerParser(AbstractParser): + """ + A parser for KHMER + """ + + @classmethod + def name(cls): + return "Lute Khmer" + + @classmethod + def uses_data_directory(cls): + "Uses the data_directory (defined in the AbstractParser)." + return False # or True + + # @classmethod + # def init_data_directory(cls): + # "Set up necessary files." + # pass + + def _handle_special_token(self, token: str, special_char: str) -> List[str]: + """ + Handle special token scenarios by replacing all special tokens with newline characters. + + Example: + If \ is the special token then + "\hey\man\\\" will evaluate as + ["\n", "hey", "\n", "man", "\n", "\n", "\n"] + """ + if token == special_char: + return ["\n"] + + num_leading_slashes = len(token) - len(token.lstrip(special_char)) + num_trailing_slashes = len(token) - len(token.rstrip(special_char)) + output = [] + + output.extend("\n" * num_leading_slashes) + + tokens = token.strip(special_char).split(special_char) + + if len(tokens) == 1: + output.append(tokens[0]) + else: + for token in tokens[:-1]: + output.append(token) + output.append("\n") + output.append(tokens[-1]) + + output.extend("\n" * num_trailing_slashes) + + return output + + def word_tokenize(self, text: str) -> List[str]: + """ + Tokenize a text using khmernltk and handle the fact that khmernltk + completely omits newline characters by replacing all newline chars with + something that khmernltk won't omit. + """ + special_char = "\\" + text = text.replace("\n", special_char) + output = [] + + for token in khmernltk.word_tokenize(text): + if special_char in token: + output.extend(self._handle_special_token(token, special_char)) + continue + output.append(token) + return output + + def get_parsed_tokens(self, text: str, language) -> List[ParsedToken]: + """ + Returns ParsedToken array for given language. + """ + + # Ensure standard carriage returns so that paragraph + # markers are used correctly. Lute uses paragraph markers + # for rendering. + text = text.replace("\r\n", "\n") + words = self.word_tokenize(text) # ... get words using parser. + pattern = f"[{language.word_characters}]" + tokens = [] + for word in words: + is_end_of_sentence = word in language.regexp_split_sentences + is_word_char = (not is_end_of_sentence) and ( + re.match(pattern, word) is not None + ) + if word == "\n": + word = "¶" + if word == "¶": + is_word_char = False + is_end_of_sentence = True + + t = ParsedToken(word, is_word_char, is_end_of_sentence) + tokens.append(t) + return tokens + + def get_reading(self, text: str): + """ + Get reading -- some parsers can return readings. + """ + return None diff --git a/plugins/lute-khmer/pyproject.toml b/plugins/lute-khmer/pyproject.toml new file mode 100644 index 000000000..783bcea64 --- /dev/null +++ b/plugins/lute-khmer/pyproject.toml @@ -0,0 +1,26 @@ +# TODO fix names +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[tool.flit.module] +name = "lute_khmer_parser" + +[project] +name = "lute3-khmer" +dynamic = ['version'] +description = "Learning Using Texts - Khmer Parser" +requires-python = ">=3.8" +authors = [ + {name = "Justin Dom"} +] +readme = "README_PyPi.md" + +dependencies = [ + "lute3>=3.4.2", + "khmer-nltk==1.6" +] + + +[project.entry-points."lute.plugin.parse"] +lute_khmer = "lute_khmer_parser.parser:KhmerParser" diff --git a/plugins/lute-khmer/requirements.txt b/plugins/lute-khmer/requirements.txt new file mode 100644 index 000000000..cfb04a4a1 --- /dev/null +++ b/plugins/lute-khmer/requirements.txt @@ -0,0 +1,5 @@ +# Required dependency for base classes. +lute3>=3.4.2 + +# extra requirements here. +khmer-nltk==1.6 diff --git a/plugins/lute-khmer/tests/__init__.py b/plugins/lute-khmer/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/lute-khmer/tests/conftest.py b/plugins/lute-khmer/tests/conftest.py new file mode 100644 index 000000000..1dcbd220b --- /dev/null +++ b/plugins/lute-khmer/tests/conftest.py @@ -0,0 +1,36 @@ +""" +Common fixtures used by many tests. +""" + +import os +import yaml +import pytest + + +from lute.parse.registry import init_parser_plugins + +from lute.models.language import Language + + +def pytest_sessionstart(session): # pylint: disable=unused-argument + """ + Initialize parser list + """ + init_parser_plugins() + + +def _get_test_language(): + """ + Retrieve the language definition file for testing ths plugin from definition.yaml + """ + thisdir = os.path.dirname(os.path.realpath(__file__)) + definition_file = os.path.join(thisdir, "..", "definition.yaml") + with open(definition_file, "r", encoding="utf-8") as df: + d = yaml.safe_load(df) + lang = Language.from_dict(d) + return lang + + +@pytest.fixture(name="khmer") +def fixture_khmer(): + return _get_test_language() diff --git a/plugins/lute-khmer/tests/test_KhmerParser.py b/plugins/lute-khmer/tests/test_KhmerParser.py new file mode 100644 index 000000000..d885193fc --- /dev/null +++ b/plugins/lute-khmer/tests/test_KhmerParser.py @@ -0,0 +1,101 @@ +""" +KhmerParser tests. +""" + + +import pytest + +# pylint: disable=wrong-import-order +from lute.models.term import Term +from lute.parse.base import ParsedToken + +from lute_khmer_parser.parser import KhmerParser + + +def test_dummy_test(): + "A dummy test so that pytest doesn't complain in github ci." + s = "Hello" + assert s == "Hello", "TODO - fix these tests for your parser :-)" + + +def test_token_count(khmer): + """ + token_count checks. + """ + cases = [ + ("ជំរាបសួរ", 2), + ("ខ្ញុំ", 1), + ("ខ្ញុំស្រលាញ់អ្នក។", 4), + ("ខ្ញុំរៀនភាសាខ្មែរ", 4), + ("ខ្ញុំចូលចិត្តរៀនភាសាខ្មែរជាមួយមិត្តរបស់ខ្ញុំ", 9), + ] + + for text, expected_count in cases: + t = Term(khmer, text) + assert t.token_count == expected_count, text + assert t.text_lc == t.text, "case" + + +def assert_tokens_equals(text, lang, expected): + """ + Parsing a text using a language should give the expected parsed tokens. + + expected is given as array of: + [ original_text, is_word, is_end_of_sentence ] + """ + p = KhmerParser() + actual = p.get_parsed_tokens(text, lang) + expected = [ParsedToken(*a) for a in expected] + assert [str(a) for a in actual] == [str(e) for e in expected] + + +def test_end_of_sentence_stored_in_parsed_tokens(khmer): + """ + ParsedToken is marked as EOS=True at ends of sentences. + """ + s = "ខ្ញុំចូលចិត្តរៀនភាសាខ្មែរជាមួយមិត្តរបស់ខ្ញុំ។ ចុះអ្នកវិញ?" + + expected = [ + ("ខ្ញុំ", True), + ("ចូលចិត្ត", True), + ("រៀន", True), + ("ភាសា", True), + ("ខ្មែរ", True), + ("ជាមួយ", True), + ("មិត្ត", True), + ("របស់", True), + ("ខ្ញុំ", True), + ("។", False, True), + (" ", False), + ("ចុះ", True), + ("អ្នក", True), + ("វិញ", True), + ("?", False, True), + ] + assert_tokens_equals(s, khmer, expected) + + +def test_carriage_returns_treated_as_reverse_p_character(khmer): + """ + Returns need to be marked with the backwards P for rendering etc. + """ + s = "ខ្ញុំចូលចិត្តរៀនភាសាខ្មែរជាមួយមិត្តរបស់ខ្ញុំ។\nចុះអ្នកវិញ?" + + expected = [ + ("ខ្ញុំ", True), + ("ចូលចិត្ត", True), + ("រៀន", True), + ("ភាសា", True), + ("ខ្មែរ", True), + ("ជាមួយ", True), + ("មិត្ត", True), + ("របស់", True), + ("ខ្ញុំ", True), + ("។", False, True), + ("¶", False, True), + ("ចុះ", True), + ("អ្នក", True), + ("វិញ", True), + ("?", False, True), + ] + assert_tokens_equals(s, khmer, expected) diff --git a/plugins/lute-mandarin/.pytest.ini b/plugins/lute-mandarin/.pytest.ini new file mode 100644 index 000000000..8b098e6fe --- /dev/null +++ b/plugins/lute-mandarin/.pytest.ini @@ -0,0 +1,10 @@ +[pytest] +testpaths = + tests + +# Acceptance tests were raising FutureWarning: +# FutureWarning: Deleting all cookies via CookieManager.delete() +# with no arguments has been deprecated. use CookieManager.delete_all(). +# This is internal to the package, so stopping that. +filterwarnings = + ignore::FutureWarning diff --git a/plugins/lute-mandarin/README.md b/plugins/lute-mandarin/README.md new file mode 100644 index 000000000..4e08e4c22 --- /dev/null +++ b/plugins/lute-mandarin/README.md @@ -0,0 +1,5 @@ +The Lute Mandarin parser. + +See [the wiki](https://github.com/LuteOrg/lute-v3/wiki/Developing-language-parser-plugins) for development notes. + +See the [Pypi readme](./README_PyPi.md) for extra config notes. \ No newline at end of file diff --git a/plugins/lute-mandarin/README_PyPi.md b/plugins/lute-mandarin/README_PyPi.md new file mode 100644 index 000000000..b8fab8359 --- /dev/null +++ b/plugins/lute-mandarin/README_PyPi.md @@ -0,0 +1,35 @@ +# `lute3-mandarin` + +A Mandarin parser for Lute (`lute3`) using the `jieba` library, and +`pypinyin` for readings. + +## Installation + +See the [Lute manual](https://luteorg.github.io/lute-manual/install/plugins.html). + +## Usage + +When this parser is installed, you can add "Mandarin Chinese" as a +language to Lute, which comes with a simple story. + +## Parsing exceptions + +Sometimes `jieba` groups too many characters together when parsing. +For example, it returns "清华大学" as a single word of four +characters, which might not be correct. + +You can specify how Lute should correct these cases by adding some +simple "rules" to the file +`plugins/lute_mandarin/parser_exceptions.txt` found in your Lute +`data` directory. This file is automatically created when Lute +starts. Each rule contains the characters of the word as parsed by +`jieba`, with regular commas added where the word should be split. + +Some examples: + +| File content | Results when parsing "清华大学" | +| --- | --- | +| (empty file) | "清华大学" | +|
清华,大学
| Two tokens, "清华" and "大学" (the single token is split in two) | +|
清,华,大,学
| Four tokens, "清", "华", "大", "学" | +|
清华,大学
大,学
| Three tokens, "清华", "大, "学" (results are recursively broken down if rules are found) | diff --git a/plugins/lute-mandarin/definition.yaml b/plugins/lute-mandarin/definition.yaml new file mode 100644 index 000000000..4130e052a --- /dev/null +++ b/plugins/lute-mandarin/definition.yaml @@ -0,0 +1,16 @@ +name: Mandarin Chinese +dictionaries: + - for: terms + type: embedded + url: https://chinese.yabla.com/chinese-english-pinyin-dictionary.php?define=[LUTE] + - for: sentences + type: popup + url: https://www.deepl.com/translator#ch/en/[LUTE] +show_romanization: true +# right_to_left: + +parser_type: lute_mandarin +# character_substitutions: +split_sentences: .!?。!? +# split_sentence_exceptions: +word_chars: 一-龥 \ No newline at end of file diff --git a/plugins/lute-mandarin/lute_mandarin_parser/__init__.py b/plugins/lute-mandarin/lute_mandarin_parser/__init__.py new file mode 100644 index 000000000..5b8145001 --- /dev/null +++ b/plugins/lute-mandarin/lute_mandarin_parser/__init__.py @@ -0,0 +1,5 @@ +""" +Lute Mandarin Parser +""" + +__version__ = "0.0.3" diff --git a/plugins/lute-mandarin/lute_mandarin_parser/parser.py b/plugins/lute-mandarin/lute_mandarin_parser/parser.py new file mode 100644 index 000000000..5b4cd218d --- /dev/null +++ b/plugins/lute-mandarin/lute_mandarin_parser/parser.py @@ -0,0 +1,144 @@ +""" +Parsing using Jieba + +The parser uses jieba to do parsing and pypinyin for character readings + +Includes classes: + +- MandarinParser + +""" + +import re +import os +from typing import List +import jieba +from pypinyin import pinyin +from lute.parse.base import ParsedToken, AbstractParser + + +class MandarinParser(AbstractParser): + """ + A parser for Mandarin Chinese, + using the jieba library for text segmentation. + + The user can add some exceptions to the "parsing_exceptions.txt" + data file. + """ + + @classmethod + def name(cls): + return "Lute Mandarin Chinese" + + @classmethod + def uses_data_directory(cls): + "Uses the data_directory (defined in the AbstractParser)." + return True + + @classmethod + def parser_exceptions_file(cls): + """Full path to an exceptions file (in the parser's + data_directory) to indicate which terms should be broken up + differently than what jieba suggests. For example, jieba + parses "清华大学" as a single token; however the user can + specify different parsing for this group: + + "清华,大学" says "parse 清华大学 into two tokens, 清华/大学." + "清,华,大学" says "parse 清华大学 into three tokens, 清/华/大学." + + Each rule is placed on a separate line in the + parser_exceptions file, e.g, the following file content + defines two rules: + + 清华,大学 + 学,华,大 + """ + return os.path.join(cls.data_directory, "parser_exceptions.txt") + + @classmethod + def init_data_directory(cls): + "Set up necessary files." + fp = cls.parser_exceptions_file() + if not os.path.exists(fp): + with open(fp, "w", encoding="utf8") as f: + f.write("# Parsing exceptions.\n") + f.write("# Place each rule on a separate line. e.g.:\n") + f.write("# 清华,大学\n") + f.write("# Lines preceded with # are ignored.\n") + + @classmethod + def _build_parser_exceptions_map(cls): + "Convert exceptions file to map." + if cls.data_directory is None: + return {} + + ret = {} + with open(cls.parser_exceptions_file(), "r", encoding="utf8") as f: + for line in f: + stripped_line = line.strip() + if stripped_line.startswith("#"): + continue + parts = [p.strip() for p in stripped_line.split(",")] + orig_token = "".join(parts) + ret[orig_token] = parts + return ret + + def _reparse_with_exceptions_map(self, original_token, exceptions_map): + "Check the token s against the map, break down further if needed." + + # pylint: disable=dangerous-default-value + def _get_mapped(tok, accum=[]): + parts = exceptions_map.get(tok) + if parts is None or len(parts) == 1: + accum.append(tok) + else: + for p in parts: + _get_mapped(p, accum) + return accum + + return _get_mapped(original_token) + + def get_parsed_tokens(self, text: str, language) -> List[ParsedToken]: + """ + Returns ParsedToken array for given language. + """ + + exceptions_map = self._build_parser_exceptions_map() + + # Ensure standard carriage returns so that paragraph + # markers are used correctly. Lute uses paragraph markers + # for rendering. + text = text.replace("\r\n", "\n") + + words = list(jieba.cut(text)) + tokens = [] + pattern = f"[{language.word_characters}]" + for word in words: + is_word_char = re.match(pattern, word) is not None + is_end_of_sentence = word in language.regexp_split_sentences + if word == "\n": + word = "¶" + if word == "¶": + is_word_char = False + is_end_of_sentence = True + parts = self._reparse_with_exceptions_map(word, exceptions_map) + for p in parts: + t = ParsedToken(p, is_word_char, is_end_of_sentence) + tokens.append(t) + return tokens + + def get_reading(self, text: str): + """ + Get the pinyin for the given text. + Returns None if the text is all Chinese characters, or the pinyin + doesn't add value (same as text). + """ + # Use pypinyin to get the pinyin of the text + pinyin_list = pinyin(text) + # Flatten the list of lists to a single list + pinyin_list = (item for sublist in pinyin_list for item in sublist) + # Join the pinyin into a single string + ret = " ".join(pinyin_list) + if ret in ("", text): + return None + return ret diff --git a/plugins/lute-mandarin/pyproject.toml b/plugins/lute-mandarin/pyproject.toml new file mode 100644 index 000000000..ac4f5ba73 --- /dev/null +++ b/plugins/lute-mandarin/pyproject.toml @@ -0,0 +1,26 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[tool.flit.module] +name = "lute_mandarin_parser" + +[project] +name = "lute3-mandarin" +dynamic = ['version'] +description = "Learning Using Texts - Chinese Parser" +requires-python = ">=3.8" +authors = [ + {name = "Chris Ghyzel"} +] +readme = "README_PyPi.md" + +dependencies = [ + "lute3>=3.4.2", + "jieba>=0.42.1", + "pypinyin>=0.51.0" +] + + +[project.entry-points."lute.plugin.parse"] +lute_mandarin = "lute_mandarin_parser.parser:MandarinParser" diff --git a/requirements.txt b/plugins/lute-mandarin/requirements.txt similarity index 82% rename from requirements.txt rename to plugins/lute-mandarin/requirements.txt index c2ad728ce..c6123e984 100644 --- a/requirements.txt +++ b/plugins/lute-mandarin/requirements.txt @@ -8,6 +8,7 @@ cffi==1.16.0 cfgv==3.4.0 charset-normalizer==3.3.1 click==8.1.7 +colorama==0.4.6 coverage==7.3.1 dill==0.3.7 distlib==0.3.7 @@ -19,6 +20,7 @@ Flask-SQLAlchemy==3.1.1 Flask-WTF==1.2.1 flit==3.9.0 flit_core==3.9.0 +greenlet==3.0.0 h11==0.14.0 identify==2.5.31 idna==3.4 @@ -27,6 +29,8 @@ iniconfig==2.0.0 invoke==2.2.0 isort==5.12.0 itsdangerous==2.1.2 +jaconv==0.3.4 +jieba==0.42.1 Jinja2==3.1.2 lazy-object-proxy==1.9.0 Mako==1.2.4 @@ -35,6 +39,7 @@ mccabe==0.7.0 mypy-extensions==1.0.0 natto-py==1.0.1 nodeenv==1.8.0 +openepub==0.0.8 outcome==1.3.0.post0 packaging==23.1 parse==1.19.1 @@ -42,14 +47,21 @@ parse-type==0.6.2 pathspec==0.11.2 pipdeptree==2.13.0 platformdirs==3.10.0 +playwright==1.39.0 pluggy==1.3.0 pre-commit==3.5.0 pycparser==2.21 +pyee==11.0.1 pylint==2.17.5 +pypdf==3.17.4 +pypinyin==0.51.0 PySocks==1.7.1 pytest==7.4.2 +pytest-base-url==2.0.0 pytest-bdd==7.0.0 +pytest-playwright==0.4.3 pytest-splinter==3.3.2 +python-slugify==8.0.1 PyYAML==6.0.1 requests==2.31.0 selenium==4.14.0 @@ -59,6 +71,8 @@ sortedcontainers==2.4.0 soupsieve==2.5 splinter==0.19.0 SQLAlchemy==2.0.21 +subtitle-parser==1.3.0 +text-unidecode==1.3 toml==0.10.2 tomli==2.0.1 tomli_w==1.0.0 @@ -73,4 +87,5 @@ Werkzeug==2.3.7 wrapt==1.15.0 wsproto==1.2.0 WTForms==3.0.1 +xmltodict==0.13.0 zipp==3.17.0 diff --git a/plugins/lute-mandarin/tests/__init__.py b/plugins/lute-mandarin/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/lute-mandarin/tests/conftest.py b/plugins/lute-mandarin/tests/conftest.py new file mode 100644 index 000000000..f13815eea --- /dev/null +++ b/plugins/lute-mandarin/tests/conftest.py @@ -0,0 +1,36 @@ +""" +Common fixtures used by many tests. +""" + +import os +import yaml +import pytest + + +from lute.parse.registry import init_parser_plugins + +from lute.models.language import Language + + +def pytest_sessionstart(session): # pylint: disable=unused-argument + """ + Initialize parser list + """ + init_parser_plugins() + + +def _get_test_language(): + """ + Retrieve the language definition file for testing ths plugin from definition.yaml + """ + thisdir = os.path.dirname(os.path.realpath(__file__)) + definition_file = os.path.join(thisdir, "..", "definition.yaml") + with open(definition_file, "r", encoding="utf-8") as df: + d = yaml.safe_load(df) + lang = Language.from_dict(d) + return lang + + +@pytest.fixture(name="mandarin_chinese") +def fixture_mandarin_chinese(): + return _get_test_language() diff --git a/plugins/lute-mandarin/tests/test_MandarinParser.py b/plugins/lute-mandarin/tests/test_MandarinParser.py new file mode 100644 index 000000000..58abd611b --- /dev/null +++ b/plugins/lute-mandarin/tests/test_MandarinParser.py @@ -0,0 +1,153 @@ +""" +MandarinParser tests. +""" + +import tempfile +import os +import pytest + +# pylint: disable=wrong-import-order +from lute.models.term import Term +from lute.parse.base import ParsedToken + +from lute_mandarin_parser.parser import MandarinParser + + +@pytest.mark.parametrize( + "text,expected_count", + [ + ("我", 1), + ("运气", 1), + ("你说", 2), + ("我不相信", 3), + ("我冒了严寒 ,回到相隔二千馀里,别了二十馀年的故乡去。", 21), + ], +) +def test_token_count(text, expected_count, mandarin_chinese): + """ + token_count checks. + """ + t = Term(mandarin_chinese, text) + assert t.token_count == expected_count, text + assert t.text_lc == t.text + + +def assert_tokens_equals(text, lang, expected): + """ + Parsing a text using a language should give the expected parsed tokens. + + expected is given as array of: + [ original_text, is_word, is_end_of_sentence ] + """ + p = MandarinParser() + actual = p.get_parsed_tokens(text, lang) + expected = [ParsedToken(*a) for a in expected] + assert [str(a) for a in actual] == [str(e) for e in expected] + + +def test_end_of_sentence_stored_in_parsed_tokens(mandarin_chinese): + """ + ParsedToken is marked as EOS=True at ends of sentences. + """ + s = "你好。吃饭了吗?现在是2024年。" + + expected = [ + ("你好", True), + ("。", False, True), + ("吃饭", True), + ("了", True), + ("吗", True), + ("?", False, True), + ("现在", True), + ("是", True), + ("2024", False, False), + ("年", True), + ("。", False, True), + ] + assert_tokens_equals(s, mandarin_chinese, expected) + + +def test_carriage_returns_treated_as_reverse_p_character(mandarin_chinese): + """ + Returns need to be marked with the backwards P for rendering etc. + """ + s = "你好。\n现在。" + + expected = [ + ("你好", True), + ("。", False, True), + ("¶", False, True), + ("现在", True), + ("。", False, True), + ] + assert_tokens_equals(s, mandarin_chinese, expected) + + +def test_readings(): + """ + Parser returns readings if they add value. + """ + p = MandarinParser() + + no_reading = ["Hello"] # roman # only katakana # only hiragana + + for c in no_reading: + assert p.get_reading(c) is None, c + + cases = [("你好", "nǐ hǎo"), ("欢迎", "huān yíng"), ("中国", "zhōng guó")] + + for c in cases: + assert p.get_reading(c[0]) == c[1], c[0] + + +@pytest.fixture(name="_datadir") +def datadir(): + "The data_directory for the plugin." + with tempfile.TemporaryDirectory() as temp_dir: + MandarinParser.data_directory = temp_dir + MandarinParser.init_data_directory() + exceptions_file = os.path.join(temp_dir, "parser_exceptions.txt") + assert os.path.exists(exceptions_file), "File should exist after init" + yield + + +def test_term_found_in_exceptions_file_is_split(mandarin_chinese, _datadir): + "User can specify parsing exceptions in file." + s = "清华大学" + + def parsed_tokens(): + p = MandarinParser() + return [t.token for t in p.get_parsed_tokens(s, mandarin_chinese)] + + assert ["清华大学"] == parsed_tokens(), "No exceptions" + + exceptions_file = MandarinParser.parser_exceptions_file() + assert os.path.exists(exceptions_file), "Sanity check only." + + def set_parse_exceptions(array_of_exceptions): + with open(exceptions_file, "w", encoding="utf8") as ef: + ef.write("\n".join(array_of_exceptions)) + + set_parse_exceptions(["清华大学"]) + assert ["清华大学"] == parsed_tokens(), "mapped to self" + + set_parse_exceptions(["清华,大学"]) + assert ["清华", "大学"] == parsed_tokens(), "Exceptions consulted during parse" + + set_parse_exceptions(["清华,大,学"]) + assert ["清华", "大", "学"] == parsed_tokens(), "multiple splits tokens" + + set_parse_exceptions(["清华,大,学", "清华,大学"]) + assert ["清华", "大学"] == parsed_tokens(), "Last rule takes precedence" + + set_parse_exceptions(["清学"]) + assert ["清华大学"] == parsed_tokens(), "no match = parsed as-is" + + set_parse_exceptions(["清华,大学", "大,学"]) + assert ["清华", "大", "学"] == parsed_tokens(), "Recursive splitting" + + set_parse_exceptions(["大,学", "清华,大学"]) + assert ["清华", "大", "学"] == parsed_tokens(), "Order doesn't matter" + + set_parse_exceptions(["清华, 大学", " 大 , 学 "]) + assert ["清华", "大", "学"] == parsed_tokens(), "Spaces are ignored" diff --git a/plugins/lute-thai/README.md b/plugins/lute-thai/README.md new file mode 100644 index 000000000..ec45fe3b9 --- /dev/null +++ b/plugins/lute-thai/README.md @@ -0,0 +1,5 @@ +The Lute Thai parser. + +See [the wiki](https://github.com/LuteOrg/lute-v3/wiki/Developing-language-parser-plugins) for development notes. + +See the [Pypi readme](./README_PyPi.md) for extra notes. diff --git a/plugins/lute-thai/README_PyPi.md b/plugins/lute-thai/README_PyPi.md new file mode 100644 index 000000000..121110cbc --- /dev/null +++ b/plugins/lute-thai/README_PyPi.md @@ -0,0 +1,32 @@ +# `lute3-thai` + +A Thai parser for Lute (`lute3`) using the `pythainlp` library. + +## Installation + +See the [Lute manual](https://luteorg.github.io/lute-manual/install/plugins.html). + +## Usage + +When this parser is installed, you can add "Thai" as a +language to Lute, which comes with a simple story. + +## Notes + +Thai is tough to parse! In particular, it is sometimes hard to know where sentences are split. + +Some sentence splitting characters are specified in the Thai language +definition, which you can edit. + +**This parser also assumes that spaces are used as sentence delimiters**. + +In many cases, this is a reasonable assumption (e.g. see the stories +at [Thai +Reader](https://seasite.niu.edu/thai/thaireader/frameset.htm)), but in +sometimes this can be incorrect. For example, numbers and English +words are often written with spaces surrounding them, as in this +single sentence from a news story: + +ออกคำสั่งในวันเสาร์ที่ 2 พ.ย. 2567 ให้ทหาร 5,000 นาย กับ ตำรวจและเจ้าหน้าที่กองกำลังป้องกันพลเรือนอีก 5,000 นาย ไปเสริมกำลังเจ้าหน้าที่ในแคว้นบาเลนเซีย . + +Hopefully in the future some smart codes will be able to improve the parsing to handle such situations ... but for now, Lute can give you some support for reading in Thai. \ No newline at end of file diff --git a/plugins/lute-thai/definition.yaml b/plugins/lute-thai/definition.yaml new file mode 100644 index 000000000..a083f4258 --- /dev/null +++ b/plugins/lute-thai/definition.yaml @@ -0,0 +1,22 @@ +name: Thai +dictionaries: + - for: terms + type: embedded + url: https://dict.com/thai-english/[LUTE] + - for: terms + type: embedded + url: https://en.wiktionary.org/wiki/[LUTE] + - for: terms + type: popup + url: https://glosbe.com/th/en/[LUTE] + - for: sentences + type: popup + url: https://www.bing.com/translator/?from=th&to=en&text=[LUTE] +show_romanization: true +# right_to_left: + +parser_type: lute_thai +# character_substitutions: +split_sentences: ฯ!? +# split_sentence_exceptions: +word_chars: ก-๛ diff --git a/plugins/lute-thai/lute_thai_parser/__init__.py b/plugins/lute-thai/lute_thai_parser/__init__.py new file mode 100644 index 000000000..d7f4171d6 --- /dev/null +++ b/plugins/lute-thai/lute_thai_parser/__init__.py @@ -0,0 +1,5 @@ +""" +Lute Thai Parser +""" + +__version__ = "0.0.3" diff --git a/plugins/lute-thai/lute_thai_parser/parser.py b/plugins/lute-thai/lute_thai_parser/parser.py new file mode 100644 index 000000000..ed425f511 --- /dev/null +++ b/plugins/lute-thai/lute_thai_parser/parser.py @@ -0,0 +1,72 @@ +""" +Parsing using pythainlp + +Includes classes: + +- ThaiParser + +""" + +import re +import os +import pythainlp + +from typing import List + +from lute.parse.base import ParsedToken, AbstractParser + + +class ThaiParser(AbstractParser): + """ + A parser for Thai that uses the pythainlp library for text segmentation. + + The user can add some exceptions to the "parsing_exceptions.txt" + data file. + """ + + @classmethod + def name(cls): + return "Lute Thai" + + @classmethod + def uses_data_directory(cls): + "Uses the data_directory (defined in the AbstractParser)." + return False + + # @classmethod + # def init_data_directory(cls): + # "Set up necessary files." + # pass + + def get_parsed_tokens(self, text: str, language) -> List[ParsedToken]: + """ + Returns ParsedToken array for given language. + """ + text = text.replace("\r\n", "\n") + + words = pythainlp.word_tokenize(text) + tokens = [] + pattern = f"[{language.word_characters}]" + whitespace_regex = r"[ \t]+" + for word in words: + is_word_char = re.match(pattern, word) is not None + is_whitespace = re.match(whitespace_regex, word) is not None + is_split_sentence = word in language.regexp_split_sentences + is_end_of_sentence = is_split_sentence or is_whitespace + if is_end_of_sentence: + is_word_char = False + if word == "\n": + word = "¶" + if word == "¶": + is_word_char = False + is_end_of_sentence = True + t = ParsedToken(word, is_word_char, is_end_of_sentence) + tokens.append(t) + return tokens + + def get_reading(self, text: str): # pylint: disable=unused-argument + """ + Get the pronunciation for the given text. For most + languages, this can't be automated. + """ + return None diff --git a/plugins/lute-thai/pyproject.toml b/plugins/lute-thai/pyproject.toml new file mode 100644 index 000000000..ca68c3483 --- /dev/null +++ b/plugins/lute-thai/pyproject.toml @@ -0,0 +1,24 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[tool.flit.module] +name = "lute_thai_parser" + +[project] +name = "lute3-thai" +dynamic = ['version'] +description = "Learning Using Texts - Thai Parser" +requires-python = ">=3.8" +authors = [ + {name = "Justin Dom"} +] +readme = "README_PyPi.md" + +dependencies = [ + "lute3>=3.4.2", + "pythainlp==5.0.4" +] + +[project.entry-points."lute.plugin.parse"] +lute_thai = "lute_thai_parser.parser:ThaiParser" diff --git a/plugins/lute-thai/requirements.txt b/plugins/lute-thai/requirements.txt new file mode 100644 index 000000000..883c5110a --- /dev/null +++ b/plugins/lute-thai/requirements.txt @@ -0,0 +1,5 @@ +# Required dependency for base classes. +lute3>=3.4.2 + +# extra requirements here. +pythainlp==5.0.4 diff --git a/plugins/lute-thai/tests/__init__.py b/plugins/lute-thai/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/lute-thai/tests/conftest.py b/plugins/lute-thai/tests/conftest.py new file mode 100644 index 000000000..d40d2cf04 --- /dev/null +++ b/plugins/lute-thai/tests/conftest.py @@ -0,0 +1,36 @@ +""" +Common fixtures used by many tests. +""" + +import os +import yaml +import pytest + + +from lute.parse.registry import init_parser_plugins + +from lute.models.language import Language + + +def pytest_sessionstart(session): # pylint: disable=unused-argument + """ + Initialize parser list + """ + init_parser_plugins() + + +def _get_test_language(): + """ + Retrieve the language definition file for testing ths plugin from definition.yaml + """ + thisdir = os.path.dirname(os.path.realpath(__file__)) + definition_file = os.path.join(thisdir, "..", "definition.yaml") + with open(definition_file, "r", encoding="utf-8") as df: + d = yaml.safe_load(df) + lang = Language.from_dict(d) + return lang + + +@pytest.fixture(name="thai") +def fixture_thai(): + return _get_test_language() diff --git a/plugins/lute-thai/tests/test_ThaiParser.py b/plugins/lute-thai/tests/test_ThaiParser.py new file mode 100644 index 000000000..ae7860321 --- /dev/null +++ b/plugins/lute-thai/tests/test_ThaiParser.py @@ -0,0 +1,86 @@ +""" +ThaiParser tests. +""" + + +import pytest + +# pylint: disable=wrong-import-order +from lute.models.term import Term +from lute.parse.base import ParsedToken + +from lute_thai_parser.parser import ThaiParser + + +def test_token_count(thai): + """ + token_count checks. + """ + cases = [ + ("สวัสดี", 1), + ("ลาก่อน", 1), + ("ฉันรักคุณ", 3), + ("ฉันกำลังเรียนภาษาไทย", 4), + ] + for text, expected_count in cases: + t = Term(thai, text) + assert t.token_count == expected_count, text + assert t.text_lc == t.text, "case" + + +def assert_tokens_equals(text, lang, expected): + """ + Parsing a text using a language should give the expected parsed tokens. + + expected is given as array of: + [ original_text, is_word, is_end_of_sentence ] + """ + p = ThaiParser() + actual = p.get_parsed_tokens(text, lang) + expected = [ParsedToken(*a) for a in expected] + assert [str(a) for a in actual] == [str(e) for e in expected] + + +def test_end_of_sentence_stored_in_parsed_tokens(thai): + """ + ParsedToken is marked as EOS=True at ends of sentences. + """ + s = "สวัสดีทุกคน! ฉันเรียนภาษาไทยมา2เดือนแล้วฯ" + + expected = [ + ("สวัสดี", True), + ("ทุกคน", True), + ("!", False, True), + (" ", False, True), + ("ฉัน", True), + ("เรียน", True), + ("ภาษาไทย", True), + ("มา", True), + ("2", False), + ("เดือน", True), + ("แล้ว", True, False), + ("ฯ", False, True), + ] + assert_tokens_equals(s, thai, expected) + + +def test_carriage_returns_treated_as_reverse_p_character(thai): + """ + Returns need to be marked with the backwards P for rendering etc. + """ + s = "สวัสดีทุกคน!\nฉันเรียนภาษาไทยมา2เดือนแล้ว" + + expected = [ + ("สวัสดี", True), + ("ทุกคน", True), + ("!", False, True), + ("¶", False, True), + ("ฉัน", True), + ("เรียน", True), + ("ภาษาไทย", True), + ("มา", True), + ("2", False), + ("เดือน", True), + ("แล้ว", True, False), + ] + assert_tokens_equals(s, thai, expected) diff --git a/pyproject.toml b/pyproject.toml index f4fe65bbe..5515b668c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,16 +19,23 @@ authors = [ {name = "Jeff Zohrab", email = "jzohrab@gmail.com"} ] readme = "README_PyPi.md" + dependencies = [ "Flask-SQLAlchemy>=3.1.1,<4", "Flask-WTF>=1.2.1,<2", "natto-py>=1.0.1,<2", + "jaconv>=0.3.4,<1", "platformdirs>=3.10.0,<4", "requests>=2.31.0,<3", "beautifulsoup4>=4.12.2,<5", "PyYAML>=6.0.1,<7", "toml>=0.10.2,<1", - "waitress>=2.1.2,<3" + "waitress>=2.1.2,<3", + "openepub>=0.0.8,<1", + "pyparsing>=3.1.4", + "pypdf>=3.17.4", + "subtitle-parser>=1.3.0", + "ahocorapy>=1.6.2" ] [project.scripts] @@ -43,12 +50,13 @@ dev = [ "invoke>=2.2.0,<3", "pip>=23.0.1", "pipdeptree>=2.13.0,<3", - "pylint>=2.17.5,<3", + "pylint>=2.17.5,<4", "pytest-bdd>=7.0.0,<8", "pytest-splinter>=3.3.2,<4", + "playwright>=1.39.0,<2", "pre-commit>=3.5.0,<4", "black>=23.10.1,<24", ] [project.urls] -Home = "https://github.com/jzohrab/lute-v3" +Home = "https://github.com/luteorg/lute-v3" diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/scripts/dump_lang_data.py b/scripts/dump_lang_data.py new file mode 100644 index 000000000..9b4218a8f --- /dev/null +++ b/scripts/dump_lang_data.py @@ -0,0 +1,84 @@ +""" +Export language definition and first two pages of all books to lute/db/language_defs/ + +Run this as a module from the root directory, with languages to export as args: + +python -m scripts.dump_lang_data Catalan Bulgarian + +NOTE: this script assumes that the config is in the root folder, or at lute/config/config.yml. +""" + +import sys +import os +import yaml + +from lute.models.book import Book +from lute.models.language import Language +from lute.db import db +import lute.app_factory + + +def _language_defs_path(): + "Path to the definitions and stories." + thisdir = os.path.dirname(__file__) + d = os.path.join(thisdir, "..", "lute", "db", "language_defs") + return os.path.abspath(d) + + +def _partial_book_content(b): + "Get book content as string." + fulltext = [t.text for t in b.texts] + first_two_pages = fulltext[:2] + s = "\n".join(first_two_pages).replace("\r", "").strip() + return f"# title: {b.title}\n\n{s}" + + +def _write_langs(language_names, outdir): + "Write all langs and books." + langs = db.session.query(Language).all() + langs = [lang for lang in langs if lang.name.lower() in language_names] + for lang in langs: + ld = lang.to_dict() + n = ld["name"].lower().replace(" ", "_") + langdir = os.path.join(outdir, n) + if not os.path.exists(langdir): + os.mkdir(langdir) + + file_path = os.path.join(langdir, "definition.yaml") + with open(file_path, "w", encoding="utf-8") as fp: + yaml.dump(ld, stream=fp, allow_unicode=True, sort_keys=False) + print(lang.name) + + books = db.session.query(Book).filter(Book.language == lang).all() + books = [b for b in books if not b.archived] + story_count = 1 + for b in books: + filename = f"story_{story_count}" + story_count += 1 + file_path = os.path.join(langdir, f"{filename}.txt") + with open(file_path, "w", encoding="utf-8") as f: + f.write(_partial_book_content(b)) + file_size_kb = os.path.getsize(file_path) / 1024 + print(f"- {filename} ({file_size_kb:.2f} KB)") + + +def main(langnames): + "Entry point." + outputdir = _language_defs_path() + print(f"Outputting to {outputdir}") + + langnames = [n.lower() for n in langnames] + langnames = list(set(langnames)) + app = lute.app_factory.create_app() + with app.app_context(): + _write_langs(langnames, outputdir) + + +##################### +# Entry point. + +if len(sys.argv) < 2: + print("Language names required.") + sys.exit(0) + +main(sys.argv[1:]) diff --git a/tasks.py b/tasks.py index 04d45e688..c75b7643f 100644 --- a/tasks.py +++ b/tasks.py @@ -15,18 +15,35 @@ import os import sys import subprocess +import threading +import time from datetime import datetime import requests from invoke import task, Collection from lute.config.app_config import AppConfig +# pylint: disable=unused-argument + @task def lint(c): "Run pylint on lute/ and tests/." + print("Starting lint") # Formats: https://pylint.pycqa.org/en/latest/user_guide/usage/output.html - msgfmt = "--msg-template='{path} ({line:03d}): {msg} ({msg_id} {symbol})'" - c.run(f"pylint {msgfmt} tasks.py lute/ tests/") + msgfmt = [ + "--ignore-patterns='zz_.*.py'", + "--msg-template='{path} ({line:03d}): {msg} ({msg_id} {symbol})'", + ] + c.run(f"pylint {' '.join(msgfmt)} tasks.py lute/ tests/") + + +@task +def lint_changed(c): + "Run pylint on changed files only. (*nix machines only)" + c.run("for p in `git diff --name-only | grep py`; do echo $p; pylint $p; done") + c.run( + "for p in `git diff --cached --name-only | grep py`; do echo $p; pylint $p; done" + ) @task @@ -37,8 +54,8 @@ def todos(c): c.run("python utils/todos.py") -@task(help={"port": "optional port to run on; default = 5000"}) -def start(c, port=5000): +@task(help={"port": "optional port to run on; default = 5001"}) +def start(c, port=5001): """ Start the dev server, using script dev.py. """ @@ -58,7 +75,7 @@ def search(c, search_for): @task def _ensure_test_db(c): # pylint: disable=unused-argument "Quits if not a test db. (Hidden task)" - ac = AppConfig.create_from_config() + ac = AppConfig(AppConfig.default_config_filename()) if ac.is_test_db is False: print( f""" @@ -79,44 +96,89 @@ def test(c): def _site_is_running(useport=None): - """ - Return true if site is running on port, or default 5000. - """ - if useport is None: - useport = 5000 - - url = f"http://localhost:{useport}" + "Return True if running on port." try: - print(f"checking for site at {url} ...") - resp = requests.get(url, timeout=5) + resp = requests.get(f"http://localhost:{useport}", timeout=5) if resp.status_code != 200: raise RuntimeError(f"Got code {resp.status_code} ... ???") - print("Site running, using that for tests.") - print() return True except requests.exceptions.ConnectionError: - print(f"URL {url} not reachable, will start new server at that port.") - print() return False -@task( - pre=[_ensure_test_db], - help={ - "port": "optional port to run on; creates server if needed.", - "show": "print data", - "headless": "run as headless", - "kflag": "optional -k flag argument", - "exitfirst": "exit on first failure", - }, -) -def accept( # pylint: disable=too-many-arguments - c, port=5000, show=False, headless=False, kflag=None, exitfirst=False +def _wait_for_running_site(port): + "Wait until the site is running." + url = f"http://localhost:{port}" + is_running = False + attempt_count = 0 + print(f"Wait until site is running at {url} ...", flush=True) + while attempt_count < 10 and not is_running: + attempt_count += 1 + try: + # print(f" Attempt {attempt_count}", flush=True) + requests.get(url, timeout=5) + print(f"Site is running (succeeded on attempt {attempt_count})", flush=True) + is_running = True + except requests.exceptions.ConnectionError: + time.sleep(1) + if not is_running: + raise Exception("Site didn't start?") # pylint: disable=broad-exception-raised + + +def _run_browser_tests(port, run_test): + "Start server on port, and run tests." + tests_failed = False + if _site_is_running(port): + raise RuntimeError(f"Site already running on port {port}, quitting") + + def print_subproc_output(pipe, label): + """Prints output from a given pipe with a label.""" + for line in iter(pipe.readline, b""): + print(f"[{label}] {line.decode().strip()}", flush=True) + pipe.close() + + cmd = ["python", "-m", "tests.acceptance.start_acceptance_app", f"{port}"] + with subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) as app_process: + _wait_for_running_site(port) + stdout_thread = threading.Thread( + target=print_subproc_output, args=(app_process.stdout, "STDOUT") + ) + stderr_thread = threading.Thread( + target=print_subproc_output, args=(app_process.stderr, "STDERR") + ) + stdout_thread.start() + stderr_thread.start() + try: + subprocess.run(run_test, check=True) + except subprocess.CalledProcessError: + # This just means a test failed. We don't need to see + # a stack trace, the assert failures are already displayed. + tests_failed = True + finally: + app_process.terminate() + stdout_thread.join() + stderr_thread.join() + + if tests_failed: + raise RuntimeError("tests failed") + + +def _run_acceptance( # pylint: disable=too-many-arguments,too-many-positional-arguments + c, + port=5001, + show=False, + noheadless=False, + kflag=None, + mobile=False, + exitfirst=False, + verbose=False, ): """ Start lute, run tests/acceptance tests, screenshot fails. - If no port specified, use default 5000. + If no port specified, use default 5001. If Lute's not running on specified port, start a server. """ @@ -130,21 +192,95 @@ def accept( # pylint: disable=too-many-arguments if show: run_test.append("-s") - if headless: + if not noheadless: run_test.append("--headless") if kflag: run_test.append("-k") run_test.append(kflag) if exitfirst: run_test.append("--exitfirst") + if verbose: + run_test.append("-vv") + if mobile: + run_test.append("-m mobile") + run_test.append("--mobile") - if _site_is_running(port): - c.run(" ".join(run_test)) - else: - cmd = ["python", "-m", "tests.acceptance.start_acceptance_app", f"{port}"] - with subprocess.Popen(cmd) as app_process: - subprocess.run(run_test, check=True) - app_process.terminate() + _run_browser_tests(5001, run_test) + + +acceptance_help = { + "port": "optional port to run on; creates server if needed.", + "show": "print data", + "noheadless": "run as non-headless (default is headless, i.e. not shown)", + "kflag": "optional -k flag argument", + "exitfirst": "exit on first failure", + "verbose": "make verbose", +} + + +@task( + pre=[_ensure_test_db], + help=acceptance_help, +) +def accept( # pylint: disable=too-many-arguments,too-many-positional-arguments + c, + port=5001, + show=False, + noheadless=False, + kflag=None, + exitfirst=False, + verbose=False, +): + "Run acceptance tests, full browser." + _run_acceptance( # pylint: disable=too-many-arguments,too-many-positional-arguments + c, + port=port, + show=show, + noheadless=noheadless, + kflag=kflag, + mobile=False, + exitfirst=exitfirst, + verbose=verbose, + ) + + +@task( + pre=[_ensure_test_db], + help=acceptance_help, +) +def acceptmobile( # pylint: disable=too-many-arguments,too-many-positional-arguments + c, + port=5001, + show=False, + noheadless=False, + kflag=None, + exitfirst=False, + verbose=False, +): + "Run acceptance tests, mobile emulation, tests marked @mobile." + _run_acceptance( # pylint: disable=too-many-arguments,too-many-positional-arguments + c, + port=port, + show=show, + noheadless=noheadless, + kflag=kflag, + mobile=True, + exitfirst=exitfirst, + verbose=verbose, + ) + + +@task(pre=[_ensure_test_db]) +def playwright(c): + """ + Start lute, run playwright tests. export SHOW=true env var to run non-headless. + + Only uses port 5001. + + If Lute's not running on specified port, start a server. + """ + run_test = ["pytest", "tests/playwright/playwright.py", "-s"] + _run_browser_tests(5001, run_test) @task(pre=[_ensure_test_db], help={"html": "open html report"}) @@ -164,13 +300,21 @@ def coverage(c, html=False): c.run(cmd) -@task(post=[lint]) +@task def black(c): "black-format things." c.run("python -m black .") -@task(pre=[test, accept, black, lint]) +@task(pre=[test, accept, acceptmobile, playwright]) +def fulltest(c): # pylint: disable=unused-argument + """ + Run full tests check. + """ + print("Done.") + + +@task(pre=[fulltest, black, lint]) def full(c): # pylint: disable=unused-argument """ Run full check and lint. @@ -179,10 +323,14 @@ def full(c): # pylint: disable=unused-argument ns = Collection() +ns.add_task(fulltest) ns.add_task(full) ns.add_task(lint) +ns.add_task(lint_changed) ns.add_task(test) ns.add_task(accept) +ns.add_task(acceptmobile) +ns.add_task(playwright) ns.add_task(coverage) ns.add_task(todos) ns.add_task(start) @@ -194,28 +342,6 @@ def full(c): # pylint: disable=unused-argument # DB tasks -@task(pre=[_ensure_test_db]) -def db_wipe(c): - """ - Wipe the data from the testing db; factory reset settings. :-) - - Can only be run on a testing db. - """ - c.run("pytest -m dbwipe") - print("ok") - - -@task(pre=[_ensure_test_db]) -def db_reset(c): - """ - Reset the database to the demo data. - - Can only be run on a testing db. - """ - c.run("pytest -m dbdemoload") - print("ok") - - def _schema_dir(): "Return full path to schema dir." thisdir = os.path.dirname(os.path.realpath(__file__)) @@ -251,24 +377,19 @@ def _do_schema_export(c, destfile, header_notes, taskname): @task def db_export_baseline(c): """ - Reset the db, and create a new baseline db file from the current db. + Create a new baseline db file from the current db. """ - - # Running the delete task before this one as a pre- step was - # causing problems (sqlite file not in correct state), so this - # asks the user to verify. - text = input("Have you reset the db? (y/n): ") - if text != "y": - print("quitting.") - return _do_schema_export( - c, "baseline.sql", "Baseline db with demo data.", "db.export.baseline" + c, + "baseline.sql", + "Baseline db with flag to load demo data.", + "db.export.baseline", ) fname = os.path.join(_schema_dir(), "baseline.sql") print(f"Verifying {fname}") with open(fname, "r", encoding="utf-8") as f: - checkstring = "Tutorial follow-up" + checkstring = 'CREATE TABLE IF NOT EXISTS "languages"' if checkstring in f.read(): print(f'"{checkstring}" found, likely ok.') else: @@ -276,22 +397,15 @@ def db_export_baseline(c): raise RuntimeError(f'Missing "{checkstring}" in exported file.') -@task -def db_export_empty(c): +@task(pre=[_ensure_test_db]) +def db_reset(c): """ - Create a new empty db file from the current db. + Reset the database to baseline state for new installations, with LoadDemoData system flag set. - This assumes that the current db is in data/test_lute.db. + Can only be run on a testing db. """ - - # Running the delete task before this one as a pre- step was - # causing problems (sqlite file not in correct state), so this - # asks the user to verify. - text = input("Have you **WIPED** the db? (y/n): ") - if text != "y": - print("quitting.") - return - _do_schema_export(c, "empty.sql", "EMPTY DB.", "db.export.empty") + c.run("pytest -m dbreset") + print("\nok, export baseline.sql if needed.\n") @task(help={"suffix": "suffix to add to filename."}) @@ -300,7 +414,7 @@ def db_newscript(c, suffix): # pylint: disable=unused-argument Create a new migration, _suffix.sql """ now = datetime.now() - fnow = now.strftime("%Y%m%d_%H%M%S") + fnow = now.strftime("%Y%m%d") filename = f"{fnow}_{suffix}.sql" destfile = os.path.join(_schema_dir(), "migrations", filename) with open(destfile, "w", encoding="utf-8") as f: @@ -311,11 +425,9 @@ def db_newscript(c, suffix): # pylint: disable=unused-argument dbtasks = Collection("db") dbtasks.add_task(db_reset, "reset") -dbtasks.add_task(db_wipe, "wipe") dbtasks.add_task(db_newscript, "newscript") dbexport = Collection("export") dbexport.add_task(db_export_baseline, "baseline") -dbexport.add_task(db_export_empty, "empty") dbtasks.add_collection(dbexport) ns.add_collection(dbtasks) diff --git a/tests/acceptance/book.feature b/tests/acceptance/book.feature index 6c5ca1dfc..4cae9d8f9 100644 --- a/tests/acceptance/book.feature +++ b/tests/acceptance/book.feature @@ -4,17 +4,92 @@ Feature: Books and stats are available Given a running site And demo languages + ## Scenario: pretend failure + ## Given I visit "/" + ## Given a Spanish book "Hola" with content: + ## Hola. Tengo un gato. + ## Then the page title is Reading "Hola" + ## And the reading pane shows: + ## Hola/. /Tengo/ /un/ /perro/. + + @mobile + Scenario: I can import text. + Given I visit "/" + Given a Spanish book "Hola" with content: + Hola. Tengo un gato. + Then the page title is Reading "Hola" + And the reading pane shows: + Hola/. /Tengo/ /un/ /gato/. + + Scenario: I can force page breaks where I want with "---". + Given I visit "/" + Given a Spanish book "Hola" with content: + Hola. Tengo un gato. + --- + Tienes un gato. + Then the page title is Reading "Hola" + And the reading pane shows: + Hola/. /Tengo/ /un/ /gato/. + + @mobile + Scenario: I can import a text file. + Given I visit "/" + Given a Spanish book "Hola" from file hola.txt + Then the page title is Reading "Hola" + And the reading pane shows: + Tengo/ /un/ /amigo/. + + + Scenario: I can import a url. + Given I visit "/" + Given a Spanish book from url http://localhost:5001/dev_api/fake_story.html + Then the page title is Reading "Mi perro." + And the reading pane shows: + Hola/. /Tengo/ /un/ /perro/. + + + Scenario Outline: I can import several text file types. + Given I visit "/" + Given a Spanish book "Hola" from file + Then the page title is Reading "Hola" + And the reading pane shows: + + + Examples: + | filename | content | + | Hola.srt | Tengo/ /un/ /amigo/. | + | hola.txt | Tengo/ /un/ /amigo/. | + | Hola.epub | Tengo/ /un/ /amigo/. | + | Hola.pdf | Tengo/ /un/ /amigo/. | + | Hola.vtt | Tengo/ /un/ /amigo/. | + + + Scenario Outline: Bad text files are rejected. + Given I visit "/" + Given a Spanish book "Hola" from file + Then the page contains "" + + Examples: + | filename | failure | + | non_utf_8.txt | non_utf_8.txt is not utf-8 encoding | + | invalid.epub | Could not parse invalid.epub | + | invalid_empty.epub | invalid_empty.epub is empty. | + | invalid.pdf | Could not parse invalid.pdf | + | invalid.srt | Could not parse invalid.srt | + | invalid.vtt | Could not parse invalid.vtt | + + Scenario: Books and stats are shown on the first page. Given I visit "/" Given a Spanish book "Hola" with content: Hola. Tengo un gato. - Then the page title is Reading "Hola (1/1)" + Then the page title is Reading "Hola" And the reading pane shows: Hola/. /Tengo/ /un/ /gato/. Given I visit "/" When I set the book table filter to "Hola" Then the book table contains: - Hola; Spanish; ; 4 (0%); + Hola; Spanish; ; 4; # Dealing with production bug. Scenario: Japanese book with multiple paragraphs works. @@ -23,4 +98,57 @@ Feature: Books and stats are available 多くなったのは初めてです。 韓国から来た人。 - Then the page title is Reading "Jp test (1/1)" + Then the page title is Reading "Jp test" + + # Dealing with production bug. + Scenario: Japanese book unique constraint failed bug. + Given I visit "/" + Given a Japanese book "Jp test" with content: + 情報さえ集めればどんどんお金も集まってくる。 + Then the page title is Reading "Jp test" + And the reading pane shows: + 情報/さえ/集めれ/ば/どんどん/お金/も/集まっ/て/くる/。 + + # Sanity check import same sequence of chars twice. + Scenario: Japanese import same text twice sanity check. + Given I visit "/" + Given a Japanese book "Jp test1" with content: + 情報さえ集めればどんどんお金も集まってくる。 + Then the page title is Reading "Jp test1" + And the reading pane shows: + 情報/さえ/集めれ/ば/どんどん/お金/も/集まっ/て/くる/。 + + Given a Japanese book "Jp test2" with content: + 情報さえ集めればどんどんお金も集まってくる。 + Then the page title is Reading "Jp test2" + And the reading pane shows: + 情報/さえ/集めれ/ば/どんどん/お金/も/集まっ/て/くる/。 + + When I click "集めれ" and edit the form: + translation: hi + status: 2 + Then the reading pane shows: + 情報/さえ/集めれ (2)/ば/どんどん/お金/も/集まっ/て/くる/。 + + Given a Japanese book "Jp test3" with content: + 集めれ。 + Then the page title is Reading "Jp test3" + And the reading pane shows: + 集め/れ/。 + + + # Production bug https://github.com/jzohrab/lute-v3/issues/375 + Scenario: Japanese production bug 375. + Given I visit "/" + Given a new Japanese term: + text: だけど + translation: but + Given a Japanese book "Jp test" with content: + 最初はね難しい。 + + だけども、間違えますよね。 + Then the page title is Reading "Jp test" + And the reading pane shows: + 最初/はね/難しい/。/ + + だけど (1)/も/、/間違え/ます/よ/ね/。 diff --git a/tests/acceptance/conftest.py b/tests/acceptance/conftest.py index 34452855c..a83b3fa80 100644 --- a/tests/acceptance/conftest.py +++ b/tests/acceptance/conftest.py @@ -8,6 +8,7 @@ """ import os +import re import tempfile import time import yaml @@ -26,6 +27,7 @@ def pytest_addoption(parser): """ parser.addoption("--port", action="store", type=int, help="Specify the port number") parser.addoption("--headless", action="store_true", help="Run the test as headless") + parser.addoption("--mobile", action="store_true", help="Run tests tagged @mobile") @pytest.fixture(name="_environment_check", scope="session") @@ -40,14 +42,27 @@ def fixture_env_check(request): # Acceptance tests run using 'inv accept' sort this out automatically. pytest.exit("--port not set") + # Try connecting a few times. + success = False + max_attempts = 5 + curr_attempt = 0 url = f"http://localhost:{useport}/" - try: - requests.get(url, timeout=10) - except requests.exceptions.ConnectionError: - pytest.exit( - f"Unable to reach {url} ... is it running? Use inv accept to auto-start it" - ) + + while curr_attempt < max_attempts and not success: + curr_attempt += 1 + try: + requests.get(url, timeout=10) + success = True + except requests.exceptions.ConnectionError: + time.sleep(5) + + if not success: + msg = f"Unable to reach {url} after {curr_attempt} tries ... " + msg += "is it running? Use inv accept to auto-start it." + pytest.exit(msg) print() + else: + print(f"Connected successfully after {curr_attempt} tries") @pytest.fixture(name="chromebrowser", scope="session") @@ -90,6 +105,21 @@ def session_chrome_browser(request, _environment_check): # how-can-i-set-the-browser-window-size-when-using-google-chrome-headless chrome_options.add_argument("window-size=1920,1080") + mobile = request.config.getoption("--mobile") + if mobile: + useragent = [ + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D)", + "AppleWebKit/535.19 (KHTML, like Gecko)", + "Chrome/18.0.1025.166 Mobile Safari/535.19", + ] + mobile_emulation = { + "deviceMetrics": {"width": 375, "height": 812, "pixelRatio": 3.0}, + "userAgent": " ".join(useragent), + } + chrome_options.add_experimental_option("mobileEmulation", mobile_emulation) + + chrome_options.add_argument("--disable-blink-features=AutomationControlled") + # Initialize the browser with ChromeOptions browser = Browser("chrome", options=chrome_options) @@ -124,6 +154,11 @@ def fixture_restore_jp_parser(luteclient): ## STEP DEFS +@given("terminate the test") +def terminate_test(): + raise RuntimeError("Test terminated intentionally :wave:") + + # Setup @@ -139,6 +174,7 @@ def given_running_site(luteclient): resp = requests.get(luteclient.home, timeout=5) assert resp.status_code == 200, f"{luteclient.home} is up" luteclient.visit("/") + luteclient.clear_book_filter() assert luteclient.browser.is_text_present("Lute") @@ -152,6 +188,12 @@ def enable_jp_parser(luteclient): luteclient.change_parser_registry_key("disabled_japanese", "japanese") +@given("all page start dates are set to null") +def set_txstartdate_to_null(luteclient): + "Hack data." + luteclient.set_txstartdate_to_null() + + # Browsing @@ -188,7 +230,20 @@ def given_demo_langs_loaded(luteclient): @given("the demo stories are loaded") def given_demo_stories_loaded(luteclient): + "Load the demo stories." luteclient.load_demo_stories() + _sleep(1) # Hack! + luteclient.visit("/") + _sleep(1) # Hack! + luteclient.clear_book_filter() + _sleep(0.5) # Hack! + + +@given("I clear the book filter") +def given_clear_book_filter(luteclient): + "clear filter." + luteclient.visit("/") + luteclient.clear_book_filter() @given(parsers.parse("I update the {lang} language:\n{content}")) @@ -203,7 +258,25 @@ def given_update_language(luteclient, lang, content): @given(parsers.parse('a {lang} book "{title}" with content:\n{c}')) def given_book(luteclient, lang, title, c): + "Make a book." luteclient.make_book(title, c, lang) + _sleep(1) # Hack! + + +@given(parsers.parse('a {lang} book "{title}" from file {filename}')) +def given_book_from_file(luteclient, lang, title, filename): + "Book is made from file in sample_files dir." + thisdir = os.path.dirname(os.path.realpath(__file__)) + fullpath = os.path.join(thisdir, "sample_files", filename) + luteclient.make_book_from_file(title, fullpath, lang) + _sleep(1) # Hack! + + +@given(parsers.parse("a {lang} book from url {url}")) +def given_book_from_url(luteclient, lang, url): + "Book is made from url in dev_api." + luteclient.make_book_from_url(url, lang) + _sleep(1) # Hack! @given(parsers.parse('the book table loads "{title}"')) @@ -228,6 +301,16 @@ def check_book_table(luteclient, content): assert content == luteclient.get_book_table_content() +@then(parsers.parse("book pages with start dates are:\n{content}")) +def book_page_start_dates_are(luteclient, content): + assert content == luteclient.get_book_page_start_dates() + + +@then(parsers.parse("book pages with read dates are:\n{content}")) +def book_page_read_dates_are(luteclient, content): + assert content == luteclient.get_book_page_read_dates() + + # Terms @@ -249,6 +332,8 @@ def import_term_file(luteclient, content): # do stuff with temp file tmp.write(content) luteclient.browser.attach_file("text_file", path) + luteclient.browser.find_by_id("create_terms").click() + luteclient.browser.find_by_id("update_terms").click() luteclient.browser.find_by_id("btnSubmit").click() @@ -264,14 +349,38 @@ def check_term_table(luteclient, content): assert content == luteclient.get_term_table_content() +@when("click Export CSV") +def click_export_csv(luteclient): + "Export the term csv" + luteclient.browser.find_by_css("#term_actions").mouse_over() + luteclient.click_link("Export CSV") + + +@then(parsers.parse("exported CSV file contains:\n{content}")) +def check_exported_file(luteclient, content): + "Check the exported file, replace all dates with placeholder." + actual = luteclient.get_temp_file_content("export_terms.csv").strip() + actual = re.sub(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", "DATE_HERE", actual) + assert content == actual + + # Reading @then(parsers.parse("the reading pane shows:\n{content}")) def then_read_content(luteclient, content): "Check rendered content." + c = content.replace("\n", "/") + timeout = 3 # seconds + poll_frequency = 0.25 + start_time = time.time() displayed = luteclient.displayed_text() - assert content == displayed + while time.time() - start_time < timeout: + if c == displayed: + break + time.sleep(poll_frequency) + else: + assert c == displayed @when(parsers.parse("I change the current text content to:\n{content}")) @@ -279,11 +388,68 @@ def when_change_content(luteclient, content): "Change the content." assert "Reading" in luteclient.browser.title, "sanity check" b = luteclient.browser + b.find_by_css("div.hamburger-btn").first.click() + b.find_by_id("page-operations-title").click() b.find_by_id("editText").click() b.find_by_id("text").fill(content) b.find_by_id("submit").click() +@when(parsers.parse("I add a page {position} current with content:\n{content}")) +def when_add_page(luteclient, position, content): + "Change the content." + assert "Reading" in luteclient.browser.title, "sanity check" + b = luteclient.browser + b.find_by_css("div.hamburger-btn").first.click() + b.find_by_id("page-operations-title").click() + + assert position in ["before", "after"], "sanity check" + linkid = "readmenu_add_page_before" + if position == "after": + linkid = "readmenu_add_page_after" + b.find_by_id(linkid).click() + b.find_by_id("text").fill(content) + b.find_by_id("submit").click() + b.reload() + + +@when(parsers.parse("I go to the {position} page")) +def when_go_to_page(luteclient, position): + "Go to page." + assert "Reading" in luteclient.browser.title, "sanity check" + assert position in ["previous", "next"], "sanity check" + + linkid = "navNext" + if position == "previous": + linkid = "navPrev" + b = luteclient.browser + b.find_by_id(linkid).first.click() + time.sleep(0.5) # Assume this is necessary for ajax reload. + # Don't reload, as it seems to nullify the nav click. + # b.reload() + + +@given(parsers.parse("I peek at page {pagenum}")) +def given_peek_at_page(luteclient, pagenum): + "Peek at a page of the current book." + currurl = luteclient.browser.url + peekurl = re.sub(r"/page/.*", f"/peek/{pagenum}", currurl) + luteclient.browser.visit(peekurl) + + +@when(parsers.parse("I delete the current page")) +def when_delete_current_page(luteclient): + "Delete the current page." + assert "Reading" in luteclient.browser.title, "sanity check" + b = luteclient.browser + b.find_by_css("div.hamburger-btn").first.click() + b.find_by_id("page-operations-title").click() + b.find_by_id("readmenu_delete_page").first.click() + alert = b.get_alert() + alert.accept() + b.reload() + + # Reading, terms @@ -294,6 +460,13 @@ def when_click_word_edit_form(luteclient, word, content): luteclient.click_word_fill_form(word, updates) +@when(parsers.parse("I edit the bulk edit form:\n{content}")) +def when_post_bulk_edits_while_reading(luteclient, content): + "The content is assumed to be yaml." + updates = yaml.safe_load(content) + luteclient.fill_reading_bulk_edit_form(updates) + + @then(parsers.parse('the reading page term form frame contains "{text}"')) def then_reading_page_term_form_iframe_contains(luteclient, text): "Have to get and read the iframe content, it's not in the main browser page." @@ -310,6 +483,32 @@ def when_click_word(luteclient, word): luteclient.click_word(word) +@then(parsers.parse('the reading page term form shows term "{text}"')) +def then_reading_page_term_form_iframe_shows_term(luteclient, text): + "Have to get and read the iframe content, it's not in the main browser page." + with luteclient.browser.get_iframe("wordframe") as iframe: + time.sleep(0.4) # Hack, test failing. + term_field = iframe.find_by_css("#text").first + zws = "\u200B" + val = term_field.value.replace(zws, "") + assert val == text, "check field value" + + +@then("the bulk edit term form is shown") +def then_reading_page_bulk_edit_term_form_is_shown(luteclient): + "Check content." + then_reading_page_term_form_iframe_contains(luteclient, "Updating") + + +@then("the term form is hidden") +def then_reading_page_term_form_is_hidden(luteclient): + "Set to blankn" + iframe_element = luteclient.browser.find_by_id("wordframeid").first + iframe_src = iframe_element["src"] + blanks = ["about:blank", "http://localhost:5001/read/empty"] + assert iframe_src in blanks, "Is blank" + + @when(parsers.parse("I shift click:\n{words}")) def shift_click_terms(luteclient, words): "Shift-click" @@ -317,6 +516,18 @@ def shift_click_terms(luteclient, words): luteclient.shift_click_words(words) +@when(parsers.parse('I shift-drag from "{wstart}" to "{wend}"')) +def shift_drag(luteclient, wstart, wend): + "shift-drag highlights multiple words, copies to clipboard." + luteclient.shift_drag(wstart, wend) + + +@when(parsers.parse('I drag from "{wstart}" to "{wend}"')) +def drag(luteclient, wstart, wend): + "shift-drag highlights multiple words, copies to clipboard." + luteclient.drag(wstart, wend) + + @when(parsers.parse('I click "{word}" and press hotkey "{hotkey}"')) def when_click_word_press_hotkey(luteclient, word, hotkey): "Click word and press hotkey." @@ -334,18 +545,28 @@ def when_hover(luteclient, word): @when(parsers.parse('I press hotkey "{hotkey}"')) def when_press_hotkey(luteclient, hotkey): - "Click word and press hotkey." + "Press hotkey." luteclient.press_hotkey(hotkey) +@given(parsers.parse('I set hotkey "{hotkey}" to "{value}"')) +def given_set_hotkey(luteclient, hotkey, value): + "Set a hotkey to be X." + luteclient.hack_set_hotkey(hotkey, value) + + # Reading, paging @when(parsers.parse("I click the footer green check")) def when_click_footer_check(luteclient): - luteclient.browser.find_by_id("footerMarkRestAsKnown").click() + "Click footer." + luteclient.browser.find_by_id("footerMarkRestAsKnownNextPage").click() + time.sleep(0.1) # Leave this, remove and test fails. @when(parsers.parse("I click the footer next page")) def when_click_footer_next_page(luteclient): + "Click footer." luteclient.browser.find_by_id("footerNextPage").click() + time.sleep(0.1) # Leave this, remove and test fails. diff --git a/tests/acceptance/lute_test_client.py b/tests/acceptance/lute_test_client.py index e595a0951..c8464a4d4 100644 --- a/tests/acceptance/lute_test_client.py +++ b/tests/acceptance/lute_test_client.py @@ -15,12 +15,13 @@ """ import time +import json import requests from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.action_chains import ActionChains -class LuteTestClient: +class LuteTestClient: # pylint: disable=too-many-public-methods """ The client! """ @@ -44,6 +45,8 @@ def __init__(self, b, home): def load_demo_stories(self): "Load the demo stories." self.visit("dev_api/load_demo_stories") + self.visit("/") + self.clear_book_filter() def change_parser_registry_key(self, key, replacement): """ @@ -67,6 +70,11 @@ def index(self): "Go to home page." self.browser.visit("") + def clear_book_filter(self): + "Clear all state. Normally state is saved." + self.browser.execute_script("clear_datatable_state()") + time.sleep(0.1) + def click_link(self, linktext): self.browser.links.find_by_text(linktext).click() @@ -102,21 +110,102 @@ def make_book(self, title, text, langname): self.browser.select("language_id", int(self.language_ids[langname])) self.browser.find_by_css("#save").first.click() + def make_book_from_file(self, title, filename, langname): + "Create a book with title, content from filename, and languagename." + self.visit("book/new") + self.browser.attach_file("textfile", filename) + self.browser.find_by_css("#title").fill(title) + self.browser.select("language_id", int(self.language_ids[langname])) + self.browser.find_by_css("#save").first.click() + + def make_book_from_url(self, url, langname): + "Create a book with title, content from url, and languagename." + self.visit("book/import_webpage") + self.browser.find_by_css("#importurl").fill(url) + self.browser.find_by_css("#import").first.click() + time.sleep(0.1) # hack + self.browser.select("language_id", int(self.language_ids[langname])) + self.browser.find_by_css("#save").first.click() + def get_book_table_content(self): "Get book table content." css = "#booktable tbody tr" + # Skip the last two columns: + # - "last opened" date is a hassle to check + # - "actions" is just "..." def _to_string(row): tds = row.find_by_css("td") rowtext = [td.text.strip() for td in tds] - return "; ".join(rowtext).strip() + ret = "; ".join(rowtext[:-2]).strip() + # print(ret, flush=True) + return ret rows = list(self.browser.find_by_css(css)) return "\n".join([_to_string(row) for row in rows]) + def get_book_page_start_dates(self): + "get content from sql check" + sql = """select bktitle, txorder + from books + inner join texts on txbkid = bkid + where txstartdate is not null + order by bktitle, txorder""" + response = requests.get(f"{self.home}/dev_api/sqlresult/{sql}", timeout=1) + ret = "\n".join(json.loads(response.text)) + if ret == "": + ret = "-" + return ret + + def get_book_page_read_dates(self): + "get content from sql check" + sql = """select bktitle, txorder + from books + inner join texts on txbkid = bkid + where txreaddate is not null + order by bktitle, txorder""" + response = requests.get(f"{self.home}/dev_api/sqlresult/{sql}", timeout=1) + ret = "\n".join(json.loads(response.text)) + if ret == "": + ret = "-" + return ret + + def set_txstartdate_to_null(self): + "hack back end to keep test data sane." + sql = "update texts set txstartdate = null" + response = requests.get(f"{self.home}/dev_api/execsql/{sql}", timeout=1) + return response.text + ################################ # Terms + def _fill_tagify_field(self, b, fieldid, text): + "Fill in field in browser b with text." + xpath = [ + # input w/ id + f'//input[@id="{fieldid}"]', + # before it. + "/preceding-sibling::tags", + # within the with class. + '/span[@class="tagify__input"]', + ] + xpath = "".join(xpath) + + # Sometimes test runs couldn't find the parent + # tagify input, so hacky loop to get it and retry. + span = None + attempts = 0 + while span is None and attempts < 10: + time.sleep(0.2) # seconds + attempts += 1 + span = b.find_by_xpath(xpath) + if span is None: + raise RuntimeError(f"unable to find {fieldid}") + + span.type(text, slowly=False) + span.type(Keys.RETURN) + time.sleep(0.3) # seconds + def _fill_term_form(self, b, updates): "Fill in the term form." for k, v in updates.items(): @@ -126,52 +215,132 @@ def _fill_term_form(self, b, updates): # This line didn't work: # iframe.choose('status', updates['status']) s = updates["status"] - xp = f"//input[@type='radio'][@name='status'][@value='{s}']" - radios = b.find_by_xpath(xp) - assert len(radios) == 1, "have matching radio button" - radio = radios[0] - radio.click() - elif k in ("translation", "text"): + xp = "".join( + [ + "//input[@type='radio'][@name='status']", + f"[@value='{s}']", + "/following-sibling::label", + ] + ) + labels = b.find_by_xpath(xp) + assert len(labels) == 1, "have matching radio button" + label = labels[0] + label.click() + elif k in ("translation", "text", "romanization"): b.find_by_css(f"#{k}").fill(v) + elif k in ("pronunciation"): + b.find_by_css("#romanization").fill(v) elif k == "parents": for p in updates["parents"]: - xp = "ul#parentslist li.tagit-new > input.ui-autocomplete-input" - tagitbox = b.find_by_css(xp) - assert len(tagitbox) == 1, "have parent input" - box = tagitbox.first - box.type(p, slowly=False) - box.type(Keys.RETURN) - time.sleep(0.1) # seconds + self._fill_tagify_field(b, "parentslist", p) + elif k == "sync_status": + if v: + b.check("sync_status") + else: + b.uncheck("sync_status") + else: + raise RuntimeError(f"unhandled key {k}") + + def _fill_bulk_term_edit_form(self, b, updates): + "Fill in the term bulk edit form." + for k, v in updates.items(): + if k == "remove parents": + if v: + b.check("remove_parents") + else: + b.uncheck("remove_parents") + elif k == "parent": + self._fill_tagify_field(b, "txtSetParent", v) + elif k == "change status": + if v: + b.check("change_status") + else: + b.uncheck("change_status") + elif k == "status": + # This line didn't work: + # iframe.choose('status', updates['status']) + s = updates["status"] + xp = "".join( + [ + "//input[@type='radio'][@name='status']", + f"[@value='{s}']", + "/following-sibling::label", + ] + ) + labels = b.find_by_xpath(xp) + assert len(labels) == 1, "have matching radio button" + label = labels[0] + label.click() + elif k in ("add tags", "remove tags"): + fields = {"add tags": "txtAddTags", "remove tags": "txtRemoveTags"} + for tag in updates[k].split(", "): + self._fill_tagify_field(b, fields[k], tag) else: raise RuntimeError(f"unhandled key {k}") def make_term(self, lang, updates): "Create a new term." - self.visit("/") - self.browser.find_by_css("#menu_terms").mouse_over() - self.browser.find_by_id("term_index").first.click() - self.click_link("Create new") + # Sometimes this failed during the unsupported_parser.feature, not sure why. + # Likely something silly, don't care, so will bypass the screen controls. + # I am sure this will bite me later. + # TODO fix_nav: figure out why occasionally got ElementNotInteractableException + # self.visit("/") + # self.browser.find_by_css("#menu_terms").mouse_over() + # self.browser.find_by_id("term_index").first.click() + # self.browser.find_by_css("#term_actions").mouse_over() + # self.click_link("Create new") + self.visit("/term/new") assert "New Term" in self.browser.html updates["language_id"] = self.language_ids[lang] b = self.browser self._fill_term_form(b, updates) - b.find_by_css("#submit").first.click() + b.find_by_css("#btnsubmit").first.click() def get_term_table_content(self): "Get term table content." self.visit("/") self.browser.find_by_css("#menu_terms").mouse_over() self.browser.find_by_id("term_index").first.click() - css = "#termtable tbody tr" + # clear any filters! + self.browser.find_by_id("showHideFilters").first.click() + + # The last column of the table is the "date added", but that's + # a hassle to check, so ignore it. def _to_string(row): tds = row.find_by_css("td") rowtext = [td.text.strip() for td in tds] + check = "; ".join(rowtext).strip() + if check == "No data available in table": + return check + + rowtext = [""] # first field is empty checkbox + rowtext.append(tds[1].text.strip()) # term + + parenttags = tds[2].text.strip() + parenttags = ", ".join(parenttags.split("\n")) + rowtext.append(parenttags) + + rowtext.append(tds[3].text.strip()) # translation + rowtext.append(tds[6].text.strip()) # language + + termtags = tds[4].text.strip() + termtags = ", ".join(termtags.split("\n")) + rowtext.append(termtags) + + select_element = row.find_by_css("select") + selected_value = select_element.value + selected_option = select_element.find_by_css( + f'option[value="{selected_value}"]' + ) + rowtext.append(selected_option.text) # status select return "; ".join(rowtext).strip() + css = "#termtable tbody tr" rows = list(self.browser.find_by_css(css)) - return "\n".join([_to_string(row) for row in rows]) + rowstring = [_to_string(row) for row in rows] + return "\n".join(rowstring) ################################3 # Reading/rendering @@ -182,19 +351,30 @@ def displayed_text(self): def _to_string(t): "Create string for token, eg 'cat (2)'." + + # Note that selenium's t.text accessor strips leading/trailing whitespace, + # so if a span contains " ", t.text returns "". We need the actual + # inner html. + # pylint: disable=protected-access + inner_html = t._element.get_attribute("innerHTML") + zws = "\u200B" + inner_html = inner_html.replace(zws, "") + status = [ c.replace("status", "") for c in t["class"].split(" ") if c.startswith("status") and c != "status0" ] if len(status) == 0: - return t.text - assert len(status) == 1, f"should only have 1 status on {t.text}" - status = status[0] - return f"{t.text} ({status})" + return inner_html + assert len(status) == 1, f"should only have 1 status on {inner_html}" + return f"{inner_html} ({status[0]})" etext = [_to_string(e) for e in elements] - return "/".join(etext) + ret = "/".join(etext) + if ret.endswith("/"): + ret = ret[:-1] + return ret ################################3 # Reading, term actions @@ -223,36 +403,135 @@ def shift_click_words(self, words): ac = ac.key_up(Keys.SHIFT) ac.perform() + def shift_drag(self, fromword, toword): + "Shift-drag over words." + # https://stackoverflow.com/questions/27775759/ + # send-keys-control-click-in-selenium-with-python-bindings + # pylint: disable=protected-access + [fromel, toel] = [ + self._get_element_for_word(w)._element for w in [fromword, toword] + ] + actions = ActionChains(self.browser.driver) + actions.key_down(Keys.SHIFT) + actions.click_and_hold(fromel) + actions.move_to_element(toel) + actions.key_up(Keys.SHIFT) + actions.release() + actions.perform() + + def drag(self, fromword, toword): + "drag over words." + # https://stackoverflow.com/questions/27775759/ + # send-keys-control-click-in-selenium-with-python-bindings + # pylint: disable=protected-access + [fromel, toel] = [ + self._get_element_for_word(w)._element for w in [fromword, toword] + ] + actions = ActionChains(self.browser.driver) + actions.click_and_hold(fromel) + actions.move_to_element(toel) + actions.release() + actions.perform() + + def _refresh_browser(self): + """ + Term actions (edits, hotkeys) cause updated content to be ajaxed in. + For the splinter browser to be aware of it, the browser has to be + reloaded, but calling a self.browser.reload() has other side effects + (sets the page start date, etc). + + The below weird js hack causes the browser to be updated, + and then the js events have to be reattached too. + """ + # self.browser.reload() + # ??? ChatGPT suggested: + time.sleep(0.5) # Hack for ci. + self.browser.execute_script( + """ + // Trigger re-render of the entire body + var body = document.querySelector('body'); + var content = body.innerHTML; + body.innerHTML = ''; + body.innerHTML = content; + + // Re-attach text interactions. + window.prepareTextInteractions(); + """ + ) + time.sleep(0.5) # Hack for ci. + + def fill_reading_bulk_edit_form(self, updates=None): + """ + Click a word in the reading frame, fill in the term form iframe. + """ + updates = updates or {} + should_refresh = False + with self.browser.get_iframe("wordframe") as iframe: + time.sleep(0.4) # Hack, test failing. + self._fill_bulk_term_edit_form(iframe, updates) + time.sleep(0.4) # Hack, test failing. + iframe.find_by_css("#btnsubmit").first.click() + time.sleep(0.4) # Hack, test failing. + + # Only refresh the reading frame if everything was ok. + # Some submits will fail due to validation errors, + # and we want to look at them. + if "updated" in iframe.html: + should_refresh = True + + # Have to refresh the content to query the dom. + if should_refresh: + self._refresh_browser() + + def hack_set_hotkey(self, hotkey, value): + "Hack set hotkey directly through dev api. Trashy." + sql = f"""update settings + set StValue='{value}' where StKey='{hotkey}'""" + requests.get(f"{self.home}/dev_api/execsql/{sql}", timeout=1) + # NOTE! Hacking is dumb, it bypassing the global state which is rendered in JS. + # Have to visit and save settings to re-set the JS values that will be rendered. + # Big time waste finding this out. + self.visit("settings/shortcuts") + self.browser.find_by_css("#btnSubmit").first.click() + def press_hotkey(self, hotkey): "Send a hotkey." - el = self.browser.find_by_tag("body") - map_to_js_keycode = { - "1": 49, - "2": 50, - "3": 51, - "4": 52, - "5": 53, - "i": 73, - "w": 87, - "c": 67, - "t": 84, + key_to_code_map = { + "escape": "Escape", + "1": "Digit1", + "2": "Digit2", + "3": "Digit3", + "4": "Digit4", + "5": "Digit5", + "arrowdown": "ArrowDown", + "arrowup": "ArrowUp", + "h": "KeyH", + "i": "KeyI", + "m": "KeyM", + "w": "KeyW", + # Manually added. + "8": "Digit8", + "9": "Digit9", } - jscode = map_to_js_keycode[hotkey.lower()] - shift_pressed = "true" if hotkey in ["C", "T"] else "false" - - # This was the only way I could get this to work: + if hotkey.lower() not in key_to_code_map: + raise RuntimeError(f"Missing {hotkey} in acceptance test map") + event_parts = [ + "type: 'keydown'", + f"code: '{key_to_code_map[hotkey.lower()]}'", + ] + if hotkey != hotkey.lower(): + event_parts.append("shiftKey: true") script = f"""jQuery.event.trigger({{ - type: 'keydown', - which: {jscode}, - shiftKey: '{shift_pressed}' + {', '.join(event_parts)} }});""" + # print(script, flush=True) # pylint: disable=protected-access + el = self.browser.find_by_id("thetext") self.browser.execute_script(script, el._element) time.sleep(0.2) # Or it's too fast. # print(script) - # Have to refresh the content to query the dom ... - # Unfortunately, I can't see how to refresh without reloading - self.browser.reload() + # Have to refresh the content to query the dom. + self._refresh_browser() def click_word_fill_form(self, word, updates=None): """ @@ -263,8 +542,11 @@ def click_word_fill_form(self, word, updates=None): should_refresh = False with self.browser.get_iframe("wordframe") as iframe: + time.sleep(0.4) # Hack, test failing. self._fill_term_form(iframe, updates) - iframe.find_by_css("#submit").first.click() + time.sleep(0.4) # Hack, test failing. + iframe.find_by_css("#btnsubmit").first.click() + time.sleep(0.4) # Hack, test failing. # Only refresh the reading frame if everything was ok. # Some submits will fail due to validation errors, @@ -272,10 +554,9 @@ def click_word_fill_form(self, word, updates=None): if "updated" in iframe.html: should_refresh = True - # Have to refresh the content to query the dom ... - # Unfortunately, I can't see how to refresh without reloading + # Have to refresh the content to query the dom. if should_refresh: - self.browser.reload() + self._refresh_browser() ################################3 # Misc. @@ -289,7 +570,7 @@ def elapsed(self, step): To see this data, run the acc. tests with '-s', eg: - pytest tests/acceptance/test_smoke.py --port=5000 -s + pytest tests/acceptance/test_smoke.py --port=5001 -s """ now = time.perf_counter() since_start = now - self.start @@ -301,3 +582,10 @@ def elapsed(self, step): def sleep(self, seconds): "Nap." time.sleep(seconds) + + def get_temp_file_content(self, filename): + "Get book table content." + response = requests.get( + f"{self.home}/dev_api/temp_file_content/{filename}", timeout=1 + ) + return response.text diff --git a/tests/acceptance/reading.feature b/tests/acceptance/reading.feature index 4a0f96ebd..242f161d9 100644 --- a/tests/acceptance/reading.feature +++ b/tests/acceptance/reading.feature @@ -7,18 +7,20 @@ Feature: User can actually read and stuff. And demo languages + @mobile Scenario: Book elements are rendered correctly Given a Spanish book "Hola" with content: Hola. Adios amigo. - Then the page title is Reading "Hola (1/1)" + Then the page title is Reading "Hola" And the reading pane shows: Hola/. /Adios/ /amigo/. + @mobile Scenario: Updating term status updates the reading frame Given a Spanish book "Hola" with content: Hola. Adios amigo. - Then the page title is Reading "Hola (1/1)" + Then the page title is Reading "Hola" And the reading pane shows: Hola/. /Adios/ /amigo/. When I click "Hola" and edit the form: @@ -28,6 +30,19 @@ Feature: User can actually read and stuff. Hola (2)/. /Adios/ /amigo/. + Scenario: Reading a Japanese book + Given a Japanese book "Genki" with content: + 私は元気です. + Then the page title is Reading "Genki" + And the reading pane shows: + 私/は/元気/です/. + When I click "元気" and edit the form: + translation: genki + status: 2 + Then the reading pane shows: + 私/は/元気 (2)/です/. + + Scenario: Changing term case in form is allowed Given a Spanish book "Hola" with content: Hola. Adios amigo. @@ -51,50 +66,58 @@ Feature: User can actually read and stuff. When I click "Hola" and press hotkey "1" Then the reading pane shows: Hola (1)/. /Adios/ /amigo/. - When I click "Hola" and press hotkey "5" + + When I press hotkey "escape" + And I click "Hola" and press hotkey "5" Then the reading pane shows: Hola (5)/. /Adios/ /amigo/. + + When I press hotkey "escape" When I click "Hola" and press hotkey "i" Then the reading pane shows: Hola (98)/. /Adios/ /amigo/. + + When I press hotkey "escape" When I click "Hola" and press hotkey "w" Then the reading pane shows: Hola (99)/. /Adios/ /amigo/. - Scenario: Click footer green checkmark ("mark rest as known") sets rest to 99. - Given a Spanish book "Hola" with content: - Hola. Adios amigo. - When I click "Hola" and press hotkey "1" - Then the reading pane shows: - Hola (1)/. /Adios/ /amigo/. - When I click the footer green check - Then the reading pane shows: - Hola (1)/. /Adios (99)/ /amigo (99)/. +# TODO restore test: was getting "Message: stale element reference: stale element not found" +# error on trying to click the green check, couldn't solve this quickly. +### Scenario: Click footer green checkmark ("mark rest as known") sets rest to 99. +### Given a Spanish book "Hola" with content: +### Hola. Adios amigo. +### When I click "Hola" and press hotkey "1" +### Then the reading pane shows: +### Hola (1)/. /Adios/ /amigo/. +### When I click the footer green check +### Then the reading pane shows: +### Hola (1)/. /Adios (99)/ /amigo (99)/. Scenario: Learned terms are applied to new texts. Given a Spanish book "Hola" with content: Hola. Adios amigo. - When I click "Hola" and press hotkey "1" - And I click the footer green check + When I click "amigo" and press hotkey "1" Then the reading pane shows: - Hola (1)/. /Adios (99)/ /amigo (99)/. + Hola/. /Adios/ /amigo (1)/. Given a Spanish book "Otro" with content: Tengo otro amigo. Then the reading pane shows: - Tengo/ /otro/ /amigo (99)/. + Tengo/ /otro/ /amigo (1)/. - Scenario: Clicking next w/ checkmark or next in footer sets bookmark - Given the demo stories are loaded - When I click the "Tutorial" link - # The green check doesn't advance the page - And I click the footer green check - And I click the footer next page - Given I visit "/" - And the book table loads "Tutorial" - Then the page contains "Tutorial (2/" +# TODO fix broken test: this test failed frequently in github ci, but never locally. +### Scenario: Clicking next w/ checkmark or next in footer sets bookmark +### Given the demo stories are loaded +### When I click the "Tutorial" link +### And I click the footer next page +### And sleep for 2 +### Given I visit "/" +### And I clear the book filter +### And the book table loads "Tutorial (2/6)" +### # .... nothing more to check ... Scenario: Language split sentence exceptions are respected @@ -108,10 +131,17 @@ Feature: User can actually read and stuff. status: 2 Then the reading pane shows: He/ /escrito/ /cap. (2)/ /uno/. - When I click "He" and edit the form: - parents: [ 'cap.' ] - Then the reading pane shows: - He (1)/ /escrito/ /cap. (2)/ /uno/. + + # TODO fix_flaky_test: this would periodically fail the + # assertion, "cap." still had status 2. + ### When I click "He" and edit the form: + ### parents: [ 'cap.' ] + ### And sleep for 1 + ### And I hover over "He" + ### And I press hotkey "3" + ### And sleep for 1 + ### Then the reading pane shows: + ### He (3)/ /escrito/ /cap. (3)/ /uno/. Scenario: User can update the text while reading. @@ -125,11 +155,43 @@ Feature: User can actually read and stuff. Tengo/ /otro/ /amigo/. + Scenario: User can add and remove pages. + Given a Spanish book "Hola" with content: + Hola. Adios amigo. + Then the reading pane shows: + Hola/. /Adios/ /amigo/. + + When I add a page after current with content: + Nuevo. + Then the reading pane shows: + Nuevo/. + When I go to the previous page + Then the reading pane shows: + Hola/. /Adios/ /amigo/. + + When I add a page before current with content: + Viejo. + Then the reading pane shows: + Viejo/. + When I go to the next page + Then the reading pane shows: + Hola/. /Adios/ /amigo/. + When I go to the next page + Then the reading pane shows: + Nuevo/. + + When I delete the current page + Then the reading pane shows: + Hola/. /Adios/ /amigo/. + + Scenario: Hotkey affects hovered element Given a Spanish book "Hola" with content: Tengo otro amigo. When I hover over "otro" + And sleep for 1 And I press hotkey "1" + And sleep for 1 Then the reading pane shows: Tengo/ /otro (1)/ /amigo/. @@ -164,3 +226,380 @@ Feature: User can actually read and stuff. And I press hotkey "1" Then the reading pane shows: Tengo (1)/ /otro/ /amigo (1)/. + + + Scenario: Edit forms are shown at appropriate times + Given a Spanish book "Hola" with content: + Tengo un amigo y una bebida. + + When I click "amigo" + Then the reading page term form shows term "amigo" + When I shift-drag from "Tengo" to "un" + Then the reading page term form shows term "amigo" + + When I shift click: + una + bebida + Then the bulk edit term form is shown + When I press hotkey "1" + Then the term form is hidden + + When I drag from "una" to "bebida" + Then the reading page term form shows term "una bebida" + + When I press hotkey "escape" + Then the term form is hidden + + When I drag from "una" to "bebida" + Then the reading page term form shows term "una bebida" + When I shift-drag from "Tengo" to "un" + Then the reading page term form shows term "una bebida" + + + Scenario: Bulk editing terms while reading + Given a Spanish book "Hola" with content: + Tengo otro amigo. + When I shift click: + amigo + Tengo + And I edit the bulk edit form: + parent: gato + change status: true + status: 4 + add tags: hello, hi + Then the reading pane shows: + Tengo (4)/ /otro/ /amigo (4)/. + Then the term table contains: + ; Tengo; gato; ; Spanish; hello, hi; Learning (4) + ; amigo; gato; ; Spanish; hello, hi; Learning (4) + ; gato; ; ; Spanish; ; Learning (4) + + Given a Spanish book "Nuevo" with content: + Tengo otro amigo. + When I shift click: + amigo + otro + And I edit the bulk edit form: + remove parents: true + change status: true + status: 3 + add tags: newtag + remove tags: hello, badtag + Then the reading pane shows: + Tengo (4)/ /otro (3)/ /amigo (3)/. + Then the term table contains: + ; Tengo; gato; ; Spanish; hello, hi; Learning (4) + ; amigo; ; ; Spanish; hi, newtag; Learning (3) + ; gato; ; ; Spanish; ; Learning (4) + ; otro; ; ; Spanish; newtag; Learning (3) + + + Scenario: Up and down arrow sets status + Given a Spanish book "Hola" with content: + Tengo un amigo. + When I click "Tengo" and press hotkey "1" + When I click "un" and press hotkey "2" + When I click "amigo" and press hotkey "3" + Then the reading pane shows: + Tengo (1)/ /un (2)/ /amigo (3)/. + + When I press hotkey "escape" + And I shift click: + Tengo + un + amigo + And I press hotkey "arrowup" + Then the reading pane shows: + Tengo (2)/ /un (3)/ /amigo (4)/. + + When I press hotkey "escape" + And I shift click: + Tengo + un + amigo + When I press hotkey "arrowup" + Then the reading pane shows: + Tengo (3)/ /un (4)/ /amigo (5)/. + + When I press hotkey "escape" + And I shift click: + Tengo + un + amigo + When I press hotkey "arrowup" + Then the reading pane shows: + Tengo (4)/ /un (5)/ /amigo (99)/. + + When I press hotkey "escape" + And I shift click: + Tengo + un + amigo + When I press hotkey "arrowup" + Then the reading pane shows: + Tengo (5)/ /un (99)/ /amigo (99)/. + + When I press hotkey "escape" + And I shift click: + Tengo + un + amigo + When I press hotkey "arrowdown" + Then the reading pane shows: + Tengo (4)/ /un (5)/ /amigo (5)/. + + When I press hotkey "escape" + And I click "Tengo" and press hotkey "arrowdown" + And I press hotkey "arrowdown" + And I press hotkey "arrowdown" + And I press hotkey "arrowdown" + And I press hotkey "arrowdown" + And I press hotkey "arrowdown" + And I press hotkey "arrowdown" + And I press hotkey "arrowdown" + Then the reading pane shows: + Tengo (1)/ /un (5)/ /amigo (5)/. + + + Scenario: Toggling highlighting only shows highlights on hovered terms + Given a Spanish book "Hola" with content: + Tengo un amigo y otro. + When I click "Tengo" and press hotkey "1" + Then the reading pane shows: + Tengo (1)/ /un/ /amigo/ /y/ /otro/. + When I click "un" and press hotkey "2" + Then the reading pane shows: + Tengo (1)/ /un (2)/ /amigo/ /y/ /otro/. + When I click "amigo" and press hotkey "3" + Then the reading pane shows: + Tengo (1)/ /un (2)/ /amigo (3)/ /y/ /otro/. + When I press hotkey "h" + And I hover over "Tengo" + Then the reading pane shows: + Tengo (1)/ /un/ /amigo/ /y/ /otro/. + When I press hotkey "h" + And I press hotkey "m" + And I press hotkey "m" + Then the reading pane shows: + Tengo (1)/ /un (2)/ /amigo (3)/ /y/ /otro/. + + + # DISABLING TEST, can't figure out what is wrong. + # When a book has multiple pages, the hotkey actions during + # acceptance testing somehow seem "stuck" on page 1. + # + # i.e. if I'm on page 1, and hit the "hotkey_MarkRead", + # the page does go to page 2, and the hidden control + # "#page_num" is updated to 2; however, subsequent calls + # to handle_page_done() (in read/index.html) **always** post + # the pagenum as 1, even though it should read from the field + # parseInt($('#page_num').val()). + # + # The hotkeys work correctly when run from the browser, + # so either: + # * something is wrong how selenium runs the browser/js + # * I'm not simulating the keypress correctly + # * there's _somehow_ some weird state being held on to. + # + ### Scenario: Can use hotkeys to move to next pages + ### Given I set hotkey "hotkey_MarkRead" to "Digit8" + ### And I set hotkey "hotkey_MarkReadWellKnown" to "Digit9" + ### And a Spanish book "Hola" with content: + ### Tengo una GATITA. + ### --- + ### Tengo una bebida. + ### --- + ### Tengo LAAAAAAA BEBIDA. + ### Then the reading pane shows: + ### Tengo/ /una/ /GATITA/. + + ### When I press hotkey "8" + ### And sleep for 2 + ### Then the reading pane shows: + ### Tengo/ /una/ /bebida/. + ### And book pages with read dates are: + ### Hola; 1 + + ### When I press hotkey "9" + ### Then the reading pane shows: + ### Tengo (99)/ /LAAAAAAA/ /BEBIDA (99)/. + ### And book pages with read dates are: + ### Hola; 1 + ### Hola; 2 + + + Scenario: Can use hotkeys to go to previous and next pages + Given I set hotkey "hotkey_PreviousPage" to "Digit8" + And I set hotkey "hotkey_NextPage" to "Digit9" + And a Spanish book "Hola" with content: + one. + --- + two. + Then the reading pane shows: + one/. + + When I press hotkey "9" + Then the reading pane shows: + two/. + And book pages with read dates are: + - + + When I press hotkey "8" + Then the reading pane shows: + one/. + And book pages with read dates are: + - + + + Scenario: Can use hotkeys to mark the page as read + Given I set hotkey "hotkey_MarkRead" to "Digit8" + And a Spanish book "Hola" with content: + Tengo una GATITA. + --- + Tengo una bebida. + Then the reading pane shows: + Tengo/ /una/ /GATITA/. + + When I press hotkey "8" + Then the reading pane shows: + Tengo/ /una/ /bebida/. + And book pages with read dates are: + Hola; 1 + + Scenario: Can use hotkeys to mark unknown terms as known and the page as read + Given I set hotkey "hotkey_MarkReadWellKnown" to "Digit9" + And a Spanish book "Hola" with content: + Tengo una GATITA. + --- + Tengo una bebida. + Then the reading pane shows: + Tengo/ /una/ /GATITA/. + + When I press hotkey "9" + Then the reading pane shows: + Tengo (99)/ /una (99)/ /bebida/. + And book pages with read dates are: + Hola; 1 + + + Scenario: Page start date is set correctly during reading + Given a Spanish book "Hola" with content: + page one here. + --- + two. + --- + three. + Then book pages with start dates are: + Hola; 1 + And the reading pane shows: + page/ /one/ /here/. + + Given all page start dates are set to null + Then book pages with start dates are: + - + + # Single form post: + When I click "page" and edit the form: + text: page + Then the reading pane shows: + page (1)/ /one/ /here/. + And book pages with start dates are: + - + + # Bulk edit: + When I press hotkey "escape" + And I shift click: + one + here + And I edit the bulk edit form: + change status: true + status: 3 + Then the reading pane shows: + page (1)/ /one (3)/ /here (3)/. + And book pages with start dates are: + - + + # Hotkey: + When I press hotkey "escape" + And I click "here" and press hotkey "2" + Then the reading pane shows: + page (1)/ /one (3)/ /here (2)/. + And book pages with start dates are: + - + + # Bulk update with hotkey: + When I press hotkey "escape" + And I shift click: + page + one + When I press hotkey "arrowup" + Then the reading pane shows: + page (2)/ /one (4)/ /here (2)/. + And book pages with start dates are: + - + + When I press hotkey "escape" + And I shift click: + one + here + When I press hotkey "1" + Then the reading pane shows: + page (2)/ /one (1)/ /here (1)/. + And book pages with start dates are: + - + + When I go to the next page + Then the reading pane shows: + two/. + And book pages with start dates are: + Hola; 2 + + When I go to the previous page + Then the reading pane shows: + page (2)/ /one (1)/ /here (1)/. + And book pages with start dates are: + Hola; 1 + Hola; 2 + + Given all page start dates are set to null + Then book pages with start dates are: + - + + When I click the footer next page + Then the reading pane shows: + two/. + And book pages with start dates are: + Hola; 2 + + Given all page start dates are set to null + Then book pages with start dates are: + - + + When I click the footer green check + Then the reading pane shows: + three/. + And book pages with start dates are: + Hola; 3 + + + # Issue 530: "peeking" at page doesn't set page data. + Scenario: Peeking at page does not set current page or start date. + Given a Spanish book "Hola" with content: + Uno. + --- + Dos. + Then the reading pane shows: + Uno/. + And book pages with start dates are: + Hola; 1 + + Given I peek at page 2 + Then the reading pane shows: + Dos/. + And book pages with start dates are: + Hola; 1 + + Given I visit "/" + When I set the book table filter to "Hola" + Then the book table contains: + Hola; Spanish; ; 2; diff --git a/tests/acceptance/sample_files/Hola.epub b/tests/acceptance/sample_files/Hola.epub new file mode 100644 index 000000000..90a225064 Binary files /dev/null and b/tests/acceptance/sample_files/Hola.epub differ diff --git a/tests/acceptance/sample_files/Hola.pdf b/tests/acceptance/sample_files/Hola.pdf new file mode 100644 index 000000000..ada400c6d Binary files /dev/null and b/tests/acceptance/sample_files/Hola.pdf differ diff --git a/tests/acceptance/sample_files/Hola.srt b/tests/acceptance/sample_files/Hola.srt new file mode 100644 index 000000000..cc06a1887 --- /dev/null +++ b/tests/acceptance/sample_files/Hola.srt @@ -0,0 +1,3 @@ +1 +00:00:00,000 --> 00:00:01,000 +Tengo un amigo. diff --git a/tests/acceptance/sample_files/Hola.vtt b/tests/acceptance/sample_files/Hola.vtt new file mode 100644 index 000000000..f26da373d --- /dev/null +++ b/tests/acceptance/sample_files/Hola.vtt @@ -0,0 +1,5 @@ +WEBVTT + +1 +00:00:00.000 --> 00:00:01.000 +Tengo un amigo. diff --git a/tests/acceptance/sample_files/fake.mp3 b/tests/acceptance/sample_files/fake.mp3 new file mode 100644 index 000000000..d4d702456 --- /dev/null +++ b/tests/acceptance/sample_files/fake.mp3 @@ -0,0 +1 @@ +fake mp3 file \ No newline at end of file diff --git a/tests/acceptance/sample_files/hola.txt b/tests/acceptance/sample_files/hola.txt new file mode 100644 index 000000000..b733da8e1 --- /dev/null +++ b/tests/acceptance/sample_files/hola.txt @@ -0,0 +1 @@ +Tengo un amigo. diff --git a/tests/acceptance/sample_files/invalid.epub b/tests/acceptance/sample_files/invalid.epub new file mode 100644 index 000000000..b733da8e1 --- /dev/null +++ b/tests/acceptance/sample_files/invalid.epub @@ -0,0 +1 @@ +Tengo un amigo. diff --git a/tests/acceptance/sample_files/invalid.pdf b/tests/acceptance/sample_files/invalid.pdf new file mode 100644 index 000000000..b733da8e1 Binary files /dev/null and b/tests/acceptance/sample_files/invalid.pdf differ diff --git a/tests/acceptance/sample_files/invalid.srt b/tests/acceptance/sample_files/invalid.srt new file mode 100644 index 000000000..b733da8e1 --- /dev/null +++ b/tests/acceptance/sample_files/invalid.srt @@ -0,0 +1 @@ +Tengo un amigo. diff --git a/tests/acceptance/sample_files/invalid.vtt b/tests/acceptance/sample_files/invalid.vtt new file mode 100644 index 000000000..b733da8e1 --- /dev/null +++ b/tests/acceptance/sample_files/invalid.vtt @@ -0,0 +1 @@ +Tengo un amigo. diff --git a/tests/acceptance/sample_files/invalid_empty.epub b/tests/acceptance/sample_files/invalid_empty.epub new file mode 100644 index 000000000..caf523000 Binary files /dev/null and b/tests/acceptance/sample_files/invalid_empty.epub differ diff --git a/tests/acceptance/sample_files/non_utf_8.txt b/tests/acceptance/sample_files/non_utf_8.txt new file mode 100644 index 000000000..3b42761b3 --- /dev/null +++ b/tests/acceptance/sample_files/non_utf_8.txt @@ -0,0 +1,2 @@ +THE GOSPEL ACCORDING TO SAINT MATTHEW +CHAPTER 1 Hello. 18 Now. diff --git a/tests/acceptance/smoke.feature b/tests/acceptance/smoke.feature new file mode 100644 index 000000000..18fdf6f6e --- /dev/null +++ b/tests/acceptance/smoke.feature @@ -0,0 +1,70 @@ +Feature: Smoke test. + Create a book, read it and create terms, view term list, export CSV. + + Background: + Given a running site + And demo languages + + + Scenario: Smoke test + # Book created and loaded. + Given a Spanish book "Hola" with content: + Hola, adios amigo. + + # No terms listed yet. + Given I visit "/" + Then the term table contains: + - + + # On read, still no terms shown in listing. + Given I visit "/" + When I click the "Hola" link + Then the page title is Reading "Hola" + And the reading pane shows: + Hola/, /adios/ /amigo/. + + # Still no terms listed. + Given I visit "/" + Then the term table contains: + - + + Given I visit "/" + When I click the "Hola" link + And I click "Hola" and edit the form: + translation: Hello + status: 2 + Then the reading pane shows: + Hola (2)/, /adios/ /amigo/. + + When I click "adios" and press hotkey "1" + Then the reading pane shows: + Hola (2)/, /adios (1)/ /amigo/. + + # Now terms exist. + Then the term table contains: + ; Hola; ; Hello; Spanish; ; New (2) + ; adios; ; ; Spanish; ; New (1) + + # Only listed terms included. + When click Export CSV + And sleep for 1 + Then exported CSV file contains: + term,parent,translation,language,tags,added,status,link_status,pronunciation + Hola,,Hello,Spanish,,DATE_HERE,2,, + adios,,,Spanish,,DATE_HERE,1,, + + # DISABLING this for now: when the page is rendered, + # unknown terms are created with status = 0. Creating + # the same term from the term form causes an integrity error. + ### Given a new Spanish term: + ### text: amigo + ### translation: friend + ### status: 4 + ### When I click the "amigo" link + + ### # Term has been updated in reading screen. + ### Given I visit "/" + ### When I click the "Hola" link + ### Then the page title is Reading "Hola" + ### And the reading pane shows: + ### Hola (2)/. /Adios (1)/ /amigo (4)/, /adios (1)/. diff --git a/tests/acceptance/start_acceptance_app.py b/tests/acceptance/start_acceptance_app.py index 2645bed0c..51aa1734e 100644 --- a/tests/acceptance/start_acceptance_app.py +++ b/tests/acceptance/start_acceptance_app.py @@ -1,5 +1,5 @@ """ -Copy of main.py, running the server on port 9876. +Copy of main elements of main.py. This still connects to the test database etc. """ @@ -17,13 +17,11 @@ # pylint: disable=wrong-import-position sys.path.append("..") from lute.app_factory import create_app -from lute.config.app_config import AppConfig logging.getLogger("waitress.queue").setLevel(logging.ERROR) -app_config = AppConfig.create_from_config() -app = create_app(app_config) +app = create_app() port = int(sys.argv[1]) -print(f"running at localhost:{port}") +print(f"running at localhost:{port}", flush=True) serve(app, host="0.0.0.0", port=port) diff --git a/tests/acceptance/sync_status.feature b/tests/acceptance/sync_status.feature new file mode 100644 index 000000000..2c95d0b69 --- /dev/null +++ b/tests/acceptance/sync_status.feature @@ -0,0 +1,65 @@ +## TODO disabled_flaky_tests: these failed way too often. +# Trying re-enable + +Feature: User can link child and parent term statuses. + + Background: + Given a running site + And demo languages + Given a Spanish book "Hola" with content: + Gato gatos gatito perro. + + + Scenario: Can link child and single parent term. + When I click "Gato" and edit the form: + translation: cat + status: 1 + Then the reading pane shows: + Gato (1)/ /gatos/ /gatito/ /perro/. + + When I click "gatos" and edit the form: + parents: [ 'Gato' ] + sync_status: true + status: 4 + Then the reading pane shows: + Gato (4)/ /gatos (4)/ /gatito/ /perro/. + + When I click "Gato" and press hotkey "2" + Then the reading pane shows: + Gato (2)/ /gatos (2)/ /gatito/ /perro/. + + When I click "gatito" and edit the form: + parents: [ 'Gato' ] + And sleep for 1 + And I click "Gato" and press hotkey "2" + Then the reading pane shows: + Gato (2)/ /gatos (2)/ /gatito (2)/ /perro/. + + When I click "gatos" and press hotkey "5" + Then the reading pane shows: + Gato (5)/ /gatos (5)/ /gatito (5)/ /perro/. + + + Scenario: Linking multiple parents breaks status updating. + When I click "Gato" and edit the form: + translation: cat + status: 3 + Then the reading pane shows: + Gato (3)/ /gatos/ /gatito/ /perro/. + + When I click "gatos" and edit the form: + parents: [ 'Gato' ] + sync_status: true + status: 4 + Then the reading pane shows: + Gato (4)/ /gatos (4)/ /gatito/ /perro/. + + When I click "gatos" and edit the form: + parents: [ 'Gato', 'perro' ] + status: 2 + Then the reading pane shows: + Gato (4)/ /gatos (2)/ /gatito/ /perro (2)/. + + When I click "gatos" and press hotkey "3" + Then the reading pane shows: + Gato (4)/ /gatos (3)/ /gatito/ /perro (2)/. diff --git a/tests/acceptance/term.feature b/tests/acceptance/term.feature index 47323c617..001a032a0 100644 --- a/tests/acceptance/term.feature +++ b/tests/acceptance/term.feature @@ -25,6 +25,20 @@ Feature: Creating and managing terms ; bb; ; thing; Spanish; ; New (1) + Scenario: Can Export CSV file + Given a new Spanish term: + text: gato + pronunciation: GAH-to + translation: cat + Then the term table contains: + ; gato; ; cat; Spanish; ; New (1) + When click Export CSV + And sleep for 1 + Then exported CSV file contains: + term,parent,translation,language,tags,added,status,link_status,pronunciation + gato,,cat,Spanish,,DATE_HERE,1,,GAH-to + + Scenario: Import a valid term file Given import term file: language,term,translation,parent,status,tags,pronunciation @@ -44,4 +58,5 @@ Feature: Creating and managing terms ; 爱好; ; hobby; Classical Chinese; HSK1; New (1) -# TODO zzfuture fix: testing scenarios: term filters. \ No newline at end of file +# TODO term testing scenarios: term filters. +# TODO term testing scenarios: bulk delete. \ No newline at end of file diff --git a/tests/acceptance/test_smoke.py b/tests/acceptance/test_smoke.py index 78666e862..459b23858 100644 --- a/tests/acceptance/test_smoke.py +++ b/tests/acceptance/test_smoke.py @@ -2,31 +2,6 @@ Smoke tests. """ +from pytest_bdd import scenarios -def test_smoke_test(chromebrowser, luteclient): - "Hit the main page, create a book, update a term." - luteclient.visit("/") - assert chromebrowser.is_text_present("Lute"), "have main page." - luteclient.make_book("Hola", "Hola. Adios amigo.", "Spanish") - assert chromebrowser.title == 'Reading "Hola (1/1)"', "title" - assert chromebrowser.is_text_present("Hola") - assert "Hola/. /Adios/ /amigo/." == luteclient.displayed_text() - - updates = {"translation": "hello", "parents": ["adios", "amigo"]} - luteclient.click_word_fill_form("Hola", updates) - luteclient.click_word_fill_form("Adios", {"status": "2", "translation": "goodbye"}) - - displayed = luteclient.displayed_text() - assert "Hola (1)/. /Adios (2)/ /amigo (1)/." == displayed - - -def test_unsupported_language_not_shown(luteclient, _restore_jp_parser): - "Missing mecab means no Japanese." - luteclient.load_demo_stories() - - luteclient.change_parser_registry_key("japanese", "disabled_japanese") - luteclient.visit("/") - assert not luteclient.browser.is_text_present("Japanese"), "no Japanese demo book." - assert luteclient.browser.is_text_present( - "Tutorial" - ), "Tutorial is available though." +scenarios("smoke.feature") diff --git a/tests/acceptance/test_sync_status.py b/tests/acceptance/test_sync_status.py new file mode 100644 index 000000000..54381bb47 --- /dev/null +++ b/tests/acceptance/test_sync_status.py @@ -0,0 +1,7 @@ +""" +Reading acceptance tests. +""" + +from pytest_bdd import scenarios + +scenarios("sync_status.feature") diff --git a/tests/acceptance/unsupported_parser.feature b/tests/acceptance/unsupported_parser.feature index 21d922131..c59cc0061 100644 --- a/tests/acceptance/unsupported_parser.feature +++ b/tests/acceptance/unsupported_parser.feature @@ -5,11 +5,11 @@ Feature: Unsupported language data is hidden And demo languages And the demo stories are loaded - Given a Spanish book "Hola" with content: - Hola. Tengo un gato. - And a new Spanish term: + Given a new Spanish term: text: gato translation: cat + And a Spanish book "Hola" with content: + Hola. Tengo un gato. And a Japanese book "Hola" with content: こんにちは And a new Japanese term: @@ -23,7 +23,7 @@ Feature: Unsupported language data is hidden When I set the book table filter to "Hola" Then the book table contains: - Hola; Spanish; ; 4 (0%); + Hola; Spanish; ; 4; Then the term table contains: ; gato; ; cat; Spanish; ; New (1) @@ -36,8 +36,8 @@ Feature: Unsupported language data is hidden When I set the book table filter to "Hola" Then the book table contains: - Hola; Japanese; ; 1 (0%); - Hola; Spanish; ; 4 (0%); + Hola; Japanese; ; 1; + Hola; Spanish; ; 4; Then the term table contains: ; gato; ; cat; Japanese; ; New (1) ; gato; ; cat; Spanish; ; New (1) diff --git a/tests/conftest.py b/tests/conftest.py index b07b537a8..ef29cc2a0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,7 +8,8 @@ from lute.config.app_config import AppConfig from lute.db import db -import lute.db.demo +import lute.db.management +from lute.language.service import Service from lute.app_factory import create_app from lute.models.language import Language @@ -49,14 +50,12 @@ def pytest_sessionstart(session): # pylint: disable=unused-argument @pytest.fixture(name="testconfig") def fixture_config(): "Config using the app config." - thisdir = os.path.dirname(os.path.realpath(__file__)) - configfile = os.path.join(thisdir, "..", "lute", "config", "config.yml") - ac = AppConfig(configfile) + ac = AppConfig(AppConfig.default_config_filename()) yield ac @pytest.fixture(name="app") -def fixture_app(testconfig): +def fixture_app(): """ A clean instance of the demo database. @@ -66,10 +65,12 @@ def fixture_app(testconfig): that said, it's much faster to do this than to do a "wipe and reload database" on every test run. """ - if os.path.exists(testconfig.dbfilename): - os.unlink(testconfig.dbfilename) + config_file = AppConfig.default_config_filename() + c = AppConfig(config_file) + if os.path.exists(c.dbfilename): + os.unlink(c.dbfilename) extra_config = {"WTF_CSRF_ENABLED": False, "TESTING": True} - app = create_app(testconfig, extra_config=extra_config) + app = create_app(config_file, extra_config=extra_config) yield app @@ -78,8 +79,8 @@ def fixture_app_context(app): """ Yields the app context so that tests using the db will work. """ - with app.app_context(): - yield + with app.app_context() as c: + yield c @pytest.fixture(name="empty_db") @@ -87,7 +88,7 @@ def fixture_empty_db(app_context): """ Wipe the db. """ - lute.db.management.delete_all_data() + lute.db.management.delete_all_data(db.session) @pytest.fixture(name="client") @@ -98,56 +99,56 @@ def fixture_demo_client(app): return app.test_client() -@pytest.fixture(name="demo_yaml_folder") -def fixture_yaml_folder(): - "Path to the demo files." - return os.path.join(lute.db.demo.demo_data_path(), "languages") - - -def _get_language(f): +def _get_test_language(lang_name): """ Return language from the db if it already exists, or create it from the file. """ - lang = lute.db.demo.get_demo_language(f) - db_language = db.session.query(Language).filter(Language.name == lang.name).first() - if db_language is None: + lang = db.session.query(Language).filter(Language.name == lang_name).first() + if lang is not None: return lang - return db_language - - -@pytest.fixture(name="test_languages") -def fixture_test_languages(app_context, demo_yaml_folder): - "Dict of available languages for tests." - # Hardcoded = good enough. - langs = ["spanish", "english", "japanese", "turkish", "classical_chinese"] - ret = {} - for lang in langs: - f = os.path.join(demo_yaml_folder, f"{lang}.yaml") - ret[lang] = _get_language(f) - yield ret + service = Service(db.session) + lang = service.get_language_def(lang_name).language + db.session.add(lang) + db.session.commit() + return lang @pytest.fixture(name="spanish") -def fixture_spanish(test_languages): - return test_languages["spanish"] +def fixture_spanish(app_context): + return _get_test_language("Spanish") @pytest.fixture(name="english") -def fixture_english(test_languages): - return test_languages["english"] +def fixture_english(app_context): + return _get_test_language("English") @pytest.fixture(name="japanese") -def fixture_japanese(test_languages): - return test_languages["japanese"] +def fixture_japanese(app_context): + return _get_test_language("Japanese") @pytest.fixture(name="turkish") -def fixture_turkish(test_languages): - return test_languages["turkish"] +def fixture_turkish(app_context): + return _get_test_language("Turkish") @pytest.fixture(name="classical_chinese") -def fixture_cl_chinese(test_languages): - return test_languages["classical_chinese"] +def fixture_cl_chinese(app_context): + return _get_test_language("Classical Chinese") + + +@pytest.fixture(name="german") +def fixture_german(app_context): + return _get_test_language("German") + + +@pytest.fixture(name="hindi") +def fixture_hindi(app_context): + return _get_test_language("Hindi") + + +@pytest.fixture(name="generic") +def fixture_generic(app_context): + return _get_test_language("Generic") diff --git a/tests/features/rendering.feature b/tests/features/rendering.feature index ae5bd9564..1232ddbde 100644 --- a/tests/features/rendering.feature +++ b/tests/features/rendering.feature @@ -5,7 +5,7 @@ Feature: Rendering Given demo data - Scenario: Smoke test + Scenario: Rendering smoke test Given language English And terms: lines @@ -18,6 +18,7 @@ Feature: Rendering Then rendered should be: Several/ /lines(1)/ /of/ /text/, and(1)/ /also/ /a/ /blank/ /line/. + And(1)/ /some/ /more/. Scenario: No terms @@ -49,6 +50,42 @@ Feature: Rendering Tengo un(1)/ gato(1)/. + Scenario: Overlapping terms starting at same position, longer wins + Given language Spanish + And terms: + tengo un + tengo un gato + And text: + Tengo un gato. + Then rendered should be: + Tengo un gato(1)/. + + + # Checking the scenario given in the code comments. + # Adding "t" to the fake terms since "I" is ignored + # during parsing (roman numeral!). + Scenario: Documentation example + Given language Spanish + And terms: + At + Bt + Ct + Dt + Et + Ft + Gt + Ht + It + Bt Ct + Et Ft Gt Ht It + Ft Gt + Ct Dt Et + And text: + At Bt Ct Dt Et Ft Gt Ht It. + Then rendered should be: + At(1)/ /Bt Ct(1)/ Dt Et(1)/ Ft Gt Ht It(1)/. + + Scenario: Non overlapping terms Given language Spanish And terms: diff --git a/tests/features/term_import.feature b/tests/features/term_import.feature index 37f76eeb4..451983bdc 100644 --- a/tests/features/term_import.feature +++ b/tests/features/term_import.feature @@ -4,11 +4,23 @@ Feature: Term import Background: Given demo data - Scenario: Smoke test + + Scenario: Smoke test with no create or update Given import file: language,term,translation,parent,status,tags,pronunciation Spanish,gato,cat,,1,"animal, noun",GA-toh - Then import should succeed with 1 created, 0 skipped + When import with create false, update false + Then import should succeed with 0 created, 0 updated, 1 skipped + And words table should contain: + - + + + Scenario: Smoke test with create only + Given import file: + language,term,translation,parent,status,tags,pronunciation + Spanish,gato,cat,,1,"animal, noun",GA-toh + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped And words table should contain: gato And Spanish term "gato" should be: @@ -19,12 +31,106 @@ Feature: Term import tags: animal, noun + Scenario: Import new term as unknown + Given import file: + language,term,translation,parent,status,tags,pronunciation + Spanish,gato,cat,,1,"animal, noun",GA-toh + When import with create true, update false, new as unknown true + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus from words order by WoText" should return: + gato; 0 + + + Scenario: Smoke test updates ignored if not updating + Given import file: + language,term,translation,parent,status,tags,pronunciation + Spanish,gato,cat,,1,"animal, noun",GA-toh + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + And words table should contain: + gato + And Spanish term "gato" should be: + translation: cat + pronunciation: GA-toh + status: 1 + parents: - + tags: animal, noun + + Given import file: + language,term,translation,parent,status,tags,pronunciation + Spanish,gato,NEW,,1,NEW,NEW + When import with create true, update false + Then import should succeed with 0 created, 0 updated, 1 skipped + And words table should contain: + gato + And Spanish term "gato" should be: + translation: cat + pronunciation: GA-toh + status: 1 + parents: - + tags: animal, noun + + + Scenario: Smoke test with update only + Given import file: + language,term,translation,status,tags,pronunciation + Spanish,gato,cat,1,"animal, noun",GA-toh + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + + Given import file: + language,term,translation,status,tags + Spanish,gato,cat,3,fuzzy + Spanish,perro,dog,3,fuzzy + When import with create false, update true + Then import should succeed with 0 created, 1 updated, 1 skipped + And words table should contain: + gato + And Spanish term "gato" should be: + translation: cat + pronunciation: GA-toh + status: 3 + parents: - + tags: fuzzy + + + Scenario: Smoke test with both create and update + Given import file: + language,term,translation,status,tags,pronunciation + Spanish,gato,cat,1,"animal, noun",GA-toh + When import with create true, update true + Then import should succeed with 1 created, 0 updated, 0 skipped + + Given import file: + language,term,translation,status,tags + Spanish,gato,cat,3,fuzzy + Spanish,perro,dog,2,fuzzy + When import with create true, update true + Then import should succeed with 1 created, 1 updated, 0 skipped + And words table should contain: + gato + perro + And Spanish term "gato" should be: + translation: cat + pronunciation: GA-toh + status: 3 + parents: - + tags: fuzzy + And Spanish term "perro" should be: + translation: dog + pronunciation: - + status: 2 + parents: - + tags: fuzzy + + Scenario: Translation field can contain a return Given import file: language,term,translation,parent,status,tags,pronunciation Spanish,gato,"A cat. A house cat.",,1,"animal, noun",GA-toh - Then import should succeed with 1 created, 0 skipped + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped And words table should contain: gato And Spanish term "gato" should be: @@ -47,7 +153,8 @@ Feature: Term import 1. blah 2. ""you know""",,3,,TEE-2 Spanish,third,,,W,?, - Then import should succeed with 3 created, 0 skipped + When import with create true, update false + Then import should succeed with 3 created, 0 updated, 0 skipped And words table should contain: other term @@ -59,7 +166,8 @@ Feature: Term import language,term,translation,parent,status,tags,pronunciation Spanish,gato,cat,,1,"animal, noun",GA-toh Spanish,perro,dog,,1,"animal, noun",PERR-oh - Then import should succeed with 2 created, 0 skipped + When import with create true, update false + Then import should succeed with 2 created, 0 updated, 0 skipped And words table should contain: gato perro @@ -69,7 +177,8 @@ Feature: Term import Given import file: language,term,translation,parent,status,tags,pronunciation Spanish,gato,cat,,1,"animal, noun",GA-toh - Then import should succeed with 1 created, 0 skipped + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped And words table should contain: gato And Spanish term "gato" should be: @@ -81,7 +190,8 @@ Feature: Term import Given import file: language,term,translation,parent,status,tags,pronunciation Spanish,gato,UPDATED,,1,"animal, noun",GA-toh - Then import should succeed with 0 created, 1 skipped + When import with create true, update false + Then import should succeed with 0 created, 0 updated, 1 skipped And Spanish term "gato" should be: translation: cat pronunciation: GA-toh @@ -94,7 +204,8 @@ Feature: Term import Given import file: language,term,translation,parent,status,tags,pronunciation Spanish,gato,cat,,1,"animal, noun",GA-toh - Then import should succeed with 1 created, 0 skipped + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped And words table should contain: gato And Spanish term "gato" should be: @@ -106,10 +217,11 @@ Feature: Term import Given import file: language,term,translation,parent,status,tags,pronunciation Spanish,GATO,UPDATED,,1,"animal, noun",GA-toh - Then import should succeed with 0 created, 1 skipped + When import with create true, update false + Then import should succeed with 0 created, 0 updated, 1 skipped - Scenario: Import is case-insensitive + Scenario: Import statuses are mapped to status IDs Given import file: language,term,status Spanish,a,1 @@ -119,7 +231,8 @@ Feature: Term import Spanish,e,5 Spanish,f,W Spanish,g,I - Then import should succeed with 7 created, 0 skipped + When import with create true, update false + Then import should succeed with 7 created, 0 updated, 0 skipped And words table should contain: a b @@ -138,11 +251,60 @@ Feature: Term import g; 98 + Scenario: Import should not update status if status not included + Given import file: + language,term,translation + Spanish,a,aa + When import with create true, update false, new as unknown true + Then import should succeed with 1 created, 0 updated, 0 skipped + + Given import file: + language,term,translation,status + Spanish,b,bb,1 + Spanish,c,cc,2 + When import with create true, update false + Then import should succeed with 2 created, 0 updated, 0 skipped + + Then sql "select WoText, WoTranslation, WoStatus from words order by WoText" should return: + a; aa; 0 + b; bb; 1 + c; cc; 2 + + Given import file: + language,term,translation + Spanish,a,aaNEW + Spanish,b,bbNEW + Spanish,c,ccNEW + When import with create false, update true, new as unknown true + Then import should succeed with 0 created, 3 updated, 0 skipped + And sql "select WoText, WoTranslation, WoStatus from words order by WoText" should return: + a; aaNEW; 0 + b; bbNEW; 1 + c; ccNEW; 2 + + + Scenario: Import field names are case-insensitive + Given import file: + LANGUAGE,Term,TRANSLATION,paRENT,statUS,TAGS,Pronunciation + Spanish,gato,cat,,1,"animal, noun",GA-toh + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + And words table should contain: + gato + And Spanish term "gato" should be: + translation: cat + pronunciation: GA-toh + status: 1 + parents: - + tags: animal, noun + + Scenario: Parent created on import Given import file: language,term,translation,parent,status,tags,pronunciation Spanish,gatos,cat,gato,1,"animal, noun",GA-toh - Then import should succeed with 1 created, 0 skipped + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped And words table should contain: gato gatos @@ -159,7 +321,8 @@ Feature: Term import language,term,parent Spanish,gatos,gato English,gato, - Then import should succeed with 2 created, 0 skipped + When import with create true, update false + Then import should succeed with 2 created, 0 updated, 0 skipped And words table should contain: gato gato @@ -177,7 +340,8 @@ Feature: Term import language,term,translation,parent,status,tags,pronunciation Spanish,gatos,,gato,1,, Spanish,gato,CAT,,1,animal,GAH-toh - Then import should succeed with 2 created, 0 skipped + When import with create true, update false + Then import should succeed with 2 created, 0 updated, 0 skipped And words table should contain: gato gatos @@ -195,22 +359,118 @@ Feature: Term import tags: animal + Scenario: Import can sync parent and children statuses + Given import file: + language,term,parent,status + Spanish,a,,1 + Spanish,b,a,2 + Spanish,c,a,3 + Spanish,d,a,4 + When import with create true, update false + Then import should succeed with 4 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus, WoSyncStatus from words order by WoText" should return: + a; 1; 0 + b; 2; 0 + c; 3; 0 + d; 4; 0 + + Given import file: + language,term,parent,status,link_status + Spanish,a,,1, + Spanish,b,a,2,y + Spanish,c,a,3,y + Spanish,d,a,4, + When import with create false, update true + Then import should succeed with 0 created, 4 updated, 0 skipped + And sql "select WoText, WoStatus, WoSyncStatus from words order by WoText" should return: + a; 3; 0 + b; 3; 1 + c; 3; 1 + d; 4; 0 + + + Scenario: Issue 387 child without status inherits parent status if linked + Given import file: + language,term,status + Spanish,a,3 + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus, WoSyncStatus from words order by WoText" should return: + a; 3; 0 + + Given import file: + language,term,parent,link_status + Spanish,achild,a,y + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus, WoSyncStatus from words order by WoText" should return: + a; 3; 0 + achild; 3; 1 + + + Scenario: Issue 387 child with status overrides parent status if linked + Given import file: + language,term,status + Spanish,a,3 + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus, WoSyncStatus from words order by WoText" should return: + a; 3; 0 + + Given import file: + language,term,parent,status,link_status + Spanish,achild,a,2,y + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus, WoSyncStatus from words order by WoText" should return: + a; 2; 0 + achild; 2; 1 + + + Scenario: Issue 387 importing an unknown child of a parent sets its status to parent + Given import file: + language,term,status + Spanish,a,3 + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus, WoSyncStatus from words order by WoText" should return: + a; 3; 0 + + # Importing a term as "unknown" really imports it as "known" (with non-0 status) + # if it's associated with a known parent! + # This may seem counter-intuitive, but it's really the only thing that makes sense. + # The parent is "known" (non-0 status), so if the child is associated with that + # parent then really it's known too. Having such a child be an exception to the + # parent-status following rules is so hairy that I'm not going to bother doing it. + Given import file: + language,term,parent,link_status + Spanish,achild,a,y + When import with create true, update false, new as unknown true + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus, WoSyncStatus from words order by WoText" should return: + a; 3; 0 + achild; 3; 1 + + Scenario: Import file fields can be in any order Given import file: language,translation,term,parent,status,tags,pronunciation Spanish,,gatos,gato,1,, Spanish,CAT,gato,,1,animal,GAH-toh - Then import should succeed with 2 created, 0 skipped + When import with create true, update false + Then import should succeed with 2 created, 0 updated, 0 skipped And words table should contain: gato gatos + Scenario: Term can have multiple parents Given import file: language,translation,term,parent,status,tags,pronunciation Spanish,,gatos,"gato, cat",1,, - Then import should succeed with 1 created, 0 skipped + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped And words table should contain: cat gato @@ -222,7 +482,20 @@ Feature: Term import language,term Spanish,gato spanish,gatos - Then import should succeed with 2 created, 0 skipped + When import with create true, update false + Then import should succeed with 2 created, 0 updated, 0 skipped + And words table should contain: + gato + gatos + + + Scenario: Field named added is ignored + Given import file: + language,term,added + Spanish,gato,27-July-2020 + spanish,gatos,27-July-2020 + When import with create true, update false + Then import should succeed with 2 created, 0 updated, 0 skipped And words table should contain: gato gatos @@ -254,13 +527,28 @@ Feature: Term import Scenario: Duplicate term throws Given import file: - language,term - Spanish,gato - Spanish,gato + language,term,translation + Spanish,gato,cat + Spanish,gato,kitty Then import should fail with message: Duplicate terms in import: Spanish: gato + Scenario: Duplicate term but identical line imports ok + Given import file: + language,term,translation + Spanish,gato,cat + Spanish,gato,cat + Spanish,perro,dog + Spanish,gato,cat + Spanish,perro,dog + When import with create true, update false + Then import should succeed with 2 created, 0 updated, 0 skipped + And sql "select WoText, WoStatus from words order by WoText" should return: + gato; 1 + perro; 1 + + Scenario: Fix issue 51: mandarin duplicate term throws Given import file: language,term,translation,pronunciation,tags @@ -270,6 +558,16 @@ Feature: Term import Duplicate terms in import: Classical chinese: 啊 + Scenario: Fix issue 454 duplicate tags ok + Given import file: + language,translation,term,parent,status,tags,pronunciation + Spanish,cat,gato,,1,"animal,animal",GAH-toh + When import with create true, update false + Then import should succeed with 1 created, 0 updated, 0 skipped + And words table should contain: + gato + + Scenario: Bad status throws Given import file: language,term,status @@ -297,3 +595,19 @@ Feature: Term import language,term,status Then import should fail with message: No terms in file + + + Scenario: Fix issue 454 too many fields in data line fails + Given import file: + language,translation,term,parent,status,tags,pronunciation + Spanish,cat,gato,,1,animal,GAH-toh,EXTRA_STUFF + Then import should fail with message: + Extra values on line 1 + + + Scenario: Fix issue 454 too few fields in data line fails + Given import file: + language,translation,term,parent,status,tags,pronunciation + Spanish,cat,gato,,1,animal + Then import should fail with message: + Missing values on line 1 diff --git a/tests/features/term_import_status_0.feature b/tests/features/term_import_status_0.feature new file mode 100644 index 000000000..12e8815ad --- /dev/null +++ b/tests/features/term_import_status_0.feature @@ -0,0 +1,62 @@ +Feature: Term import updating Status 0 terms + If a term is in the database, but its status is 0, it is still "unknown" -- + it's a placeholder record only. It's as if the term doesn't exist in the db. + For that reason, it should only be created (set to different status) + if the "create" is set to true. + + Background: + Given demo data + + Scenario: Status 0 terms are updated if update true + Given import file: + language,term,translation,parent,status,tags,pronunciation + Spanish,gato,cat,,1,"animal, noun",GA-toh + When import with create true, update false, new as unknown true + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoTranslation, WoStatus from words order by WoText" should return: + gato; cat; 0 + + Given import file: + language,term,translation,status + Spanish,gato,kitty,1 + + When import with create true, update true, new as unknown false + Then import should succeed with 0 created, 1 updated, 0 skipped + And sql "select WoText, WoTranslation, WoStatus from words order by WoText" should return: + gato; kitty; 1 + + + Scenario: Status 0 terms are left as status 0 if updated but status not set. + Given import file: + language,term,translation,parent,status,tags,pronunciation + Spanish,gato,cat,,1,"animal, noun",GA-toh + When import with create true, update false, new as unknown true + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoTranslation, WoStatus from words order by WoText" should return: + gato; cat; 0 + + Given import file: + language,term,translation + Spanish,gato,kitty + When import with create false, update true, new as unknown false + Then import should succeed with 0 created, 1 updated, 0 skipped + And sql "select WoText, WoTranslation, WoStatus from words order by WoText" should return: + gato; kitty; 0 + + + Scenario: Status 0 terms not updated if create true and update false + Given import file: + language,term,translation,parent,status,tags,pronunciation + Spanish,gato,cat,,1,"animal, noun",GA-toh + When import with create true, update false, new as unknown true + Then import should succeed with 1 created, 0 updated, 0 skipped + And sql "select WoText, WoTranslation, WoStatus from words order by WoText" should return: + gato; cat; 0 + + Given import file: + language,term,translation + Spanish,gato,kitty + When import with create true, update false, new as unknown false + Then import should succeed with 0 created, 0 updated, 1 skipped + And sql "select WoText, WoTranslation, WoStatus from words order by WoText" should return: + gato; cat; 0 diff --git a/tests/features/term_parent_mapping.feature b/tests/features/term_parent_mapping.feature deleted file mode 100644 index a2788da02..000000000 --- a/tests/features/term_parent_mapping.feature +++ /dev/null @@ -1,300 +0,0 @@ -Feature: Term import - Valid files are imported - - Background: - Given demo data - And language Spanish - - # The scenarios here use parent term 'dog' and child term 'dogs', - # as documented in the wiki. - # https://github.com/jzohrab/lute/wiki/Bulk-Mapping-Parent-Terms. - # The wiki is actually slightly out of date, as Lute now supports - # multiple parents. - - - Scenario: "dog" and "dogs" exist, "dogs" doesn't have a parent. - Given terms: - dog - dogs - Then parents should be: - - - Given import file: - parent,term - dog,dogs - Then import should succeed with 0 created, 1 updated - And parents should be: - dog; dogs - - - Scenario: "dog" and "dogs" exist, "dog" is already parent of "dogs" - Given terms: - dog - dogs - And "dog" is parent of "dogs" - Then parents should be: - dog; dogs - Given import file: - parent,term - dog,dogs - Then import should succeed with 0 created, 0 updated - # Unchanged parents - And parents should be: - dog; dogs - And words table should contain: - dog - dogs - - - Scenario: "dog" and "dogs" exist, "dogs" already has a parent, so a new parent is added - Given terms: - dog - dogs - hound - And "hound" is parent of "dogs" - Then parents should be: - hound; dogs - Given import file: - parent,term - dog,dogs - Then import should succeed with 0 created, 1 updated - # Unchanged parents - And parents should be: - dog; dogs - hound; dogs - And words table should contain: - dog - dogs - hound - - - Scenario: Mapping a term as its own parent does nothing - Given terms: - dog - dogs - Given import file: - parent,term - dogs,dogs - Then import should succeed with 0 created, 0 updated - And parents should be: - - - - - Scenario: "dogs" exist but "dog" does not - Given terms: - dogs - And import file: - parent,term - dog,dogs - Then import should succeed with 1 created, 1 updated - And parents should be: - dog; dogs - And words table should contain: - dog - dogs - And "dog" flash message should be: - Auto-created parent for "dogs" - - - Scenario: "doggies" and "dogs" exist but "dog" does not - Given terms: - dogs - doggies - And import file: - parent,term - dog,dogs - dog,doggies - Then import should succeed with 1 created, 2 updated - And parents should be: - dog; doggies - dog; dogs - And words table should contain: - dog - doggies - dogs - And "dog" flash message should be: - Auto-created parent for "dogs" + 1 more - And "doggies" flash message should be: - - - - - Scenario: Import does not create new terms if missing child and parent - Given import file: - parent,term - dog,dogs - Then import should succeed with 0 created, 0 updated - And words table should contain: - - - - - Scenario: "dogs" exist but "dog" does not, no "cat" terms exist - Given terms: - dogs - And import file: - parent,term - dog,dogs - cat,cats - Then import should succeed with 1 created, 1 updated - And parents should be: - dog; dogs - And words table should contain: - dog - dogs - And "dog" flash message should be: - Auto-created parent for "dogs" - - - Scenario: "dog" exists but "dogs" child term does not - Given terms: - dog - And import file: - parent,term - dog,dogs - Then import should succeed with 1 created, 0 updated - And words table should contain: - dog - dogs - And parents should be: - dog; dogs - And "dogs" flash message should be: - Auto-created and mapped to parent "dog" - - - Scenario: Duplicate mappings are ok - Given terms: - dog - And import file: - parent,term - dog,dogs - dog,dogs - Then import should succeed with 1 created, 0 updated - And words table should contain: - dog - dogs - And parents should be: - dog; dogs - And "dogs" flash message should be: - Auto-created and mapped to parent "dog" - - - Scenario: Child term can be mapped to multiple parents. - Given terms: - dogs - And import file: - parent,term - dog,dogs - pup,dogs - Then import should succeed with 2 created, 2 updated - And words table should contain: - dog - dogs - pup - And parents should be: - dog; dogs - pup; dogs - - - Scenario: Case ignored for mapping, and accented caps is OK. - Given terms: - pA - Á - Then words table should contain: - pa - á - Given import file: - parent,term - pA,á - Then import should succeed with 0 created, 1 updated - And parents should be: - pa; á - - - Scenario: Existing term and parent, new link created - Given terms: - dog - dogs - And import file: - parent,term - dog,dogs - Then import should succeed with 0 created, 1 updated - And parents should be: - dog; dogs - And "dogs" flash message should be: - - - - - Scenario: Child term creates parent term which creates other child term - "propagation" of new terms - Given terms: - gatito - # Note for the file below, "gatito" creates "gato", but then "gato" creates "gatos"! - And import file: - parent,term - gato,gatos - gato,gatito - Then import should succeed with 2 created, 1 updated - And words table should contain: - gatito - gato - gatos - And parents should be: - gato; gatito - gato; gatos - - - # Tricky case where a new term will get created, but is also - # needed as a parent for an existing term ... this showed up on - # tests on my machine. - Scenario: new term is used as parent for other term - Given terms: - aladas - alado - # x is a (new) parent of aladas, _and_ a (new) - # child of alado. - And import file: - parent,term - x,aladas - alado,x - Then import should succeed with 1 created, 2 updated - And words table should contain: - aladas - alado - x - And parents should be: - alado; x - x; aladas - - - Scenario: Bad heading throws - Given import file: - blah,blah2 - blah,gato,x - Then import should fail with message: - File must contain headings 'parent' and 'term' - - - Scenario: Term is required - Given import file: - parent,term - something, - Then import should fail with message: - Term is required - - - Scenario: Parent is required - Given import file: - parent,term - ,something - Then import should fail with message: - Term is required - - - Scenario: Empty file throws - Given empty import file - Then import should fail with message: - No mappings in file - - - Scenario: File with only headings throws - Given import file: - parent,term - Then import should fail with message: - No mappings in file \ No newline at end of file diff --git a/tests/features/test_rendering.py b/tests/features/test_rendering.py index f9a9713b2..df8cf87a7 100644 --- a/tests/features/test_rendering.py +++ b/tests/features/test_rendering.py @@ -7,8 +7,10 @@ from lute.db import db from lute.models.language import Language +from lute.language.service import Service as LanguageService from lute.term.model import Repository -from lute.read.service import get_paragraphs, set_unknowns_to_known, bulk_status_update +from lute.read.render.service import Service as RenderService +from lute.read.service import Service from tests.utils import add_terms, make_text from tests.dbasserts import assert_sql_result @@ -26,10 +28,13 @@ @given("demo data") def given_demo_data(app_context): "Calling app_context loads the demo data." + # TODO remove this @given(parsers.parse("language {langname}")) def given_lang(langname): + svc = LanguageService(db.session) + svc.load_language_def(langname) global language # pylint: disable=global-statement lang = db.session.query(Language).filter(Language.name == langname).first() assert lang.name == langname, "sanity check" @@ -44,7 +49,7 @@ def given_terms(content): @given(parsers.parse('term "{content}" with status {status} and parent "{parenttext}"')) def given_term_with_status_and_parent(content, status, parenttext): - r = Repository(db) + r = Repository(db.session) t = r.find_or_new(language.id, content) t.status = int(status) t.parents.append(parenttext) @@ -54,7 +59,7 @@ def given_term_with_status_and_parent(content, status, parenttext): @given(parsers.parse('term "{content}" with status {status}')) def given_term_with_status(content, status): - r = Repository(db) + r = Repository(db.session) t = r.find_or_new(language.id, content) t.status = int(status) r.add(t) @@ -71,12 +76,14 @@ def given_text(content): @given("all unknowns are set to known") def set_to_known(): - set_unknowns_to_known(text) + service = Service(db.session) + service.set_unknowns_to_known(text) @given(parsers.parse("bulk status {newstatus} update for terms:\n{terms}")) def update_status(newstatus, terms): - bulk_status_update(text, terms.split("\n"), int(newstatus)) + service = Service(db.session) + service.bulk_status_update(text, terms.split("\n"), int(newstatus)) def _assert_stringized_equals(stringizer, joiner, expected): @@ -84,16 +91,21 @@ def _assert_stringized_equals(stringizer, joiner, expected): Get paragraphs and stringize all textitems, join and assert equals expected. """ - paras = get_paragraphs(text) + rs = RenderService(db.session) + paras = rs.get_paragraphs(text.text, text.book.language) + # print("TOKENS", flush=True) + # print(paras, flush=True) ret = [] for p in paras: - tis = [t for s in p for t in s.textitems] + tis = [t for s in p for t in s] ss = [stringizer(ti) for ti in tis] ret.append(joiner.join(ss)) actual = "//".join(ret) - expected = expected.split("\n") - assert actual == "//".join(expected) + # print("", flush=True) + # print(expected, flush=True) + expected = "//".join(expected.split("\n")) + assert actual == expected @then(parsers.parse("rendered should be:\n{expected}")) diff --git a/tests/features/test_term_import.py b/tests/features/test_term_import.py index ba6a83d4c..b12f0f14e 100644 --- a/tests/features/test_term_import.py +++ b/tests/features/test_term_import.py @@ -7,12 +7,14 @@ import tempfile import pytest -from pytest_bdd import given, then, scenarios, parsers +from pytest_bdd import given, when, then, scenarios, parsers +from lute.db import db from lute.models.language import Language +from lute.language.service import Service as LanguageService from lute.models.term import Term - -from lute.termimport.service import import_file, BadImportFileError +from lute.models.repositories import LanguageRepository, TermRepository +from lute.termimport.service import Service, BadImportFileError from tests.dbasserts import assert_sql_result @@ -20,12 +22,19 @@ # The content of the file for the current test. content = None +# The results of the import +stats = None + scenarios("term_import.feature") +scenarios("term_import_status_0.feature") @given("demo data") def given_demo_data(app_context): - "Calling app_context loads the demo data." + "Load languages necessary for imports." + svc = LanguageService(db.session) + for lang in ["Spanish", "English", "Classical Chinese"]: + svc.load_language_def(lang) @given(parsers.parse("import file:\n{newcontent}")) @@ -40,17 +49,51 @@ def given_empty_file(): content = "" -@then(parsers.parse("import should succeed with {created} created, {skipped} skipped")) -def succeed_with_status(created, skipped): +@when(parsers.parse("import with create {create}, update {update}")) +def import_with_settings(create, update): fd, path = tempfile.mkstemp() with os.fdopen(fd, "w") as tmp: # do stuff with temp file tmp.write(content) - stats = import_file(path) + global stats # pylint: disable=global-statement + service = Service(db.session) + stats = service.import_file( + path, create.lower() == "true", update.lower() == "true" + ) os.remove(path) + +@when( + parsers.parse( + "import with create {create}, update {update}, new as unknown {newunknowns}" + ) +) +def import_with_settings_and_newunks(create, update, newunknowns): + fd, path = tempfile.mkstemp() + with os.fdopen(fd, "w") as tmp: + # do stuff with temp file + tmp.write(content) + + global stats # pylint: disable=global-statement + service = Service(db.session) + stats = service.import_file( + path, + create.lower() == "true", + update.lower() == "true", + newunknowns.lower() == "true", + ) + os.remove(path) + + +@then( + parsers.parse( + "import should succeed with {created} created, {updated} updated, {skipped} skipped" + ) +) +def succeed_with_status(created, updated, skipped): assert stats["created"] == int(created), "created" + assert stats["updated"] == int(updated), "updated" assert stats["skipped"] == int(skipped), "skipped" @@ -61,26 +104,31 @@ def fail_with_message(message): # do stuff with temp file tmp.write(content) with pytest.raises(BadImportFileError, match=message): - import_file(path) + service = Service(db.session) + service.import_file(path) os.remove(path) @then(parsers.parse("words table should contain:\n{text_lc_content}")) def then_words_table_contains_WoTextLC(text_lc_content): - expected = text_lc_content.split("\n") + expected = [] + if text_lc_content != "-": + expected = text_lc_content.split("\n") sql = "select WoTextLC from words order by WoTextLC" assert_sql_result(sql, expected) @then(parsers.parse('{language} term "{term}" should be:\n{expected}')) def then_term_tags(language, term, expected): - lang = Language.find_by_name(language) + repo = LanguageRepository(db.session) + lang = repo.find_by_name(language) spec = Term(lang, term) - t = Term.find_by_spec(spec) + term_repo = TermRepository(db.session) + t = term_repo.find_by_spec(spec) pstring = ", ".join([p.text for p in t.parents]) if pstring == "": pstring = "-" - tstring = ", ".join([p.text for p in t.term_tags]) + tstring = ", ".join(sorted([p.text for p in t.term_tags])) if tstring == "": tstring = "-" actual = [ diff --git a/tests/features/test_term_parent_mapping.py b/tests/features/test_term_parent_mapping.py deleted file mode 100644 index aa3be601c..000000000 --- a/tests/features/test_term_parent_mapping.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -Step defs for term_parent_mapping.feature. -""" -# pylint: disable=missing-function-docstring - -import os -import tempfile -import pytest - -from pytest_bdd import given, then, scenarios, parsers - -from lute.db import db -from lute.models.term import Term - -from lute.term_parent_map.service import import_file, BadImportFileError - -from tests.utils import add_terms -from tests.dbasserts import assert_sql_result - - -# The content of the file for the current test. -content = None - -# The current language. -language = None - -scenarios("term_parent_mapping.feature") - - -@given("demo data") -def given_demo_data(app_context): - "Calling app_context loads the demo data." - - -@given("language Spanish") -def lang_spanish(spanish): - "Do-nothing step, just for clarity." - global language # pylint: disable=global-statement - language = spanish - - -@given(parsers.parse("terms:\n{terms}")) -def given_terms(terms): - terms = terms.split("\n") - add_terms(language, terms) - - -@given(parsers.parse("import file:\n{newcontent}")) -def given_file(newcontent): - global content # pylint: disable=global-statement - content = newcontent - - -@given("empty import file") -def given_empty_file(): - global content # pylint: disable=global-statement - content = "" - - -@given(parsers.parse('"{parent}" is parent of "{child}"')) -def given_parent(parent, child): - spec = Term(language, parent) - p = Term.find_by_spec(spec) - spec = Term(language, child) - c = Term.find_by_spec(spec) - c.parents.append(p) - db.session.add(p) - db.session.add(c) - db.session.commit() - - -@then(parsers.parse("import should succeed with {created} created, {updated} updated")) -def succeed_with_status(created, updated): - fd, path = tempfile.mkstemp() - with os.fdopen(fd, "w") as tmp: - # do stuff with temp file - tmp.write(content) - - stats = import_file(language, path) - os.remove(path) - - assert stats["created"] == int(created), "created" - assert stats["updated"] == int(updated), "updated" - - -@then(parsers.parse("import should fail with message:\n{message}")) -def fail_with_message(message): - fd, path = tempfile.mkstemp() - with os.fdopen(fd, "w") as tmp: - # do stuff with temp file - tmp.write(content) - with pytest.raises(BadImportFileError, match=message): - import_file(language, path) - os.remove(path) - - -@then(parsers.parse("words table should contain:\n{text_lc_content}")) -def then_words_table_contains_WoTextLC(text_lc_content): - expected = [] - if text_lc_content != "-": - expected = text_lc_content.split("\n") - sql = "select WoTextLC from words order by WoTextLC" - assert_sql_result(sql, expected) - - -@then(parsers.parse("parents should be:\n{expected}")) -def then_parents(expected): - sql = """ - select p.WoTextLC, c.WoTextLC - from words p - inner join wordparents on WpParentWoID=p.WoID - inner join words c on c.WoID = WpWoID - order by p.WoTextLC, c.WoTextLC - """ - if expected == "-": - expected = [] - else: - expected = expected.split("\n") - assert_sql_result(sql, expected) - - -@then(parsers.parse('sql "{sql}" should return:\n{expected}')) -def then_sql_returns(sql, expected): - expected = expected.split("\n") - assert_sql_result(sql, expected) - - -@then(parsers.parse('"{txt}" flash message should be:\n{msg}')) -def then_flash(txt, msg): - expected = None - if msg != "-": - expected = msg - spec = Term(language, txt) - t = Term.find_by_spec(spec) - assert t.get_flash_message() == expected, "flash" diff --git a/tests/integration/ankiexport/__init__.py b/tests/integration/ankiexport/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/ankiexport/test_smoke_anki_export.py b/tests/integration/ankiexport/test_smoke_anki_export.py new file mode 100644 index 000000000..310bc08af --- /dev/null +++ b/tests/integration/ankiexport/test_smoke_anki_export.py @@ -0,0 +1,100 @@ +""" +Smoke test get post data. +""" + +import json +from lute.models.term import Term, TermTag +from lute.db import db +from lute.models.srsexport import SrsExportSpec +from lute.ankiexport.service import Service + + +def test_smoke_get_post_data(empty_db, spanish): + "Misc data check - parent and tags are saved." + term = Term(spanish, "un gatito") + term.translation = "t_trans\nt_extra" + term.romanization = "t_rom" + term.set_flash_message("hello") + term.add_term_tag(TermTag("t_tag")) + term.set_current_image("blah.jpg") + + parent = Term(spanish, "un gato") + parent.translation = "p_trans\np_extra" + parent.romanization = "p_rom" + parent.add_term_tag(TermTag("p_tag")) + + component = Term(spanish, "gatito") + component.translation = "c_trans\nc_extra" + component.romanization = "c_rom" + component.add_term_tag(TermTag("c_tag")) + + term.add_parent(parent) + db.session.add(term) + db.session.add(component) + db.session.commit() + + term.set_flash_message("hello") + + db.session.add(term) + db.session.add(parent) + db.session.add(component) + db.session.commit() + + spec = SrsExportSpec() + spec.id = 1 + spec.export_name = "export_name" + spec.criteria = "" + spec.deck_name = "good_deck" + spec.note_type = "good_note" + spec.field_mapping = json.dumps({"a": "{ language }"}) + spec.active = True + spec.field_mapping = json.dumps( + { + "a": "{ language }", + "b": "{ image }", + "c": "{ term }", + "d": "{ sentence }", + } + ) + + anki_decks = ["good_deck"] + anki_notes = {"good_note": ["a", "b", "c", "d"]} + svc = Service(anki_decks, anki_notes, [spec]) + post_data = svc.get_ankiconnect_post_data([term.id], {}, "dummyurl", db.session) + # print(post_data, flush=True) + + expected = { + 1: { + "export_name": { + "action": "multi", + "params": { + "actions": [ + { + "action": "storeMediaFile", + "params": { + "filename": "LUTE_TERM_1.jpg", + "url": "dummyurl/userimages/1/blah.jpg", + }, + }, + { + "action": "addNote", + "params": { + "note": { + "deckName": "good_deck", + "modelName": "good_note", + "fields": { + "a": "Spanish", + "b": '', + "c": "un gatito", + }, + "tags": ["lute", "p_tag", "t_tag"], + } + }, + }, + ] + }, + } + } + } + + assert post_data == expected, "Got post data" diff --git a/tests/integration/test_main.py b/tests/integration/test_main.py index 840511a93..fee031cb0 100644 --- a/tests/integration/test_main.py +++ b/tests/integration/test_main.py @@ -6,6 +6,7 @@ import sqlite3 from contextlib import closing +from lute.config.app_config import AppConfig from lute.app_factory import create_app @@ -18,7 +19,8 @@ def test_init_no_existing_database(testconfig): if os.path.exists(testconfig.dbfilename): os.unlink(testconfig.dbfilename) - app = create_app(testconfig) + config_file = AppConfig.default_config_filename() + app = create_app(config_file) assert os.path.exists(testconfig.dbfilename) is True, "db exists" assert testconfig.dbname.startswith("test_") diff --git a/tests/integration/test_term_popup.py b/tests/integration/test_term_popup.py new file mode 100644 index 000000000..3f81144f5 --- /dev/null +++ b/tests/integration/test_term_popup.py @@ -0,0 +1,67 @@ +""" +Term mapping tests. +""" + +import html +from bs4 import BeautifulSoup +from lute.models.term import Term, TermTag +from lute.models.repositories import UserSettingRepository +from lute.db import db + + +def test_smoke_popup_response(client, empty_db, spanish): + "Misc data check - parent and tags are saved." + term = Term(spanish, "un gatito") + term.translation = "t_trans\nt_extra" + term.romanization = "t_rom" + term.set_flash_message("hello") + term.add_term_tag(TermTag("t_tag")) + term.set_current_image("blah.jpg") + + parent = Term(spanish, "un gato") + parent.translation = "p_trans\np_extra" + parent.romanization = "p_rom" + parent.add_term_tag(TermTag("p_tag")) + parent.set_current_image("parentblah.jpg") + + component = Term(spanish, "gatito") + component.translation = "c_trans\nc_extra" + component.romanization = "c_rom" + component.add_term_tag(TermTag("c_tag")) + + term.add_parent(parent) + db.session.add(term) + db.session.add(component) + db.session.commit() + + term.set_flash_message("hello") + + db.session.add(term) + db.session.add(parent) + db.session.add(component) + db.session.commit() + + def _get_pretty_response(term_id): + "Response for term popup." + response = client.get(f"/read/termpopup/{term_id}") + decoded_response = response.data.decode("utf-8") + unescaped_response = html.unescape(decoded_response) + soup = BeautifulSoup(unescaped_response, "html.parser") + pretty_response = soup.prettify() + return pretty_response + + pretty_response = _get_pretty_response(term.id) + print(pretty_response, flush=True) + for t in ["t", "p", "c"]: + for part in ["trans", "rom", "tag"]: + s = f"{t}_{part}" + assert s in pretty_response, s + + us_repo = UserSettingRepository(db.session) + us_repo.set_value("term_popup_show_components", False) + db.session.commit() + pretty_response = _get_pretty_response(term.id) + print(pretty_response, flush=True) + for part in ["trans", "rom", "tag"]: + s = f"c_{part}" + assert s not in pretty_response, s diff --git a/tests/orm/test_Book.py b/tests/orm/test_Book.py index 6be4fc434..c27c5826b 100644 --- a/tests/orm/test_Book.py +++ b/tests/orm/test_Book.py @@ -4,16 +4,18 @@ from datetime import datetime import pytest -from lute.models.book import Book, BookTag -from lute.book.stats import BookStats +from lute.models.book import Book, BookTag, TextBookmark, BookStats +from lute.read.service import Service +from lute.book.stats import Service as BookStatsService from lute.db import db +from tests.utils import make_book from tests.dbasserts import assert_sql_result, assert_record_count_equals @pytest.fixture(name="simple_book") def fixture_simple_book(english): "Single page book with some associated objects." - b = Book.create_book("hi", english, "some text") + b = make_book("hi", "SOME TEXT", english) b.texts[0].read_date = datetime.now() bt = BookTag.make_book_tag("hola") b.book_tags.append(bt) @@ -28,14 +30,14 @@ def test_save_book(empty_db, simple_book): db.session.add(b) db.session.commit() - sql = "select BkID, BkTitle, BkLgID, BkWordCount from books" - assert_sql_result(sql, ["1; hi; 1; 2"], "book") + sql = "select BkID, BkTitle, BkLgID from books" + assert_sql_result(sql, ["1; hi; 1"], "book") sql = "select TxID, TxBkID, TxText from texts" - assert_sql_result(sql, ["1; 1; some text"], "texts") + assert_sql_result(sql, ["1; 1; SOME TEXT"], "texts") sql = "select * from sentences" - assert_sql_result(sql, ["1; 1; 1; /some/ /text/"], "sentences") + assert_sql_result(sql, ["1; 1; 1; /SOME/ /TEXT/; *"], "sentences") sql = "select * from booktags" assert_sql_result(sql, ["1; 1"], "booktags") @@ -52,22 +54,36 @@ def test_delete_book(empty_db, simple_book): db.session.add(b) db.session.commit() + service = Service(db.session) + service.mark_page_read(b.id, 1, False) + service.mark_page_read(b.id, 1, True) + + bss = BookStatsService(db.session) + bss.refresh_stats() + + check_tables = ["books", "bookstats", "texts", "sentences", "booktags"] + for t in check_tables: + assert_record_count_equals(t, 1, f"{t} created") + db.session.delete(b) db.session.commit() - for t in ["books", "texts", "sentences", "booktags"]: + for t in check_tables: assert_record_count_equals(t, 0, f"{t} deleted") sql = "select * from tags2" assert_sql_result(sql, ["1; hola; "], "tags2 remain") + sql = "select WrTxID, WrWordCount from wordsread" + assert_sql_result(sql, ["None; 2", "None; 2"], "words read remains.") + def test_save_and_delete_created_book(english): """ Verify book orm mappings. """ - content = "Some text here. Some more text" - b = Book.create_book("test", english, content, 3) + content = ["Some text here.", "Some more text"] + b = make_book("test", content, english) db.session.add(b) db.session.commit() sql = f"select TxOrder, TxText from texts where TxBkID = {b.id}" @@ -101,3 +117,28 @@ def test_load_book_loads_lang(empty_db, simple_book): ) for b in books_to_update: assert b.language is not None, "have lang object" + + +def test_delete_book_cascade_deletes_bookmarks(empty_db, simple_book): + """ + All associated TextBookmark(s) should be deleted when their + associated Book entry is deleted. + """ + b = simple_book + + TextBookmark(title="hello", text=b.texts[0]) + db.session.add(b) + db.session.commit() + + sql = "select BkID, BkTitle, BkLgID from books" + assert_sql_result(sql, ["1; hi; 1"], "book") + sql = "select TxID, TxBkID, TxText from texts" + assert_sql_result(sql, ["1; 1; SOME TEXT"], "texts") + sql = "select TbID, TbTxID, TbTitle from textbookmarks" + assert_sql_result(sql, ["1; 1; hello"], "bookmarks") + + db.session.delete(b) + db.session.commit() + + for t in ["books", "texts", "textbookmarks"]: + assert_record_count_equals(t, 0, f"{t} deleted") diff --git a/tests/orm/test_Language.py b/tests/orm/test_Language.py index 5bce309e8..1b6fcae79 100644 --- a/tests/orm/test_Language.py +++ b/tests/orm/test_Language.py @@ -2,13 +2,16 @@ Language mapping tests. """ -from lute.models.language import Language +import json +from lute.models.language import Language, LanguageDictionary +from lute.models.repositories import LanguageRepository +from lute.read.service import Service as ReadService from lute.db import db -from tests.dbasserts import assert_sql_result +from tests.dbasserts import assert_sql_result, assert_record_count_equals from tests.utils import make_text, add_terms -def test_save_new(empty_db): +def test_save_new_language(empty_db): """ Check language mappings and defaults. """ @@ -19,14 +22,12 @@ def test_save_new(empty_db): lang = Language() lang.name = "abc" - lang.dict_1_uri = "something" db.session.add(lang) db.session.commit() assert_sql_result(sql, ["abc; 0; 0; .!?"], "have language, defaults as expected") lang.right_to_left = True - db.session.add(lang) db.session.commit() assert_sql_result(sql, ["abc; 1; 0; .!?"], "rtl is True") @@ -38,21 +39,93 @@ def test_save_new(empty_db): assert retrieved.show_romanization is False, "retrieved no roman" +def test_language_dictionaries_smoke_test(empty_db): + "Smoke test for new dictionary structure." + lang = Language() + lang.name = "abc" + + ld = LanguageDictionary() + ld.usefor = "terms" + ld.dicttype = "embeddedhtml" + ld.dicturi = "1?[LUTE]" + ld.sort_order = 1 + lang.dictionaries.append(ld) + ld2 = LanguageDictionary() + ld2.usefor = "terms" + ld2.dicttype = "popuphtml" + ld2.dicturi = "2?[LUTE]" + ld2.sort_order = 2 + lang.dictionaries.append(ld2) + + ld3 = LanguageDictionary() + ld3.usefor = "sentences" + ld3.dicttype = "popuphtml" + ld3.dicturi = "3?[LUTE]" + ld3.sort_order = 3 + lang.dictionaries.append(ld3) + + db.session.add(lang) + db.session.commit() + + sqldicts = """select LgName, LdUseFor, LdType, LdDictURI + from languages + inner join languagedicts on LdLgID = LgID + order by LdSortOrder""" + assert_sql_result( + sqldicts, + [ + "abc; terms; embeddedhtml; 1?[LUTE]", + "abc; terms; popuphtml; 2?[LUTE]", + "abc; sentences; popuphtml; 3?[LUTE]", + ], + "dict saved", + ) + + retrieved = db.session.query(Language).filter(Language.name == "abc").first() + assert len(retrieved.dictionaries) == 3, "have dicts" + ld = retrieved.dictionaries[0] + assert ld.dicttype == "embeddedhtml", "type" + assert ld.dicturi == "1?[LUTE]", "uri" + + exp = """{"1": {"term": ["1?[LUTE]", "*2?[LUTE]"], "sentence": ["*3?[LUTE]"]}}""" + repo = LanguageRepository(db.session) + dicts = repo.all_dictionaries() + assert json.dumps(dicts) == exp + + def test_delete_language_removes_book_and_terms(app_context, spanish): """ - Test HACKY Language.delete() method to ensure deletes cascade. + Test HACKY LanguageRepository.delete() method to ensure deletes cascade. """ add_terms(spanish, ["gato", "perro"]) t = make_text("hola", "Hola amigo", spanish) db.session.add(t) + + ld = LanguageDictionary() + ld.usefor = "terms" + ld.dicttype = "embeddedhtml" + ld.dicturi = "something?[LUTE]" + ld.sort_order = 1 + spanish.dictionaries.append(ld) + db.session.add(spanish) + db.session.commit() + svc = ReadService(db.session) + svc.mark_page_read(t.book.id, 1, False) + sqlterms = "select WoText from words order by WoText" sqlbook = "select BkTitle from books where BkTitle = 'hola'" + sqldict = "select LdDictURI from languagedicts where LdDictURI = 'something?[LUTE]'" assert_sql_result(sqlterms, ["gato", "perro"], "initial terms") assert_sql_result(sqlbook, ["hola"], "initial book") + assert_sql_result(sqldict, ["something?[LUTE]"], "dict") + assert_record_count_equals("select * from wordsread", 1, "saved") - Language.delete(spanish) + db.session.delete(spanish) + db.session.commit() assert_sql_result(sqlbook, [], "book deleted") assert_sql_result(sqlterms, [], "terms deleted") + assert_sql_result(sqldict, [], "dicts deleted") + assert_record_count_equals("select * from wordsread", 0, "deleted") diff --git a/tests/orm/test_Term.py b/tests/orm/test_Term.py index 167fe3169..29a00858b 100644 --- a/tests/orm/test_Term.py +++ b/tests/orm/test_Term.py @@ -65,7 +65,7 @@ def test_term_parent_with_two_children(spanish): db.session.add(gato) db.session.commit() - parent_get = Term.find(parent.id) + parent_get = db.session.get(Term, parent.id) assert parent_get.text == "PARENT" assert len(parent_get.children) == 2 @@ -244,6 +244,52 @@ def test_changing_text_of_saved_Term_throws(english): term.text = "DEF" +@pytest.mark.term_case +def test_changing_text_to_same_thing_does_not_throw(japanese): + """ + New terms get created on page open, and when parsed + in full context of the text, the term may be parsed differently + than when the form is opened. + + e.g. for Japanese, the term "集めれ" may get created on + page load due to the term getting parsed in the full page context, + but when parsed individually it's parsed as "集め/れ". + Setting a new term with original text "集めれ" to that _same_ text + means that the text is really _unchanged_. + """ + term = Term.create_term_no_parsing(japanese, "集めれ") + db.session.add(term) + db.session.commit() + + sql = f"select wotext, wotextlc from words where woid = {term.id}" + assert_sql_result(sql, ["集めれ; 集めれ"], "have term") + + t = db.session.get(Term, term.id) + t.text = "集めれ" + db.session.add(t) + db.session.commit() + assert_sql_result(sql, ["集めれ; 集めれ"], "have term") + + +@pytest.mark.term_case +def test_changing_multiword_text_case_does_not_throw(english): + """ + Sanity check. + """ + term = Term(english, "a CAT") + db.session.add(term) + db.session.commit() + + sql = f"select wotext, wotextlc from words where woid = {term.id}" + assert_sql_result(sql, ["a/ /CAT; a/ /cat"], "have term") + + t = db.session.get(Term, term.id) + t.text = term.text.lower() + db.session.add(t) + db.session.commit() + assert_sql_result(sql, ["a/ /cat; a/ /cat"], "have term") + + @pytest.mark.term_case def test_changing_case_only_of_text_of_saved_Term_is_ok(english): "Changing text should throw." @@ -256,6 +302,6 @@ def test_changing_case_only_of_text_of_saved_Term_is_ok(english): @pytest.mark.term_case def test_changing_text_of_non_saved_Term_is_ok(english): - "Changing text should throw." + "Changing text should not throw if not saved." term = Term(english, "ABC") term.text = "DEF" diff --git a/tests/orm/test_TermTag.py b/tests/orm/test_TermTag.py new file mode 100644 index 000000000..82c527e42 --- /dev/null +++ b/tests/orm/test_TermTag.py @@ -0,0 +1,72 @@ +""" +TermTage mapping tests. +""" + +from sqlalchemy import text +from lute.models.term import Term, TermTag +from lute.db import db +from tests.dbasserts import assert_record_count_equals + +# from lute.termtag.routes import delete as route_delete + + +def test_deleting_termtag_removes_wordtags_table_record(empty_db, spanish): + """ + Association record should be deleted if tag is deleted. + + Annoying test ... during unit testing, deleting TermTag entity + causes the association table records wordtags to be deleted + correctly, but during actual operation -- i.e., deletion of a + TermTag through the UI -- the records aren't being deleted. + Can't explain why, and I don't want to waste more time trying + to figure it out. + """ + + tg = TermTag("tag") + db.session.add(tg) + db.session.commit() + + term = Term(spanish, "HOLA") + term.add_term_tag(tg) + db.session.add(term) + db.session.commit() + + # The tag association for HOLA is getting deleted correctly + # when the "tag" tag is deleted, which is odd because it's + # not getting deleted when the action is called from the UI + # (ref https://github.com/LuteOrg/lute-v3/issues/455). + # + # Trying adding another term, with directly inserting + # the data in the table, to see if that is deleted correctly ... + # ... and it is. + perro = Term(spanish, "perro") + db.session.add(perro) + db.session.commit() + + sql = f"insert into wordtags (WtWoID, WtTgID) values ({perro.id}, {tg.id})" + db.session.execute(text(sql)) + db.session.commit() + + # Trying loading data directly into the DB, so that the db session orm + # isn't aware of a Term. This too is deleted correctly. + sql = f"""insert into words (WoLgID, WoText, WoTextLC, WoStatus) + values ({spanish.id}, 'gato', 'gato', 1)""" + db.session.execute(text(sql)) + db.session.commit() + + sql = f"insert into wordtags (WtWoID, WtTgID) values ({perro.id + 1}, {tg.id})" + db.session.execute(text(sql)) + db.session.commit() + + sqltags = "select * from tags" + assert_record_count_equals(sqltags, 1, "tag sanity check on save") + + sqlassoc = "select * from wordtags" + assert_record_count_equals(sqlassoc, 3, "word tag associations exist") + + termtag = db.session.get(TermTag, tg.id) + db.session.delete(termtag) + db.session.commit() + + assert_record_count_equals(sqltags, 0, "tag removed") + assert_record_count_equals(sqlassoc, 0, "associations removed") diff --git a/tests/orm/test_Text.py b/tests/orm/test_Text.py index b65d4f055..ee0efeb5f 100644 --- a/tests/orm/test_Text.py +++ b/tests/orm/test_Text.py @@ -3,7 +3,7 @@ """ from datetime import datetime -from lute.models.book import Book, Text +from lute.models.book import Book, Text, TextBookmark, WordsRead from lute.db import db from tests.dbasserts import assert_record_count_equals @@ -29,3 +29,35 @@ def test_save_text_sentences_replaced_in_db(empty_db, english): db.session.add(t) db.session.commit() assert_record_count_equals("sentences", 1, "back to 1 sentences") + + +def test_delete_text_cascade_deletes_bookmarks_leaves_wordsread(empty_db, english): + """ + Texts should be able to be deleted even if they have bookmarks. + In addition, all associated TextBookmark(s) should be deleted when their + associated Text entry is deleted. + """ + b = Book("hola", english) + t = Text(b, "Tienes un perro. Un gato.") + tb = TextBookmark(text=t, title="Marcador") + db.session.add(t) + db.session.add(tb) + db.session.commit() + + wr = WordsRead(t, datetime.now(), 42) + db.session.add(wr) + db.session.commit() + + assert_record_count_equals("texts", 1, "1 text") + assert_record_count_equals("textbookmarks", 1, "1 bookmark") + assert_record_count_equals("wordsread", 1, "1 read") + + db.session.delete(t) + db.session.commit() + + assert_record_count_equals("texts", 0, "0 texts") + assert_record_count_equals("textbookmarks", 0, "0 bookmarks") + assert_record_count_equals("wordsread", 1, "still 1 read") + assert_record_count_equals( + "select * from wordsread where wrtxid is null", 1, "nulled" + ) diff --git a/tests/orm/test_TextBookmark.py b/tests/orm/test_TextBookmark.py new file mode 100644 index 000000000..dae03b43f --- /dev/null +++ b/tests/orm/test_TextBookmark.py @@ -0,0 +1,77 @@ +""" +TextBookmark mapping checks. +""" +from datetime import datetime +import pytest +from lute.models.book import Book, Text, TextBookmark +from lute.db import db +from tests.utils import make_book +from tests.dbasserts import assert_record_count_equals, assert_sql_result + + +@pytest.fixture(name="sample_book") +def fixture_sample_book(english): + "Sample Book" + b = make_book("Book Title", "some text", english) + b.texts[0].read_date = datetime.now() + return b + + +@pytest.fixture(name="sample_text") +def fixture_sample_text(sample_book: Book): + "Sample Text" + t = Text(sample_book, "test text", 1) + return t + + +@pytest.fixture(name="sample_textbookmark") +def fixture_sample_bookmark(sample_text: Text): + "Sample TextBookmark" + tb = TextBookmark(text=sample_text, title="Test Title") + return tb + + +def test_save_textbookmark( + empty_db, sample_textbookmark: TextBookmark, sample_text: Text +): + """Check TextBookmark mappings""" + db.session.add(sample_text) + db.session.add(sample_textbookmark) + db.session.commit() + sql = "SELECT TbTxId, TbTitle FROM textbookmarks" + assert_sql_result( + sql, [f"{sample_text.id}; {sample_textbookmark.title}"], "textbookmark" + ) + + +def test_edit_textbookmark( + empty_db, sample_textbookmark: TextBookmark, sample_text: Text +): + """Edit TextBookmark""" + db.session.add(sample_text) + db.session.add(sample_textbookmark) + db.session.commit() + + db.session.query(TextBookmark).filter( + TextBookmark.text.has(id=sample_text.id) + ).update({"title": "New Title"}) + db.session.commit() + + sql = "SELECT TbTxId, TbTitle FROM textbookmarks" + assert_sql_result(sql, [f"{sample_text.id}; New Title"], "textbookmark") + + +def test_delete_textbookmark( + empty_db, sample_textbookmark: TextBookmark, sample_text: Text +): + """Delete TextBookmark""" + db.session.add(sample_text) + db.session.add(sample_textbookmark) + db.session.commit() + + db.session.query(TextBookmark).filter( + TextBookmark.text.has(id=sample_text.id) + ).delete() + db.session.commit() + + assert_record_count_equals("textbookmarks", 0, "0 bookmarks") diff --git a/tests/playwright/__init__.py b/tests/playwright/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/playwright/playwright.py b/tests/playwright/playwright.py new file mode 100644 index 000000000..e5492bd6c --- /dev/null +++ b/tests/playwright/playwright.py @@ -0,0 +1,271 @@ +""" +Run pre-recorded smoke tests using playwright. + +Notes: + +- the db must be reset to the baseline with demo stories +- site must be running (currently hardcoded to port 5001) +- start code gen in another window with `python codegen`, + then go to http://localhost:5001/ in the new window +- click through etc etc, then stop the code gen, copy-paste + code here, fix as needed, _then_ shut down + +Debugging: + +- to debug, can use "page.pause()" to pause the runner. + +More notes: + +This is _just a smoke test_, it doesn't do any assertions. +The actions were _recorded_ using playwright's supertastic +code generation. https://playwright.dev/python/docs/codegen + +Then I added some tweaks: + +Menu sub-items are only visible after hovering over the menu, e.g.: + page.locator("#menu_books").hover() + page.locator("#book_new").click() +""" + +import os +import time +from playwright.sync_api import Playwright, sync_playwright, expect + + +def run(p: Playwright) -> None: # pylint: disable=too-many-statements + "Run the smoke test." + + # Run headless, can add an env var later for headless or not. + showbrowser = os.environ.get("SHOW", "") == "true" + + # print(os.environ.get("SHOW"), flush=True) + # print("-" * 50) + def _print(s): + print(s) + + _print("Opening browser.") + browser = p.chromium.launch(headless=not showbrowser) + context = browser.new_context() + context.set_default_timeout(30000) + page = context.new_page() + + # Hardcoded port will cause problems ... + _print("Initial load sanity check") + page.goto("http://localhost:5001/") + + _print("Reset db.") + page.goto("http://localhost:5001/dev_api/load_demo") + + # Hardcoded port will cause problems ... + page.goto("http://localhost:5001/") + + # Open Tutorial + _print("Tutorial check.") + page.goto("http://localhost:5001") + page.get_by_role("link", name="Tutorial", exact=True).click() + page.locator("#ID-14-172").click() + page.frame_locator('iframe[name="wordframe"]').get_by_placeholder( + "Translation" + ).click() + page.frame_locator('iframe[name="wordframe"]').get_by_placeholder( + "Translation" + ).fill("big grey thing") + page.frame_locator('iframe[name="wordframe"]').get_by_role( + "button", name="Save" + ).click() + page.get_by_title( + "Mark rest as known, mark page as read, then go to next page" + ).click() + page.get_by_title("Mark page as read, then go to next page", exact=True).click() + page.get_by_title("Home").click() + + # Bookmarks + _print("Bookmarks.") + page.goto("http://localhost:5001") + page.get_by_role("link", name="Tutorial follow-up", exact=True).click() + page.locator(".hamburger-btn").click() + page.once("dialog", lambda dialog: dialog.accept(prompt_text="Page 1")) + page.get_by_text("Bookmarks", exact=True).hover() + page.get_by_role("link", name="Add bookmark").hover() + page.get_by_role("link", name="Add bookmark").click() + + page.get_by_text("▶").click() + + page.locator(".hamburger-btn").click() + page.once("dialog", lambda dialog: dialog.accept(prompt_text="Page 2")) + page.get_by_text("Bookmarks", exact=True).hover() + page.get_by_role("link", name="Add bookmark").hover() + page.get_by_role("link", name="Add bookmark").click() + + page.get_by_role("link", name="List bookmarks").click() + page.get_by_text("…").first.hover() + page.once("dialog", lambda dialog: dialog.accept()) + page.get_by_role("link", name="Delete").click() + + page.get_by_text("…").last.hover() + page.once("dialog", lambda dialog: dialog.accept(prompt_text="Page 2 - edit")) + page.get_by_role("link", name="Edit").click() + expect(page.get_by_role("link", name="Page 2 - edit")).to_be_visible() + expect(page.get_by_role("link", name="Page 1")).not_to_be_visible() + + # Open and archive book. + _print("Archive.") + page.goto("http://localhost:5001") + page.get_by_role("link", name="Büyük ağaç").click() + page.once("dialog", lambda dialog: dialog.accept()) + page.get_by_role("link", name="Archive book").click() + + # Make a new book + _print("New book.") + page.locator("#menu_books").hover() + page.locator("#book_new").click() + page.locator("#language_id").select_option("4") + page.get_by_label("Title").click() + page.get_by_label("Title").fill("Hello") + page.get_by_label("Text", exact=True).click() + page.get_by_label("Text", exact=True).fill("Hello there.") + page.get_by_role("button", name="Save").click() + + # Edit a term. + _print("Edit term.") + page.locator("#ID-0-0").click() + page.frame_locator('iframe[name="wordframe"]').get_by_placeholder( + "Translation" + ).click() + page.frame_locator('iframe[name="wordframe"]').get_by_placeholder( + "Translation" + ).fill("Hi.") + page.frame_locator('iframe[name="wordframe"]').get_by_role( + "button", name="Save" + ).click() + + # Archive current book "Hello", check archive. + _print("Archive.") + page.get_by_role("link", name="Archive book").click() + page.locator("#menu_books").hover() + page.get_by_role("link", name="Book archive").click() + expect(page.get_by_role("link", name="Hello")).to_be_visible() + + # Open term listing. + _print("Term listing.") + page.goto("http://localhost:5001/") + page.locator("#menu_terms").hover() + page.get_by_role("link", name="Terms", exact=True).click() + page.get_by_role("link", name="Hello").click() + # TODO testing: restore Sentences smoke test check. + # page.get_by_role("link", name="Sentences").click() + page.get_by_role("link", name="Back to list").click() + # page.pause() + + # TODO issue_336_export_unknown_book_terms: restore this test. + # _print("Export parent term mapping files.") + # page.locator("#menu_terms").hover() + # page.get_by_role("link", name="Parent Term mapping").click() + # with page.expect_download(timeout=30000) as _: + # page.get_by_role("link", name="Tutorial", exact=True).click() + + # Edit language. + _print("Edit language.") + page.goto("http://localhost:5001/") + page.locator("#menu_settings").hover() + page.get_by_role("link", name="Languages").click() + page.get_by_role("link", name="English").click() + page.get_by_role("button", name="Save").click() + + # Wipe the db. + _print("Reset db.") + page.get_by_role("link", name="click here").click() + + # Create a new language. + _print("New language.") + page.get_by_role("link", name="create your language.").click() + page.locator("#predefined").select_option("Spanish") + page.get_by_role("button", name="go").click() + page.get_by_role("button", name="Save").click() + + # Create a new book for the new lang. + _print("New book.") + page.get_by_role("link", name="Create one?").click() + page.get_by_label("Title").click() + page.get_by_label("Title").fill("Hola.") + page.get_by_label("Text", exact=True).fill("Tengo un perro.") + page.get_by_role("button", name="Save").click() + + # Interact with text. + page.get_by_text("perro").click() + page.frame_locator('iframe[name="wordframe"]').get_by_placeholder( + "Translation" + ).click() + page.frame_locator('iframe[name="wordframe"]').get_by_placeholder( + "Translation" + ).fill("dog.") + page.frame_locator('iframe[name="wordframe"]').get_by_role( + "button", name="Save" + ).click() + + # Go home, backup is kicked off. + _print("Disabled: Verify backup started.") + page.locator("#reading-footer").get_by_role("link", name="Home").click() + # TODO disabled_backup_check: backup now runs and redirects to home. + # Not sure how to check it easily ... wait for it to complete. + time.sleep(4) + # page.get_by_role("link", name="Back to home.").click() + + # Archive and unarchive. + # Disabled, the links are now hidden inside a small hover-over dropdown. + # TODO reactivate_disabled_tests: book links are in a small hover-over list. + _print("Disabled: Archive and unarchive.") + ### expect(page.get_by_role("link", name="Hola.")).to_be_visible() + ### page.get_by_title("Archive", exact=True).click() + ### expect(page.get_by_role("link", name="Create one?")).to_be_visible() + ### page.locator("#menu_books").hover() + ### page.get_by_role("link", name="Book archive").click() + ### expect(page.get_by_role("link", name="Hola.")).to_be_visible() + ### page.get_by_title("Unarchive", exact=True).click() + ### expect(page.get_by_role("link", name="Hola.")).to_be_visible() + + # Import web page. + _print("Import web page.") + page.locator("#menu_books").hover() + page.get_by_role("link", name="Import web page").click() + page.get_by_label("Import URL").fill( + "http://localhost:5001/dev_api/fake_story.html" + ) + page.get_by_role("button", name="Import").click() + time.sleep(2) + # Page is imported, form shown, so save it. + page.get_by_role("button", name="Save").click() + page.get_by_text("Tengo").click() # Quick hacky check if exists. + page.get_by_title("Home").click() + expect(page.get_by_role("link", name="Mi perro.")).to_be_visible() + + # Check version. + _print("Version.") + page.locator("#menu_about").hover() + page.get_by_role("link", name="Version and software info").click() + + # Custom style. + _print("Custom style.") + page.locator("#menu_settings").hover() + page.get_by_role("link", name="Settings").click() + page.get_by_label("Custom styles").click() + page.get_by_label("Custom styles").fill( + "span.status0 { background-color: yellow; }" + ) + page.get_by_role("button", name="Save").click() + + # Custom style. + _print("Keyboard shortcuts.") + page.locator("#menu_settings").hover() + page.get_by_role("link", name="Keyboard shortcuts").click() + page.get_by_role("button", name="Save").click() + + # --------------------- + context.close() + browser.close() + + +def test_playwright(): + "Run playwright with tests." + with sync_playwright() as sp: + run(sp) diff --git a/tests/playwright/test_tibetan_font.py b/tests/playwright/test_tibetan_font.py new file mode 100644 index 000000000..72dc12545 --- /dev/null +++ b/tests/playwright/test_tibetan_font.py @@ -0,0 +1,79 @@ +""" +Playwright check: Tibetan text uses Tibetan-capable fonts. + +Requires the app running locally (same assumption as other Playwright tests). +""" + +import re + +from playwright.sync_api import sync_playwright + + +BASE_URL = "http://localhost:5001" + +# Anything containing these indicates a Tibetan-capable font was picked up. +TIBETAN_FONT_CUES = [ + "Lute Tibetan Fallback", + "Noto Sans Tibetan", + "Noto Serif Tibetan", + "Kailasa", + "Microsoft Himalaya", + "Jomolhari", + "Tibetan Machine Uni", +] + +# Cues that the theme stack is still present for non-Tibetan text. +THEME_FONT_CUES = [ + "Rubik", + "Lucida Grande", + "Georgia", + "Times New Roman", + "Arial", + "Segoe UI", + "-apple-system", + "BlinkMacSystemFont", +] + + +def _load_tibetan_book(page): + """ + Reset demo data, load predefined Tibetan language (with sample book), + and navigate to its first book page. + """ + page.goto(f"{BASE_URL}/dev_api/load_demo") + page.goto(f"{BASE_URL}/language/load_predefined/Tibetan") + page.goto(BASE_URL + "/") + + # Click the Tibetan book link (title starts with Tibetan characters). + tibetan_title_pattern = re.compile(r"[\u0f00-\u0fff]+") + page.get_by_role("link", name=tibetan_title_pattern).click() + + # Wait for reading text to be present. + page.wait_for_selector("#thetext span.textitem") + + +def test_tibetan_font_fallback(): + """ + Tibetan characters should render using a Tibetan-capable font + (from the fallback stack), not the default theme font. + """ + with sync_playwright() as sp: + browser = sp.chromium.launch() + page = browser.new_page() + _load_tibetan_book(page) + + font = page.locator("#thetext span.textitem").first.evaluate( + "el => getComputedStyle(el).fontFamily" + ) + assert any(cue in font for cue in TIBETAN_FONT_CUES), ( + "Expected a Tibetan-capable font in computed font-family; " f"got: {font}" + ) + + # Sanity: non-Tibetan UI text (body) should still include a theme/system font. + ui_font = page.locator("body").evaluate("el => getComputedStyle(el).fontFamily") + assert any(cue in ui_font for cue in THEME_FONT_CUES), ( + "Expected theme/system font cues in UI computed font-family; " + f"got: {ui_font}" + ) + + browser.close() diff --git a/tests/unit/ankiexport/__init__.py b/tests/unit/ankiexport/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/ankiexport/test_criteria.py b/tests/unit/ankiexport/test_criteria.py new file mode 100644 index 000000000..62aa47d72 --- /dev/null +++ b/tests/unit/ankiexport/test_criteria.py @@ -0,0 +1,92 @@ +""" +Selector tests. +""" + +from unittest.mock import Mock +import pytest +from lute.ankiexport.criteria import evaluate_criteria, validate_criteria +from lute.ankiexport.exceptions import AnkiExportConfigurationError + + +@pytest.fixture(name="term") +def fixture_term(): + "Fake term." + parent = Mock(text="PARENT") + parent.term_tags = [Mock(text="parenttag")] + + term = Mock() + term.id = 1 + term.text = "HOLA" + term.status = 3 + term.language.name = "Spanish" + term.language.id = 42 + term.get_current_image.return_value = "image.jpg" + term.parents = [parent] + term.term_tags = [Mock(text="masc"), Mock(text="xxx")] + term.translation = "example translation" + return term + + +@pytest.mark.parametrize( + "criteria,expected", + [ + ('language:"Spanish"', True), + ('language:"xxx"', False), + ("parents.count=1", True), + ("parents.count==1", True), + ("parents.count>=0", True), + ("parents.count>1", False), + ('tags:"masc"', True), + ('tags:"fem"', False), + ('tags:["fem", "masc"]', True), + ("status<=3", True), + ("status==1", False), + ('tags:["fem", "other"]', False), + ('tags:["parenttag"]', False), + ('parents.tags:["parenttag"]', True), + ('all.tags:["parenttag"]', True), + ('parents.count=1 and tags:["fem", "other"] and status<=3', False), + ], +) +def test_criteria(criteria, expected, term): + "Check criteria vs test term." + assert evaluate_criteria(criteria, term) == expected, criteria + + +def test_blank_criteria_is_always_true(term): + assert evaluate_criteria("", term) is True, "blank" + assert evaluate_criteria(None, term) is True, "None" + + +@pytest.mark.parametrize( + "criteria", + [ + ('lanxguage:"Spanish"'), + ('language="xxx"'), + ("parents=1"), + ('tags="masc"'), + ('tags["fem", "masc"]'), + ('parents.count=1 and tags["fem", "other"] and status<=3'), + ], +) +def test_bad_criteria_throws(criteria, term): + "Check criteria vs test term." + with pytest.raises(AnkiExportConfigurationError): + evaluate_criteria(criteria, term) + + +@pytest.mark.parametrize( + "criteria", + [ + ('lanxguage:"Spanish"'), + ('language="xxx"'), + ("parents=1"), + ('tags="masc"'), + ('tags["fem", "masc"]'), + ('parents.count=1 and tags["fem", "other"] and status<=3'), + ], +) +def test_validate_criteria_throws_if_bad(criteria): + "Check criteria vs test term." + with pytest.raises(AnkiExportConfigurationError): + validate_criteria(criteria) diff --git a/tests/unit/ankiexport/test_field_mapping.py b/tests/unit/ankiexport/test_field_mapping.py new file mode 100644 index 000000000..705813469 --- /dev/null +++ b/tests/unit/ankiexport/test_field_mapping.py @@ -0,0 +1,199 @@ +""" +Field-to-value tests. +""" + +from unittest.mock import Mock +import pytest + +from lute.ankiexport.exceptions import AnkiExportConfigurationError +from lute.ankiexport.field_mapping import ( + get_values_and_media_mapping, + validate_mapping, + get_fields_and_final_values, + SentenceLookup, +) + +# pylint: disable=missing-function-docstring + + +@pytest.mark.parametrize( + "mapping,msg", + [ + ({"a": "{ x }"}, 'Invalid field mapping "x"'), + ({"a": "{ id }", "b": "{ x }"}, 'Invalid field mapping "x"'), + ], +) +def test_validate_mapping_throws_if_bad_mapping_string(mapping, msg): + with pytest.raises(AnkiExportConfigurationError, match=msg): + validate_mapping(mapping) + + +@pytest.mark.parametrize( + "mapping,msg", + [ + ({"a": "{ id }"}, "ok"), + ({"a": "{ id }", "b": "{ id }"}, "same value twice ok"), + ({"a": "{ id }", "b": "{ term }"}, "different fields"), + ], +) +def test_validate_mapping_does_not_throw_if_ok(mapping, msg): + validate_mapping(mapping) + assert True, msg + + +@pytest.fixture(name="term") +def fixture_term(): + zws = "\u200B" + term = Mock() + term.id = 1 + term.text = f"test{zws} {zws}term" + term.romanization = "blah-blah" + term.language.name = "English" + term.language.id = 42 + term.get_current_image.return_value = "image.jpg" + term.term_tags = [Mock(text="noun"), Mock(text="verb")] + term.translation = f"example{zws} {zws}translation" + + parent = Mock() + parent.text = "parent-text" + parent.translation = "parent-transl" + parent.romanization = "parent-blah" + parent.get_current_image.return_value = None + parent.term_tags = [Mock(text="parenttag"), Mock(text="xyz")] + term.parents = [parent] + + return term + + +def test_basic_replacements(term): + sentence_lookup = Mock() + mapping = { + "id": "{ id }", + "term": "{ term }", + "language": "{ language }", + "translation": "{ translation }", + "pron": "{ pronunciation }", + } + values, media = get_values_and_media_mapping(term, sentence_lookup, mapping) + + expected = { + "id": 1, + "term": "test term", + "parents": "parent-text", + "tags": "noun, verb", + "language": "English", + "translation": "example translation
parent-transl", + "pronunciation": "blah-blah", + "parents.pronunciation": "parent-blah", + } + assert values == expected, "mappings" + assert len(media) == 0 + + term.parents[0].romanization = None + values, media = get_values_and_media_mapping(term, sentence_lookup, mapping) + expected["parents.pronunciation"] = "" + assert values == expected, "new mappings with no parent pron" + + +def test_basic_replacements_no_parents(term): + sentence_lookup = Mock() + mapping = { + "id": "{ id }", + "term": "{ term }", + "language": "{ language }", + "translation": "{ translation }", + "pron": "{ pronunciation }", + } + term.parents = [] + values, media = get_values_and_media_mapping(term, sentence_lookup, mapping) + + expected = { + "id": 1, + "term": "test term", + "parents": "", + "tags": "noun, verb", + "language": "English", + "translation": "example translation", + "pronunciation": "blah-blah", + "parents.pronunciation": "", + } + assert values == expected, "mappings" + assert len(media) == 0 + + +def test_tag_replacements(term): + sentence_lookup = Mock() + mapping = {"tags": "{ tags }"} + values, media = get_values_and_media_mapping(term, sentence_lookup, mapping) + + assert set(values["tags"].split(", ")) == {"noun", "verb"} + assert len(media) == 0 + + +def test_filtered_tag_replacements(term): + sentence_lookup = Mock() + mapping = {"mytags": '{ tags:["noun"] }'} + values, media = get_values_and_media_mapping(term, sentence_lookup, mapping) + assert set(values['tags:["noun"]'].split(", ")) == {"noun"} + assert len(media) == 0 + + +def test_filtered_parents_tag_replacements(term): + sentence_lookup = Mock() + mapping = {"mytags": '{ parents.tags:["parenttag"] }'} + values, media = get_values_and_media_mapping(term, sentence_lookup, mapping) + assert set(values['parents.tags:["parenttag"]'].split(", ")) == {"parenttag"} + assert len(media) == 0 + + +def test_image_handling(term): + sentence_lookup = Mock() + mapping = {"image": "{ image }"} + + values, media = get_values_and_media_mapping(term, sentence_lookup, mapping) + + assert media == {"LUTE_TERM_1.jpg": "/userimages/42/image.jpg"}, "one image" + assert '' in values["image"] + + +def test_sentence_handling(term): + zws = "\u200B" + sentence_lookup = Mock() + sentence_lookup.get_sentence_for_term.return_value = f"Example{zws} {zws}sentence." + mapping = {"sentence": "{ sentence }"} + + values, media = get_values_and_media_mapping(term, sentence_lookup, mapping) + + assert values["sentence"] == "Example sentence." + assert len(media) == 0 + + +def test_get_fields_and_final_values_smoke_test(): + mapping = { + "a": "{ id }", + "b": "{ term }", + } + replacements = {"id": 42, "term": "rabbit"} + actual = get_fields_and_final_values(mapping, replacements) + assert actual == {"a": "42", "b": "rabbit"} + + +def test_empty_fields_not_posted(): + mapping = { + "a": "{ id }", + "b": "{ term }", + } + replacements = {"id": 42, "term": ""} + actual = get_fields_and_final_values(mapping, replacements) + assert actual == {"a": "42"} + + +def test_sentence_lookup_finds_sentence_in_supplied_dict_or_does_db_call(): + refsrepo = Mock() + refsrepo.find_references_by_id.return_value = {"term": [Mock(sentence="Db lookup")]} + fixed_sentences = {"42": "Hello"} + lookup = SentenceLookup(fixed_sentences, refsrepo) + assert lookup.get_sentence_for_term("42") == "Hello", "looks up" + assert lookup.get_sentence_for_term(42) == "Hello", "int ok, still finds" + assert lookup.get_sentence_for_term(99) == "Db lookup", "falls back to db lookup" + assert lookup.get_sentence_for_term("99") == "Db lookup", "falls back to db lookup" diff --git a/tests/unit/ankiexport/test_service.py b/tests/unit/ankiexport/test_service.py new file mode 100644 index 000000000..a3fca43da --- /dev/null +++ b/tests/unit/ankiexport/test_service.py @@ -0,0 +1,251 @@ +""" +Service tests. +""" + +import json +from unittest.mock import Mock +import pytest +from lute.models.srsexport import SrsExportSpec +from lute.ankiexport.service import Service + +# pylint: disable=missing-function-docstring + + +@pytest.fixture(name="export_spec") +def fixture_spec(): + spec = SrsExportSpec() + spec.id = 1 + spec.export_name = "export_name" + spec.criteria = 'language:"German"' + spec.deck_name = "good_deck" + spec.note_type = "good_note" + spec.field_mapping = json.dumps({"a": "{ language }"}) + spec.active = True + return spec + + +def test_validate_returns_empty_hash_if_all_ok(export_spec): + anki_decks = ["good_deck"] + anki_notes = {"good_note": ["a", "b"]} + svc = Service(anki_decks, anki_notes, [export_spec]) + result = svc.validate_specs() + assert len(result) == 0, "No problems" + msg = svc.validate_specs_failure_message() + assert len(msg) == 0, "failure msg" + + +@pytest.mark.parametrize( + "prop_name,prop_value,expected_error", + [ + ( + "criteria", + 'lanxxguage:"German"', + 'Criteria syntax error at position 0 or later: lanxxguage:"German"', + ), + ("deck_name", "missing_deck", 'No deck name "missing_deck"'), + ("note_type", "missing_note", 'No note type "missing_note"'), + ( + "field_mapping", + json.dumps({"xx": "{ language }"}), + "Note type good_note does not have field(s): xx", + ), + ( + "field_mapping", + json.dumps({"a": "{ bad_value }"}), + 'Invalid field mapping "bad_value"', + ), + ( + "field_mapping", + "this_is_not_valid_json", + "Mapping is not valid json", + ), + ], +) +def test_validate_spec_returns_array_of_errors( + prop_name, prop_value, expected_error, export_spec +): + anki_decks = ["good_deck"] + anki_notes = {"good_note": ["a", "b"]} + setattr(export_spec, prop_name, prop_value) + svc = Service(anki_decks, anki_notes, [export_spec]) + result = svc.validate_spec(export_spec) + assert result == [expected_error] + + export_spec.active = False + assert len(svc.validate_spec(export_spec)) == 0, "no errors for inactive spec" + + +def test_validate_specs_returns_dict_of_export_ids_and_errors(export_spec): + anki_decks = ["good_deck"] + anki_notes = {"good_note": ["a", "b"]} + export_spec.deck_name = "missing_deck" + svc = Service(anki_decks, anki_notes, [export_spec]) + result = svc.validate_specs() + assert result == {export_spec.id: 'No deck name "missing_deck"'} + + msg = svc.validate_specs_failure_message() + assert msg == ['export_name: No deck name "missing_deck"'], "failure msg" + + +def test_validate_only_checks_active_specs(export_spec): + anki_decks = ["good_deck"] + anki_notes = {"good_note": ["a", "b"]} + export_spec.criteria = "xxx={yyy}" + svc = Service(anki_decks, anki_notes, [export_spec]) + result = svc.validate_specs() + assert export_spec.id in result, "should have a problem, sanity check" + + export_spec.active = False + result = svc.validate_specs() + assert len(result) == 0, "No problems" + msg = svc.validate_specs_failure_message() + assert len(msg) == 0, "failure msg" + + +@pytest.fixture(name="term") +def fixture_term(): + zws = "\u200B" + term = Mock() + term.id = 1 + term.text = f"test{zws} {zws}term" + term.romanization = "blah-blah" + term.language.name = "German" + term.language.id = 42 + term.get_current_image.return_value = "image.jpg" + term.term_tags = [Mock(text="noun"), Mock(text="verb")] + term.translation = f"example{zws} {zws}translation" + + parent = Mock() + parent.text = "parent-text" + parent.translation = "parent-transl" + parent.romanization = "parent-blah" + parent.get_current_image.return_value = None + parent.term_tags = [Mock(text="parenttag"), Mock(text="xyz")] + term.parents = [parent] + + return term + + +def test_smoke_ankiconnect_post_data_for_term(term, export_spec): + anki_decks = ["good_deck"] + anki_notes = {"good_note": ["a", "b", "c", "d", "e", "f", "g", "h"]} + export_spec.field_mapping = json.dumps( + { + "a": "{ language }", + "b": "{ image }", + "c": "{ term }", + "d": "{ sentence }", + "e": "{ pronunciation }", + "f": '{ tags:["noun"] }', + "g": '{ parents.tags:["parenttag"] }', + "h": "{ parents.pronunciation }", + } + ) + svc = Service(anki_decks, anki_notes, [export_spec]) + result = svc.validate_specs() + assert len(result) == 0, "No problems, sanity check" + + sentence_lookup = Mock() + sentence_lookup.get_sentence_for_term.return_value = "Example sentence." + + pd = svc.get_ankiconnect_post_data_for_term(term, "http://x:42", sentence_lookup) + assert len(pd) != 0, "Got some post data" + + expected = { + "export_name": { + "action": "multi", + "params": { + "actions": [ + { + "action": "storeMediaFile", + "params": { + "filename": "LUTE_TERM_1.jpg", + "url": "http://x:42/userimages/42/image.jpg", + }, + }, + { + "action": "addNote", + "params": { + "note": { + "deckName": "good_deck", + "modelName": "good_note", + "fields": { + "a": "German", + "b": '', + "c": "test term", + "d": "Example sentence.", + "e": "blah-blah", + "f": "noun", + "g": "parenttag", + "h": "parent-blah", + }, + "tags": ["lute", "noun", "parenttag", "verb", "xyz"], + } + }, + }, + ] + }, + } + } + + # print("actual") + # print(pd) + # print("expected") + # print(expected) + assert pd == expected, "PHEW!" + + +def test_smoke_ankiconnect_post_data_for_term_without_image(term, export_spec): + term.get_current_image.return_value = None + + anki_decks = ["good_deck"] + anki_notes = {"good_note": ["a", "b", "c", "d"]} + export_spec.field_mapping = json.dumps( + { + "a": "{ language }", + "b": "{ image }", + "c": "{ term }", + "d": "{ sentence }", + } + ) + svc = Service(anki_decks, anki_notes, [export_spec]) + result = svc.validate_specs() + assert len(result) == 0, "No problems, sanity check" + + sentence_lookup = Mock() + sentence_lookup.get_sentence_for_term.return_value = "Example sentence." + + pd = svc.get_ankiconnect_post_data_for_term(term, "http://x:42", sentence_lookup) + assert len(pd) != 0, "Got some post data" + + expected = { + "export_name": { + "action": "multi", + "params": { + "actions": [ + { + "action": "addNote", + "params": { + "note": { + "deckName": "good_deck", + "modelName": "good_note", + "fields": { + "a": "German", + # "b": "", image not posted at all. + "c": "test term", + "d": "Example sentence.", + }, + "tags": ["lute", "noun", "parenttag", "verb", "xyz"], + } + }, + }, + ] + }, + } + } + + # print("actual") + # print(pd) + # print("expected") + # print(expected) + assert pd == expected, "PHEW!" diff --git a/tests/unit/backup/test_backup.py b/tests/unit/backup/test_backup.py index 6421c3eb1..7a0d033e2 100644 --- a/tests/unit/backup/test_backup.py +++ b/tests/unit/backup/test_backup.py @@ -3,20 +3,22 @@ """ import os -from datetime import datetime +from datetime import datetime, timezone +from unittest.mock import Mock, patch import pytest -from lute.backup.service import ( - create_backup, - BackupException, - should_run_auto_backup, - backup_warning, -) -from lute.models.setting import BackupSettings +from lute.backup.service import Service, BackupException, DatabaseBackupFile +from lute.models.repositories import UserSettingRepository +from lute.db import db +from lute.language.service import Service as LanguageService + +from tests.dbasserts import assert_record_count_equals # pylint: disable=missing-function-docstring # Test method names are pretty descriptive already. +utc = timezone.utc + @pytest.fixture(name="bkp_dir") def fixture_backup_directory(testconfig): @@ -48,12 +50,45 @@ def cleanup_directory(directory): @pytest.fixture(name="backup_settings") def fixture_backup_settings(app_context, bkp_dir): # app_context is passed so that the db session is available. - ret = BackupSettings.get_backup_settings() + repo = UserSettingRepository(db.session) + ret = repo.get_backup_settings() ret.backup_dir = bkp_dir ret.backup_enabled = True yield ret +def _mock_backup_file(directory, name, time, size): + file_mock = Mock() + file_mock.name = name + file_mock.path = os.path.join(directory, name) + file_mock.size = size + with open(file_mock.path, "wb") as f: + f.seek(file_mock.size - 1) + f.write(b"\0") + os.utime(file_mock.path, (time.timestamp(), time.timestamp())) + return file_mock + + +@pytest.fixture(name="auto_backup_file") +def fixture_auto_backup_file(bkp_dir): + yield _mock_backup_file( + bkp_dir, + "lute_backup_2024-01-01-000000.db.gz", + datetime(2024, 1, 1, 0, 0, 0, tzinfo=utc), + 567890, + ) + + +@pytest.fixture(name="manual_backup_file") +def fixture_manual_backup_file(bkp_dir): + yield _mock_backup_file( + bkp_dir, + "manual_lute_backup_2024-02-01-000000.db.gz", + datetime(2024, 2, 1, 0, 0, 0, tzinfo=utc), + 123450, + ) + + def test_backup_writes_file_to_output_dir(testconfig, bkp_dir, backup_settings): file_content = b"imagefile" img1 = os.path.join(testconfig.userimagespath, "1") @@ -62,7 +97,8 @@ def test_backup_writes_file_to_output_dir(testconfig, bkp_dir, backup_settings): with open(os.path.join(img1, "file.txt"), "wb") as f: f.write(file_content) - create_backup(testconfig, backup_settings) + service = Service(db.session) + service.create_backup(testconfig, backup_settings) assert len(os.listdir(bkp_dir)) == 2 # db and directory assert len(os.listdir(os.path.join(bkp_dir, "userimages_backup", "1"))) == 1 assert os.path.exists(os.path.join(bkp_dir, "userimages_backup", "1", "file.txt")) @@ -70,21 +106,24 @@ def test_backup_writes_file_to_output_dir(testconfig, bkp_dir, backup_settings): def test_timestamp_added_to_db_name(testconfig, bkp_dir, backup_settings): assert os.listdir(bkp_dir) == [], "empty dirs at start" - create_backup(testconfig, backup_settings) + service = Service(db.session) + service.create_backup(testconfig, backup_settings) dbfile = [f for f in os.listdir(bkp_dir) if f.startswith("lute_backup_2")] assert len(dbfile) == 1, "db found" def test_backup_fails_if_missing_output_dir(testconfig, backup_settings): backup_settings.backup_dir = "some_missing_dir" + service = Service(db.session) with pytest.raises(BackupException, match="Missing directory some_missing_dir"): - create_backup(testconfig, backup_settings) + service.create_backup(testconfig, backup_settings) def test_user_can_configure_rolling_backup_count(testconfig, bkp_dir, backup_settings): backup_settings.backup_count = 2 + service = Service(db.session) for i in range(1, 10): - create_backup(testconfig, backup_settings, suffix=f"0{i}") + service.create_backup(testconfig, backup_settings, suffix=f"0{i}") expected_files = [f"lute_backup_0{i}.db.gz" for i in range(8, 10)] db_files = [f for f in os.listdir(bkp_dir) if f.endswith(".gz")] assert db_files.sort() == expected_files.sort() @@ -92,8 +131,11 @@ def test_user_can_configure_rolling_backup_count(testconfig, bkp_dir, backup_set def test_all_manual_backups_are_kept(testconfig, bkp_dir, backup_settings): backup_settings.backup_count = 2 + service = Service(db.session) for i in range(1, 10): - create_backup(testconfig, backup_settings, suffix=f"0{i}", is_manual=True) + service.create_backup( + testconfig, backup_settings, suffix=f"0{i}", is_manual=True + ) expected_files = [f"lute_backup_0{i}.db.gz" for i in range(1, 10)] db_files = [f for f in os.listdir(bkp_dir) if f.endswith(".gz")] assert db_files.sort() == expected_files.sort() @@ -103,22 +145,27 @@ def test_last_import_setting_is_updated_on_successful_backup( testconfig, backup_settings ): assert backup_settings.last_backup_datetime is None, "no backup" - create_backup(testconfig, backup_settings) - updated = BackupSettings.get_backup_settings() + service = Service(db.session) + service.create_backup(testconfig, backup_settings) + + repo = UserSettingRepository(db.session) + updated = repo.get_backup_settings() assert updated.last_backup_datetime is not None, "set" def test_should_not_run_autobackup_if_auto_is_no_or_false(backup_settings): + service = Service(db.session) backup_settings.backup_enabled = True backup_settings.backup_auto = False - assert should_run_auto_backup(backup_settings) is False + assert service.should_run_auto_backup(backup_settings) is False def test_autobackup_returns_true_if_never_backed_up(backup_settings): backup_settings.backup_enabled = True backup_settings.backup_auto = True backup_settings.last_backup_datetime = None - assert should_run_auto_backup(backup_settings) is True + service = Service(db.session) + assert service.should_run_auto_backup(backup_settings) is True def test_autobackup_returns_true_if_last_backed_up_over_one_day_ago(backup_settings): @@ -127,11 +174,12 @@ def test_autobackup_returns_true_if_last_backed_up_over_one_day_ago(backup_setti curr_datetime = datetime.now() one_day_ago = curr_datetime.timestamp() - 24 * 60 * 60 + service = Service(db.session) backup_settings.last_backup_datetime = one_day_ago - 10 - assert should_run_auto_backup(backup_settings) is True + assert service.should_run_auto_backup(backup_settings) is True backup_settings.last_backup_datetime = one_day_ago + 10 - assert should_run_auto_backup(backup_settings) is False + assert service.should_run_auto_backup(backup_settings) is False def test_warn_if_last_backup_never_happened_or_is_old(backup_settings): @@ -140,13 +188,106 @@ def test_warn_if_last_backup_never_happened_or_is_old(backup_settings): backup_settings.backup_warn = True backup_settings.last_backup_datetime = None - assert backup_warning(backup_settings) == "Never backed up." + service = Service(db.session) + + assert_record_count_equals("select * from books", 0, "sanity check, no books") + assert service.backup_warning(backup_settings) == "", "no warning if db empty" + + langsvc = LanguageService(db.session) + langsvc.load_language_def("English") + assert_record_count_equals("select * from books", 2, "sanity check, have books") + assert service.backup_warning(backup_settings) == "Never backed up." backup_settings.last_backup_datetime = one_week_ago + 10 - assert backup_warning(backup_settings) == "" + assert service.backup_warning(backup_settings) == "" backup_settings.last_backup_datetime = one_week_ago - 10 - assert backup_warning(backup_settings) == "Last backup was more than 1 week ago." + assert ( + service.backup_warning(backup_settings) + == "Last backup was more than 1 week ago." + ) backup_settings.backup_warn = False - assert backup_warning(backup_settings) == "" + assert service.backup_warning(backup_settings) == "" + + +def test_database_backup_file_on_nonexistent_path(bkp_dir): + file = os.path.join(bkp_dir, "lute_backup_nonexistent.db.gz") + with pytest.raises(BackupException) as excinfo: + DatabaseBackupFile(file) + assert str(excinfo.value) == f"No backup file at {file}." + + +def test_database_backup_file_with_non_lute_backup(bkp_dir): + file = os.path.join(bkp_dir, "some_other_file.db.gz") + with open(file, "wb") as f: + f.write(b"\0") + with pytest.raises(BackupException) as excinfo: + DatabaseBackupFile(file) + assert str(excinfo.value) == f"Not a valid lute database backup at {file}." + + +def test_database_backup_file_with_auto_backup_returns_success(auto_backup_file): + dbf = DatabaseBackupFile(auto_backup_file.path) + + assert auto_backup_file.name == dbf.name + assert auto_backup_file.path == dbf.filepath + assert auto_backup_file.size == dbf.size_bytes + assert dbf.size == "568 KB" + assert dbf.last_modified == datetime(2024, 1, 1, 0, 0, 0, tzinfo=utc) + + +def test_database_backup_file_for_auto_is_not_manual(auto_backup_file): + dbf = DatabaseBackupFile(auto_backup_file.path) + assert not dbf.is_manual + + +def test_database_backup_file_for_manual_is_not_manual(manual_backup_file): + dbf = DatabaseBackupFile(manual_backup_file.path) + assert dbf.is_manual + + +def test_database_backup_file_size_formatting(auto_backup_file): + dbf = DatabaseBackupFile(auto_backup_file.path) + + with patch("lute.backup.service.DatabaseBackupFile.size_bytes", 11234567890): + assert dbf.size == "11 GB" + with patch("lute.backup.service.DatabaseBackupFile.size_bytes", 1123456789): + assert dbf.size == "1 GB" + with patch("lute.backup.service.DatabaseBackupFile.size_bytes", 19876543): + assert dbf.size == "20 MB" + with patch("lute.backup.service.DatabaseBackupFile.size_bytes", 7654321): + assert dbf.size == "8 MB" + with patch("lute.backup.service.DatabaseBackupFile.size_bytes", 1024): + assert dbf.size == "1 KB" + with patch("lute.backup.service.DatabaseBackupFile.size_bytes", 1001): + assert dbf.size == "1 KB" + with patch("lute.backup.service.DatabaseBackupFile.size_bytes", 944): + assert dbf.size == "944 bytes" + + +def test_list_backups_returns_both( + bkp_dir, auto_backup_file, manual_backup_file +): # pylint: disable=unused-argument + service = Service(db.session) + backups = service.list_backups(bkp_dir) + assert len(backups) == 2 + assert all(isinstance(backup, DatabaseBackupFile) for backup in backups) + assert any(not backup.is_manual for backup in backups) + assert any(backup.is_manual for backup in backups) + + +def test_backup_listing_sorts_by_modified( + bkp_dir, auto_backup_file, manual_backup_file +): # pylint: disable=unused-argument + service = Service(db.session) + backups = service.list_backups(bkp_dir) + assert len(backups) == 2 + + backups.sort() + assert backups[0].last_modified == datetime(2024, 1, 1, 0, 0, 0, tzinfo=utc) + assert backups[1].last_modified == datetime(2024, 2, 1, 0, 0, 0, tzinfo=utc) + + backups.sort(reverse=True) + assert backups[0].last_modified == datetime(2024, 2, 1, 0, 0, 0, tzinfo=utc) + assert backups[1].last_modified == datetime(2024, 1, 1, 0, 0, 0, tzinfo=utc) diff --git a/tests/unit/book/test_Repository.py b/tests/unit/book/test_Repository.py index 8d4e76d7c..44dc199bc 100644 --- a/tests/unit/book/test_Repository.py +++ b/tests/unit/book/test_Repository.py @@ -14,7 +14,7 @@ @pytest.fixture(name="repo") def fixture_repo(): - return Repository(db) + return Repository(db.session) @pytest.fixture(name="new_book") @@ -23,6 +23,10 @@ def fixture_book(english): Term business object with some defaults, no tags or parents. """ + if english.id is None: + db.session.add(english) + db.session.commit() + assert english.id is not None, "have english lang sanity check" b = Book() b.language_id = english.id b.title = "HELLO" @@ -32,7 +36,7 @@ def fixture_book(english): return b -def test_save_new(app_context, new_book, repo): +def test_save_new_book(app_context, new_book, repo): """ Saving a simple Book object loads the database. """ @@ -49,6 +53,114 @@ def test_save_new(app_context, new_book, repo): assert book.book_tags == ["tag1", "tag2"], "tags filled" +def test_can_save_new_book_by_language_name(app_context, new_book, repo): + """ + Can save a book with language name, useful for api access. + """ + sql = "select BkTitle from books where BkTitle = 'HELLO'" + assert_sql_result(sql, [], "empty table") + + new_book.language_id = None + new_book.language_name = "English" + b = repo.add(new_book) + repo.commit() + assert_sql_result(sql, ["HELLO"], "Saved") + assert b.texts[0].text == "greeting" + + book = repo.load(b.id) + assert book.title == new_book.title, "found book" + assert book.book_tags == ["tag1", "tag2"], "tags filled" + + +def test_save_new_respects_book_words_per_page_count(app_context, new_book, repo): + """ + Saving a simple Book object loads the database. + """ + sql = "select BkTitle from books where BkTitle = 'HELLO'" + assert_sql_result(sql, [], "empty table") + + new_book.threshold_page_tokens = 3 + new_book.split_by = "sentences" + new_book.text = ( + "One two three four. One two three four five six seven eight nine ten eleven." + ) + b = repo.add(new_book) + repo.commit() + assert_sql_result(sql, ["HELLO"], "Saved") + assert b.texts[0].text == "One two three four.", "page 1" + assert ( + b.texts[1].text == "One two three four five six seven eight nine ten eleven." + ), "page 2" + + book = repo.load(b.id) + assert book.title == new_book.title, "found book" + assert book.book_tags == ["tag1", "tag2"], "tags filled" + + +@pytest.mark.parametrize( + "fulltext,threshold,expected", + [ + ("Test.", 200, ["Test."]), + ("Here is a dog. And a cat.", 3, ["Here is a dog.", "And a cat."]), + ("Here is a dog. And a cat.", 500, ["Here is a dog. And a cat."]), + ("Here is a dog.\nAnd a cat.", 500, ["Here is a dog.\nAnd a cat."]), + ("\nHere is a dog.\n\nAnd a cat.\n", 500, ["Here is a dog.\n\nAnd a cat."]), + ("Here is a dog.\n---\nAnd a cat.", 200, ["Here is a dog.", "And a cat."]), + ("Here is a dog. A cat. A thing.", 5, ["Here is a dog. A cat.", "A thing."]), + ("Dog.\n---\n---\nCat.\n---\n", 5, ["Dog.", "Cat."]), + ], +) +def test_split_sentences_scenario( + fulltext, threshold, expected, app_context, repo, english +): + "Check scenarios." + b = Book() + b.title = "Hola" + b.language_id = english.id + b.text = fulltext + b.threshold_page_tokens = threshold + b.split_by = "sentences" + dbbook = repo.add(b) + actuals = [t.text for t in dbbook.texts] + assert "/".join(actuals) == "/".join(expected), f"scen {threshold}, {fulltext}" + + +@pytest.mark.parametrize( + "fulltext,threshold,expected", + [ + ("Test.", 200, ["Test."]), + ( + "Here is a dog. And a cat.\nNew paragraph.", + 5, + ["Here is a dog. And a cat.", "New paragraph."], + ), + ( + "Here is a dog. And a cat.\nNew paragraph.", + 500, + ["Here is a dog. And a cat.\nNew paragraph."], + ), + ("Here is a dog.\nAnd a cat.", 500, ["Here is a dog.\nAnd a cat."]), + ("\nHere is a dog.\n\nAnd a cat.\n", 500, ["Here is a dog.\n\nAnd a cat."]), + ("Here is a dog.\n---\nAnd a cat.", 200, ["Here is a dog.", "And a cat."]), + ("Here is a dog. A cat. A thing.", 7, ["Here is a dog. A cat. A thing."]), + ("Dog.\n---\n---\nCat.\n---\n", 5, ["Dog.", "Cat."]), + ], +) +def test_split_by_paragraphs_scenario( + fulltext, threshold, expected, app_context, repo, english +): + "Check scenarios." + b = Book() + b.title = "Hola" + b.language_id = english.id + b.text = fulltext + b.threshold_page_tokens = threshold + b.split_by = "paragraphs" + dbbook = repo.add(b) + actuals = [t.text for t in dbbook.texts] + assert "/".join(actuals) == "/".join(expected), f"scen {threshold}, {fulltext}" + + def test_get_tags(app_context, new_book, repo): "Helper method test." assert repo.get_book_tags() == [], "no tags yet" diff --git a/tests/unit/book/test_datatables.py b/tests/unit/book/test_datatables.py index 845969203..20abe58ae 100644 --- a/tests/unit/book/test_datatables.py +++ b/tests/unit/book/test_datatables.py @@ -2,11 +2,13 @@ Book tests. """ +from datetime import datetime import pytest from lute.models.language import Language from lute.book.datatables import get_data_tables_list from lute.db import db -from lute.db.demo import load_demo_stories +from lute.db.demo import Service as DemoService +from tests.utils import make_book @pytest.fixture(name="_dt_params") @@ -15,14 +17,16 @@ def fixture_dt_params(): columns = [ {"data": "0", "name": "BkID", "searchable": False, "orderable": False}, {"data": "1", "name": "BkTitle", "searchable": True, "orderable": True}, + {"data": "2", "name": "IsCompleted", "searchable": False, "orderable": False}, ] params = { "draw": "1", "columns": columns, "order": [{"column": "1", "dir": "asc"}], - "start": "1", + "start": "0", # Start from page 0 "length": "10", "search": {"value": "", "regex": False}, + "filtLanguage": "0", # Ha! } return params @@ -31,8 +35,9 @@ def test_smoke_book_datatables_query_runs(app_context, _dt_params): """ Smoke test only, ensure query runs. """ - load_demo_stories() - get_data_tables_list(_dt_params, False) + demosvc = DemoService(db.session) + demosvc.load_demo_data() + get_data_tables_list(_dt_params, False, db.session) # print(d['data']) a = 1 assert a == 1, "dummy check" @@ -42,10 +47,33 @@ def test_book_query_only_returns_supported_language_books(app_context, _dt_param """ Smoke test only, ensure query runs. """ - load_demo_stories() + demosvc = DemoService(db.session) + demosvc.load_demo_data() for lang in db.session.query(Language).all(): lang.parser_type = "unknown" db.session.add(lang) db.session.commit() - d = get_data_tables_list(_dt_params, False) + d = get_data_tables_list(_dt_params, False, db.session) assert len(d["data"]) == 0, "no books should be active" + + +def test_book_data_says_completed_if_last_page_has_been_read( + app_context, _dt_params, english +): + "Add a visual cue to completed books." + b = make_book("title", "Hello.", english) + db.session.add(b) + db.session.commit() + _dt_params["search"] = {"value": "title", "regex": False} + d = get_data_tables_list(_dt_params, False, db.session) + actual = d["data"][0] + assert actual["BkID"] == b.id, "correct book" + assert actual["IsCompleted"] == 0, "not completed" + t = b.texts[0] + t.read_date = datetime.now() + db.session.add(t) + db.session.commit() + d = get_data_tables_list(_dt_params, False, db.session) + actual = d["data"][0] + assert actual["BkID"] == b.id, "correct book" + assert actual["IsCompleted"] == 1, "completed" diff --git a/tests/unit/book/test_service.py b/tests/unit/book/test_service.py new file mode 100644 index 000000000..ed53bed10 --- /dev/null +++ b/tests/unit/book/test_service.py @@ -0,0 +1,80 @@ +""" +Book service tests. +""" + +import os +from contextlib import ExitStack +from lute.db import db +from lute.models.repositories import BookRepository +from lute.book.model import Book +from lute.book.service import Service + + +def get_test_files(): + "Return test files pair." + thisdir = os.path.dirname(os.path.realpath(__file__)) + sample_files = os.path.join(thisdir, "..", "..", "acceptance", "sample_files") + text_path = os.path.join(sample_files, "hola.txt") + mp3_path = os.path.join(sample_files, "fake.mp3") + with open(text_path, "r", encoding="utf-8") as fp: + assert fp.read().strip() == "Tengo un amigo.", "Sanity check only." + with open(mp3_path, "r", encoding="utf-8") as fp: + assert fp.read().strip() == "fake mp3 file", "Sanity check only." + return (text_path, mp3_path) + + +def test_create_book_from_file_paths(app, app_context, spanish): + "Create a book using the DTO, to be populated by the form." + text_path, mp3_path = get_test_files() + + b = Book() + b.title = "Hola" + b.language_id = spanish.id + b.text_source_path = text_path + b.audio_source_path = mp3_path + + svc = Service() + svc.import_book(b, db.session) + + repo = BookRepository(db.session) + book = repo.find_by_title("Hola", spanish.id) + assert book.title == "Hola", "title" + assert book.texts[0].text == "Tengo un amigo.", "Got content" + + assert book.audio_filename is not None, "Have audio file" + assert book.audio_filename.endswith("mp3"), "still an mp3" + useraudiopath = app.env_config.useraudiopath + full_audio_path = os.path.join(useraudiopath, book.audio_filename) + assert os.path.exists(full_audio_path), "file saved" + + with open(full_audio_path, "r", encoding="utf-8") as fp: + assert fp.read().strip() == "fake mp3 file", "correct content copied." + + +def test_create_book_from_streams(app, app_context, spanish): + "Create a book using streams, as given by the form." + text_path, mp3_path = get_test_files() + + b = Book() + b.title = "Hola" + b.language_id = spanish.id + with ExitStack() as stack: + b.text_stream = stack.enter_context(open(text_path, mode="rb")) + b.text_stream_filename = "blah.txt" + b.audio_stream = stack.enter_context(open(mp3_path, mode="rb")) + b.audio_stream_filename = "blah.mp3" + svc = Service() + svc.import_book(b, db.session) + + repo = BookRepository(db.session) + book = repo.find_by_title("Hola", spanish.id) + assert book.title == "Hola", "title" + assert book.texts[0].text == "Tengo un amigo.", "Got content" + + assert book.audio_filename is not None, "Have audio file" + assert book.audio_filename.endswith("mp3"), "still an mp3" + useraudiopath = app.env_config.useraudiopath + full_audio_path = os.path.join(useraudiopath, book.audio_filename) + assert os.path.exists(full_audio_path), "file saved" + with open(full_audio_path, "r", encoding="utf-8") as fp: + assert fp.read().strip() == "fake mp3 file", "correct content copied." diff --git a/tests/unit/book/test_stats.py b/tests/unit/book/test_stats.py index f17122d99..fe01bd89a 100644 --- a/tests/unit/book/test_stats.py +++ b/tests/unit/book/test_stats.py @@ -3,10 +3,11 @@ """ import pytest +from sqlalchemy.sql import text from lute.db import db from lute.term.model import Term, Repository -from lute.book.stats import get_status_distribution, refresh_stats, mark_stale +from lute.book.stats import Service from tests.utils import make_text, make_book from tests.dbasserts import assert_record_count_equals, assert_sql_result @@ -19,7 +20,7 @@ def add_term(lang, s, status): term.language_id = lang.id term.text = s term.status = status - repo = Repository(db) + repo = Repository(db.session) repo.add(term) repo.commit() @@ -35,7 +36,8 @@ def scenario(language, fulltext, terms_and_statuses, expected): for ts in terms_and_statuses: add_term(language, ts[0], ts[1]) - stats = get_status_distribution(b) + svc = Service(db.session) + stats = svc.calc_status_distribution(b) assert stats == expected @@ -58,6 +60,18 @@ def test_single_word(spanish): ) +def test_new_terms_are_not_created(spanish): + "No new terms created accidentally on calc stats." + scenario( + spanish, + "Tengo un gato. Tengo un perro.", + [["gato", 3], ["un", 0]], + {0: 3, 1: 0, 2: 0, 3: 1, 4: 0, 5: 0, 98: 0, 99: 0}, + ) + sql = "select WoText from words order by WoText" + assert_sql_result(sql, ["gato", "un"], "no new terms.") + + def test_with_multiword(spanish): scenario( spanish, @@ -94,9 +108,15 @@ def fixture_make_book(empty_db, spanish): return b +@pytest.fixture(name="service") +def fixture_service(): + "svc." + return Service(db.session) + + def add_terms(lang, terms): "Create and add term." - repo = Repository(db) + repo = Repository(db.session) for s in terms: term = Term() term.language = lang @@ -108,41 +128,104 @@ def add_terms(lang, terms): def assert_stats(expected, msg=""): "helper." - sql = "select wordcount, distinctterms, distinctunknowns, unknownpercent from bookstats" + sql = """select distinctterms, distinctunknowns, + unknownpercent, replace(status_distribution, '"', "'") from bookstats""" assert_sql_result(sql, expected, msg) -def test_cache_loads_when_prompted(_test_book): +def test_cache_loads_when_prompted(service, _test_book): "Have to call refresh_stats() to load stats." assert_record_count_equals("bookstats", 0, "nothing loaded") - refresh_stats() + service.refresh_stats() assert_record_count_equals("bookstats", 1, "loaded") -def test_stats_smoke_test(_test_book, spanish): +def test_stats_smoke_test(service, _test_book, spanish): "Terms are rendered to count stats." add_terms(spanish, ["gato", "TENGO"]) - refresh_stats() - assert_stats(["4; 4; 2; 50"]) + service.refresh_stats() + assert_stats( + ["4; 2; 50; {'0': 2, '1': 2, '2': 0, '3': 0, '4': 0, '5': 0, '98': 0, '99': 0}"] + ) + + +def test_get_stats_calculates_and_caches_stats(service, _test_book, spanish): + "Calculating stats is expensive, so store them on get." + add_terms(spanish, ["gato", "TENGO"]) + assert_record_count_equals("bookstats", 0, "cache not loaded") + assert_stats([], "No stats cached at start.") + + stats = service.get_stats(_test_book) + assert stats.BkID == _test_book.id + assert stats.distinctterms == 4 + assert stats.distinctunknowns == 2 + assert stats.unknownpercent == 50 + assert ( + stats.status_distribution + == '{"0": 2, "1": 2, "2": 0, "3": 0, "4": 0, "5": 0, "98": 0, "99": 0}' + ) + + assert_record_count_equals("bookstats", 1, "cache loaded") + assert_stats( + ["4; 2; 50; {'0': 2, '1': 2, '2': 0, '3': 0, '4': 0, '5': 0, '98': 0, '99': 0}"] + ) + stats = service.get_stats(_test_book) + assert stats.BkID == _test_book.id + assert ( + stats.status_distribution + == '{"0": 2, "1": 2, "2": 0, "3": 0, "4": 0, "5": 0, "98": 0, "99": 0}' + ) -def test_stats_calculates_rendered_text(_test_book, spanish): +def test_stats_calculates_rendered_text(service, _test_book, spanish): "Multiword term counted as one term." add_terms(spanish, ["tengo un"]) - refresh_stats() - assert_stats(["4; 3; 2; 67"]) + service.refresh_stats() + assert_stats( + ["3; 2; 67; {'0': 2, '1': 1, '2': 0, '3': 0, '4': 0, '5': 0, '98': 0, '99': 0}"] + ) -def test_stats_only_update_books_marked_stale(_test_book, spanish): +def test_stats_only_update_books_marked_stale(service, _test_book, spanish): "Have to mark book as stale, too expensive otherwise." add_terms(spanish, ["gato", "TENGO"]) - refresh_stats() - assert_stats(["4; 4; 2; 50"]) + service.refresh_stats() + assert_stats( + ["4; 2; 50; {'0': 2, '1': 2, '2': 0, '3': 0, '4': 0, '5': 0, '98': 0, '99': 0}"] + ) add_terms(spanish, ["hola"]) - refresh_stats() - assert_stats(["4; 4; 2; 50"], "not updated") + service.refresh_stats() + assert_stats( + [ + "4; 2; 50; {'0': 2, '1': 2, '2': 0, '3': 0, '4': 0, '5': 0, '98': 0, '99': 0}" + ], + "not updated", + ) + + service.mark_stale(_test_book) + service.refresh_stats() + assert_stats( + [ + "4; 1; 25; {'0': 1, '1': 3, '2': 0, '3': 0, '4': 0, '5': 0, '98': 0, '99': 0}" + ], + "updated", + ) + - mark_stale(_test_book) - refresh_stats() - assert_stats(["4; 4; 1; 25"], "updated") +def test_stats_updated_if_field_empty(service, _test_book, spanish): + "Have to mark book as stale, too expensive otherwise." + add_terms(spanish, ["gato", "TENGO"]) + service.refresh_stats() + assert_stats( + ["4; 2; 50; {'0': 2, '1': 2, '2': 0, '3': 0, '4': 0, '5': 0, '98': 0, '99': 0}"] + ) + + db.session.execute(text("update bookstats set status_distribution = null")) + db.session.commit() + + assert_stats(["4; 2; 50; None"], "Set to none") + service.refresh_stats() + assert_stats( + ["4; 2; 50; {'0': 2, '1': 2, '2': 0, '3': 0, '4': 0, '5': 0, '98': 0, '99': 0}"] + ) diff --git a/tests/unit/book/test_token_group_generator.py b/tests/unit/book/test_token_group_generator.py new file mode 100644 index 000000000..b98097bba --- /dev/null +++ b/tests/unit/book/test_token_group_generator.py @@ -0,0 +1,94 @@ +""" +token_group_iterator tests. +""" + +from lute.book.model import token_group_generator +from lute.parse.space_delimited_parser import SpaceDelimitedParser + + +def toks_to_string(tokens): + ret = "".join([t.token for t in tokens]) + return ret.replace("¶", "\n").strip() + + +def test_paragraph_scenarios(english): + """ + Given a string and the max token count, + generator should return expected groups. + """ + + def scenario(s, threshold, expected_groups): + parser = SpaceDelimitedParser() + tokens = parser.get_parsed_tokens(s, english) + # pylint: disable=unnecessary-comprehension + groups = [g for g in token_group_generator(tokens, "paragraphs", threshold)] + print(groups, flush=True) + gs = [toks_to_string(g) for g in groups] + expected = "||".join(expected_groups) + assert "||".join(gs) == expected, f"groups for size {threshold}" + + scenario("", 100, [""]) + + scenario("Here is a dog. Here is a cat.", 100, ["Here is a dog. Here is a cat."]) + scenario("Here is a dog. Here is a cat.", 3, ["Here is a dog. Here is a cat."]) + scenario("Here is a dog. Here is a cat", 6, ["Here is a dog. Here is a cat"]) + scenario("Here is a dog Here is a cat", 6, ["Here is a dog Here is a cat"]) + + p1 = "Here is a dog." + p2 = "And a cat." + src = "\n".join([p1, p2]) + scenario(src, 100, [src]) + scenario(src, 5, [src]) + scenario(src, 3, [p1, p2]) + scenario(src.replace("\n", "\n\n\n"), 3, [p1, p2]) + + p1 = "Here is a dog." + p2 = "Here is a cat. Much more info here, long paragraph." + p3 = "Last stuff." + src = "\n".join([p1, p2, p3]) + scenario(src, 100, [src]) + scenario(src, 5, [f"{p1}\n{p2}", p3]) + scenario(src, 3, [p1, p2, p3]) + + src = "\n" + "\n\n\n".join([p1, p2, p3]) + "\n" + scenario(src, 5, [f"{p1}\n\n\n{p2}", p3]) + + +def test_sentence_scenarios(english): + """ + Given a string and the max token count, + generator should return expected groups. + """ + + def scenario(s, threshold, expected_groups): + parser = SpaceDelimitedParser() + tokens = parser.get_parsed_tokens(s, english) + # pylint: disable=unnecessary-comprehension + groups = [g for g in token_group_generator(tokens, "sentences", threshold)] + gs = [toks_to_string(g) for g in groups] + expected = "||".join(expected_groups) + assert "||".join(gs) == expected, f"groups for size {threshold}" + + scenario("", 100, [""]) + + text = "Here is a dog. Here is a cat." + + scenario(text, 100, ["Here is a dog. Here is a cat."]) + + scenario(text, 3, ["Here is a dog.", "Here is a cat."]) + + scenario(text, 6, ["Here is a dog. Here is a cat."]) + + # No period at the end. + scenario("Here is a dog. Here is a cat", 6, ["Here is a dog. Here is a cat"]) + + # No period at all. + scenario("Here is a dog Here is a cat", 6, ["Here is a dog Here is a cat"]) + + s1 = "Here is a dog." + s2 = "Here is a cat." + s3 = "Here is a thing." + src = " ".join([s1, s2, s3]) + scenario(src, 10, [src]) + scenario(src, 7, [f"{s1} {s2}", s3]) + scenario(src, 3, [s1, s2, s3]) diff --git a/tests/unit/cli/__init__.py b/tests/unit/cli/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/cli/test_import_books.py b/tests/unit/cli/test_import_books.py new file mode 100644 index 000000000..0158f5cd5 --- /dev/null +++ b/tests/unit/cli/test_import_books.py @@ -0,0 +1,95 @@ +""" +Smoke test for bulk import of books. +""" + +from sqlalchemy import and_ +from lute.cli.import_books import import_books_from_csv +from lute.models.book import Book +from lute.models.repositories import BookRepository +from lute.db import db + + +def test_smoke_test(app_context, tmp_path, english, german): + """Test importing books from CSV file""" + csv_contents = """title,language,url,tags,audio,bookmarks,an extra column,text +A Book,English,http://www.example.com/book,"foo,bar,baz",book.mp3,1.00;3.14;42.00,extra information,"Lorem ipsum, dolor sit amet." +Another Book,,,,,,,The quick brown fox jumps over the lazy dog. +A Book,German,,,,,,Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich. +""" + csv_file = tmp_path / "books.csv" + with open(csv_file, "w", encoding="utf-8") as f: + f.write(csv_contents) + mp3_file = tmp_path / "book.mp3" + with open(mp3_file, "w", encoding="utf-8") as f: + pass + + common_tags = ["bar", "qux"] + + repo = BookRepository(db.session) + + # Check that no changes are made if not committing. + import_books_from_csv(csv_file, "English", common_tags, False) + assert repo.find_by_title("A Book", english.id) is None + assert repo.find_by_title("Another Book", english.id) is None + assert repo.find_by_title("A Book", german.id) is None + + # Check that new books are added. + import_books_from_csv(csv_file, "English", common_tags, True) + + book = repo.find_by_title("A Book", english.id) + assert book is not None + assert book.title == "A Book" + assert book.language_id == english.id + assert book.source_uri == "http://www.example.com/book" + assert book.audio_filename == str(mp3_file) + assert book.audio_bookmarks == "1.00;3.14;42.00" + assert len(book.texts) == 1 + assert book.texts[0].text == "Lorem ipsum, dolor sit amet." + assert sorted([tag.text for tag in book.book_tags]) == ["bar", "baz", "foo", "qux"] + + book = repo.find_by_title("Another Book", english.id) + assert book is not None + assert book.title == "Another Book" + assert book.language_id == english.id + assert book.source_uri is None + assert book.audio_filename is None + assert book.audio_bookmarks is None + assert len(book.texts) == 1 + assert book.texts[0].text == "The quick brown fox jumps over the lazy dog." + assert sorted([tag.text for tag in book.book_tags]) == ["bar", "qux"] + + book = repo.find_by_title("A Book", german.id) + assert book is not None + assert book.title == "A Book" + assert book.language_id == german.id + assert book.source_uri is None + assert book.audio_filename is None + assert book.audio_bookmarks is None + assert len(book.texts) == 1 + assert ( + book.texts[0].text + == "Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich." + ) + assert sorted([tag.text for tag in book.book_tags]) == ["bar", "qux"] + + # Check that duplicate books are not added. + import_books_from_csv(csv_file, "English", common_tags, True) + + assert ( + db.session.query(Book) + .filter(and_(Book.title == "A Book", Book.language_id == english.id)) + .count() + == 1 + ) + assert ( + db.session.query(Book) + .filter(and_(Book.title == "Another Book", Book.language_id == english.id)) + .count() + == 1 + ) + assert ( + db.session.query(Book) + .filter(and_(Book.title == "A Book", Book.language_id == german.id)) + .count() + == 1 + ) diff --git a/tests/unit/cli/test_language_term_export.py b/tests/unit/cli/test_language_term_export.py new file mode 100644 index 000000000..ba8fd052b --- /dev/null +++ b/tests/unit/cli/test_language_term_export.py @@ -0,0 +1,126 @@ +"Smoke test only." + +from lute.cli.language_term_export import generate_language_file, generate_book_file + +from lute.models.term import Term, TermTag +from lute.models.repositories import TermRepository, LanguageRepository +from lute.db import db +from lute.db.demo import Service as DemoService +from tests.utils import make_book +from tests.dbasserts import assert_sql_result, assert_record_count_equals + + +def test_language_term_export_smoke_test(app_context, tmp_path): + "dump data." + demosvc = DemoService(db.session) + demosvc.load_demo_data() + sql = """select * from books + where BkLgID = (select LgID from languages where LgName='English') + """ + assert_record_count_equals(sql, 2, "have books") + langrepo = LanguageRepository(db.session) + eng = langrepo.find_by_name("English") + t = Term(eng, "the") + t.translation = "article" + t.add_term_tag(TermTag("a")) + t.add_term_tag(TermTag("b")) + db.session.add(t) + db.session.commit() + + outfile = tmp_path / "outfile.csv" + generate_language_file("English", outfile) + with open(outfile, "r", encoding="utf-8") as ofhandle: + text = ofhandle.read() + print(text) + lines = text.split("\n") + head = lines[0] + assert ( + head == "term,count,familycount,books,definition,status,parents,children,tags" + ), "headings" + firstline = lines[1] + assert firstline.startswith("the,"), "the is most common" + assert firstline.endswith('article,1,-,-,"a, b"'), "ending data" + + +def test_single_book_export(app_context, empty_db, tmp_path, english): + "dump data for english." + + assert_sql_result("select * from books", [], "no books") + assert_sql_result("select * from words", [], "no terms") + + fulltext = "a b c d e A B C\n---\nG H I c d e d" + b = make_book("hi", fulltext, english) + db.session.add(b) + db.session.commit() + + for c in ["a", "d", "c d"]: + t = Term(english, c) + t.status = 1 + db.session.add(t) + for c in ["e", "g", "h"]: + t = Term(english, c) + t.status = 0 + db.session.add(t) + db.session.commit() + + def _find(term_string): + "Find term with the text." + spec = Term(english, term_string) + repo = TermRepository(db.session) + ret = repo.find_by_spec(spec) + assert ret is not None, f"Have {term_string}" + return ret + + a = _find("a") + for c in ["e", "h"]: + t = _find(c) + t.add_parent(a) + db.session.add(t) + db.session.commit() + + expected = [ + # Headings + "term,count,familycount,books,definition,status,parents,children,tags", + # a has two children, e and h + "a,2,5,hi,-,1,-,e (2); h (1),-", + # b occurs twice. + "b,2,2,hi,-,0,-,-,-", + # 'c d' occurs twice + "c d,2,2,hi,-,1,-,-,-", + # e is a new term + "e,2,2,hi,-,0,a,-,-", + # c is a new term, status 0. + # Occurs once as c, once as C. + "C,1,1,hi,-,0,-,-,-", + "I,1,1,hi,-,0,-,-,-", + "d,1,1,hi,-,1,-,-,-", + # g and h are new + "g,1,1,hi,-,0,-,-,-", + "h,1,1,hi,-,0,a,-,-", + "", + ] + + def _lowersort(arr): + "Lowercase and sort strings." + return sorted([a.lower() for a in arr]) + + def _assert_text_matches_expected(file_text, expected_array): + "Avoid sorting, case issues." + + # Converting the text to lower because sometimes the text file + # returned B, and sometimes b ... which is _very_ odd, but I + # don't really care. + assert _lowersort(file_text.split("\n")) == _lowersort(expected_array) + + # Generate for english. + outfile = tmp_path / "outfile.csv" + generate_language_file("English", outfile) + with open(outfile, "r", encoding="utf-8") as ofhandle: + text = ofhandle.read() + _assert_text_matches_expected(text, expected) + + # Generate for book. + generate_book_file(b.id, outfile) + with open(outfile, "r", encoding="utf-8") as ofhandle: + text = ofhandle.read() + _assert_text_matches_expected(text, expected) diff --git a/tests/unit/config/test_app_config.py b/tests/unit/config/test_app_config.py index f9d07746c..3e173b275 100644 --- a/tests/unit/config/test_app_config.py +++ b/tests/unit/config/test_app_config.py @@ -97,12 +97,3 @@ def test_nonexistent_config_file_throws(tmp_path): config_file = tmp_path / "nonexistent_config.yaml" with pytest.raises(FileNotFoundError, match="No such file"): AppConfig(config_file) - - -def test_create_from_config_file(): - """ - Has a method to create a config from the config/config.yml. - """ - ac = AppConfig.create_from_config() - assert ac is not None, "sanity check" - assert isinstance(ac, AppConfig), "sanity check" diff --git a/tests/unit/db/test_data_cleanup.py b/tests/unit/db/test_data_cleanup.py new file mode 100644 index 000000000..6bc6a542c --- /dev/null +++ b/tests/unit/db/test_data_cleanup.py @@ -0,0 +1,58 @@ +"Data cleanup tests." + +from datetime import datetime +from sqlalchemy import text as sqltext +from lute.db import db +from lute.db.data_cleanup import clean_data +from tests.utils import make_text +from tests.dbasserts import assert_sql_result + + +# Cleaning up missing sentence.SeTextLC records. + + +def test_cleanup_loads_missing_sentence_textlc(app_context, spanish): + """ + Load the sentence.SeTextLC. + + If the sqlite LOWER(SeText) would be the same as the parser-generated lowercase text, + store the special char '*' only, don't waste file space storing the parser-generated lc text. + """ + + t = make_text("test", "gato. Ábrelo. tengo. QUIERO. Ábrela. ábrela.", spanish) + t.read_date = datetime.now() + db.session.add(t) + db.session.commit() + + # Force re-calc. + sqlhack = """ + update sentences set SeTextLC = null + where SeText not like '%gato%' and SeText not like '%brelo%' + """ + db.session.execute(sqltext(sqlhack)) + db.session.commit() + sql = "select SeText, SeTextLC from sentences order by SeID" + preclean = [ + "/gato/./; *", + "/Ábrelo/./; /ábrelo/./", + "/tengo/./; None", + "/QUIERO/./; None", + "/Ábrela/./; None", + "/ábrela/./; None", + ] + assert_sql_result(sql, preclean, "pre-clean") + + def _output(s): + print(s, flush=True) + + clean_data(db.session, _output) + + postclean = [ + "/gato/./; *", + "/Ábrelo/./; /ábrelo/./", + "/tengo/./; *", + "/QUIERO/./; *", + "/Ábrela/./; /ábrela/./", + "/ábrela/./; *", + ] + assert_sql_result(sql, postclean, "post-clean") diff --git a/tests/unit/db/test_demo.py b/tests/unit/db/test_demo.py index 24f6ce79b..c0f1743c8 100644 --- a/tests/unit/db/test_demo.py +++ b/tests/unit/db/test_demo.py @@ -1,128 +1,149 @@ """ Tests for managing the demo data. + +Prior to https://github.com/LuteOrg/lute-v3/issues/534, the baseline +db had languages and stories pre-loaded. This created a lot of db +file thrash whenever that data changed. + +In the new setup, the baseline db only contains a flag, "LoadDemoData" +(true/false), which is initially set to True. When the app first +starts up, if that flag is True, it loads the demo data, and sets +"LoadDemoData" to False, and "IsDemoData" to True. + +If the LoadDemoData flag is set, the demo data is loaded from the +startup scripts (devstart and lute.main) + """ -import os from sqlalchemy import text import pytest from lute.db import db -from lute.db.demo import ( - contains_demo_data, - remove_flag, - delete_demo_data, - tutorial_book_id, - demo_data_path, - load_demo_data, - predefined_languages, - get_demo_language, -) +from lute.db.demo import Service import lute.parse.registry from tests.dbasserts import assert_record_count_equals, assert_sql_result -def test_new_db_is_demo(app_context): +# ======================================== +# See notes at top of file re these tests. +# ======================================== + + +@pytest.fixture(name="service") +def _service(app_context): + return Service(db.session) + + +def test_new_db_doesnt_contain_anything(service): "New db created from the baseline has the demo flag set." - assert contains_demo_data() is True, "new db contains demo." + assert service.should_load_demo_data() is True, "has LoadDemoData flag." + assert service.contains_demo_data() is False, "no demo data." + + +def test_empty_db_not_loaded_if_load_flag_not_set(service): + "Even if it's empty, nothing happens." + service.remove_load_demo_flag() + assert service.contains_demo_data() is False, "no demo data." + assert_record_count_equals("select * from languages", 0, "empty") + service.load_demo_data() + assert service.contains_demo_data() is False, "no demo data." + assert service.should_load_demo_data() is False, "still no reload." + assert_record_count_equals("select * from languages", 0, "still empty") + + +def test_smoke_test_load_demo_works(service): + "Wipe everything, but set the flag and then start." + assert service.should_load_demo_data() is True, "should reload demo data." + service.load_demo_data() + assert service.contains_demo_data() is True, "demo loaded." + assert service.tutorial_book_id() > 0, "Have tutorial" + assert service.should_load_demo_data() is False, "loaded once, don't reload." + + +def test_load_not_run_if_data_exists_even_if_flag_is_set(service): + "Just in case." + assert service.should_load_demo_data() is True, "should reload demo data." + service.load_demo_data() + assert service.tutorial_book_id() > 0, "Have tutorial" + assert service.should_load_demo_data() is False, "loaded once, don't reload." + service.remove_flag() + service.set_load_demo_flag() + assert service.should_load_demo_data() is True, "should re-reload demo data." + service.load_demo_data() # if this works, it didn't throw :-P + assert service.should_load_demo_data() is False, "already loaded once." -def test_removing_flag_means_not_demo(app_context): + +def test_removing_flag_means_not_demo(service): "Unsetting the flag means the db is not a demo." - remove_flag() - assert contains_demo_data() is False, "not a demo." + assert service.should_load_demo_data() is True, "should reload demo data." + service.load_demo_data() + assert service.contains_demo_data() is True, "demo loaded." + service.remove_flag() + assert service.contains_demo_data() is False, "not a demo now." -def test_wiping_db_clears_flag(app_context): +def test_wiping_db_clears_flag(service): "No longer a demo if the demo is wiped out!" - delete_demo_data() - assert contains_demo_data() is False, "not a demo." + assert service.should_load_demo_data() is True, "should reload demo data." + service.load_demo_data() + assert service.contains_demo_data() is True, "demo loaded." + service.delete_demo_data() + assert service.contains_demo_data() is False, "not a demo." -def test_wipe_db_only_works_if_flag_is_set(app_context): +def test_wipe_db_only_works_if_flag_is_set(service): "Can only wipe a demo db!" - remove_flag() + assert service.should_load_demo_data() is True, "should reload demo data." + service.load_demo_data() + assert service.contains_demo_data() is True, "demo loaded." + service.remove_flag() with pytest.raises(Exception): - delete_demo_data() + service.delete_demo_data() -def test_tutorial_id_returned_if_present(app_context): +def test_tutorial_id_returned_if_present(service): "Sanity check." - assert tutorial_book_id() > 0, "have tutorial" + assert service.should_load_demo_data() is True, "should reload demo data." + service.load_demo_data() + assert service.tutorial_book_id() > 0, "have tutorial" sql = 'update books set bktitle = "xxTutorial" where bktitle = "Tutorial"' db.session.execute(text(sql)) db.session.commit() - assert tutorial_book_id() is None, "no tutorial" + assert service.tutorial_book_id() is None, "no tutorial" sql = 'update books set bktitle = "Tutorial" where bktitle = "xxTutorial"' db.session.execute(text(sql)) db.session.commit() - assert tutorial_book_id() > 0, "have tutorial again" - - delete_demo_data() - assert tutorial_book_id() is None, "no tutorial" - - -# Getting languages from yaml files. + assert service.tutorial_book_id() > 0, "have tutorial again" - -def test_new_english_from_yaml_file(): - """ - Smoke test, can load a new language from yaml definition. - """ - f = os.path.join(demo_data_path(), "languages", "english.yaml") - lang = get_demo_language(f) - - # Replace the following assertions with your specific expectations - assert lang.name == "English" - assert lang.dict_1_uri == "https://en.thefreedictionary.com/###" - assert lang.sentence_translate_uri == "*https://www.deepl.com/translator#en/en/###" - assert lang.show_romanization is False, "uses default" - assert lang.right_to_left is False, "uses default" - - -def test_get_predefined(): - """ - Returns all the languages using the files in the demo folder. - """ - langs = predefined_languages() - langnames = [lang.name for lang in langs] - for expected in ["English", "French", "Turkish"]: - assert expected in langnames, expected + service.delete_demo_data() + assert service.tutorial_book_id() is None, "no tutorial" # Loading. -@pytest.mark.dbdemoload -def test_load_demo_loads_language_yaml_files(app_context): +@pytest.mark.dbreset +def test_rebaseline(service): """ - All data is loaded, spot check some. - This test is also used from "inv db.reset" in tasks.py (see .pytest.ini). """ - delete_demo_data() - assert contains_demo_data() is False, "not a demo." + assert service.contains_demo_data() is False, "not a demo." assert_record_count_equals("languages", 0, "wiped out") - # Wipe out all settings!!! - # When user installs, the settings need to be loaded - # with values from _their_ config and environment. - sql = "delete from settings" + # Wipe out all user settings!!! When user installs and first + # starts up, the user settings need to be loaded with values from + # _their_ config and environment. + sql = "delete from settings where StKeyType = 'user'" db.session.execute(text(sql)) db.session.commit() - load_demo_data() - assert contains_demo_data() is True, "demo loaded" - checks = [ - "select * from languages where LgName = 'English'", - "select * from books where BkTitle = 'Tutorial'", - ] - for c in checks: - assert_record_count_equals(c, 1, c + " returned 1") + service.set_load_demo_flag() - sql = "select distinct stkeytype from settings" - assert_sql_result(sql, ["system"], "only system settings remain") + sql = "select stkeytype, stkey, stvalue from settings" + assert_sql_result(sql, ["system; LoadDemoData; 1"], "only this key is set.") @pytest.fixture(name="_restore_japanese_parser") @@ -131,10 +152,10 @@ def fixture_restore_mecab_support(): "Teardown" method to restore jp parser if it was removed. """ k = "japanese" - assert k in lute.parse.registry.parsers, "have jp parser, sanity check" - old_val = lute.parse.registry.parsers[k] + assert k in lute.parse.registry.__LUTE_PARSERS__, "have jp parser, sanity check" + old_val = lute.parse.registry.__LUTE_PARSERS__[k] yield - if k not in lute.parse.registry.parsers: - lute.parse.registry.parsers[k] = old_val + if k not in lute.parse.registry.__LUTE_PARSERS__: + lute.parse.registry.__LUTE_PARSERS__[k] = old_val diff --git a/tests/unit/db/test_management.py b/tests/unit/db/test_management.py index d9966bb91..e04998cd3 100644 --- a/tests/unit/db/test_management.py +++ b/tests/unit/db/test_management.py @@ -5,22 +5,21 @@ """ import pytest +from sqlalchemy import text from lute.db import db -from lute.models.setting import UserSetting, BackupSettings -from lute.db.management import delete_all_data +from lute.models.setting import UserSetting +from lute.models.repositories import UserSettingRepository +from lute.db.management import delete_all_data, add_default_user_settings from tests.dbasserts import assert_record_count_equals -@pytest.mark.dbwipe def test_wiping_db_clears_out_all_tables(app_context): """ DB is wiped clean if requested ... settings are left! - - This test is also used from /tasks.py; see .pytest.ini. """ old_user_settings = db.session.query(UserSetting).all() - delete_all_data() + delete_all_data(db.session) tables = [ "books", "bookstats", @@ -34,6 +33,7 @@ def test_wiping_db_clears_out_all_tables(app_context): "wordimages", "wordparents", "words", + "wordsread", "wordtags", ] for t in tables: @@ -47,7 +47,42 @@ def test_wiping_db_clears_out_all_tables(app_context): def test_can_get_backup_settings_when_db_is_wiped(app_context): "The backupsettings struct assumes certain things about the data." - delete_all_data() - bs = BackupSettings.get_backup_settings() + delete_all_data(db.session) + repo = UserSettingRepository(db.session) + bs = repo.get_backup_settings() assert bs.backup_enabled, "backup is back to being enabled" assert bs.backup_dir is not None, "default restored" + + +@pytest.fixture(name="us_repo") +def fixture_usersetting_repo(app_context): + "Repo" + r = UserSettingRepository(db.session) + return r + + +def test_user_settings_loaded_with_defaults(us_repo): + "Called on load." + db.session.execute(text("delete from settings")) + db.session.commit() + assert us_repo.key_exists("backup_dir") is False, "key removed" + add_default_user_settings(db.session, "blah") + assert us_repo.key_exists("backup_dir") is True, "key created" + + # Check defaults + b = us_repo.get_backup_settings() + assert b.backup_enabled is True + assert b.backup_dir is not None + assert b.backup_auto is True + assert b.backup_warn is True + assert b.backup_count == 5 + + +def test_user_settings_load_leaves_existing_values(us_repo): + "Called on load." + us_repo.set_value("backup_count", 17) + db.session.commit() + assert us_repo.get_value("backup_count") == "17" + add_default_user_settings(db.session, "blah") + b = us_repo.get_backup_settings() + assert b.backup_count == 17, "still 17" diff --git a/tests/unit/language/__init__.py b/tests/unit/language/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/language/test_service.py b/tests/unit/language/test_service.py new file mode 100644 index 000000000..2f9268d43 --- /dev/null +++ b/tests/unit/language/test_service.py @@ -0,0 +1,92 @@ +""" +Language service tests. +""" + +from lute.language.service import Service +from lute.db import db +from lute.utils.debug_helpers import DebugTimer +from tests.dbasserts import assert_sql_result + + +def test_get_all_lang_defs(app_context): + "Can get all predefined languages." + service = Service(db.session) + defs = service.get_supported_defs() + engs = [d for d in defs if d.language.name == "English"] + assert len(engs) == 1, "have english" + eng = engs[0] + assert len(eng.books) == 2, "tutorial and follow-up" + titles = [b.title for b in eng.books] + titles.sort() + assert titles == ["Tutorial", "Tutorial follow-up"], "book titles" + + +def test_supported_predefined_languages(app_context): + "Get supported lang names" + service = Service(db.session) + predefs = service.supported_predefined_languages() + assert len(predefs) > 1, "Have predefined" + langnames = [lang.name for lang in predefs] + assert "English" in langnames, "Have English" + assert "French" in langnames, "Have French" + + +def test_get_language_def(app_context): + """ + Smoke test, can load a new language from yaml definition. + """ + service = Service(db.session) + lang = service.get_language_def("English").language + + assert lang.name == "English" + assert lang.show_romanization is False, "uses default" + assert lang.right_to_left is False, "uses default" + + # pylint: disable=line-too-long + expected = [ + "terms; embeddedhtml; https://simple.wiktionary.org/wiki/[LUTE]; True; 1", + "terms; popuphtml; https://www.collinsdictionary.com/dictionary/english/[LUTE]; True; 2", + "sentences; popuphtml; https://www.deepl.com/translator#en/en/[LUTE]; True; 3", + "terms; popuphtml; https://conjugator.reverso.net/conjugation-english-verb-[LUTE].html; True; 4", + ] + actual = [ + f"{ld.usefor}; {ld.dicttype}; {ld.dicturi}; {ld.is_active}; {ld.sort_order}" + for ld in lang.dictionaries + ] + assert actual == expected, "dictionaries" + + +def test_load_def_loads_lang_and_stories(app_context): + "Can load a language." + story_sql = "select bktitle from books order by BkTitle" + lang_sql = "select LgName from languages" + assert_sql_result(lang_sql, [], "no langs") + assert_sql_result(story_sql, [], "nothing loaded") + + dt = DebugTimer("Loading", False) + dt.step("start") + service = Service(db.session) + dt.step("Service()") + lang_id = service.load_language_def("English") + dt.step("load_language_def") + dt.summary() + + assert lang_id > 0, "ID returned, used for filtering" + assert_sql_result(lang_sql, ["English"], "eng loaded") + assert_sql_result(story_sql, ["Tutorial", "Tutorial follow-up"], "stories loaded") + + +def test_load_all_defs_loads_lang_and_stories(app_context): + "Smoke test, load everything." + story_sql = "select bktitle from books" + lang_sql = "select LgName from languages" + assert_sql_result(lang_sql, [], "no langs") + assert_sql_result(story_sql, [], "nothing loaded") + + db.session.flush() + service = Service(db.session) + defs = service.get_supported_defs() + langnames = [d.language.name for d in defs] + for n in langnames: + lang_id = service.load_language_def(n) + assert lang_id > 0, "Loaded" diff --git a/tests/unit/models/test_Book.py b/tests/unit/models/test_Book.py deleted file mode 100644 index 405efc8da..000000000 --- a/tests/unit/models/test_Book.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Book tests. -""" - -from lute.models.book import Book - - -def test_create_book(english): - """ - When a book is created with given content, the content - is split into separate Text objects. - """ - fulltext = "Here is a dog. And a cat." - scenario(english, fulltext, 5, ["Here is a dog.", "And a cat."], 7) - scenario(english, fulltext, 500, ["Here is a dog. And a cat."], 7) - scenario( - english, - fulltext + " And a thing.", - 8, - ["Here is a dog. And a cat.", "And a thing."], - 10, - ) - scenario( - english, "Here is a dog.\nAnd a cat.", 500, ["Here is a dog.\nAnd a cat."], 7 - ) - - -def scenario(english, fulltext, maxwords, expected, expected_word_count): - """ - Check scenarios. - """ - b = Book.create_book("hi", english, fulltext, maxwords) - - actuals = [t.text for t in b.texts] - print(actuals) - assert "/".join(actuals) == "/".join(expected), f"scen {maxwords}" - assert b.word_count == expected_word_count, "word count" diff --git a/tests/unit/models/test_Book_add_remove_pages.py b/tests/unit/models/test_Book_add_remove_pages.py new file mode 100644 index 000000000..9169c7442 --- /dev/null +++ b/tests/unit/models/test_Book_add_remove_pages.py @@ -0,0 +1,93 @@ +""" +Book tests. +""" + +from lute.db import db +from tests.utils import make_book +from tests.dbasserts import assert_sql_result + + +# pylint: disable=too-many-arguments,too-many-positional-arguments +def assert_add(book, pagenum, before, text, expected, msg=""): + "Assert that adding results in expected table content." + sql = f""" + select TxText, TxOrder from Texts + where TxBkID = {book.id} + order by TxOrder""" + + t = None + if before: + t = book.add_page_before(pagenum) + else: + t = book.add_page_after(pagenum) + t.text = text + db.session.add(book) + db.session.commit() + assert_sql_result(sql, expected, msg) + + +def assert_remove(book, pagenum, expected, msg): + "assert removed" + sql = f""" + select TxText, TxOrder from Texts + where TxBkID = {book.id} + order by TxOrder""" + + book.remove_page(pagenum) + + db.session.add(book) + db.session.commit() + assert_sql_result(sql, expected, msg) + + +def test_can_add_page(app_context, english): + "Add page before and after." + b = make_book("hi", ["1", "2", "3"], english) + db.session.add(b) + db.session.commit() + + sql = f""" + select TxText, TxOrder from Texts + where TxBkID = {b.id} + order by TxOrder""" + assert_sql_result(sql, ["1; 1", "2; 2", "3; 3"], "initial book") + + assert_add(b, 2, True, "B2", ["1; 1", "B2; 2", "2; 3", "3; 4"], "new") + assert_add(b, 3, False, "A4", ["1; 1", "B2; 2", "2; 3", "A4; 4", "3; 5"], "new") + + +def test_add_page_before_first(app_context, english): + "Add page before and after." + b = make_book("hi", "hi", english) + db.session.add(b) + db.session.commit() + + assert_add(b, 1, True, "B1", ["B1; 1", "hi; 2"], "new") + assert_add(b, 0, True, "B0", ["B0; 1", "B1; 2", "hi; 3"], "0") + assert_add(b, -100, True, "B-", ["B-; 1", "B0; 2", "B1; 3", "hi; 4"], "neg") + assert_add(b, 100, True, "B+", ["B-; 1", "B0; 2", "B1; 3", "B+; 4", "hi; 5"], "big") + + +def test_add_page_after_last(app_context, english): + "Add page after last." + b = make_book("hi", "hi", english) + db.session.add(b) + db.session.commit() + + assert_add(b, 1, False, "B1", ["hi; 1", "B1; 2"], "new") + assert_add(b, 100, False, "B100", ["hi; 1", "B1; 2", "B100; 3"], "100") + assert_add(b, -100, False, "Bneg", ["hi; 1", "Bneg; 2", "B1; 3", "B100; 4"], "neg") + + +def test_can_remove_page(app_context, english): + "Remove pages before and after." + b = make_book("hi", ["1", "2", "3"], english) + db.session.add(b) + db.session.commit() + + assert_remove(b, 2, ["1; 1", "3; 2"], "2nd page removed") + assert_remove(b, 12, ["1; 1", "3; 2"], "bad page removal ignored") + assert_remove(b, -12, ["1; 1", "3; 2"], "bad page removal ignored") + assert_remove(b, 0, ["1; 1", "3; 2"], "bad page removal ignored") + assert_remove(b, 1, ["3; 1"], "1st removed") + assert_remove(b, 1, ["3; 1"], "can't remove sole page") diff --git a/tests/unit/models/test_Language.py b/tests/unit/models/test_Language.py index a57a59c3f..c1ea1272f 100644 --- a/tests/unit/models/test_Language.py +++ b/tests/unit/models/test_Language.py @@ -4,7 +4,10 @@ Low value but ensure that the db mapping is correct. """ +from lute.db import db +from lute.db.demo import Service as DemoService from lute.models.language import Language +from lute.models.repositories import LanguageRepository from tests.dbasserts import assert_sql_result @@ -13,6 +16,8 @@ def test_demo_has_preloaded_languages(app_context): When users get the initial demo, it has English, French, etc, pre-defined. """ + demosvc = DemoService(db.session) + demosvc.load_demo_data() sql = """ select LgName from languages @@ -31,8 +36,6 @@ def test_new_language_has_sane_defaults(): assert lang.regexp_split_sentences == ".!?" assert lang.exceptions_split_sentences == "Mr.|Mrs.|Dr.|[A-Z].|Vd.|Vds." assert lang.word_characters == "a-zA-ZÀ-ÖØ-öø-ȳáéíóúÁÉÍÓÚñÑ" - assert lang.remove_spaces is False - assert lang.split_each_char is False assert lang.right_to_left is False assert lang.show_romanization is False assert lang.parser_type == "spacedel" @@ -42,13 +45,18 @@ def test_can_find_lang_by_name(app_context): """ Returns lang if found, or None """ - e = Language.find_by_name("English") + lang = Language() + lang.name = "English" + db.session.add(lang) + db.session.commit() + repo = LanguageRepository(db.session) + e = repo.find_by_name("English") assert e.name == "English", "case match" - e_lc = Language.find_by_name("english") + e_lc = repo.find_by_name("english") assert e_lc.name == "English", "case-insensitive" - nf = Language.find_by_name("notfound") + nf = repo.find_by_name("notfound") assert nf is None, "not found" @@ -63,5 +71,28 @@ def test_language_word_char_regex_returns_python_compatible_regex(app_context): u0600-u06FFuFE70-uFEFC (where u = backslash-u) """ - a = Language.find_by_name("Arabic") - assert a.word_characters == r"\u0600-\u06FF\uFE70-\uFEFC" + demosvc = DemoService(db.session) + demosvc.load_demo_data() + repo = LanguageRepository(db.session) + a = repo.find_by_name("Arabic") + # pylint: disable=line-too-long + assert ( + a.word_characters + == r"\u0600-\u0608\u060B\u060E-\u061A\u061C\u0620-\u0669\u066E-\u06D3\u06D5-\u06FF\uFE70-\uFEFC" + ) + + +def test_lang_to_dict_from_dict_returns_same_thing(app_context): + """ + Lang can be exported to yaml and imported from yaml. + A dictionary is used as the intermediary form, so the + same language should return the same data. + """ + demosvc = DemoService(db.session) + demosvc.load_demo_data() + repo = LanguageRepository(db.session) + e = repo.find_by_name("English") + e_dict = e.to_dict() + e_from_dict = Language.from_dict(e_dict) + e_back_to_dict = e_from_dict.to_dict() + assert e_dict == e_back_to_dict, "Same thing returned" diff --git a/tests/unit/models/test_Setting.py b/tests/unit/models/test_Setting.py index 9df640fa7..dbea294e2 100644 --- a/tests/unit/models/test_Setting.py +++ b/tests/unit/models/test_Setting.py @@ -2,64 +2,77 @@ Settings test. """ +from unittest.mock import patch import pytest -from sqlalchemy import text from lute.db import db -from lute.models.setting import ( - UserSetting, +from lute.models.repositories import ( + UserSettingRepository, + SystemSettingRepository, MissingUserSettingKeyException, - SystemSetting, - BackupSettings, ) from tests.dbasserts import assert_sql_result -def test_user_and_system_settings_do_not_intersect(app_context): +@pytest.fixture(name="us_repo") +def fixture_usersetting_repo(app_context): + "Repo" + r = UserSettingRepository(db.session) + return r + + +@pytest.fixture(name="ss_repo") +def fixture_systemsetting_repo(app_context): + "Repo" + r = SystemSettingRepository(db.session) + return r + + +def test_user_and_system_settings_do_not_intersect(us_repo, ss_repo): "A UserSetting is not available as a system setting." - UserSetting.set_value("backup_count", 42) + us_repo.set_value("backup_count", 42) db.session.commit() sql = "select StValue, StKeyType from settings where StKey = 'backup_count'" assert_sql_result(sql, ["42; user"], "loaded") - u = UserSetting.get_value("backup_count") + u = us_repo.get_value("backup_count") assert u == "42", "found user setting" - assert SystemSetting.get_value("backup_count") is None, "not in system settings" + assert ss_repo.get_value("backup_count") is None, "not in system settings" -def test_save_and_retrieve_user_setting(app_context): +def test_save_and_retrieve_user_setting(us_repo): "Smoke tests." - UserSetting.set_value("backup_count", 42) + us_repo.set_value("backup_count", 42) sql = "select StValue from settings where StKey = 'backup_count'" assert_sql_result(sql, ["5"], "still default") db.session.commit() assert_sql_result(sql, ["42"], "now set") - v = UserSetting.get_value("backup_count") + v = us_repo.get_value("backup_count") assert v == "42", "is string" -def test_missing_value_value_is_nullapp_context(app_context): +def test_missing_value_value_is_null(ss_repo): "Missing key = None." - assert SystemSetting.get_value("missing") is None, "missing key" + assert ss_repo.get_value("missing") is None, "missing key" -def test_smoke_last_backup(app_context): +def test_smoke_last_backup(us_repo): "Check syntax only." - v = SystemSetting.get_last_backup_datetime() + v = us_repo.get_last_backup_datetime() assert v is None, "not set" - SystemSetting.set_last_backup_datetime(42) - v = SystemSetting.get_last_backup_datetime() + us_repo.set_last_backup_datetime(42) + v = us_repo.get_last_backup_datetime() assert v == 42, "set _and_ saved" -def test_get_backup_settings(app_context): +def test_get_backup_settings(us_repo): "Smoke test." - UserSetting.set_value("backup_dir", "blah") - UserSetting.set_value("backup_count", 12) - UserSetting.set_value("backup_warn", 0) + us_repo.set_value("backup_dir", "blah") + us_repo.set_value("backup_count", 12) + us_repo.set_value("backup_warn", 0) db.session.commit() - b = BackupSettings.get_backup_settings() + b = us_repo.get_backup_settings() assert b.backup_dir == "blah" assert b.backup_auto is True # initial defaults assert b.backup_warn is False # set to 0 above @@ -67,36 +80,76 @@ def test_get_backup_settings(app_context): assert b.last_backup_datetime is None -def test_user_settings_loaded_with_defaults(app_context): - "Called on load." - db.session.execute(text("delete from settings")) - db.session.commit() - assert UserSetting.key_exists("backup_dir") is False, "key removed" - UserSetting.load() - assert UserSetting.key_exists("backup_dir") is True, "key created" - - # Check defaults - b = BackupSettings.get_backup_settings() - assert b.backup_enabled is True - assert b.backup_dir is not None - assert b.backup_auto is True - assert b.backup_warn is True - assert b.backup_count == 5 - - -def test_user_settings_load_leaves_existing_values(app_context): - "Called on load." - UserSetting.set_value("backup_count", 17) - db.session.commit() - assert UserSetting.get_value("backup_count") == "17" - UserSetting.load() - b = BackupSettings.get_backup_settings() - assert b.backup_count == 17, "still 17" - - -def test_get_or_set_user_setting_unknown_key_throws(app_context): +def test_time_since_last_backup_future(us_repo): + """ + Check formatting when last backup is reported to be in the future. + + current time = 600, backup time = 900 + """ + b = us_repo.get_backup_settings() + with patch("time.time", return_value=600): + b.last_backup_datetime = 900 + assert b.time_since_last_backup is None + + +def test_time_since_last_backup_none(us_repo): + """ + Check formatting when last backup is reported to be None. + + current time = 600, backup time = None + """ + b = us_repo.get_backup_settings() + with patch("time.time", return_value=600): + b.last_backup_datetime = None + assert b.time_since_last_backup is None + + +def test_time_since_last_backup_right_now(us_repo): + """ + Check formatting when last backup is reported to be the same as current time. + + current time = 600, backup time = 600 + """ + b = us_repo.get_backup_settings() + with patch("time.time", return_value=600): + b.last_backup_datetime = 600 + assert b.time_since_last_backup == "0 seconds ago" + + +def test_time_since_last_backup_in_past(us_repo): + """ + Check formatting when last backup is reported to be in the past. + + current time = 62899200, backup time = various + """ + b = us_repo.get_backup_settings() + now = 62899200 + with patch("time.time", return_value=now): + b.last_backup_datetime = now - 45 + assert b.time_since_last_backup == "45 seconds ago" + b.last_backup_datetime = now - 75 + assert b.time_since_last_backup == "1 minute ago" + b.last_backup_datetime = now - 135 + assert b.time_since_last_backup == "2 minutes ago" + b.last_backup_datetime = now - 3601 + assert b.time_since_last_backup == "1 hour ago" + b.last_backup_datetime = now - 7201 + assert b.time_since_last_backup == "2 hours ago" + b.last_backup_datetime = now - 86401 + assert b.time_since_last_backup == "1 day ago" + b.last_backup_datetime = now - 172801 + assert b.time_since_last_backup == "2 days ago" + b.last_backup_datetime = now - 604801 + assert b.time_since_last_backup == "1 week ago" + b.last_backup_datetime = now - 15724801 + assert b.time_since_last_backup == "26 weeks ago" + b.last_backup_datetime = now - 45360001 + assert b.time_since_last_backup == "75 weeks ago" + + +def test_get_or_set_user_setting_unknown_key_throws(us_repo): "Safety, ensure no typo for user settings." with pytest.raises(MissingUserSettingKeyException): - UserSetting.get_value("bad_key") + us_repo.get_value("bad_key") with pytest.raises(MissingUserSettingKeyException): - UserSetting.set_value("bad_key", 17) + us_repo.set_value("bad_key", 17) diff --git a/tests/unit/models/test_Term.py b/tests/unit/models/test_Term.py index 02b0c32bf..48a3a62f1 100644 --- a/tests/unit/models/test_Term.py +++ b/tests/unit/models/test_Term.py @@ -2,11 +2,13 @@ Term tests. """ +import datetime import pytest from sqlalchemy import text -from lute.models.term import Term +from lute.models.term import Term, TermImage +from lute.models.repositories import TermRepository from lute.db import db -from tests.dbasserts import assert_record_count_equals +from tests.dbasserts import assert_record_count_equals, assert_sql_result def test_cruft_stripped_on_set_word(spanish): @@ -78,15 +80,16 @@ def test_find_by_spec(app_context, spanish, english): db.session.commit() spec = Term(spanish, "GATO") - found = Term.find_by_spec(spec) + repo = TermRepository(db.session) + found = repo.find_by_spec(spec) assert found.id == t.id, "term found by matching spec" spec = Term(english, "GATO") - found = Term.find_by_spec(spec) + found = repo.find_by_spec(spec) assert found is None, "not found in different language" spec = Term(spanish, "gatito") - found = Term.find_by_spec(spec) + found = repo.find_by_spec(spec) assert found is None, "not found with different text" @@ -180,3 +183,81 @@ def test_update_status_via_sql_updates_date(app_context, _saved_term): db.session.execute(text("update words set WoStatus = 2")) db.session.commit() assert_record_count_equals(sql, 0, "updated WoStatusChanged") + + +def test_save_new_image_all_existing_images_replaced(app_context, spanish): + "All existing terms removed." + t = Term(spanish, "hola") + ti1 = TermImage() + ti1.term = t + ti1.source = "1.png" + t.images.append(ti1) + + ti2 = TermImage() + ti2.term = t + ti2.source = "2.png" + t.images.append(ti2) + db.session.add(t) + db.session.commit() + + assert_record_count_equals("select * from wordimages", 2, "image count") + + t.set_current_image("3.png") + db.session.add(t) + db.session.commit() + assert_record_count_equals("select * from wordimages", 1, "new image count") + assert_record_count_equals( + "select * from wordimages where wisource='3.png'", 1, "new image count" + ) + + +def test_delete_empty_image_records(app_context, spanish): + "Check cleanup." + t = Term(spanish, "hola") + for s in ["", " ", "3.png"]: + ti = TermImage() + ti.term = t + ti.source = s + t.images.append(ti) + db.session.add(t) + db.session.commit() + + assert_sql_result("select wisource from wordimages", ["", " ", "3.png"], "images") + + repo = TermRepository(db.session) + repo.delete_empty_images() + assert_sql_result("select wisource from wordimages", ["3.png"], "cleaned images") + + +def test_changing_status_of_status_0_term_resets_WoCreated(app_context, spanish): + """ + New unknown Terms get created with Status = 0 when a page is + rendered for reading, but that's not _really_ the date that the + term was created. + """ + t = Term(spanish, "hola") + t.translation = "hi" + t.status = 0 + db.session.add(t) + db.session.commit() + + db.session.execute(text("update words set WoCreated = 'a'")) + db.session.commit() + sql = "select WoTranslation, WoCreated from words where WoCreated = 'a'" + assert_sql_result(sql, ["hi; a"], "created date") + + t.status = 0 + t.translation = "hello" + db.session.add(t) + db.session.commit() + assert_sql_result(sql, ["hello; a"], "created date still old value") + + t.status = 1 + t.translation = "howdy" + db.session.add(t) + db.session.commit() + + assert_sql_result(sql, [], "updated") + current_year = str(datetime.datetime.now().year) + sql_updated = "select strftime('%Y', WoCreated) from words" + assert_sql_result(sql_updated, [f"{current_year}"], "final") diff --git a/tests/unit/models/test_TermTag.py b/tests/unit/models/test_TermTag.py index f187e0092..5899f5f4e 100644 --- a/tests/unit/models/test_TermTag.py +++ b/tests/unit/models/test_TermTag.py @@ -5,6 +5,7 @@ import pytest from lute.models.term import TermTag +from lute.models.repositories import TermTagRepository from lute.db import db from tests.dbasserts import assert_sql_result @@ -34,19 +35,22 @@ def test_new_dup_tag_text_fails(_hola_tag, app_context): def test_find_by_text(_hola_tag, app_context): "Find by text returns match." - retrieved = TermTag.find_by_text("Hola") + repo = TermTagRepository(db.session) + retrieved = repo.find_by_text("Hola") assert retrieved is not None assert retrieved.text == "Hola" def test_find_by_text_returns_null_if_not_exact_match(_hola_tag, app_context): "Find returns null if no match." - assert TermTag.find_by_text("unknown") is None - assert TermTag.find_by_text("hola") is None + repo = TermTagRepository(db.session) + assert repo.find_by_text("unknown") is None + assert repo.find_by_text("hola") is None def test_find_or_create_by_text_returns_new_if_no_match(_hola_tag, app_context): "Return new." - assert TermTag.find_by_text("unknown") is None - t = TermTag.find_or_create_by_text("unknown") + repo = TermTagRepository(db.session) + assert repo.find_by_text("unknown") is None + t = repo.find_or_create_by_text("unknown") assert t.text == "unknown", "new tag created" diff --git a/tests/unit/models/test_Text.py b/tests/unit/models/test_Text.py index ee78ed33e..02c9acd12 100644 --- a/tests/unit/models/test_Text.py +++ b/tests/unit/models/test_Text.py @@ -13,7 +13,7 @@ def transform_sentence(s): def test_sentence_lifecycle(english): """ - Sentences should only be generated when a Text is saved with the ReadDate saved. + Sentences must be generated when a Text is saved with the ReadDate saved. Sentences are only used for reference lookups. """ b = Book("hola", english) @@ -22,7 +22,7 @@ def test_sentence_lifecycle(english): assert len(t.sentences) == 0, "no sentences" t.read_date = datetime.now() - assert len(t.sentences) == 2, "have on read" + assert len(t.sentences) == 2, "sentences are created when read_date is set" assert ( transform_sentence(t.sentences[0]) == "/Tienes/ /un/ /perro/./" diff --git a/tests/unit/parse/test_JapaneseParser.py b/tests/unit/parse/test_JapaneseParser.py index d6220019c..fd61854c1 100644 --- a/tests/unit/parse/test_JapaneseParser.py +++ b/tests/unit/parse/test_JapaneseParser.py @@ -4,6 +4,7 @@ from lute.parse.mecab_parser import JapaneseParser from lute.models.term import Term +from lute.settings.current import current_settings from lute.parse.base import ParsedToken @@ -52,7 +53,25 @@ def test_end_of_sentence_stored_in_parsed_tokens(japanese): assert_tokens_equals(s, japanese, expected) -def test_readings(): +def test_issue_488_repeat_character_handled(japanese): + "Repeat sometimes needs explicit check, can be returned as own word." + s = "聞こえる行く先々。少々お待ちください。" + + expected = [ + ("聞こえる", True), + ("行く先", True), + ("々", True), + ("。", False, True), + ("少々", True), + ("お待ち", True), + ("ください", True), + ("。", False, True), + ("¶", False, True), + ] + assert_tokens_equals(s, japanese, expected) + + +def test_readings(app_context): """ Parser returns readings if they add value. """ @@ -66,11 +85,24 @@ def test_readings(): zws = "\u200B" cases = [ - ("強い", "ツヨイ"), - ("二人", "ニニン"), # ah well, not perfect :-) - ("強いか", "ツヨイカ"), - (f"強い{zws}か", f"ツヨイ{zws}カ"), # zero-width-space ignored + ("強い", "つよい"), + ("二人", "ににん"), # ah well, not perfect :-) + ("強いか", "つよいか"), + (f"強い{zws}か", f"つよい{zws}か"), # zero-width-space ignored ] for c in cases: assert p.get_reading(c[0]) == c[1], c[0] + + +def test_reading_setting(app_context): + "Return reading matching user setting." + cases = { + "katakana": "ツヨイ", + "hiragana": "つよい", + "alphabet": "tsuyoi", + } + p = JapaneseParser() + for k, v in cases.items(): + current_settings["japanese_reading"] = k + assert p.get_reading("強い") == v, k diff --git a/tests/unit/parse/test_SentenceGroupIterator.py b/tests/unit/parse/test_SentenceGroupIterator.py deleted file mode 100644 index f488f6783..000000000 --- a/tests/unit/parse/test_SentenceGroupIterator.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -SentenceGroupIterator tests. -""" - -from lute.parse.base import SentenceGroupIterator -from lute.parse.space_delimited_parser import SpaceDelimitedParser - - -def toks_to_string(tokens): - return "".join([t.token for t in tokens]) - - -def test_sgi_scenarios(english): - """ - Given a string and the max token count, - SentenceGroupIterator should return expected groups. - """ - - def scenario(s, maxcount, expected_groups): - parser = SpaceDelimitedParser() - tokens = parser.get_parsed_tokens(s, english) - - it = SentenceGroupIterator(tokens, maxcount) - groups = [] - while g := it.next(): - groups.append(g) - gs = [toks_to_string(g) for g in groups] - assert "||".join(gs) == "||".join( - expected_groups - ), f"groups for size {maxcount}" - - scenario("", 100, [""]) - - text = "Here is a dog. Here is a cat." - - scenario(text, 100, ["Here is a dog. Here is a cat."]) - - scenario(text, 3, ["Here is a dog. ", "Here is a cat."]) - - scenario(text, 6, ["Here is a dog. ", "Here is a cat."]) - - # No period at the end. - scenario("Here is a dog. Here is a cat", 6, ["Here is a dog. ", "Here is a cat"]) - - # No period at all. - scenario("Here is a dog Here is a cat", 6, ["Here is a dog Here is a cat"]) - - scenario( - "Here is a dog. Here is a cat. Here is a thing.", - 10, - ["Here is a dog. Here is a cat. ", "Here is a thing."], - ) diff --git a/tests/unit/parse/test_SpaceDelimitedParser.py b/tests/unit/parse/test_SpaceDelimitedParser.py index b9546bd76..e5f3d7a65 100644 --- a/tests/unit/parse/test_SpaceDelimitedParser.py +++ b/tests/unit/parse/test_SpaceDelimitedParser.py @@ -2,6 +2,9 @@ SpaceDelimitedParser tests. """ +import sys +import re +import unicodedata from lute.parse.space_delimited_parser import SpaceDelimitedParser from lute.parse.base import ParsedToken @@ -145,3 +148,136 @@ def test_quick_checks(english): assert_string_equals("1234", english, "1234") assert_string_equals("1234.", english, "1234.") assert_string_equals("1234.Hello", english, "1234.[Hello]") + + +def test_zero_width_non_joiner_retained(german): + """ + Verify zero-width joiner characters are retained in languages that + include them as word characters. + + Test case from https://en.wikipedia.org/wiki/Zero-width_non-joiner. + """ + assert_string_equals("Brotzeit", german, "[Brotzeit]") + assert_string_equals("Brot\u200czeit", german, "[Brot\u200czeit]") + + +def test_zero_width_joiner_retained(hindi): + """ + Verify zero-width joiner characters are retained in languages that + include them as word characters. + + We see them used to replace Hindi conjunct characters with individual consonants. + """ + assert_string_equals("namaste", hindi, "[namaste]") + assert_string_equals("नमस्ते", hindi, "[नमस्ते]") + assert_string_equals("नमस\u200dते", hindi, "[नमस\u200dते]") + + +def test_default_word_pattern_latin(generic): + """ + Verify the default word pattern handles Latin alphabets (0000..00FF). + """ + + # Source: https://www.folger.edu/explore/shakespeares-works/henry-v/read/5/2/ + assert_string_equals( + "Saint Denis be my speed!—donc vôtre est France, et vous êtes mienne.", + generic, + " ".join( + [ + "[Saint]", + "[Denis]", + "[be]", + "[my]", + "[speed]!—[donc]", + "[vôtre]", + "[est]", + "[France],", + "[et]", + "[vous]", + "[êtes]", + "[mienne].", + ] + ), + ) + + +def test_default_word_pattern_devanagari(generic): + """ + Verify the default word pattern handles the Devanagari Unicode block (0900..097F). + """ + + # Source: https://en.wikipedia.org/wiki/Hindi#Sample_text + assert_string_equals( + "अनुच्छेद १(एक): सभी मनुष्य जन्म से स्वतन्त्र तथा मर्यादा और अधिकारों में समान होते हैं।", + generic, + " ".join( + [ + "[अनुच्छेद]", + "१([एक]):", + "[सभी]", + "[मनुष्य]", + "[जन्म]", + "[से]", + "[स्वतन्त्र]", + "[तथा]", + "[मर्यादा]", + "[और]", + "[अधिकारों]", + "[में]", + "[समान]", + "[होते]", + "[हैं]।", + ] + ), + ) + + +def test_default_word_pattern_georgian(generic): + """ + Verify the default word pattern handles the Georgian Unicode block (10A0..10FF). + """ + + # Source: https://en.wikipedia.org/wiki/Georgian_language#Examples + assert_string_equals( + "ყველა ადამიანი იბადება თავისუფალი და თანასწორი თავისი ღირსებითა და უფლებებით.", + generic, + " ".join( + [ + "[ყველა]", + "[ადამიანი]", + "[იბადება]", + "[თავისუფალი]", + "[და]", + "[თანასწორი]", + "[თავისი]", + "[ღირსებითა]", + "[და]", + "[უფლებებით].", + ] + ), + ) + + +def test_default_word_pattern_gothic(generic): + """ + Verify the default word pattern handles the Gothic Unicode block (10330..1034F). + This is an import test case because it tests larger Unicode values. + """ + + # Source: https://en.wikipedia.org/wiki/Gothic_language#Examples + assert_string_equals( + "𐌰𐍄𐍄𐌰 𐌿𐌽𐍃𐌰𐍂 𐌸𐌿 𐌹𐌽 𐌷𐌹𐌼𐌹𐌽𐌰𐌼", generic, "[𐌰𐍄𐍄𐌰] [𐌿𐌽𐍃𐌰𐍂] [𐌸𐌿] [𐌹𐌽] [𐌷𐌹𐌼𐌹𐌽𐌰𐌼]" + ) + + +def test_all_chars_in_categories_match_default_word_chars(): + "Default_word_chars builds a range of characters ... ensure chars in categories are all found." + categories = set(["Cf", "Ll", "Lm", "Lo", "Lt", "Lu", "Mc", "Mn", "Sk"]) + + word_chars = SpaceDelimitedParser.get_default_word_characters() + pattern = rf"[{word_chars}]" + regex = re.compile(pattern, flags=re.IGNORECASE) + for i in range(1, sys.maxunicode): + c = chr(i) + if unicodedata.category(c) in categories: + assert regex.match(c), f"Match for {c}" diff --git a/tests/unit/parse/test_registry.py b/tests/unit/parse/test_registry.py index 556c6ff70..d1f332e81 100644 --- a/tests/unit/parse/test_registry.py +++ b/tests/unit/parse/test_registry.py @@ -4,7 +4,13 @@ import pytest -from lute.parse.registry import parsers, get_parser, supported_parsers, is_supported +from lute.parse.registry import ( + __LUTE_PARSERS__, + get_parser, + supported_parsers, + supported_parser_types, + is_supported, +) from lute.parse.space_delimited_parser import SpaceDelimitedParser @@ -14,8 +20,14 @@ def test_get_parser_by_name(): def test_get_parser_throws_if_not_found(): - with pytest.raises(ValueError): - get_parser("trash") + "Check error message thrown." + e = None + try: + _ = get_parser("trash") + except ValueError as ex: + e = ex + assert e is not None, "Have ValueError" + assert str(e) == "Unknown parser type 'trash'", "message" def test_supported_parsers(): @@ -24,10 +36,17 @@ def test_supported_parsers(): assert isinstance(d, list), "returns a list" p = [n for n in d if n[0] == "spacedel"][0] - assert p == ["spacedel", "Space Delimited"], "sanity check" + assert [p[0], p[1].name()] == ["spacedel", "Space Delimited"], "sanity check" + +def test_supported_parser_types(): + "Sanity check only." + d = supported_parser_types() + assert isinstance(d, list), "returns a list" + assert "spacedel" in d, "sanity check" -class DummyParser(SpaceDelimitedParser): + +class DummyParser: "Dummy unsupported parser." @classmethod @@ -42,9 +61,9 @@ def name(cls): @pytest.fixture(name="_load_dummy") def fixture_load_dummy(): "Add the dummy parser for the test." - parsers["dummy"] = DummyParser + __LUTE_PARSERS__["dummy"] = DummyParser yield - del parsers["dummy"] + del __LUTE_PARSERS__["dummy"] def test_unavailable_parser_not_included_in_lists(_load_dummy): @@ -54,3 +73,14 @@ def test_unavailable_parser_not_included_in_lists(_load_dummy): assert is_supported("dummy") is False, "no" with pytest.raises(ValueError): get_parser("dummy") + + +def test_get_parser_throws_if_parser_not_supported(_load_dummy): + "Check throw." + e = None + try: + _ = get_parser("dummy") + except ValueError as ex: + e = ex + assert e is not None, "Have ValueError" + assert str(e) == "Unsupported parser type 'dummy'", "message" diff --git a/tests/unit/read/render/test_RenderableCalculator.py b/tests/unit/read/render/test_calculate_textitems.py similarity index 67% rename from tests/unit/read/render/test_RenderableCalculator.py rename to tests/unit/read/render/test_calculate_textitems.py index 7c92e0304..b596138fb 100644 --- a/tests/unit/read/render/test_RenderableCalculator.py +++ b/tests/unit/read/render/test_calculate_textitems.py @@ -1,11 +1,10 @@ """ -RenderableCalculator tests. +Tests for getting TextItems. """ -import pytest from lute.models.term import Term from lute.parse.base import ParsedToken -from lute.read.render.renderable_calculator import RenderableCalculator +from lute.read.render.calculate_textitems import get_textitems def make_tokens(token_data): @@ -26,12 +25,10 @@ def assert_renderable_equals( tokens = make_tokens(token_data) terms = [Term(language, t) for t in term_data] - rc = RenderableCalculator() - rcs = rc.main(language, terms, tokens) + tis = get_textitems(tokens, terms, language) res = "" - for rc in rcs: - if rc.render: - res += f"[{rc.text}-{rc.length}]" + for ti in tis: + res += f"[{ti.text}-{ti.token_count}]" zws = chr(0x200B) res = res.replace(zws, "") @@ -40,9 +37,8 @@ def assert_renderable_equals( if expected_displayed is not None: res = "" - for rc in rcs: - if rc.render: - res += f"[{rc.display_text}-{rc.length}]" + for ti in tis: + res += f"[{ti.display_text}-{ti.token_count}]" res = res.replace(zws, "") assert res == expected_displayed @@ -55,16 +51,18 @@ def test_simple_render(english): assert_renderable_equals(english, data, [], expected) -def test_tokens_must_be_contiguous(english): - """ - If tokens aren't contiguous, the algorithm gets confused. - """ +def test_non_matching_terms_are_ignored(english): + "Non-match ignored." + data = ["some", " ", "data", " ", "here", "."] + expected = "[some-1][ -1][data-1][ -1][here-1][.-1]" + assert_renderable_equals(english, data, ["ignoreme"], expected) + + +def test_partial_matching_terms_are_ignored(english): + "Partial match is not the same as a match." data = ["some", " ", "data", " ", "here", "."] - tokens = make_tokens(data) - tokens[1].order = 99 - rc = RenderableCalculator() - with pytest.raises(Exception): - rc.main(english, [], tokens) + expected = "[some-1][ -1][data-1][ -1][here-1][.-1]" + assert_renderable_equals(english, data, ["data he"], expected) def test_multiword_items_cover_other_items(english): @@ -79,6 +77,33 @@ def test_multiword_items_cover_other_items(english): assert_renderable_equals(english, data, words, expected) +def test_case_not_considered_for_matches(english): + "Case doesnt matter." + data = ["some", " ", "data", " ", "here", "."] + expected = "[some-1][ -1][data here-3][.-1]" + assert_renderable_equals(english, data, ["DATA HERE"], expected) + + +def test_term_found_in_multiple_places(english): + "Term can be in a few places." + data = [ + "some", + " ", + "data", + " ", + "here", + " ", + "more", + " ", + "data", + " ", + "here", + ".", + ] + expected = "[some-1][ -1][data here-3][ -1][more-1][ -1][data here-3][.-1]" + assert_renderable_equals(english, data, ["DATA HERE"], expected) + + def test_overlapping_multiwords(english): """ Given two overlapping terms, they're both displayed, diff --git a/tests/unit/read/render/test_TokenLocator.py b/tests/unit/read/render/test_get_string_indexes.py similarity index 56% rename from tests/unit/read/render/test_TokenLocator.py rename to tests/unit/read/render/test_get_string_indexes.py index 997c72717..6d5ec5400 100644 --- a/tests/unit/read/render/test_TokenLocator.py +++ b/tests/unit/read/render/test_get_string_indexes.py @@ -1,11 +1,11 @@ """ -TokenLocator tests. +get_string_indexes tests. """ -from lute.read.render.renderable_calculator import TokenLocator +from lute.read.render.calculate_textitems import get_string_indexes -def test_token_locator_scenario(english): +def test_get_string_indexes_scenario(english): """ Run test scenarios. """ @@ -17,34 +17,34 @@ def test_token_locator_scenario(english): # word sought # matches [ [ as_found_in_text, position ], ... ]) # Finds b - (["a", "b", "c", "d"], "b", [{"text": "b", "index": 1}]), + (["a", "b", "c", "d"], "b", [("b", 1)]), # Case doesn't matter # The original case is returned - (["A", "B", "C", "D"], "b", [{"text": "B", "index": 1}]), + (["A", "B", "C", "D"], "b", [("b", 1)]), # Original case returned - (["a", "b", "c", "d"], "B", [{"text": "b", "index": 1}]), + (["a", "b", "c", "d"], "B", [("b", 1)]), # No match (["a", "bb", "c", "d"], "B", []), # Found in multiple places ( ["b", "b", "c", "d"], "b", - [{"text": "b", "index": 0}, {"text": "b", "index": 1}], + [("b", 0), ("b", 1)], ), # Multiword, found in multiple ( ["b", "B", "b", "d"], f"b{zws}b", - [{"text": f"b{zws}B", "index": 0}, {"text": f"B{zws}b", "index": 1}], + [(f"b{zws}b", 0), (f"b{zws}b", 1)], ), # Multiword, found in multiple ( ["b", "B", "c", "b", "b", "x", "b"], f"b{zws}b", - [{"text": f"b{zws}B", "index": 0}, {"text": f"b{zws}b", "index": 3}], + [(f"b{zws}b", 0), (f"b{zws}b", 3)], ), - (["a", " ", "cat", " ", "here"], "cat", [{"text": "cat", "index": 2}]), - (["a", " ", "CAT", " ", "here"], "cat", [{"text": "CAT", "index": 2}]), + (["a", " ", "cat", " ", "here"], "cat", [("cat", 2)]), + (["a", " ", "CAT", " ", "here"], "cat", [("cat", 2)]), # No match (["a", " ", "CAT", " ", "here"], "ca", []), # No match @@ -54,8 +54,8 @@ def test_token_locator_scenario(english): casenum = 0 for tokens, word, expected in cases: casenum += 1 - sentence = TokenLocator.make_string(tokens) - tocloc = TokenLocator(english, sentence) - actual = tocloc.locate_string(word) + p = english.parser + sentence = p.get_lowercase(zws.join(tokens)) + actual = get_string_indexes([p.get_lowercase(word)], sentence) msg = f"case {casenum} - tokens: {', '.join(tokens)}; word: {word}" assert actual == expected, msg diff --git a/tests/unit/read/render/test_multiword_indexer.py b/tests/unit/read/render/test_multiword_indexer.py new file mode 100644 index 000000000..fefd8935c --- /dev/null +++ b/tests/unit/read/render/test_multiword_indexer.py @@ -0,0 +1,57 @@ +""" +get_string_indexes tests. +""" + +import pytest +from lute.read.render.multiword_indexer import MultiwordTermIndexer + +zws = "\u200B" # zero-width space + + +@pytest.mark.parametrize( + "name,terms,tokens,expected", + [ + ("empty", [], ["a"], []), + ("no terms", [], ["a"], []), + ("no tokens", ["a"], [], []), + ("no match", ["x"], ["a"], []), + ("single match", ["a"], ["a"], [("a", 0)]), + ("single match 2", ["a"], ["b", "a"], [("a", 1)]), + ("same term twice", ["a"], ["b", "a", "c", "a"], [("a", 1), ("a", 3)]), + ( + "multiple terms", + ["a", "b"], + ["b", "a", "c", "a"], + [("b", 0), ("a", 1), ("a", 3)], + ), + ("multi-word term", [f"a{zws}b"], ["b", "a", "b", "a"], [(f"a{zws}b", 1)]), + ( + "repeated m-word term", + [f"a{zws}a"], + ["a", "a", "a", "b"], + [(f"a{zws}a", 0), (f"a{zws}a", 1)], + ), + ("bound check term at end", ["a"], ["b", "c", "a"], [("a", 2)]), + ], +) +def test_scenario(name, terms, tokens, expected): + "Test scenario." + mw = MultiwordTermIndexer() + for t in terms: + mw.add(t) + results = list(mw.search_all(tokens)) + assert len(results) == len(expected), name + assert results == expected, name + + +def test_can_search_multiple_times_with_different_tokens(): + "Single match, returns token index." + mw = MultiwordTermIndexer() + mw.add("a") + results = list(mw.search_all(["a", "b"])) + assert len(results) == 1, "one match" + assert results[0] == ("a", 0) + + results = list(mw.search_all(["b", "a"])) + assert len(results) == 1, "one match" + assert results[0] == ("a", 1) diff --git a/tests/unit/read/render/test_service.py b/tests/unit/read/render/test_service.py new file mode 100644 index 000000000..4682500ae --- /dev/null +++ b/tests/unit/read/render/test_service.py @@ -0,0 +1,162 @@ +""" +Render service tests. +""" + +from lute.parse.base import ParsedToken +from lute.read.render.service import Service +from lute.db import db +from lute.models.term import Term + +from tests.utils import add_terms, make_text, assert_rendered_text_equals +from tests.dbasserts import assert_sql_result + + +def _run_scenario(language, content, expected_found, msg=""): + """ + Given some pre-saved terms in language, + find_all method returns the expected_found terms that + exist in the content string. + """ + service = Service(db.session) + found_terms = service.find_all_Terms_in_string(content, language) + assert len(found_terms) == len(expected_found), "found count, " + msg + zws = "\u200B" # zero-width space + found_terms = [t.text.replace(zws, "") for t in found_terms] + assert found_terms is not None, msg + assert expected_found is not None, msg + found_terms.sort() + expected_found.sort() + assert found_terms == expected_found, msg + + +def test_smoke_tests(english, app_context): + "Check bounds, ensure no false matches, etc." + add_terms(english, ["a", "at", "xyz"]) + + _run_scenario(english, "attack cat", [], "no matches, not standalone") + _run_scenario(english, "at", ["at"], "a doesn't match, not standalone") + _run_scenario(english, "A", ["a"], "case ignored") + _run_scenario(english, "AT A", ["a", "at"], "case, order ignored") + _run_scenario(english, "aatt", [], "no match") + _run_scenario(english, "Xyz", ["xyz"], "case ignored 2") + _run_scenario(english, "XyZ", ["xyz"], "case ignored 3") + _run_scenario(english, " A at x", ["a", "at"], "spaces ignored") + + _run_scenario(english, "a dog here", ["a"], "bounds check, found at start") + _run_scenario(english, "dog a here", ["a"], "bounds check, found at start") + _run_scenario(english, "dog here a", ["a"], "bounds check, found at end") + _run_scenario(english, "a a a a a a a", ["a"], "return once only") + + add_terms(english, ["ab xy"]) + _run_scenario(english, "ab xy", ["ab xy"], "with space") + _run_scenario(english, "cab xy", [], "extra at start") + _run_scenario(english, "cab xyq", [], "no match, not the same") + _run_scenario(english, "ab xyq", [], "extra stuff at end") + + +def test_spanish_find_all_in_string(spanish, app_context): + "Given various pre-saved terms, find_all returns those in the string." + add_terms(spanish, ["perro", "gato", "un gato"]) + + _run_scenario(spanish, "Hola tengo un gato", ["gato", "un gato"]) + _run_scenario(spanish, "gato gato gato", ["gato"]) + _run_scenario(spanish, "No tengo UN PERRO", ["perro"]) + _run_scenario(spanish, "Hola tengo un gato", ["gato", "un gato"]) + _run_scenario(spanish, "No tengo nada", []) + + add_terms(spanish, ["échalo", "ábrela"]) + + _run_scenario(spanish, '"Échalo", me dijo.', ["échalo"]) + _run_scenario(spanish, "gato ábrela Ábrela", ["gato", "ábrela"]) + + +def test_english_find_all_in_string(english, app_context): + "Can find a term with an apostrophe in string." + add_terms(english, ["the cat's pyjamas"]) + + _run_scenario(english, "This is the cat's pyjamas.", ["the cat's pyjamas"]) + + +def test_turkish_find_all_in_string(turkish, app_context): + "Finds terms, handling case conversion." + add_terms(turkish, ["ışık", "için"]) + + _run_scenario(turkish, "Işık İçin.", ["ışık", "için"]) + + +def test_smoke_get_paragraphs(spanish, app_context): + """ + Smoke test to get paragraph information. + """ + add_terms(spanish, ["tengo un", "un gato"]) + perro = Term(spanish, "perro") + perro.status = 0 + db.session.add(perro) + + content = "Tengo un gato. Hay un perro.\nTengo un perro." + t = make_text("Hola", content, spanish) + db.session.add(t) + db.session.commit() + + sql = "select WoText from words order by WoText" + assert_sql_result(sql, ["perro", "tengo/ /un", "un/ /gato"], "initial") + + ParsedToken.reset_counters() + service = Service(db.session) + paras = service.get_paragraphs(t.text, t.book.language) + assert len(paras) == 2 + + def stringize(t): + zws = chr(0x200B) + parts = [ + f"[{t.display_text.replace(zws, '/')}(", + f"{t.paragraph_number}.{t.sentence_number}", + ")]", + ] + return "".join(parts) + + sentences = [item for sublist in paras for item in sublist] + actual = [] + for sentence in sentences: + actual.append("".join(map(stringize, sentence))) + + expected = [ + "[Tengo/ /un(0.0)][ /gato(0.0)][. (0.0)]", + "[Hay(0.1)][ (0.1)][un(0.1)][ (0.1)][perro(0.1)][.(0.1)]", + "[Tengo/ /un(1.3)][ (1.3)][perro(1.3)][.(1.3)]", + ] + assert actual == expected + + assert_sql_result(sql, ["perro", "tengo/ /un", "un/ /gato"], "No new terms") + + +def test_smoke_rendered(spanish, app_context): + """ + Smoke test to get paragraph information. + """ + add_terms(spanish, ["tengo un", "un gato"]) + content = ["Tengo un gato. Hay un perro.", "Tengo un perro."] + text = make_text("Hola", "\n".join(content), spanish) + db.session.add(text) + db.session.commit() + + expected = ["Tengo un(1)/ gato(1)/. /Hay/ /un/ /perro/.", "Tengo un(1)/ /perro/."] + assert_rendered_text_equals(text, expected) + + +def test_rendered_leaves_blank_lines(spanish, app_context): + """ + Smoke test to get paragraph information. + """ + add_terms(spanish, ["tengo un", "un gato"]) + content = ["Tengo un gato. Hay un perro.", "", "Tengo un perro."] + text = make_text("Hola", "\n".join(content), spanish) + db.session.add(text) + db.session.commit() + + expected = [ + "Tengo un(1)/ gato(1)/. /Hay/ /un/ /perro/.", + "", + "Tengo un(1)/ /perro/.", + ] + assert_rendered_text_equals(text, expected) diff --git a/tests/unit/read/test_service.py b/tests/unit/read/test_service.py index e0655b755..fa84c1cdc 100644 --- a/tests/unit/read/test_service.py +++ b/tests/unit/read/test_service.py @@ -3,123 +3,119 @@ """ from lute.models.term import Term -from lute.parse.base import ParsedToken -from lute.read.service import find_all_Terms_in_string, get_paragraphs +from lute.book.model import Book, Repository +from lute.read.service import Service from lute.db import db -from tests.utils import add_terms, make_text, assert_rendered_text_equals - - -def _run_scenario(language, content, expected_found): - """ - Given some pre-saved terms in language, - find_all method returns the expected_found terms that - exist in the content string. - """ - found_terms = find_all_Terms_in_string(content, language) - assert len(found_terms) == len(expected_found), "found count" - zws = "\u200B" # zero-width space - found_terms = [t.text.replace(zws, "") for t in found_terms] - assert found_terms is not None - assert expected_found is not None - found_terms.sort() - expected_found.sort() - assert found_terms == expected_found - - -def test_spanish_find_all_in_string(spanish, app_context): - "Given various pre-saved terms, find_all returns those in the string." - terms = ["perro", "gato", "un gato"] - for term in terms: - t = Term(spanish, term) - db.session.add(t) - db.session.commit() - - _run_scenario(spanish, "Hola tengo un gato", ["gato", "un gato"]) - _run_scenario(spanish, "gato gato gato", ["gato"]) - _run_scenario(spanish, "No tengo UN PERRO", ["perro"]) - _run_scenario(spanish, "Hola tengo un gato", ["gato", "un gato"]) - _run_scenario(spanish, "No tengo nada", []) +from tests.dbasserts import assert_record_count_equals, assert_sql_result - terms = ["échalo", "ábrela"] - for term in terms: - t = Term(spanish, term) - db.session.add(t) - db.session.commit() - _run_scenario(spanish, '"Échalo", me dijo.', ["échalo"]) - _run_scenario(spanish, "gato ábrela Ábrela", ["gato", "ábrela"]) +def test_mark_page_read(english, app_context): + "Sanity check, field set and stat added." + b = Book() + b.title = "blah" + b.language_id = english.id + b.text = "Dog CAT dog cat." + r = Repository(db.session) + dbbook = r.add(b) + r.commit() + sql_text_started = "select * from texts where TxStartDate is not null" + sql_text_read = "select * from texts where TxReadDate is not null" + sql_wordsread = "select * from wordsread" + assert_record_count_equals(sql_text_started, 0, "not started, sanity check") + assert_record_count_equals(sql_text_read, 0, "not read") + assert_record_count_equals(sql_wordsread, 0, "not read") -def test_english_find_all_in_string(english, app_context): - "Can find a term with an apostrophe in string." - terms = ["the cat's pyjamas"] - for term in terms: - t = Term(english, term) - db.session.add(t) - db.session.commit() + svc = Service(db.session) + svc.mark_page_read(dbbook.id, 1, True) + assert_record_count_equals(sql_text_started, 0, "still not started!") + assert_record_count_equals(sql_text_read, 1, "read, text") + assert_record_count_equals(sql_wordsread, 1, "read, wordsread") - _run_scenario(english, "This is the cat's pyjamas.", ["the cat's pyjamas"]) + svc.mark_page_read(dbbook.id, 1, True) + assert_record_count_equals(sql_text_read, 1, "still read") + assert_record_count_equals(sql_wordsread, 2, "extra record added") -def test_turkish_find_all_in_string(turkish, app_context): - "Finds terms, handling case conversion." - terms = ["ışık", "için"] - for term in terms: - t = Term(turkish, term) - db.session.add(t) +def test_set_unknowns_to_known(english, app_context): + "Unknowns (status 0) or new are set to well known." + t = Term(english, "dog") + db.session.add(t) db.session.commit() - content = "Işık İçin." - _run_scenario(turkish, content, ["ışık", "için"]) + b = Book() + b.title = "blah" + b.language_id = english.id + b.text = "Dog CAT dog cat." + r = Repository(db.session) + dbbook = r.add(b) + r.commit() + sql = "select WoTextLC, WoStatus from words order by WoText" + assert_sql_result(sql, ["dog; 1"], "before start") -def test_smoke_get_paragraphs(spanish, app_context): - """ - Smoke test to get paragraph information. - """ - add_terms(spanish, ["tengo un", "un gato"]) + service = Service(db.session) + service.start_reading(dbbook, 1) + assert_sql_result(sql, ["cat; 0", "dog; 1"], "after start") - content = "Tengo un gato. Hay un perro.\nTengo un perro." - t = make_text("Hola", content, spanish) - db.session.add(t) + tx = dbbook.texts[0] + tx.text = "Dog CAT dog cat extra." + db.session.add(tx) db.session.commit() - ParsedToken.reset_counters() - paras = get_paragraphs(t) - assert len(paras) == 2 - - def stringize(t): - zws = chr(0x200B) - parts = [ - f"[{t.display_text.replace(zws, '/')}(", - f"{t.para_id}.{t.se_id}", - ")]", - ] - return "".join(parts) - - sentences = [item for sublist in paras for item in sublist] - actual = [] - for sent in sentences: - actual.append("".join(map(stringize, sent.textitems))) - - expected = [ - "[Tengo/ /un(0.0)][ /gato(0.0)][. (0.0)]", - "[Hay(0.1)][ (0.1)][un(0.1)][ (0.1)][perro(0.1)][.(0.1)]", - "[Tengo/ /un(1.3)][ (1.3)][perro(1.3)][.(1.3)]", - ] - assert actual == expected - - -def test_smoke_rendered(spanish, app_context): - """ - Smoke test to get paragraph information. - """ - add_terms(spanish, ["tengo un", "un gato"]) - content = ["Tengo un gato. Hay un perro.", "Tengo un perro."] - text = make_text("Hola", "\n".join(content), spanish) - db.session.add(text) + service = Service(db.session) + service.set_unknowns_to_known(tx) + assert_sql_result(sql, ["cat; 99", "dog; 1", "extra; 99"], "after set") + + +def test_smoke_start_reading(english, app_context): + "Smoke test book." + b = Book() + b.title = "blah" + b.language_id = english.id + b.text = "Here is some content. Here is more." + r = Repository(db.session) + dbbook = r.add(b) + r.commit() + + sql_sentence = "select * from sentences" + sql_text_started = "select * from texts where TxStartDate is not null" + assert_record_count_equals(sql_sentence, 0, "before start") + assert_record_count_equals(sql_text_started, 0, "before start") + service = Service(db.session) + service.start_reading(dbbook, 1) + assert_record_count_equals(sql_sentence, 2, "after start") + assert_record_count_equals(sql_text_started, 1, "text after start") + + +def test_start_reading_creates_Terms_for_unknown_words(english, app_context): + "Unknown (status 0) terms are created for all new words." + t = Term(english, "dog") + db.session.add(t) db.session.commit() - expected = ["Tengo un(1)/ gato(1)/. /Hay/ /un/ /perro/.", "Tengo un(1)/ /perro/."] - assert_rendered_text_equals(text, expected) + b = Book() + b.title = "blah" + b.language_id = english.id + b.text = "Dog CAT dog cat." + r = Repository(db.session) + dbbook = r.add(b) + r.commit() + + sql = "select WoTextLC from words order by WoText" + assert_sql_result(sql, ["dog"], "before start") + + service = Service(db.session) + paragraphs = service.start_reading(dbbook, 1) + textitems = [ + ti + for para in paragraphs + for sentence in para + for ti in sentence + if ti.is_word and ti.wo_id is None + ] + assert ( + len(textitems) == 0 + ), f"All text items should have a term, but got {textitems}" + assert_sql_result(sql, ["cat", "dog"], "after start") diff --git a/tests/unit/read/test_service_popup_data.py b/tests/unit/read/test_service_popup_data.py new file mode 100644 index 000000000..42ab87983 --- /dev/null +++ b/tests/unit/read/test_service_popup_data.py @@ -0,0 +1,363 @@ +""" +Term popup data tests. +""" + +import pytest +from lute.models.term import Term, TermTag, Status +from lute.models.repositories import UserSettingRepository +from lute.read.service import Service +from lute.db import db + + +@pytest.fixture(name="service") +def fixture_svc(app_context): + "Service" + return Service(db.session) + + +def test_popup_data_is_none_if_no_data(spanish, app_context, service): + "Return None if no popup." + t = Term(spanish, "gato") + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d is None, "No data, no popup" + + t.translation = "hello" + d = service.get_popup_data(t.id) + assert d is not None, "Have data, popup" + + +def test_popup_data_shown_if_have_tag(spanish, app_context, service): + "Return None if no popup." + t = Term(spanish, "gato") + t.add_term_tag(TermTag("animal")) + db.session.add(t) + db.session.commit() + d = service.get_popup_data(t.id) + assert d is not None, "Have tag, show popup" + assert d.tags == ["animal"] + + +def test_popup_shown_if_parent_exists_even_if_no_other_data( + spanish, app_context, service +): + "I always want to know if a parent has been set." + t = Term(spanish, "gato") + db.session.add(t) + db.session.commit() + d = service.get_popup_data(t.id) + assert d is None, "No data, no popup" + + p = Term(spanish, "perro") + t.parents.append(p) + db.session.add(t) + db.session.commit() + d = service.get_popup_data(t.id) + assert d is not None, "Has parent, popup, even if no other data." + + +def test_popup_data_is_shown_if_have_data_regardless_of_status( + spanish, app_context, service +): + "Return None if no-popup statuses." + t = Term(spanish, "gato") + t.translation = "hello" + for s in [1, Status.UNKNOWN, Status.IGNORED]: + t.status = s + db.session.add(t) + db.session.commit() + d = service.get_popup_data(t.id) + assert d is not None, f"Have data for status {s}" + + +def test_term_with_no_parents(spanish, app_context, service): + "Keep the lights on test, smoke only." + t = Term(spanish, "gato") + t.translation = "cat" + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d.parents == [], "no parents" + assert d.parents_text == "", "no parents" + + +def test_parent_not_shown_if_has_no_useful_data(spanish, app_context, service): + "No need for dup data" + t = Term(spanish, "gato") + p = Term(spanish, "perro") + p.translation = "cat" + t.parents.append(p) + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d.translation == "cat", "trans promoted" + assert len(d.parents) == 0, "no data" + + p.romanization = "hello" + db.session.add(p) + db.session.commit() + d = service.get_popup_data(t.id) + assert len(d.parents) == 1, "some data" + + +def test_images_combined_in_popup(spanish, app_context, service): + "combine images as needed." + t = Term(spanish, "gato") + t.set_current_image("gato.jpg") + p = Term(spanish, "perro") + t.parents.append(p) + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + img_url_start = f"/userimages/{spanish.id}/" + assert d.popup_image_data == {img_url_start + "gato.jpg": "gato"} + + p.set_current_image("perro.jpg") + db.session.add(p) + db.session.commit() + d = service.get_popup_data(t.id) + assert d.popup_image_data == { + img_url_start + "gato.jpg": "gato", + img_url_start + "perro.jpg": "perro", + } + + p.set_current_image("gato.jpg") + db.session.add(p) + db.session.commit() + d = service.get_popup_data(t.id) + assert d.popup_image_data == {img_url_start + "gato.jpg": "gato, perro"} + + +def test_single_parent_translation_can_be_promoted_to_term_if_term_translation_blank( + spanish, app_context, service +): + "No need for dup data" + t = Term(spanish, "gato") + p = Term(spanish, "perro") + p.translation = "cat" + p.romanization = "hello" # need extra data + t.parents.append(p) + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d.translation == "cat", "trans promoted" + assert d.parents[0].translation == "", "moved up" + + repo = UserSettingRepository(db.session) + repo.set_value("term_popup_promote_parent_translation", False) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d.translation == "", "trans not promoted" + assert d.parents[0].translation == "cat", "translation left with parent" + + +def test_single_parent_translation_may_be_removed_if_same_as_child( + spanish, app_context, service +): + "No need for dup data" + t = Term(spanish, "gato") + t.translation = "cat" + p = Term(spanish, "perro") + p.translation = "cat" + p.romanization = "hello" # need extra data + t.parents.append(p) + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d.translation == "cat", "trans promoted" + assert d.parents[0].translation == "", "moved up" + + repo = UserSettingRepository(db.session) + repo.set_value("term_popup_promote_parent_translation", False) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d.translation == "cat", "trans left" + assert d.parents[0].translation == "cat", "translation also left with parent" + + +def test_multiple_parents_translations_left_alone_even_if_blank( + spanish, app_context, service +): + "No need for dup data" + t = Term(spanish, "gato") + p = Term(spanish, "perro") + p.translation = "cat" + p2 = Term(spanish, "otro") + p2.translation = "cat" + t.parents.append(p) + t.parents.append(p2) + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d.translation == "", "still blank" + assert d.parents[0].translation == "cat", "not moved up" + + +def test_parent_data_returned(spanish, app_context, service): + "Extra data added for display." + t = Term(spanish, "gato") + t.translation = "cat" + p = Term(spanish, "perro") + p.translation = "kitty" + t.parents.append(p) + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + p = d.parents[0] + assert p.term_text == "perro" + assert p.romanization == "" + assert p.translation == "kitty" + assert p.tags == [] + assert d.parents_text == "perro", "one parent" + + +def test_parent_data_always_added_if_multiple_parents(spanish, app_context, service): + "Extra data added for display." + t = Term(spanish, "gato") + t.translation = "cat" + p = Term(spanish, "perro") + p.translation = "kitty" + p2 = Term(spanish, "hombre") + p2.translation = "kitteh" + t.parents.append(p) + t.parents.append(p2) + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + assert len(d.parents) == 2, "extra parent data added" + assert d.parents_text == "perro, hombre", "parents" + + +## Component term checks. + + +def make_terms(term_trans_pairs, spanish): + "Make test data" + for c in term_trans_pairs: + ct = Term(spanish, c[0]) + ct.translation = c[1] + db.session.add(ct) + + +def assert_components(d, expected, msg=""): + "Check components." + + def _c_to_string(c): + s = "; ".join([c.term_text, c.translation]) + zws = chr(0x200B) + return s.replace(zws, "") + + actual = [_c_to_string(c) for c in d.components] + assert actual == expected, msg + + +def test_single_term_not_included_in_own_components(spanish, app_context, service): + "Keep the lights on test, smoke only." + t = Term(spanish, "gato") + t.translation = "cat" + db.session.add(t) + db.session.commit() + + d = service.get_popup_data(t.id) + assert d.components == [], "no components" + + +def test_component_without_useful_data_not_returned(spanish, app_context, service): + "Component word is returned." + t = Term(spanish, "un gato") + t.translation = "a cat" + g = Term(spanish, "gato") + db.session.add(t) + db.session.add(g) + db.session.commit() + + d = service.get_popup_data(t.id) + assert len(d.components) == 0, "no component data" + + g.translation = "something" + db.session.add(g) + db.session.commit() + d = service.get_popup_data(t.id) + assert len(d.components) == 1, "have data" + assert d.components[0].translation == "something", "check" + + +def test_component_word_with_translation_returned(spanish, app_context, service): + "Component word is returned." + t = Term(spanish, "un gato") + t.translation = "a cat" + db.session.add(t) + make_terms([("gato", "cat")], spanish) + db.session.commit() + + d = service.get_popup_data(t.id) + assert_components(d, ["gato; cat"], "one component") + + +def test_nested_multiword_components_returned_depending_on_setting( + spanish, app_context, service +): + "Complete components are returned." + t = Term(spanish, "un gato gordo") + t.translation = "a fat cat" + db.session.add(t) + make_terms([("gato", "cat"), ("gat", "x"), ("un gato", "a cat")], spanish) + db.session.commit() + + d = service.get_popup_data(t.id) + assert_components(d, ["un gato; a cat", "gato; cat"], "components") + + repo = UserSettingRepository(db.session) + repo.set_value("term_popup_show_components", False) + db.session.commit() + + d = service.get_popup_data(t.id) + assert_components(d, [], "no components shown") + + +def test_multiword_components_returned_in_order_of_appearance( + spanish, app_context, service +): + "Complete components are returned." + t = Term(spanish, "un gato gordo") + t.translation = "a fat cat" + db.session.add(t) + make_terms( + [ + ("gato", "cat"), + ("gat", "x"), + ("gato gordo", "fat"), + ("un gato", "a cat"), + ], + spanish, + ) + db.session.commit() + + d = service.get_popup_data(t.id) + assert_components( + d, ["un gato; a cat", "gato gordo; fat", "gato; cat"], "components" + ) + + +def test_components_only_returned_once(spanish, app_context, service): + "Component not returned multiple times if present multiple times." + t = Term(spanish, "un gato gordo gato") + t.translation = "a cat fat cat" + db.session.add(t) + make_terms([("gato", "cat")], spanish) + db.session.commit() + + d = service.get_popup_data(t.id) + assert_components(d, ["gato; cat"], "components") diff --git a/tests/unit/settings/__init__.py b/tests/unit/settings/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/settings/test_current.py b/tests/unit/settings/test_current.py new file mode 100644 index 000000000..5a6ffae09 --- /dev/null +++ b/tests/unit/settings/test_current.py @@ -0,0 +1,27 @@ +""" +Current settings tests. +""" + +from lute.db import db +from lute.settings.current import ( + refresh_global_settings, + current_settings, + current_hotkeys, +) + + +def test_refresh_refreshes_current_settings(app_context): + "Current settigns are loaded." + if "backup_dir" in current_settings: + del current_settings["backup_dir"] + refresh_global_settings(db.session) + assert "backup_dir" in current_settings, "loaded" + + +def test_hotkey_strings_mapped_to_name(app_context): + "Hotkey key combo to name." + refresh_global_settings(db.session) + hotkey_names = current_hotkeys.values() + assert "hotkey_Status5" in hotkey_names, "this is set by default" + assert current_hotkeys["Digit5"] == "hotkey_Status5", "mapped" + assert "" not in current_hotkeys, "No blank keyboard shortcuts" diff --git a/tests/unit/stats/__init__.py b/tests/unit/stats/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/stats/test_service.py b/tests/unit/stats/test_service.py new file mode 100644 index 000000000..d0404a4da --- /dev/null +++ b/tests/unit/stats/test_service.py @@ -0,0 +1,89 @@ +"Stats service test." + +from datetime import datetime, timedelta +from lute.models.book import WordsRead +from lute.db import db +from lute.stats.service import get_chart_data, get_table_data +from tests.utils import make_text + + +def make_read_text(lang, content, readdate): + "Make and save a text." + t = make_text(content, content, lang) + # t.read_date = readdate + db.session.add(t) + db.session.commit() + + if readdate is None: + return + + wr = WordsRead(t, readdate, t.word_count) + db.session.add(wr) + db.session.commit() + + +def test_get_chart_data(spanish, english, app_context): + "Smoke test." + today = datetime.now() + yesterday = today - timedelta(days=1) + daybefore = today - timedelta(days=2) + + make_read_text(spanish, "Yo tengo un gato.", today) + make_read_text(spanish, "Ella esta aqui.", yesterday) + make_read_text(spanish, "Nuevo text no leido.", None) + make_read_text(english, "Yo yo.", today) + + expected = { + "Spanish": [ + { + "readdate": daybefore.strftime("%Y-%m-%d"), + "wordcount": 0, + "runningTotal": 0, + }, + { + "readdate": yesterday.strftime("%Y-%m-%d"), + "wordcount": 3, + "runningTotal": 3, + }, + {"readdate": today.strftime("%Y-%m-%d"), "wordcount": 4, "runningTotal": 7}, + ], + "English": [ + { + "readdate": yesterday.strftime("%Y-%m-%d"), + "wordcount": 0, + "runningTotal": 0, + }, + {"readdate": today.strftime("%Y-%m-%d"), "wordcount": 2, "runningTotal": 2}, + ], + } + assert get_chart_data(db.session) == expected + + +def test_get_table_data(spanish, english, app_context): + "Smoke test." + today = datetime.now() + yesterday = today - timedelta(days=1) + + make_read_text(spanish, "Yo tengo un gato.", today) + make_read_text(spanish, "Ella esta aqui.", yesterday) + make_read_text(spanish, "Nuevo text no leido.", None) + make_read_text(english, "Yo yo.", today) + + expected = [ + { + "name": "English", + "counts": {"day": 2, "week": 2, "month": 2, "year": 2, "total": 2}, + }, + { + "name": "Spanish", + "counts": {"day": 4, "week": 7, "month": 7, "year": 7, "total": 7}, + }, + ] + actual = get_table_data(db.session) + assert actual == expected + + +def test_get_data_works_when_nothing_read(app_context): + "Nothing read should still be ok, empty chart." + assert not get_chart_data(db.session), "nothing present" + assert not get_table_data(db.session), "nothing" diff --git a/tests/unit/term/test_ReferencesRepository.py b/tests/unit/term/test_ReferencesRepository.py new file mode 100644 index 000000000..0aef63366 --- /dev/null +++ b/tests/unit/term/test_ReferencesRepository.py @@ -0,0 +1,269 @@ +""" +Term Repository tests. + +Tests lute.term.model.Term *domain* objects being saved +and retrieved from DB. +""" + +from datetime import datetime +import pytest + +from lute.db import db +from lute.term.model import Term, Repository, ReferencesRepository +from tests.dbasserts import assert_record_count_equals +from tests.utils import add_terms, make_text + + +@pytest.fixture(name="repo") +def fixture_repo(): + return Repository(db.session) + + +@pytest.fixture(name="refsrepo") +def fixture_refs_repo(): + return ReferencesRepository(db.session) + + +## Sentences + + +def full_refs_to_string(refs): + "Convert refs to strings for simpler testing." + + def to_string(r): + return f"{r.title}, {r.sentence}" + + def refs_to_string(refs_array): + ret = [to_string(r) for r in refs_array] + ret.sort() + return ret + + def parent_refs_to_string(prefs): + ret = [] + for p in prefs: + ret.append({"term": p["term"], "refs": refs_to_string(p["refs"])}) + return ret + + return { + "term": refs_to_string(refs["term"]), + "children": refs_to_string(refs["children"]), + "parents": parent_refs_to_string(refs["parents"]), + } + + +@pytest.mark.sentences +def test_get_all_references(spanish, repo, refsrepo): + "Check references with parents and children." + text = make_text( + "hola", "Tengo un gato. Ella tiene un perro. No quiero tener nada.", spanish + ) + archtext = make_text("luego", "Tengo un coche.", spanish) + archtext.book.archived = True + db.session.add(archtext.book) + + for t in [text, archtext]: + t.read_date = datetime.now() + db.session.add(t) + + db.session.commit() + + tengo = Term() + tengo.language_id = spanish.id + tengo.text = "tengo" + tengo.parents = ["tener"] + repo.add(tengo) + + tiene = Term() + tiene.language_id = spanish.id + tiene.text = "tiene" + tiene.parents = ["tener"] + repo.add(tiene) + + repo.commit() + + refs = refsrepo.find_references(tengo) + assert full_refs_to_string(refs) == { + "term": [ + "hola (1/1), Tengo un gato.", + "luego (1/1), Tengo un coche.", + ], + "children": [], + "parents": [ + { + "term": "tener", + "refs": [ + "hola (1/1), Ella tiene un perro.", + "hola (1/1), No quiero tener nada.", + ], + } + ], + }, "term tengo" + + tener = repo.find(spanish.id, "tener") + refs = refsrepo.find_references(tener) + assert full_refs_to_string(refs) == { + "term": ["hola (1/1), No quiero tener nada."], + "children": [ + "hola (1/1), Tengo un gato.", + "hola (1/1), Ella tiene un perro.", + "luego (1/1), Tengo un coche.", + ], + "parents": [], + }, "term tener" + + +@pytest.mark.sentences +def test_multiword_reference(spanish, repo, refsrepo): + "Ensure zws-delimiters are respected." + text = make_text("hola", "Yo tengo un gato.", spanish) + text.read_date = datetime.now() + db.session.add(text) + db.session.commit() + + t = Term() + t.language_id = spanish.id + t.text = "TENGO UN" + repo.add(t) + repo.commit() + + refs = refsrepo.find_references(t) + assert full_refs_to_string(refs) == { + "term": ["hola (1/1), Yo tengo un gato."], + "children": [], + "parents": [], + }, "term tengo" + + +@pytest.mark.sentences +def test_get_references_only_includes_read_texts(spanish, repo, refsrepo): + "Like it says above." + text = make_text("hola", "Tengo un gato. No tengo un perro.", spanish) + + tengo = Term() + tengo.language_id = spanish.id + tengo.text = "tengo" + repo.add(tengo) + repo.commit() + + refs = refsrepo.find_references(tengo) + keys = refs.keys() + for k in keys: + assert len(refs[k]) == 0, f"{k}, no matches for unread texts" + + text.read_date = datetime.now() + db.session.add(text) + db.session.commit() + + refs = refsrepo.find_references(tengo) + assert len(refs["term"]) == 2, "have refs once text is read" + + +def _make_read_text(title, body, lang): + "Make a text, mark it read." + text = make_text(title, body, lang) + text.read_date = datetime.now() + db.session.add(text) + db.session.commit() + return text + + +@pytest.mark.sentences +def test_issue_531_spanish_ref_search_case_insens_normal(spanish, refsrepo): + "Spanish was finding 'normal' upper/lower chars that sqlite could handle." + _make_read_text("hola", "TENGO. tengo.", spanish) + t = add_terms(spanish, ["tengo"])[0] + + refs = refsrepo.find_references(t) + assert len(refs["term"]) == 2, "both found" + + +@pytest.mark.sentences +def test_issue_531_spanish_ref_search_case_insens_accented(spanish, refsrepo): + "Spanish wasn't finding different case of accented chars." + _make_read_text("hola", "Ábrelo. ábrelo.", spanish) + t = add_terms(spanish, ["ábrelo"])[0] + refs = refsrepo.find_references(t) + assert len(refs["term"]) == 2, "both found" + + +@pytest.mark.sentences +def test_issue_531_turkish_ref_search_is_case_insensitive(turkish, refsrepo): + "Turkish upper/lower case letters are quite different!." + _make_read_text("Test", "ışık. Işık", turkish) + t = add_terms(turkish, ["ışık"])[0] + refs = refsrepo.find_references(t) + assert len(refs["term"]) == 2, "both found" + + +@pytest.mark.sentences +def test_get_references_only_includes_refs_in_same_language( + spanish, english, repo, refsrepo +): + "Like it says above." + text1 = make_text("hola", "Tengo un gato. No tengo un perro.", spanish) + text2 = make_text("hola", "Tengo in english.", english) + + tengo = Term() + tengo.language_id = spanish.id + tengo.text = "tengo" + repo.add(tengo) + repo.commit() + + text1.read_date = datetime.now() + text2.read_date = datetime.now() + db.session.add(text1) + db.session.add(text2) + db.session.commit() + + refs = refsrepo.find_references(tengo) + assert len(refs["term"]) == 2, "only have 2 refs (spanish)" + sentences = [r.sentence for r in refs["term"]] + expected = [ + "Tengo un gato.", + "No tengo un perro.", + ] + assert sentences == expected + + +@pytest.mark.sentences +def test_get_references_new_term(spanish, refsrepo): + "Check references with parents and children." + text = make_text("hola", "Tengo un gato.", spanish) + text.read_date = datetime.now() + db.session.add(text) + + tengo = Term() + tengo.language_id = spanish.id + tengo.text = "tengo" + + refs = refsrepo.find_references(tengo) + assert full_refs_to_string(refs) == { + "term": ["hola (1/1), Tengo un gato."], + "children": [], + "parents": [], + }, "term tengo" + + +@pytest.mark.sentences +def test_can_get_references_by_term_id_including_unread(spanish): + "Like it says above." + text = make_text("hola", "Tengo un gato. No tengo un perro.", spanish) + text.load_sentences() + db.session.add(text) + db.session.commit() + # pylint: disable=unbalanced-tuple-unpacking + [tengo] = add_terms(spanish, ["tengo"]) + + assert_record_count_equals("select * from sentences", 2, "sanity check") + + refsrepo = ReferencesRepository(db.session, limit=1, include_unread=False) + refs = refsrepo.find_references_by_id(tengo.id) + sentences = [r.sentence for r in refs["term"]] + expected = [] + assert sentences == expected, "not including unread texts" + + refsrepo = ReferencesRepository(db.session, limit=1, include_unread=True) + refs = refsrepo.find_references_by_id(tengo.id) + sentences = [r.sentence for r in refs["term"]] + expected = ["Tengo un gato."] + assert sentences == expected, "including unread" diff --git a/tests/unit/term/test_Repository.py b/tests/unit/term/test_Repository.py index 74cac32c9..26e734e12 100644 --- a/tests/unit/term/test_Repository.py +++ b/tests/unit/term/test_Repository.py @@ -5,19 +5,18 @@ and retrieved from DB. """ -from datetime import datetime import pytest from lute.models.term import Term as DBTerm, TermTag from lute.db import db from lute.term.model import Term, Repository from tests.dbasserts import assert_sql_result, assert_record_count_equals -from tests.utils import add_terms, make_text +from tests.utils import add_terms @pytest.fixture(name="repo") def fixture_repo(): - return Repository(db) + return Repository(db.session) @pytest.fixture(name="hello_term") @@ -139,6 +138,24 @@ def test_save_uses_existing_TermTags(app_context, repo, hello_term): assert_sql_result(sql, ["1; a; HELLO", "2; b; HELLO"], "a used, b created") +def test_fix_issue_454_handle_duplicate_tags(app_context, repo, hello_term): + "Same new tag added twice should be handled ok." + db.session.add(TermTag("a")) + db.session.commit() + + sql = """select TgID, TgText, WoText + from tags + left join wordtags on WtTgID = TgID + left join words on WoID = WtWoID + order by TgText""" + assert_sql_result(sql, ["1; a; None"], "a tag exists") + + hello_term.term_tags = ["a", "b", "b"] + repo.add(hello_term) + repo.commit() + assert_sql_result(sql, ["1; a; HELLO", "2; b; HELLO"], "a used, b created") + + def test_save_with_no_flash_message(app_context, repo, hello_term): "Saving with flash = None removes the flash record." hello_term.flash_message = "hi there" @@ -181,28 +198,221 @@ def test_delete_leaves_parent(app_context, repo, hello_term): assert_sql_result(sql, ["parent"], "parent stays") +## Saving and images. + + +def test_save_term_image_saved_and_replaced(app_context, repo, hello_term): + "Saving puts record in image table." + sql = "select WiSource from wordimages" + assert_sql_result(sql, [], "nothing present") + repo.add(hello_term) + repo.commit() + assert_sql_result(sql, ["hello.png"], "image saved") + + +def test_save_term_image_set_to_blank_removes_record(app_context, repo, hello_term): + "Saving puts record in image table." + sql = "select WiSource from wordimages" + repo.add(hello_term) + repo.commit() + assert_sql_result(sql, ["hello.png"], "image saved") + + hello_term.current_image = "" + repo.add(hello_term) + repo.commit() + assert_sql_result(sql, [], "record removed") + + hello_term.current_image = "new.png" + repo.add(hello_term) + repo.commit() + assert_sql_result(sql, ["new.png"], "image saved") + + hello_term.current_image = None + repo.add(hello_term) + repo.commit() + assert_sql_result(sql, [], "record removed again") + + ## Saving and parents. -def test_save_with_new_parent(app_context, repo, hello_term): +def create_parent(lang, status=0, translation=None, image=None, tags=None): + "Create test parent." + p = DBTerm(lang, "parent") + p.status = status + p.translation = translation + p.set_current_image(image) + for t in tags or []: + p.add_term_tag(TermTag(t)) # Ensure no double-tag added. + db.session.add(p) + db.session.commit() + return p + + +def assert_parent_has(status, translation, img, term_tags): + "Assert parent data matches expected." + # For some weird reason, query with filter was not returning + # a match, so doing a loop to get the parent. Possibly due to aliasing + # of lute.models.term.Term to DBTerm. + # p = db.session.query(DBTerm).filter(DBTerm.text == "parent").first() + parents = [t for t in db.session.query(DBTerm).all() if t.text == "parent"] + assert len(parents) == 1, "Sanity check" + p = parents[0] + assert p.status == status, "status" + assert (p.translation or "-") == (translation or "-"), "txn" + assert (p.get_current_image() or "-") == (img or "-"), "img" + actual_tags = sorted([t.text for t in p.term_tags]) + assert actual_tags == term_tags, "tags" + + +def test_save_new_child_creates_new_populated_parent(app_context, repo, hello_term): """ Given a Term with parents = [ 'newparent' ], new parent DBTerm is created, and is assigned translation and image and tag. + + The child's data is propagated up if needed, to 'fill in' missing parent data. """ + t = hello_term + t.parents = ["parent"] + t.term_tags = ["a", "b"] + repo.add(t) + repo.commit() + + assert_parent_has(t.status, t.translation, t.current_image, ["a", "b"]) + + +def test_save_new_child_populates_existing_unknown_parent( + app_context, repo, english, hello_term +): + "Existing parent with status 0 is bumped to status 1." + create_parent(english, status=0) + hello_term.parents = ["parent"] hello_term.term_tags = ["a", "b"] repo.add(hello_term) repo.commit() - assert_sql_result("select WoText from words", ["HELLO", "parent"], "parent created") + + t = hello_term + assert_parent_has(t.status, t.translation, t.current_image, ["a", "b"]) + + +def test_save_new_child_sets_existing_parent_translation_and_image_if_missing( + app_context, repo, english, hello_term +): + "Existing new child data is propagated up if needed, to 'fill in' missing parent data." + create_parent(english, status=3, translation="something") + + hello_term.parents = ["parent"] + hello_term.term_tags = ["a", "b"] + repo.add(hello_term) + repo.commit() + + assert_parent_has(3, "something", hello_term.current_image, []) + + +def test_save_existing_child_creates_new_populated_parent( + app_context, repo, hello_term +): + """ + Given a Term with parents = [ 'newparent' ], + new parent DBTerm is created, and is assigned translation and image and tag. + + The child's data is propagated up if needed, to 'fill in' missing parent data. + """ + repo.add(hello_term) + repo.commit() + assert_sql_result("select WoText from words", ["HELLO"], "no parent yet") + + hello_term.parents = ["parent"] + hello_term.term_tags = ["a", "b"] + repo.add(hello_term) + repo.commit() + + t = hello_term + assert_parent_has(t.status, t.translation, t.current_image, ["a", "b"]) + + +def test_save_existing_child_populates_existing_unknown_parent( + app_context, repo, english, hello_term +): + "Existing parent with status 0 is bumped to status 1." + create_parent(english, status=0, tags=["a"]) + + repo.add(hello_term) + repo.commit() parent = repo.find(hello_term.language_id, "parent") - assert isinstance(parent, Term), "is a Term bus. object" - assert parent.text == "parent" - assert parent.term_tags == hello_term.term_tags - assert parent.term_tags == ["a", "b"] # just spelling it out. - assert parent.translation == hello_term.translation - assert parent.current_image == hello_term.current_image - assert parent.parents == [] + assert parent.status == 0, "parent still unknown" + + h2 = repo.find(hello_term.language_id, hello_term.text) + h2.parents = ["parent"] + h2.term_tags = ["a", "b"] + repo.add(h2) + repo.commit() + + t = hello_term + assert_parent_has(t.status, t.translation, t.current_image, ["a", "b"]) + + +def test_update_child_add_existing_parent_does_not_change_parent_data_even_if_missing( + app_context, repo, english, hello_term +): + """ + If a parent existed before, and the child existed before, then + editing the child shouldn't affect the parent, even if the + parent's translation and image are empty -- reason: they have been + created and are specifically empty. + """ + create_parent(english, translation=None, image=None, status=3) + + repo.add(hello_term) + repo.commit() + + hello_term = repo.find(hello_term.language_id, hello_term.text) + assert hello_term is not None, "Have hello_term" + hello_term.parents = ["parent"] + hello_term.term_tags = ["a", "b"] + repo.add(hello_term) + repo.commit() + + assert_parent_has(3, None, None, []) + + +def test_update_child_with_parent_does_not_change_parent_data_even_if_missing( + app_context, repo, english, hello_term +): + """ + If a parent existed before, and the child existed before, then + editing the child shouldn't affect the parent, even if the + parent's translation and image are empty -- reason: they have been + created and are specifically empty. + """ + p = create_parent(english, translation=None, image=None, status=3) + + hello_term.parents = ["parent"] + repo.add(hello_term) + repo.commit() + + # Parent updated on initial save. + assert_parent_has(3, hello_term.translation, hello_term.current_image, []) + + # Re-set parent. + p.translation = None + p.set_current_image(None) + db.session.add(p) + db.session.commit() + assert_parent_has(3, None, None, []) + + # Re-update existing child term. + hello_term = repo.find(hello_term.language_id, hello_term.text) + assert hello_term.parents == ["parent"], "parent still set" + hello_term.translation = "UPDATED" + hello_term.current_image = "UPDATED.PNG" + repo.add(hello_term) + repo.commit() + + # Parent not changed. + assert_parent_has(3, None, None, []) def test_save_remove_parent_breaks_link(app_context, repo, hello_term): @@ -245,6 +455,25 @@ def test_save_change_parent_breaks_old_link(app_context, repo, hello_term): assert_sql_result(sqlparent, ["hello; new"], "changed") +def test_save_term_with_multiple_parents_unsets_sync_status( + app_context, repo, hello_term +): + "Ensure DB data is good on save." + hello_term.parents.append("parent") + hello_term.sync_status = True + repo.add(hello_term) + repo.commit() + + sql = "select WoTextLC, WoSyncStatus from words where WoTextLC = 'hello'" + assert_sql_result(sql, ["hello; 1"], "link set") + + hello_term.parents.append("parent2") + hello_term.sync_status = True + repo.add(hello_term) + repo.commit() + assert_sql_result(sql, ["hello; 0"], "link UN-set") + + def test_cant_set_term_as_its_own_parent(app_context, repo, hello_term): """ Would create obvious circular ref @@ -320,7 +549,7 @@ def test_load(empty_db, english, repo): term = repo.load(t.id) assert term.id == t.id - assert term.language.id == t.language.id, "lang object set" + assert term.language_id == t.language.id assert term.language_id == english.id assert term.text == "Hello" assert term.original_text == "Hello" @@ -359,16 +588,24 @@ def test_find_only_looks_in_specified_language(spanish, english, repo): def test_find_existing_multi_word(spanish, repo): - "Domain objects don't have zero-width strings in them." + """ + Domain objects DO have zero-width strings in them. + + This is necessary to disambiguate terms such as + "集めれ" and "集め/れ", both of which are valid + parser results (though perhaps not *useful* parser + results, but that's a different question). + """ add_terms(spanish, ["una bebida"]) zws = "\u200B" - t = repo.find(spanish.id, f"una{zws} {zws}bebida") + term_with_zws = f"una{zws} {zws}bebida" + t = repo.find(spanish.id, term_with_zws) assert t.id > 0 - assert t.text == "una bebida" + assert t.text == term_with_zws t = repo.find(spanish.id, "una bebida") assert t.id > 0 - assert t.text == "una bebida" + assert t.text == term_with_zws ## Find or new tests. @@ -380,7 +617,7 @@ def test_find_or_new_existing_word(spanish, repo): t = repo.find_or_new(spanish.id, "bebida") assert t.id > 0, "exists" assert t.text == "BEBIDA" - assert t.language.id == spanish.id, "lang object set" + assert t.language_id == spanish.id, "lang id set" def test_find_or_new_non_existing(spanish, repo): @@ -388,20 +625,21 @@ def test_find_or_new_non_existing(spanish, repo): t = repo.find_or_new(spanish.id, "TENGO") assert t.id is None assert t.text == "TENGO" - assert t.language.id == spanish.id, "lang object set" + assert t.language_id == spanish.id, "lang id set" def test_find_or_new_existing_multi_word(spanish, repo): "Spaces etc handled correctly." add_terms(spanish, ["una bebida"]) zws = "\u200B" + term_with_zws = f"una{zws} {zws}bebida" t = repo.find_or_new(spanish.id, f"una{zws} {zws}bebida") assert t.id > 0 - assert t.text == "una bebida" + assert t.text == term_with_zws t = repo.find_or_new(spanish.id, "una bebida") assert t.id > 0 - assert t.text == "una bebida" + assert t.text == term_with_zws def test_find_or_new_new_multi_word(spanish, repo): @@ -413,7 +651,25 @@ def test_find_or_new_new_multi_word(spanish, repo): t = repo.find_or_new(spanish.id, "una bebida") assert t.id is None - assert t.text == "una bebida" + assert t.text == f"una{zws} {zws}bebida" + + +def test_find_or_new_ambiguous_japanese_terms(japanese, repo): + """ + Characterization test only: behaviour of find_or_new for + ambiguously parsable 集めれ terms + + See comments in find_or_new for notes. + """ + s = "集めれ" + term = DBTerm.create_term_no_parsing(japanese, s) + db.session.add(term) + db.session.commit() + + t = repo.find_or_new(japanese.id, s) + assert t.id is None, "do _not_ have term, searching for string without context" + zws = "\u200B" + assert t.text == f"集め{zws}れ", "returns a new term" ## Matches tests. @@ -433,7 +689,7 @@ def test_find_matches_only_returns_language_matches(spanish, repo, _multiple_ter matches = repo.find_matches(spanish.id, c) assert len(matches) == 2, c assert matches[0].text == "pare" - assert matches[0].language.id == spanish.id, "lang object set" + assert matches[0].language_id == spanish.id, "language included" @pytest.mark.find_match @@ -448,35 +704,44 @@ def test_find_matches_returns_empty_if_no_match_or_empty_string( @pytest.mark.find_match def test_find_matches_multiword_respects_zws(spanish, repo, _multiple_terms): - "zws are removed from the business objects." + "zws handled correctly in search." zws = "\u200B" for c in [f"tengo{zws} {zws}uno", "tengo uno", "tengo"]: matches = repo.find_matches(spanish.id, c) assert len(matches) == 1, f'have match for case "{c}"' - assert matches[0].text == "tengo uno" + assert matches[0].text == f"tengo{zws} {zws}uno" def assert_find_matches_returns(term_repo, spanish, s, expected): "Helper, assert returns expected content." matches = term_repo.find_matches(spanish.id, s) - assert len(matches) == len(expected) actual = ", ".join([t.text for t in matches]) assert ", ".join(expected) == actual +def test_find_matches_starting_matches_sort_to_top(spanish, repo): + "All matches return, but words that start with the same thing sort to top." + add_terms(spanish, ["tener", "contener", "sostener", "ten", "contengo", "xxx"]) + assert_find_matches_returns( + repo, spanish, "ten", ["ten", "tener", "contener", "contengo", "sostener"] + ) + assert_find_matches_returns(repo, spanish, "ene", ["contener", "sostener", "tener"]) + assert_find_matches_returns(repo, spanish, "x", ["xxx"]) + + @pytest.mark.find_match -def test_findLikeSpecification_initial_check(spanish, repo): - "Searches match the start of string." +def test_find_matches_initial_check(spanish, repo): + "Searches match any part of string." add_terms(spanish, ["abc", "abcd", "bcd"]) assert_find_matches_returns(repo, spanish, "ab", ["abc", "abcd"]) assert_find_matches_returns(repo, spanish, "abcd", ["abcd"]) - assert_find_matches_returns(repo, spanish, "bc", ["bcd"]) + assert_find_matches_returns(repo, spanish, "bcd", ["bcd", "abcd"]) assert_find_matches_returns(repo, spanish, "yy", []) @pytest.mark.find_match -def test_find_like_specification_terms_with_children_go_to_top(spanish, repo): +def test_find_matches_terms_with_children_go_to_top(spanish, repo): """ Parents go to the top, then the rest ... but exact matches trumps parent. """ @@ -495,137 +760,3 @@ def test_find_like_specification_terms_with_children_go_to_top(spanish, repo): ) assert_find_matches_returns(repo, spanish, "ab", ["abcParent", "abc"]) assert_find_matches_returns(repo, spanish, "abc", ["abc", "abcParent"]) - - -## Sentences - - -def full_refs_to_string(refs): - "Convert refs to strings for simpler testing." - - def to_string(r): - return f"{r.title}, {r.sentence}" - - def refs_to_string(refs_array): - ret = [to_string(r) for r in refs_array] - ret.sort() - return ret - - def parent_refs_to_string(prefs): - ret = [] - for p in prefs: - ret.append({"term": p["term"], "refs": refs_to_string(p["refs"])}) - return ret - - return { - "term": refs_to_string(refs["term"]), - "children": refs_to_string(refs["children"]), - "parents": parent_refs_to_string(refs["parents"]), - } - - -@pytest.mark.sentences -def test_get_all_references(spanish, repo): - "Check references with parents and children." - text = make_text( - "hola", "Tengo un gato. Ella tiene un perro. No quiero tener nada.", spanish - ) - archtext = make_text("luego", "Tengo un coche.", spanish) - archtext.book.archived = True - db.session.add(archtext.book) - - for t in [text, archtext]: - t.read_date = datetime.now() - db.session.add(t) - - db.session.commit() - - tengo = Term() - tengo.language_id = spanish.id - tengo.text = "tengo" - tengo.parents = ["tener"] - repo.add(tengo) - - tiene = Term() - tiene.language_id = spanish.id - tiene.text = "tiene" - tiene.parents = ["tener"] - repo.add(tiene) - - repo.commit() - - refs = repo.find_references(tengo) - assert full_refs_to_string(refs) == { - "term": [ - "hola (1/1), Tengo un gato.", - "luego (1/1), Tengo un coche.", - ], - "children": [], - "parents": [ - { - "term": "tener", - "refs": [ - "hola (1/1), Ella tiene un perro.", - "hola (1/1), No quiero tener nada.", - ], - } - ], - }, "term tengo" - - tener = repo.find(spanish.id, "tener") - refs = repo.find_references(tener) - assert full_refs_to_string(refs) == { - "term": ["hola (1/1), No quiero tener nada."], - "children": [ - "hola (1/1), Tengo un gato.", - "hola (1/1), Ella tiene un perro.", - "luego (1/1), Tengo un coche.", - ], - "parents": [], - }, "term tener" - - -@pytest.mark.sentences -def test_multiword_reference(spanish, repo): - "Ensure zws-delimiters are respected." - text = make_text("hola", "Yo tengo un gato.", spanish) - text.read_date = datetime.now() - db.session.add(text) - db.session.commit() - - t = Term() - t.language_id = spanish.id - t.text = "TENGO UN" - repo.add(t) - repo.commit() - - refs = repo.find_references(t) - assert full_refs_to_string(refs) == { - "term": ["hola (1/1), Yo tengo un gato."], - "children": [], - "parents": [], - }, "term tengo" - - -@pytest.mark.sentences -def test_get_references_only_includes_read_texts(spanish, repo): - "Like it says above." - text = make_text("hola", "Tengo un gato. No tengo un perro.", spanish) - - tengo = Term() - tengo.language_id = spanish.id - tengo.text = "tengo" - repo.add(tengo) - repo.commit() - - refs = repo.find_references(tengo) - keys = refs.keys() - for k in keys: - assert len(refs[k]) == 0, f"{k}, no matches for unread texts" - - text.read_date = datetime.now() - db.session.add(text) - db.session.commit() - - refs = repo.find_references(tengo) - assert len(refs["term"]) == 2, "have refs once text is read" diff --git a/tests/unit/term/test_Repository_sync_status.py b/tests/unit/term/test_Repository_sync_status.py new file mode 100644 index 000000000..87228067e --- /dev/null +++ b/tests/unit/term/test_Repository_sync_status.py @@ -0,0 +1,97 @@ +""" +Term Repository tests, with syncing status to parent. +""" + +import pytest +from lute.term.model import Repository +from lute.db import db +from tests.dbasserts import assert_sql_result +from tests.utils import add_terms + +# pylint: disable=unbalanced-tuple-unpacking, missing-function-docstring,unused-argument + + +@pytest.fixture(name="repo") +def fixture_repo(app_context): + return Repository(db.session) + + +@pytest.fixture(name="t") +def fixture_term(repo, spanish): + [dt] = add_terms(spanish, ["T"]) + dt.status = 1 + db.session.add(dt) + db.session.commit() + yield repo.load(dt.id) + + +@pytest.fixture(name="p") +def fixture_parent(repo, spanish): + [dp] = add_terms(spanish, ["P"]) + dp.status = 4 + db.session.add(dp) + db.session.commit() + yield repo.load(dp.id) + + +def assert_statuses(term, expected, msg): + repo = Repository(db.session) + repo.add(term) + repo.commit() + sql = "select WoText, WoStatus from words order by WoText" + assert_sql_result(sql, expected, msg) + + +def test_save_synced_child_with_set_status_updates_parent_status(t, p): + t.parents = ["P"] + t.sync_status = True + t.status = 3 + assert_statuses(t, ["P; 3", "T; 3"], "both updated") + + t.sync_status = False + t.status = 1 + assert_statuses(t, ["P; 3", "T; 1"], "P not updated") + + +def test_save_child_with_no_set_status_inherits_parent_status_if_sync(t, p): + assert_statuses(t, ["P; 4", "T; 1"], "initial state") + t.parents = ["P"] + t.sync_status = True + assert_statuses(t, ["P; 4", "T; 4"], "initial save") + + +def test_new_parent_gets_child_status_if_sync(t): + t.parents = ["NEW"] + t.sync_status = True + assert_statuses(t, ["NEW; 1", "T; 1"], "initial save") + + +def test_child_status_changes_dont_affect_others_if_no_sync(t, p): + t.status = 3 + assert_statuses(t, ["P; 4", "T; 3"], "no change to other") + + +def test_remove_parent_breaks_sync(t, p): + t.parents = ["P"] + t.sync_status = True + t.status = 3 + assert_statuses(t, ["P; 3", "T; 3"], "both updated") + + t.parents = [] + t.status = 1 + assert_statuses(t, ["P; 3", "T; 1"], "P not updated, no sync") + + +def test_cant_sync_multiple_parents(t, p): + t.parents = ["P", "X"] + t.sync_status = True + t.status = 2 + assert_statuses( + t, ["P; 4", "T; 2", "X; 2"], "P no change, X.status defaults to T's" + ) + + t.status = 3 + assert_statuses(t, ["P; 4", "T; 3", "X; 2"], "X not sync'd, multiple parents") + + t.parents = ["X"] + assert_statuses(t, ["P; 4", "T; 3", "X; 3"], "X sync'd, single parent") diff --git a/tests/unit/term/test_TermForm.py b/tests/unit/term/test_TermForm.py index 937cc0b3a..b07a5c1d4 100644 --- a/tests/unit/term/test_TermForm.py +++ b/tests/unit/term/test_TermForm.py @@ -11,34 +11,39 @@ from lute.term.forms import TermForm -def test_validate(app_context, english): +def _make_form(t, app, english): + "Make form with args." + with app.test_request_context(): + f = TermForm(obj=t, session=db.session) + f.language_id.choices = [(english.id, "english")] + return f + + +def test_validate(app, app_context, english): "A new term is valid." - repo = Repository(db) + repo = Repository(db.session) t = repo.find_or_new(english.id, "CAT") - f = TermForm(obj=t) - f.language_id.choices = [(english.id, "english")] - + f = _make_form(t, app, english) assert f.validate() is True, "no change = valid" -def test_text_change_not_valid(app_context, english): +def test_text_change_not_valid(app, app_context, english): "Text change raises error." dbt = DBTerm(english, "CAT") db.session.add(dbt) db.session.commit() - repo = Repository(db) + repo = Repository(db.session) t = repo.find_or_new(english.id, "CAT") t.text = "dog" - f = TermForm(obj=t) - f.language_id.choices = [(english.id, "english")] + f = _make_form(t, app, english) is_valid = f.validate() assert is_valid is False, "text change = not valid" assert f.errors == {"text": ["Can only change term case"]} -def test_duplicate_text_not_valid(app_context, english): +def test_duplicate_text_not_valid(app, app_context, english): "Duplicate term throws." dbt = DBTerm(english, "CAT") db.session.add(dbt) @@ -47,25 +52,23 @@ def test_duplicate_text_not_valid(app_context, english): t = Term() t.language_id = english.id t.text = "cat" - f = TermForm(obj=t) - f.language_id.choices = [(english.id, "english")] + f = _make_form(t, app, english) is_valid = f.validate() assert is_valid is False, "dup term not valid" assert f.errors == {"text": ["Term already exists"]} -def test_update_existing_term_is_valid(app_context, english): +def test_update_existing_term_is_valid(app, app_context, english): "Can update an existing term." dbt = DBTerm(english, "CAT") db.session.add(dbt) db.session.commit() - repo = Repository(db) + repo = Repository(db.session) t = repo.find_or_new(english.id, "cat") t.text = "cat" - f = TermForm(obj=t) - f.language_id.choices = [(english.id, "english")] + f = _make_form(t, app, english) is_valid = f.validate() assert is_valid is True, "updating existing term is ok" diff --git a/tests/unit/term/test_Term_status_follow.py b/tests/unit/term/test_Term_status_follow.py new file mode 100644 index 000000000..cdf95e339 --- /dev/null +++ b/tests/unit/term/test_Term_status_follow.py @@ -0,0 +1,417 @@ +""" +Term status following. +""" + +import pytest + +from lute.models.term import Term as DBTerm +from lute.db import db +from tests.dbasserts import assert_sql_result + + +@pytest.fixture(name="term_family") +def fixture_term_family(app_context, english): + """ + Term family. + + A + B - follows + b1 - follows + b2 - no + C - does not follow + c1 - follows + c2 - no + """ + + class family: + "Family of terms." + + def __init__(self): + "Set up terms." + # pylint: disable=invalid-name + A = DBTerm(english, "A") + B = DBTerm(english, "Byes") + B.add_parent(A) + B.sync_status = True + b1 = DBTerm(english, "b1yes") + b1.add_parent(B) + b1.sync_status = True + b2 = DBTerm(english, "b2no") + b2.add_parent(B) + + C = DBTerm(english, "Cno") + C.add_parent(A) + c1 = DBTerm(english, "c1yes") + c1.add_parent(C) + c1.sync_status = True + c2 = DBTerm(english, "c2no") + c2.add_parent(C) + + db.session.add(A) + db.session.add(B) + db.session.add(b1) + db.session.add(b2) + db.session.add(C) + db.session.add(c1) + db.session.add(c2) + db.session.commit() + + self.A = A + self.B = B + self.b1 = b1 + self.b2 = b2 + self.C = C + self.c1 = c1 + self.c2 = c2 + + f = family() + + expected_initial_state = """ + A: 1 + Byes: 1 + b/1/yes: 1 + b/2/no: 1 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected_initial_state, "initial state") + + return f + + +def assert_statuses(expected, msg): + "Check the statuses of terms." + lines = [ + s.strip().replace(":", ";") for s in expected.split("\n") if s.strip() != "" + ] + sql = "select WoText, WoStatus from words order by WoTextLC" + assert_sql_result(sql, lines, msg) + + +def test_parent_status_propagates_down(term_family, app_context): + "Changing status should propagate down the tree." + f = term_family + f.A.status = 4 + db.session.add(f.A) + db.session.commit() + + expected = """ + A: 4 + Byes: 4 + b/1/yes: 4 + b/2/no: 1 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def test_status_propagates_up_and_down(term_family, app_context): + "Parent and child also updated." + f = term_family + f.B.status = 4 + db.session.add(f.B) + db.session.commit() + + expected = """ + A: 4 + Byes: 4 + b/1/yes: 4 + b/2/no: 1 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def test_status_propagates_even_if_circular_reference(term_family, app_context): + "B-b1-B circular reference, is all ok." + f = term_family + # f.b1.parents.append(f.B) + f.B.parents.append(f.b1) + db.session.add(f.B) + # db.session.add(f.b1) + db.session.commit() + + zws = "\u200B" + sql = f""" + select p.WoText, c.WoText + from words p + inner join wordparents on p.WoID = WpParentWoID + inner join words c on c.WoID = WpWoID + where p.WoText in ('Byes', 'b{zws}1{zws}yes') + """ + assert_sql_result( + sql, ["Byes; b/1/yes", "Byes; b/2/no", "b/1/yes; Byes"], "parent set" + ) + + f.B.status = 4 + db.session.add(f.B) + db.session.commit() + expected = """ + A: 1 + Byes: 4 + b/1/yes: 4 + b/2/no: 1 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def test_status_stops_propagating_to_top(term_family, app_context): + "Goes up the tree until it stops." + f = term_family + f.c1.status = 4 + db.session.add(f.c1) + db.session.commit() + + expected = """ + A: 1 + Byes: 1 + b/1/yes: 1 + b/2/no: 1 + Cno: 4 + c/1/yes: 4 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def test_term_not_following_parent(term_family, app_context): + "Doesn't update parent." + f = term_family + f.b2.status = 4 + db.session.add(f.b2) + db.session.commit() + + expected = """ + A: 1 + Byes: 1 + b/1/yes: 1 + b/2/no: 4 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def test_changing_sync_status_propagates_status(term_family, app_context): + "Doesn't update parent." + f = term_family + f.b2.status = 4 + db.session.add(f.b2) + db.session.commit() + + expected = """ + A: 1 + Byes: 1 + b/1/yes: 1 + b/2/no: 4 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected, "not following, not updated") + + f.b2.sync_status = True + db.session.add(f.b2) + db.session.commit() + + expected = """ + A: 4 + Byes: 4 + b/1/yes: 4 + b/2/no: 4 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def test_parent_not_updated_if_term_has_multiple_parents(term_family, app_context): + "Doesn't update parent." + f = term_family + f.c1.add_parent(f.B) + db.session.add(f.c1) + db.session.commit() + + f.c1.status = 4 + db.session.add(f.c1) + db.session.commit() + + expected = """ + A: 1 + Byes: 1 + b/1/yes: 1 + b/2/no: 1 + Cno: 1 + c/1/yes: 4 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def test_adding_new_term_changes_family_if_added(english, term_family, app_context): + "Doesn't update parent." + f = term_family + + b3 = DBTerm(english, "b3yes") + b3.add_parent(f.B) + b3.status = 3 + b3.sync_status = True + db.session.add(b3) + db.session.commit() + + expected = """ + A: 3 + Byes: 3 + b/1/yes: 3 + b/2/no: 1 + b/3/yes: 3 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def test_adding_new_term_does_not_change_family_if_multiple_parents( + english, term_family, app_context +): + "Doesn't update parent." + f = term_family + + b3 = DBTerm(english, "b3yes") + b3.parents.append(f.B) + b3.parents.append(f.C) + b3.status = 3 + # NOTE: setting "follow parent" causes weird propagation ... + db.session.add(b3) + db.session.commit() + + expected = """ + A: 1 + Byes: 1 + b/1/yes: 1 + b/2/no: 1 + b/3/yes: 3 + Cno: 1 + c/1/yes: 1 + c/2/no: 1 + """ + assert_statuses(expected, "updated") + + +def assert_sync_statuses(expected, msg): + "Check the sync statuses of terms." + lines = [ + s.strip().replace(":", ";") for s in expected.split("\n") if s.strip() != "" + ] + sql = "select WoText from words where WoSyncStatus = 1 order by WoText" + assert_sql_result(sql, lines, msg) + + +def test_deleting_parent_deactivates_sync_status(term_family, app_context): + "No more parent = no more follow." + + expected = """ + Byes + b/1/yes + c/1/yes + """ + assert_sync_statuses(expected, "before delete") + + f = term_family + db.session.delete(f.A) + db.session.commit() + + expected = """ + b/1/yes + c/1/yes + """ + assert_sync_statuses(expected, "after delete") + + +def test_adding_extra_parents_unsets_sync_status(term_family, app_context): + "Can't follow multiple people." + c1 = db.session.get(DBTerm, term_family.c1.id) + assert c1.sync_status is True, "following C" + + c1.add_parent(term_family.B) + assert c1.sync_status is False, "Can't follow 2 parents" + + +def test_changing_parent_keeps_sync_status(term_family, app_context): + "Can't follow multiple people." + c1 = db.session.get(DBTerm, term_family.c1.id) + assert c1.sync_status is True, "following C" + + c1.remove_all_parents() + c1.add_parent(term_family.B) + assert c1.sync_status is True, "Still following" + + db.session.add(c1) + db.session.commit() + + expected = """ + Byes + b/1/yes + c/1/yes + """ + assert_sync_statuses(expected, "after change") + + +def test_remove_parent_doesnt_affect_other_linked_terms(term_family, app_context): + "Issue 417." + + b3 = DBTerm(term_family.B.language, "b3yes") + b3.add_parent(term_family.B) + b3.sync_status = True + db.session.add(b3) + db.session.commit() + + expected = """ + Byes + b/1/yes + b/3/yes + c/1/yes + """ + assert_sync_statuses(expected, "set up") + + b1 = term_family.b1 + b1.remove_all_parents() + b1.sync_status = False + db.session.add(b1) + db.session.commit() + + expected = """ + Byes + b/3/yes + c/1/yes + """ + assert_sync_statuses(expected, "set up") + + +def test_remove_non_linked_parent_leaves_other_linked_terms(term_family, app_context): + "Issue 417." + + # term b2 is not linked to its parent b + b2 = term_family.b2 + b2.remove_all_parents() + b2.sync_status = False + db.session.add(b2) + db.session.commit() + + expected = """ + Byes + b/1/yes + c/1/yes + """ + assert_sync_statuses(expected, "b1 still linked") diff --git a/tests/unit/term/test_datatables.py b/tests/unit/term/test_datatables.py index a3d3feb9e..d9e433ddf 100644 --- a/tests/unit/term/test_datatables.py +++ b/tests/unit/term/test_datatables.py @@ -4,6 +4,8 @@ import pytest from lute.term.datatables import get_data_tables_list +from lute.db import db +from tests.utils import add_terms @pytest.fixture(name="_dt_params") @@ -17,18 +19,20 @@ def fixture_dt_params(): "draw": "1", "columns": columns, "order": [{"column": "1", "dir": "asc"}], - "start": "1", + "start": "0", "length": "10", "search": {"value": "", "regex": False}, # Filters - set "manually" in the route. # Cheating here ... had to look at the request payload # in devtools to see what was being sent. + "filtLanguage": "null", # Ha! "filtParentsOnly": "false", "filtAgeMin": "", "filtAgeMax": "", "filtStatusMin": "0", - "filtStatusMax": "0", + "filtStatusMax": "99", "filtIncludeIgnored": "false", + "filtTermIDs": "", } return params @@ -37,7 +41,7 @@ def test_smoke_term_datatables_query_runs(app_context, _dt_params): """ Smoke test only, ensure query runs. """ - get_data_tables_list(_dt_params) + get_data_tables_list(_dt_params, db.session) # print(d['data']) a = 1 assert a == 1, "dummy check" @@ -45,10 +49,29 @@ def test_smoke_term_datatables_query_runs(app_context, _dt_params): def test_smoke_query_with_filter_params_runs(app_context, _dt_params): "Smoke test with filters set." + _dt_params["filtLanguage"] = "44" _dt_params["filtParentsOnly"] = "true" _dt_params["filtAgeMin"] = "1" _dt_params["filtAgeMax"] = "10" _dt_params["filtStatusMin"] = "2" _dt_params["filtStatusMax"] = "4" _dt_params["filtIncludeIgnored"] = "true" - get_data_tables_list(_dt_params) + _dt_params["filtTermIDs"] = "42,43" + get_data_tables_list(_dt_params, db.session) + + +def test_parents_included_in_termids_query(app_context, _dt_params, spanish): + "For term list viewing from page, it's useful to see parents as well." + # pylint: disable=unbalanced-tuple-unpacking + [t, p, g] = add_terms(spanish, ["T", "P", "G"]) + t.add_parent(p) + p.add_parent(g) + db.session.add(t) + db.session.add(p) + db.session.commit() + + _dt_params["filtTermIDs"] = f"{t.id}" + d = get_data_tables_list(_dt_params, db.session) + terms = [t["WoText"] for t in d["data"]] + terms = sorted(terms) + assert terms == ["P", "T"] diff --git a/tests/unit/term/test_service_apply_ajax_update.py b/tests/unit/term/test_service_apply_ajax_update.py new file mode 100644 index 000000000..9e2d000ab --- /dev/null +++ b/tests/unit/term/test_service_apply_ajax_update.py @@ -0,0 +1,154 @@ +""" +Term service apply_ajax_update tests. + +Generally smoke tests. +""" + +import pytest +from lute.models.repositories import TermRepository +from lute.db import db +from lute.term.service import Service, TermServiceException +from tests.utils import add_terms +from tests.dbasserts import assert_sql_result + +# pylint: disable=unbalanced-tuple-unpacking,missing-function-docstring + + +def assert_updated(termid, expected, msg=""): + "Return a term as a string for fast asserts." + t = TermRepository(db.session).find(termid) + bad_keys = [ + k + for k in expected.keys() + if k not in ["parents", "status", "tags", "translation"] + ] + assert len(bad_keys) == 0, "no bad keys " + ", ".join(bad_keys) + if "translation" in expected: + assert expected["translation"] == t.translation, msg + if "parents" in expected: + parents = expected["parents"] + assert parents == sorted([p.text for p in t.parents]), msg + should_by_sync = len(parents) == 1 + assert t.sync_status == should_by_sync, f"sync status with parents = {parents}" + if "status" in expected: + assert expected["status"] == t.status, msg + if "tags" in expected: + assert expected["tags"] == sorted([tag.text for tag in t.term_tags]), msg + + +def _apply_updates(term_id, update_type, values): + "Apply updates." + svc = Service(db.session) + svc.apply_ajax_update(term_id, update_type, values) + + +def test_smoke_test(app_context, spanish): + "Smoke test." + [t, _] = add_terms(spanish, ["T", "P"]) + _apply_updates(t.id, "parents", ["P", "X"]) + _apply_updates(t.id, "status", 4) + _apply_updates(t.id, "term_tags", ["tag1", "tag2"]) + _apply_updates(t.id, "translation", "new_translation") + expected = { + "translation": "new_translation", + "parents": ["P", "X"], + "status": 4, + "tags": ["tag1", "tag2"], + } + assert_updated(t.id, expected, "smoke all items") + + +def test_term_gets_parent_status_if_parent_status_set(app_context, spanish): + "Smoke test." + [t, p] = add_terms(spanish, ["T", "P"]) + t.status = 1 + p.status = 4 + db.session.add(t) + db.session.add(p) + db.session.commit() + + _apply_updates(t.id, "parents", ["P"]) + expected = {"status": 4} + assert_updated(t.id, expected, "gets parent's status") + assert_updated(p.id, expected, "parent keeps status") + + +def test_term_keeps_status_if_parent_status_unknown(app_context, spanish): + "Smoke test." + [t, p] = add_terms(spanish, ["T", "P"]) + t.status = 1 + p.status = 0 + db.session.add(t) + db.session.add(p) + db.session.commit() + + _apply_updates(t.id, "parents", ["P"]) + expected = {"status": 1} + assert_updated(t.id, expected, "keeps own status") + expected = {"status": 1} + assert_updated(p.id, expected, "parent gets child's status!") + + +def test_bad_term_id_throws(app_context): + svc = Service(db.session) + with pytest.raises(TermServiceException, match="No term with id -99"): + svc.apply_ajax_update(-99, "status", 99) + + +def test_bad_update_type_throws(app_context, spanish): + svc = Service(db.session) + [t] = add_terms(spanish, ["T"]) + with pytest.raises(TermServiceException, match="Bad update type"): + svc.apply_ajax_update(t.id, "trash", 99) + + +def test_bad_status_throws(app_context, spanish): + svc = Service(db.session) + [t] = add_terms(spanish, ["T"]) + with pytest.raises(TermServiceException, match="Bad status value"): + svc.apply_ajax_update(t.id, "status", 42) + + +def test_can_remove_values(app_context, spanish): + "Smoke test." + [t, _] = add_terms(spanish, ["T", "P"]) + _apply_updates(t.id, "parents", ["P", "X"]) + _apply_updates(t.id, "term_tags", ["tag1", "tag2"]) + _apply_updates(t.id, "translation", "new_translation") + expected = { + "translation": "new_translation", + "parents": ["P", "X"], + "tags": ["tag1", "tag2"], + } + assert_updated(t.id, expected, "smoke all items") + + _apply_updates(t.id, "parents", []) + _apply_updates(t.id, "term_tags", []) + _apply_updates(t.id, "translation", "") + expected = { + "translation": None, + "parents": [], + "tags": [], + } + assert_updated(t.id, expected, "smoke all items updated back to nothing") + + +def test_parent_created_if_needed(app_context, spanish): + [t, _] = add_terms(spanish, ["T", "P"]) + sql = "select WoText from words order by WoText" + assert_sql_result(sql, ["P", "T"], "initial state") + _apply_updates(t.id, "parents", ["P", "X"]) + assert_sql_result(sql, ["P", "T", "X"], "X created") + + +def test_correct_parent_found_if_has_zero_width_spaces(app_context, spanish): + [t, _] = add_terms(spanish, ["T", "P Q"]) + sql = "select WoText from words order by WoText" + assert_sql_result(sql, ["P/ /Q", "T"], "initial state") + + zws = "\u200B" + parent = f"P{zws} {zws}Q" + _apply_updates(t.id, "parents", [parent]) + expected = {"parents": [parent]} + assert_updated(t.id, expected, "correct zws-included parent found") + assert_sql_result(sql, ["P/ /Q", "T"], "nothing new created") diff --git a/tests/unit/term/test_service_bulk_updates.py b/tests/unit/term/test_service_bulk_updates.py new file mode 100644 index 000000000..7a0d28d91 --- /dev/null +++ b/tests/unit/term/test_service_bulk_updates.py @@ -0,0 +1,152 @@ +""" +Term service apply_bulk_updates tests. +""" + +import pytest +from lute.models.repositories import TermRepository +from lute.models.term import TermTag +from lute.db import db +from lute.term.service import Service, TermServiceException, BulkTermUpdateData +from tests.utils import add_terms +from tests.dbasserts import assert_sql_result + +# pylint: disable=unbalanced-tuple-unpacking + + +def assert_updated(termid, expected, msg=""): + "Return a term as a string for fast asserts." + t = TermRepository(db.session).find(termid) + bad_keys = [ + k for k in expected.keys() if k not in ["parents", "status", "tags", "text"] + ] + assert len(bad_keys) == 0, "no bad keys " + ", ".join(bad_keys) + if "text" in expected: + assert expected["text"] == t.text, msg + if "parents" in expected: + assert expected["parents"] == sorted([p.text for p in t.parents]), msg + if "status" in expected: + assert expected["status"] == t.status, msg + if "tags" in expected: + assert expected["tags"] == sorted([tag.text for tag in t.term_tags]), msg + + +def _apply_updates(bud): + "Apply BulkTermUpdateData bud." + svc = Service(db.session) + svc.apply_bulk_updates(bud) + + +def test_downcasing(app_context, spanish): + "Add parent." + [t, p] = add_terms(spanish, ["T", "P"]) + bud = BulkTermUpdateData(term_ids=[t.id], lowercase_terms=False, parent_id=p.id) + _apply_updates(bud) + expected = {"text": "T", "parents": ["P"], "status": 1, "tags": []} + assert_updated(t.id, expected, "no downcasing") + + bud.lowercase_terms = True + _apply_updates(bud) + expected = {"text": "t", "parents": ["P"], "status": 1, "tags": []} + assert_updated(t.id, expected, "t downcased") + + +def test_bulk_updates_all_terms_must_be_same_lang(app_context, spanish, english): + "Update parent of term." + t, p = add_terms(spanish, ["t", "p"]) + [e] = add_terms(english, ["e"]) + bud = BulkTermUpdateData(term_ids=[t.id, e.id], parent_id=p.id) + svc = Service(db.session) + with pytest.raises(TermServiceException, match="Terms not all the same language"): + svc.apply_bulk_updates(bud) + + +def test_add_parent_by_id(app_context, spanish): + "Add parent." + [t, p] = add_terms(spanish, ["T", "p"]) + bud = BulkTermUpdateData(term_ids=[t.id], parent_id=p.id) + _apply_updates(bud) + expected = {"text": "T", "parents": ["p"], "status": 1, "tags": []} + assert_updated(t.id, expected, "parent added") + + +def test_remove_all_parents(app_context, spanish): + "Checkbox to remove all parents." + [t, p] = add_terms(spanish, ["t", "p"]) + bud = BulkTermUpdateData(term_ids=[t.id], parent_id=p.id) + _apply_updates(bud) + expected = {"parents": ["p"], "status": 1, "tags": []} + assert_updated(t.id, expected, "parent added") + + bud = BulkTermUpdateData(term_ids=[t.id], remove_parents=True) + _apply_updates(bud) + expected = {"parents": [], "status": 1, "tags": []} + assert_updated(t.id, expected, "parent removed") + + +def test_add_parent_by_text_existing_parent(app_context, spanish): + "Finds the existing parent by id." + [t, _] = add_terms(spanish, ["t", "p"]) + bud = BulkTermUpdateData(term_ids=[t.id], parent_text="p") + _apply_updates(bud) + expected = {"parents": ["p"], "status": 1, "tags": []} + assert_updated(t.id, expected, "parent added") + + +def test_add_parent_by_text_new_parent(app_context, spanish): + "User can create a new parent in the tagify parent field." + [t] = add_terms(spanish, ["t"]) + bud = BulkTermUpdateData(term_ids=[t.id], parent_text="newparent") + _apply_updates(bud) + expected = {"parents": ["newparent"], "status": 1, "tags": []} + assert_updated(t.id, expected, "parent added") + + +def test_add_parent_text_ignored_if_id_present(app_context, spanish): + "Just in case parent_id and parent_text are both sent." + [t, p] = add_terms(spanish, ["t", "p"]) + bud = BulkTermUpdateData(term_ids=[t.id], parent_id=p.id, parent_text="ignored") + _apply_updates(bud) + expected = {"parents": ["p"], "status": 1, "tags": []} + assert_updated(t.id, expected, "parent added") + + +def test_set_status(app_context, spanish): + "Sanity check." + [t] = add_terms(spanish, ["t"]) + bud = BulkTermUpdateData(term_ids=[t.id], change_status=True, status_value=4) + _apply_updates(bud) + expected = {"parents": [], "status": 4, "tags": []} + assert_updated(t.id, expected, "status") + + +def test_set_status_skipped_if_change_status_false(app_context, spanish): + "Sanity check." + [t] = add_terms(spanish, ["t"]) + bud = BulkTermUpdateData(term_ids=[t.id], change_status=False, status_value=4) + _apply_updates(bud) + expected = {"parents": [], "status": 1, "tags": []} + assert_updated(t.id, expected, "status unchanged") + + +def test_bulk_update_tag_add_and_remove_smoke_test(app_context, spanish): + "Update parent of term." + [t] = add_terms(spanish, ["t"]) + t.add_term_tag(TermTag("hello")) + t.add_term_tag(TermTag("there")) + db.session.add(t) + db.session.commit() + + expected = {"parents": [], "status": 1, "tags": ["hello", "there"]} + assert_updated(t.id, expected, "initial tags") + + bud = BulkTermUpdateData( + term_ids=[t.id], add_tags=["hello", "cat"], remove_tags=["there", "dog"] + ) + _apply_updates(bud) + + expected = {"parents": [], "status": 1, "tags": ["cat", "hello"]} + assert_updated(t.id, expected, "tags added,removed") + + tagssql = "select TgText from tags order by TgText" + expected_tags = ["cat", "hello", "there"] + assert_sql_result(tagssql, expected_tags, "tag created and added if needed") diff --git a/tests/unit/term_parent_map/test_service.py b/tests/unit/term_parent_map/test_service.py index d50add881..0c55ab1ee 100644 --- a/tests/unit/term_parent_map/test_service.py +++ b/tests/unit/term_parent_map/test_service.py @@ -5,10 +5,8 @@ import os import tempfile import pytest -from lute.term_parent_map.service import ( - export_terms_without_parents, - export_unknown_terms, -) +from lute.db import db +from lute.term_parent_map.service import Service from tests.utils import add_terms, make_book @@ -24,8 +22,6 @@ def fixture_temp_file(): @pytest.fixture(name="_book") def fixture_terms_and_book(spanish): "Create a book." - spanish_terms = ["gato", "lista", "tiene una", "listo"] - add_terms(spanish, spanish_terms) content = "Hola tengo un gato. No tengo una lista.\nElla tiene una bebida." book = make_book("Hola", content, spanish) yield book @@ -40,15 +36,36 @@ def assert_file_content(fname, expected): assert expected == actual, "contents" -def test_smoke_language_file_created(app_context, spanish, _book, output_tempfile): +def test_smoke_book_file_created(app_context, _book, output_tempfile): "Smoke test only." - export_terms_without_parents(spanish, output_tempfile) - expected = ["gato", "lista", "listo"] + service = Service(db.session) + service.export_unknown_terms(_book, output_tempfile) + expected = [ + "hola", + "gato", + "lista", + "tengo", + "un", + "no", + "una", + "ella", + "tiene", + "bebida", + ] assert_file_content(output_tempfile, expected) -def test_smoke_book_file_created(app_context, _book, output_tempfile): - "Smoke test only." - export_unknown_terms(_book, output_tempfile) - expected = ["hola", "tengo", "un", "no", "una", "ella", "tiene", "bebida"] +def test_known_multiword_terms_can_hide_unknown_terms( + spanish, app_context, _book, output_tempfile +): + "See what terms would show up as blue 'unknown' for a book." + spanish_terms = ["gato", "lista", "tiene una", "listo"] + add_terms(spanish, spanish_terms) + service = Service(db.session) + service.export_unknown_terms(_book, output_tempfile) + expected = ["hola", "tengo", "un", "no", "ella", "tiene", "una", "bebida"] assert_file_content(output_tempfile, expected) + + +# def test_book_file_contains_status_0_words(app_context, _book, output_tempfile): +# "Status 0 words are created as 'placeholders', and should be included." diff --git a/tests/unit/termtag/__init__.py b/tests/unit/termtag/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/termtag/test_datatables.py b/tests/unit/termtag/test_datatables.py index 521e1f03f..03ec9d6f8 100644 --- a/tests/unit/termtag/test_datatables.py +++ b/tests/unit/termtag/test_datatables.py @@ -3,6 +3,7 @@ """ from lute.termtag.datatables import get_data_tables_list +from lute.db import db def test_smoke_datatables_query_runs(app_context): @@ -22,7 +23,7 @@ def test_smoke_datatables_query_runs(app_context): "search": {"value": "", "regex": False}, } - d = get_data_tables_list(params) + d = get_data_tables_list(params, db.session) print(d) a = 1 assert a == 1, "dummy check" diff --git a/tests/unit/termtag/test_routes.py b/tests/unit/termtag/test_routes.py new file mode 100644 index 000000000..f8f579c9f --- /dev/null +++ b/tests/unit/termtag/test_routes.py @@ -0,0 +1,36 @@ +""" +TermTage mapping tests. +""" + +from lute.models.term import Term, TermTag +from lute.db import db +from lute.termtag.routes import delete as route_delete +from tests.dbasserts import assert_record_count_equals + + +def test_deleting_termtag_removes_wordtags_table_record(empty_db, spanish): + "Association record should be deleted if tag is deleted." + + tg = TermTag("tag") + db.session.add(tg) + db.session.commit() + + term = Term(spanish, "HOLA") + term.add_term_tag(tg) + db.session.add(term) + db.session.commit() + + sqlterms = "select * from words" + assert_record_count_equals(sqlterms, 1, "term sanity check on save") + + sqltags = "select * from tags" + assert_record_count_equals(sqltags, 1, "tag sanity check on save") + + sqlassoc = "select * from wordtags" + assert_record_count_equals(sqlassoc, 1, "word tag associations exist") + + route_delete(tg.id) + + assert_record_count_equals(sqlterms, 1, "term stays") + assert_record_count_equals(sqltags, 0, "tag removed") + assert_record_count_equals(sqlassoc, 0, "associations removed") diff --git a/tests/unit/textbookmark/test_datatables.py b/tests/unit/textbookmark/test_datatables.py new file mode 100644 index 000000000..c4d9650b3 --- /dev/null +++ b/tests/unit/textbookmark/test_datatables.py @@ -0,0 +1,33 @@ +""" +TextBookmark DataTable tests. +""" + +import pytest +from lute.bookmarks.datatables import get_data_tables_list +from lute.db import db + + +@pytest.fixture(name="_dt_params") +def fixture_dt_params(): + "Sample query params." + columns = [ + {"data": "0", "name": "TxOrder", "searchable": True, "orderable": True}, + {"data": "1", "name": "TbTitle", "searchable": True, "orderable": True}, + ] + params = { + "draw": "1", + "columns": columns, + "order": [{"column": "1", "dir": "asc"}], + "start": "1", + "length": "10", + "search": {"value": "", "regex": False}, + } + return params + + +def test_smoke_term_datatables_query_runs(app_context, _dt_params): + """ + Smoke test only, ensure query runs. + """ + data = get_data_tables_list(_dt_params, 1, db.session) + assert data is not None diff --git a/tests/unit/themes/__init__.py b/tests/unit/themes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/themes/test_service.py b/tests/unit/themes/test_service.py new file mode 100644 index 000000000..f1a83a80f --- /dev/null +++ b/tests/unit/themes/test_service.py @@ -0,0 +1,119 @@ +""" +Theme service tests. +""" + +import os +from lute.themes.service import Service +from lute.db import db +from lute.models.repositories import UserSettingRepository + + +def test_list_themes(app_context): + "Smoke test only." + svc = Service(db.session) + lst = svc.list_themes() + assert len(lst) > 0, "have themes" + assert lst[0][0] == "-", "No theme" + assert lst[0][1] == "(default)" + + assert ("Apple_Books.css", "Apple Books") in lst + + +def test_default_theme_is_blank_css(app_context): + "UserSetting starts off with blank css." + repo = UserSettingRepository(db.session) + assert repo.get_value("current_theme") == "-" + svc = Service(db.session) + assert svc.get_current_css() == "", "Default = empty string." + + +def test_bad_setting_returns_blank_css(app_context): + "Just in case." + repo = UserSettingRepository(db.session) + repo.set_value("current_theme", "_missing_file.css") + db.session.commit() + svc = Service(db.session) + assert svc.get_current_css() == "", "Missing = empty string." + + +def test_setting_a_theme_returns_its_css(app_context): + "User choice is used." + repo = UserSettingRepository(db.session) + repo.set_value("current_theme", "Apple_Books.css") + db.session.commit() + svc = Service(db.session) + assert "Georgia" in svc.get_current_css(), "font specified" + + +def test_next_theme_cycles_themes(app_context): + """ + Users should be able to move the 'next' theme quickly + while reading, via a hotkey. + """ + svc = Service(db.session) + lst = svc.list_themes() + repo = UserSettingRepository(db.session) + assert repo.get_value("current_theme") == lst[0][0] + svc.next_theme() + assert repo.get_value("current_theme") == lst[1][0] + for _ in range(0, len(lst) + 10): # pylint: disable=consider-using-enumerate + svc.next_theme() + svc.next_theme() + # OK + + +def _delete_custom_theme_files(theme_dir): + "Delete custom file." + for filename in os.listdir(theme_dir): + filepath = os.path.join(theme_dir, filename) + if os.path.isfile(filepath): + os.remove(filepath) + + +def test_custom_theme_in_theme_dir_is_available(app, app_context): + "Can use .css file in theme dir." + theme_dir = app.env_config.userthemespath + _delete_custom_theme_files(theme_dir) + + mytheme_content = "p { font-size: 30pt; }" + themefile = os.path.join(theme_dir, "my_theme.css") + with open(themefile, "w", encoding="utf-8") as f: + f.write(mytheme_content) + + svc = Service(db.session) + lst = svc.list_themes() + assert ("my_theme.css", "my theme") in lst, "Have my theme" + + repo = UserSettingRepository(db.session) + repo.set_value("current_theme", "my_theme.css") + db.session.commit() + assert mytheme_content in svc.get_current_css(), "my theme used" + + +def test_custom_theme_in_theme_dir_appends_to_existing_theme(app, app_context): + "Can use .css file in theme dir." + theme_dir = app.env_config.userthemespath + _delete_custom_theme_files(theme_dir) + + svc = Service(db.session) + lst = svc.list_themes() + assert ("Apple_Books.css", "Apple Books") in lst + repo = UserSettingRepository(db.session) + repo.set_value("current_theme", "Apple_Books.css") + db.session.commit() + old_content = svc.get_current_css() + + mytheme_content = "p { font-size: 30pt; }" + themefile = os.path.join(theme_dir, "Apple_Books.css") + with open(themefile, "w", encoding="utf-8") as f: + f.write(mytheme_content) + + lst = svc.list_themes() + assert ("Apple_Books.css", "Apple Books") in lst, "Have my theme" + + repo.set_value("current_theme", "Apple_Books.css") + db.session.commit() + + new_css = old_content + "\n\n/* Additional user css */\n\n" + mytheme_content + new_content = svc.get_current_css() + assert new_css in new_content, "my theme used in addition to built-in" diff --git a/tests/unit/utils/test_DataTablesSqliteQuery.py b/tests/unit/utils/test_DataTablesSqliteQuery.py index bbd446e4b..8210f2abd 100644 --- a/tests/unit/utils/test_DataTablesSqliteQuery.py +++ b/tests/unit/utils/test_DataTablesSqliteQuery.py @@ -45,7 +45,7 @@ def test_smoke_test(basesql, parameters): expected = { "recordsTotal": "select count(*) from (select CatID, Color, Food from Cats) realbase", "recordsFiltered": "select count(*) from (select CatID, Color, Food from Cats) realbase ", - "data": "SELECT CatID, Color, Food FROM (select * from (select CatID, Color, Food from Cats) realbase ORDER BY Color asc, Color, Food LIMIT 10, 50) src ORDER BY Color asc, Color, Food", + "data": "SELECT * FROM (select * from (select CatID, Color, Food from Cats) realbase ORDER BY Color asc, Color, Food LIMIT 10, 50) src ORDER BY Color asc, Color, Food", "params": {}, "draw": 1, } @@ -61,7 +61,7 @@ def test_sorting(basesql, parameters): expected = { "recordsTotal": "select count(*) from (select CatID, Color, Food from Cats) realbase", "recordsFiltered": "select count(*) from (select CatID, Color, Food from Cats) realbase ", - "data": "SELECT CatID, Color, Food FROM (select * from (select CatID, Color, Food from Cats) realbase ORDER BY Food desc, Color, Food LIMIT 10, 50) src ORDER BY Food desc, Color, Food", + "data": "SELECT * FROM (select * from (select CatID, Color, Food from Cats) realbase ORDER BY Food desc, Color, Food LIMIT 10, 50) src ORDER BY Food desc, Color, Food", "params": {}, "draw": 1, } @@ -77,7 +77,7 @@ def test_single_search(basesql, parameters): expected = { "recordsTotal": "select count(*) from (select CatID, Color, Food from Cats) realbase", "recordsFiltered": "select count(*) from (select CatID, Color, Food from Cats) realbase WHERE (Color LIKE '%' || :s0 || '%' OR Food LIKE '%' || :s0 || '%')", - "data": "SELECT CatID, Color, Food FROM (select * from (select CatID, Color, Food from Cats) realbase WHERE (Color LIKE '%' || :s0 || '%' OR Food LIKE '%' || :s0 || '%') ORDER BY Color asc, Color, Food LIMIT 10, 50) src ORDER BY Color asc, Color, Food", + "data": "SELECT * FROM (select * from (select CatID, Color, Food from Cats) realbase WHERE (Color LIKE '%' || :s0 || '%' OR Food LIKE '%' || :s0 || '%') ORDER BY Color asc, Color, Food LIMIT 10, 50) src ORDER BY Color asc, Color, Food", "params": {"s0": "XXX"}, "draw": 1, } @@ -93,7 +93,7 @@ def test_multiple_search_terms(basesql, parameters): expected = { "recordsTotal": "select count(*) from (select CatID, Color, Food from Cats) realbase", "recordsFiltered": "select count(*) from (select CatID, Color, Food from Cats) realbase WHERE (Color LIKE '%' || :s0 || '%' OR Food LIKE '%' || :s0 || '%') AND (Color LIKE '%' || :s1 || '%' OR Food LIKE '%' || :s1 || '%')", - "data": "SELECT CatID, Color, Food FROM (select * from (select CatID, Color, Food from Cats) realbase WHERE (Color LIKE '%' || :s0 || '%' OR Food LIKE '%' || :s0 || '%') AND (Color LIKE '%' || :s1 || '%' OR Food LIKE '%' || :s1 || '%') ORDER BY Color asc, Color, Food LIMIT 10, 50) src ORDER BY Color asc, Color, Food", + "data": "SELECT * FROM (select * from (select CatID, Color, Food from Cats) realbase WHERE (Color LIKE '%' || :s0 || '%' OR Food LIKE '%' || :s0 || '%') AND (Color LIKE '%' || :s1 || '%' OR Food LIKE '%' || :s1 || '%') ORDER BY Color asc, Color, Food LIMIT 10, 50) src ORDER BY Color asc, Color, Food", "params": {"s0": "XXX", "s1": "YYY"}, "draw": 1, } diff --git a/tests/unit/utils/test_formutils.py b/tests/unit/utils/test_formutils.py index 783756401..f79069096 100644 --- a/tests/unit/utils/test_formutils.py +++ b/tests/unit/utils/test_formutils.py @@ -2,12 +2,32 @@ DataTables sqlite tests. """ -from lute.utils.formutils import language_choices +from lute.db import db +from lute.utils.formutils import language_choices, valid_current_language_id +from lute.models.repositories import UserSettingRepository +# pylint: disable=unused-argument -def test_language_choices(app_context): + +def test_language_choices(app_context, spanish, english): "Gets all languages." - choices = language_choices() + choices = language_choices(db.session) assert choices[0][1] == "-", "- at the top" langnames = [c[1] for c in choices] assert "Spanish" in langnames, "have Spanish" + + +def test_language_choices_if_only_single_language_exists(app_context, spanish): + "Gets all languages." + choices = language_choices(db.session) + assert choices[0][1] == "Spanish", "sole choice possible" + + +def test_valid_current_language_id(app_context, spanish, english): + "Sanity check only." + repo = UserSettingRepository(db.session) + repo.set_value("current_language_id", 9999) + assert int(repo.get_value("current_language_id")) == 9999, "pre-check" + curr_lang_id = int(valid_current_language_id(db.session)) + assert curr_lang_id == 0, "set back to 0" + assert int(repo.get_value("current_language_id")) == 0, "re-set to 0" diff --git a/tests/utils.py b/tests/utils.py index a4aa75ad8..f3ea4e778 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -3,8 +3,8 @@ """ from lute.models.term import Term -from lute.models.book import Book -from lute.read.service import get_paragraphs +from lute.models.book import Book, Text +from lute.read.render.service import Service from lute.db import db @@ -12,17 +12,28 @@ def add_terms(language, term_array): """ Make and save terms. """ + ret = [] for term in term_array: t = Term(language, term) db.session.add(t) + ret.append(t) db.session.commit() + return ret def make_book(title, content, language): """ Make a book. """ - b = Book.create_book(title, language, content) + b = Book() + b.title = title + b.language = language + if isinstance(content, str): + content = [content] + n = 0 + for c in content: + n += 1 + _ = Text(b, c, n) return b @@ -46,9 +57,10 @@ def stringize(ti): usestringize = overridestringize or stringize ret = [] - paras = get_paragraphs(text) + service = Service(db.session) + paras = service.get_paragraphs(text.text, text.book.language) for p in paras: - tis = [t for s in p for t in s.textitems] + tis = [t for s in p for t in s] ss = [usestringize(ti) for ti in tis] ret.append(imploder.join(ss)) return "//".join(ret) diff --git a/utils/dump_changelog.sh b/utils/dump_changelog.sh index 90103c795..fcc792ef4 100755 --- a/utils/dump_changelog.sh +++ b/utils/dump_changelog.sh @@ -68,7 +68,7 @@ _(raw info to process)_ " > docs/tmp_CHANGELOG.tmp # Add raw log info. -git log ${FROMTAG}..${TOCOMMIT} --pretty="* %s" >> docs/tmp_CHANGELOG.tmp +git log ${FROMTAG}..${TOCOMMIT} --oneline --graph >> docs/tmp_CHANGELOG.tmp # Finish changelog entry. echo " diff --git a/utils/findstring.sh b/utils/findstring.sh index 2a2e9a0a7..b1754f394 100755 --- a/utils/findstring.sh +++ b/utils/findstring.sh @@ -12,7 +12,7 @@ SEARCHFOR="$@" function runsearch() { echo "# $1 ---------------" - find $1 -name "*.*" -maxdepth $2 -print0 | xargs -0 grep "$SEARCHFOR" 2>/dev/null | grep -v .min.js | grep -v phpunit.result.cache | grep -v findstring.sh | grep -v Binary | grep -v js/jquery | grep -v docs/archive | grep -v css/jplayer.css | grep -v css/jquery | grep -v iui/iuix.css | grep -v css/datatables | grep -v iui/iui.css + find $1 -name "*" -maxdepth $2 -print0 | xargs -0 grep "$SEARCHFOR" 2>/dev/null | grep -v findstring.sh | grep -v Binary | grep -v js/jquery | grep -v docs/archive | grep -v lute/static/vendor } runsearch . 1 @@ -20,6 +20,8 @@ runsearch lute 8 runsearch tests 8 runsearch utils 8 runsearch .github 8 +runsearch docker 8 +runsearch plugins 8 # Script sometimes returned w/ non-zero exit code, # breaking testing.