diff --git a/.forgejo/workflows/deploy-to-wiki.yml b/.forgejo/workflows/deploy-to-wiki.yml new file mode 100644 index 00000000..eda21ce6 --- /dev/null +++ b/.forgejo/workflows/deploy-to-wiki.yml @@ -0,0 +1,55 @@ +# Copyright (c) 2026 Morwenn +# SPDX-License-Identifier: MIT + +name: Wiki Deployment + +on: + push: + branches: + - trunk + paths: + - 'docs/**' + workflow_dispatch: + +jobs: + sync-wiki-files: + name: Sync Wiki Files + + runs-on: codeberg-tiny-lazy + + steps: + - name: Checkout /docs + uses: actions/checkout@v6 + with: + repository: ${{forgejo.repository}} + path: main + + - name: Checkout wiki + uses: actions/checkout@v4 + with: + repository: ${{forgejo.repository}}.wiki + path: wiki + + - name: Sync wiki files + run: | + apt-get update + apt-get install -y rsync + for docname in main/docs/*.md; do + old=$(basename "$docname"); + new=${old%.*}; + find main/docs -name "*.md" -exec sed -i "s/$old/$new/g" {} \; + done + rsync -avzr --delete --exclude='.git/' "main/docs/" "wiki/" + + - name: Commit changes + working-directory: wiki + run: | + git config --local user.email "action@forgejo.org" + git config --local user.name "Forgejo Action" + git add . + git commit -m "Synchronize wiki with docs/" + + - name: Push changes to wiki + working-directory: wiki + run: | + git push diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml index db432bae..d53467d8 100644 --- a/.github/workflows/build-macos.yml +++ b/.github/workflows/build-macos.yml @@ -23,14 +23,14 @@ on: jobs: build: - runs-on: macos-13 + runs-on: macos-14 strategy: fail-fast: false matrix: config: # Release build - - cxx: g++-12 + - cxx: g++-13 build_type: Release - cxx: clang++ build_type: Release diff --git a/.github/workflows/deploy-to-wiki.yml b/.github/workflows/deploy-to-wiki.yml index b37eaba7..f2b1b964 100644 --- a/.github/workflows/deploy-to-wiki.yml +++ b/.github/workflows/deploy-to-wiki.yml @@ -9,6 +9,7 @@ on: - 2.x.y-stable paths: - 'docs/**' + workflow_dispatch: jobs: sync-wiki-files: diff --git a/.github/workflows/mirror-to-codeberg.yml b/.github/workflows/mirror-to-codeberg.yml new file mode 100644 index 00000000..d13a05b6 --- /dev/null +++ b/.github/workflows/mirror-to-codeberg.yml @@ -0,0 +1,25 @@ +# Copyright (c) 2025 Morwenn +# SPDX-License-Identifier: MIT + +name: Mirror Commits to Codeberg + +on: [push, workflow_dispatch] + +jobs: + mirror-to-codeberg: + name: Mirror to Codeberg + + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Mirror + uses: yesolutions/mirror-action@v0.7.0 + with: + REMOTE: 'https://codeberg.org/Morwenn/cpp-sort.git' + GIT_USERNAME: Morwenn + GIT_PASSWORD: ${{ secrets.GIT_PASSWORD }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 55b3897d..a760d814 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,14 @@ -# Copyright (c) 2015-2025 Morwenn +# Copyright (c) 2015-2026 Morwenn # SPDX-License-Identifier: MIT cmake_minimum_required(VERSION 3.11.0) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -project(cpp-sort VERSION 2.0.0 LANGUAGES CXX) +project(cpp-sort VERSION 2.1.0 LANGUAGES CXX) include(CMakePackageConfigHelpers) +include(cpp-sort-utils) include(GNUInstallDirs) # Project options @@ -30,6 +31,7 @@ if (CPPSORT_USE_LIBASSERT) if (NOT libassert_POPULATED) FetchContent_Populate(libassert) add_subdirectory(${libassert_SOURCE_DIR} ${libassert_BINARY_DIR}) + mark_system_library(libassert-lib) endif() endif() diff --git a/README.md b/README.md index 59890131..55617ae9 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ![cpp-sort logo](docs/images/cpp-sort-logo.svg) -[![Latest Release](https://img.shields.io/badge/release-2.0.0-blue.svg)](https://github.com/Morwenn/cpp-sort/releases/tag/v2.0.0) -[![Conan Package](https://img.shields.io/badge/conan-cpp--sort%2F2.0.0-blue.svg)](https://conan.io/center/recipes/cpp-sort?version=2.0.0) +[![Latest Release](https://img.shields.io/badge/release-2.1.0-blue.svg)](https://codeberg.org/Morwenn/cpp-sort/releases/tag/v2.1.0) +[![Conan Package](https://img.shields.io/badge/conan-cpp--sort%2F2.1.0-blue.svg)](https://conan.io/center/recipes/cpp-sort?version=2.1.0) [![Code Coverage](https://codecov.io/gh/Morwenn/cpp-sort/branch/2.x.y-develop/graph/badge.svg)](https://codecov.io/gh/Morwenn/cpp-sort) [![Pitchfork Layout](https://img.shields.io/badge/standard-PFL-orange.svg)](https://github.com/vector-of-bool/pitchfork) @@ -41,11 +41,12 @@ anything to be backported._ **cpp-sort** provides a full set of sorting-related features. Here are the main building blocks of the library: -* Every sorting algorithm exists as a function object called a [sorter](https://github.com/Morwenn/cpp-sort/wiki/Sorters) -* Sorters can be wrapped in [sorter adapters](https://github.com/Morwenn/cpp-sort/wiki/Sorter-adapters) to augment their behaviour -* The library provides a [sorter facade](https://github.com/Morwenn/cpp-sort/wiki/Sorter-facade) to easily build sorters -* [Fixed-size sorters](https://github.com/Morwenn/cpp-sort/wiki/Fixed-size-sorters) can be used to efficiently sort tiny fixed-size collections -* [Measures of disorder](https://github.com/Morwenn/cpp-sort/wiki/Measures-of-disorder) can be used to evaluate the disorder in a collection +* Every sorting algorithm exists as a function object called a [sorter][sorters] +* Sorters can be wrapped in [sorter adapters][sorter-adapters] to augment their behaviour +* The library provides a [sorter facade][sorter-facade] to easily build sorters +* [Fixed-size sorters][fixed-size-sorters] can be used to efficiently sort tiny fixed-size collections +* [Metrics][metrics] can be used to gather information about the sorting operation +* [Measures of disorder][measures-of-disorder] can be used to evaluate the disorder in a collection Here is a more complete example of what can be done with the library: @@ -97,7 +98,7 @@ some interesting guarantees (ideas often taken from the Ranges TS): * Sorters are function objects: they can directly be passed as "overload sets" to other functions You can read more about all the available tools and find some tutorials about using -and extending **cpp-sort** in [the wiki](https://github.com/Morwenn/cpp-sort/wiki). +and extending **cpp-sort** in [the wiki][cpp-sort-wiki]. # Benchmarks @@ -150,7 +151,7 @@ You can read more about those [in the wiki][tooling]. > piece.* > — Jarod Kintz, $3.33 -Even though some parts of the library are [original research](https://github.com/Morwenn/cpp-sort/wiki/Original-research) +Even though some parts of the library are [original research][original-research] and some others correspond to custom and rather naive implementations of standard sorting algorithms, **cpp-sort** also reuses a great deal of code and ideas from open-source projects, often altered to integrate seamlessly into the library. Here @@ -175,12 +176,11 @@ module](https://www.boost.org/doc/libs/1_80_0/libs/sort/doc/html/index.html). in [Boost.Sort](https://www.boost.org/doc/libs/1_80_0/libs/sort/doc/html/index.html). by Francisco Jose Tapia. -* [`utility::as_function`](https://github.com/Morwenn/cpp-sort/wiki/Miscellaneous-utilities#as_function), -and several projection-enhanced helper algorithms come from Eric Niebler's [Range -v3](https://github.com/ericniebler/range-v3) library. Several ideas such as proxy -iterators, customization points and projections, as well as a few other utility -functions also come from that library or from the related articles and standard -C++ proposals. +* [`utility::as_function`][utility-as-function], and several projection-enhanced helper +algorithms come from Eric Niebler's [Range v3](https://github.com/ericniebler/range-v3) +library. Several ideas such as proxy iterators, customization points and projections, +as well as a few other utility functions also come from that library or from the related +articles and standard C++ proposals. * The algorithm used by `ska_sorter` comes from Malte Skarupke's [implementation](https://github.com/skarupke/ska_sort) of his own [ska_sort](https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/) algorithm. @@ -229,6 +229,11 @@ discussion](https://stackoverflow.com/q/2786899/1364752) on StackOverflow and ar backed by the article [*Applying Sorting Networks to Synthesize Optimized Sorting Libraries*](https://arxiv.org/abs/1505.01962). +* The algorithm behind `utility::quicksort_adversary` is a fairly straightforward adaptation of the +one provided by M. D. McIlroy in [*A Killer Adversary for Quicksort*](https://www.cs.dartmouth.edu/~doug/mdmspe.pdf). + +* The algorithm used by [`utility::check_strict_weak_ordering`][utility-check-strict-weak-ordering] is a reimplementation of the one desribed in the README file of Danila Kutenin's [quadratic_strict_weak_ordering project](https://github.com/danlark1/quadratic_strict_weak_ordering). + * The test suite reimplements random number algorithms originally found in the following places: - [xoshiro256\*\*](https://prng.di.unimi.it/) - [*Optimal Discrete Uniform Generation from Coin Flips, and Applications*](https://arxiv.org/abs/1304.1916) @@ -245,9 +250,19 @@ developed by Thøger Rivera-Thorsen. [adaptive-sort]: https://en.wikipedia.org/wiki/Adaptive_sort - [benchmarks]: https://github.com/Morwenn/cpp-sort/wiki/Benchmarks - [changelog]: https://github.com/Morwenn/cpp-sort/wiki/Changelog - [drop-merge-adapter]: https://github.com/Morwenn/cpp-sort/wiki/Sorter-adapters#drop_merge_adapter - [heap-sorter]: https://github.com/Morwenn/cpp-sort/wiki/Sorters#heap_sorter - [split-adapter]: https://github.com/Morwenn/cpp-sort/wiki/Sorter-adapters#split_adapter - [tooling]: https://github.com/Morwenn/cpp-sort/wiki/Tooling + [benchmarks]: https://codeberg.org/Morwenn/cpp-sort/wiki/Benchmarks + [changelog]: https://codeberg.org/Morwenn/cpp-sort/wiki/Changelog + [cpp-sort-wiki]: https://codeberg.org/Morwenn/cpp-sort/wiki + [drop-merge-adapter]: https://codeberg.org/Morwenn/cpp-sort/wiki/Sorter-adapters#drop_merge_adapter + [fixed-size-sorters]: https://codeberg.org/Morwenn/cpp-sort/wiki/Fixed-size-sorters + [heap-sorter]: https://codeberg.org/Morwenn/cpp-sort/wiki/Sorters#heap_sorter + [measures-of-disorder]: https://codeberg.org/Morwenn/cpp-sort/wiki/Measures-of-disorder + [metrics]: https://codeberg.org/Morwenn/cpp-sort/wiki/Metrics + [original-research]: https://codeberg.org/Morwenn/cpp-sort/wiki/Original-research + [sorter-adapters]: https://codeberg.org/Morwenn/cpp-sort/wiki/Sorter-adapters + [sorter-facade]: https://codeberg.org/Morwenn/cpp-sort/wiki/Sorter-facade + [sorters]: https://codeberg.org/Morwenn/cpp-sort/wiki/Sorters + [split-adapter]: https://codeberg.org/Morwenn/cpp-sort/wiki/Sorter-adapters#split_adapter + [tooling]: https://codeberg.org/Morwenn/cpp-sort/wiki/Tooling + [utility-as-function]: https://codeberg.org/Morwenn/cpp-sort/wiki/Miscellaneous-utilities#as_function + [utility-check-strict-weak-ordering]: https://codeberg.org/Morwenn/cpp-sort/wiki/Miscellaneous-utilities#strict-weak-ordering-checker diff --git a/benchmarks/errorbar-plot/plot.py b/benchmarks/errorbar-plot/plot.py index 46b55152..1c02ff44 100644 --- a/benchmarks/errorbar-plot/plot.py +++ b/benchmarks/errorbar-plot/plot.py @@ -15,13 +15,14 @@ def main(): parser = argparse.ArgumentParser(description="Plot the results of the errorbar-plot benchmark.") parser.add_argument('root', help="directory with the result files to plot") - parser.add_argument('--alternative-palette', dest='use_alt_palette', - action='store_true', default=False, + parser.add_argument('--alternative-palette', + dest='use_alt_palette', + action='store_true', help="Use another color palette") args = parser.parse_args() root = pathlib.Path(args.root) - result_files = list(root.glob('*.csv')) + result_files = sorted(root.glob('*.csv')) if len(result_files) == 0: print(f"There are no files to plot in {root}") sys.exit(1) diff --git a/cmake/cpp-sort-utils.cmake b/cmake/cpp-sort-utils.cmake index fb946ef1..f7150ae6 100644 --- a/cmake/cpp-sort-utils.cmake +++ b/cmake/cpp-sort-utils.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023 Morwenn +# Copyright (c) 2019-2025 Morwenn # SPDX-License-Identifier: MIT # Add a selection of warnings to a target @@ -15,3 +15,17 @@ macro(cppsort_add_warnings target) ) endif() endmacro() + +# Mark a target as a SYSTEM library +function(mark_system_library target) + get_target_property( + TARGET_INCLUDE_DIR + ${target} + INTERFACE_INCLUDE_DIRECTORIES + ) + set_target_properties( + ${target} + PROPERTIES + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${TARGET_INCLUDE_DIR}" + ) +endfunction() diff --git a/conanfile.py b/conanfile.py index 49cfb2f6..37f81c03 100644 --- a/conanfile.py +++ b/conanfile.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2018-2025 Morwenn +# Copyright (c) 2018-2026 Morwenn # SPDX-License-Identifier: MIT import os.path @@ -16,10 +16,10 @@ class CppSortConan(ConanFile): name = "cpp-sort" - version = "2.0.0" + version = "2.1.0" description = "Sorting algorithms & related tools" license = "MIT" - url = "https://github.com/Morwenn/cpp-sort" + url = "https://codeberg.org/Morwenn/cpp-sort" homepage = url topics = "cpp-sort", "sorting", "algorithms" author = "Morwenn " @@ -29,7 +29,8 @@ class CppSortConan(ConanFile): exports_sources = [ "include/*", "CMakeLists.txt", - "cmake/cpp-sort-config.cmake.in" + "cmake/cpp-sort-config.cmake.in", + "cmake/cpp-sort-utils.cmake", ] settings = "os", "compiler", "build_type", "arch" package_type = "header-library" diff --git a/docs/.redirects.gollum b/docs/.redirects.gollum new file mode 100644 index 00000000..632c00ba --- /dev/null +++ b/docs/.redirects.gollum @@ -0,0 +1,2 @@ +--- +Measures-of-presortedness.md: Measures-of-disorder.md diff --git a/docs/Benchmarks.md b/docs/Benchmarks.md index e43ebd2c..1ca87fa7 100644 --- a/docs/Benchmarks.md +++ b/docs/Benchmarks.md @@ -1,7 +1,7 @@ *Note: this page only benchmarks sorting algorithms under specific conditions. It can be used as a quick guide but if you really need a fast algorithm for a specific use case, you better run your own benchmarks.* *Last meaningful updates:* -* *2.0.0 for measures of disorder* +* *2.1.0 for measures of disorder* * *1.16.0 for slow O(n log n) sorts* * *1.14.0 for small array sorts* * *1.13.1 for unstable random-access sorts, forward sorts, and the expensive move/cheap comparison benchmark* @@ -13,7 +13,9 @@ It is worth noting that most benchmarks on this page use collections of `double` All of the graphs on this page have been generated with slightly modified versions of the scripts found in the project's benchmarks folder. There are just too many things to check; if you ever want a specific benchmark, don't hesitate to ask for it. -*The latest benchmarks were run on Windows 10 with 64-bit MinGW-w64 g++12.0, with the flags -O3 -march=native -std=c++20.* +*The benchmarks were run on:* +* *For version 2.0.0 and newer: EndeavourOS with g++ 15.2, with the flags -O3 -march=native -std=c++26.* +* *For older versions: Windows 10 with 64-bit MinGW-w64 g++ 12.0, with the flags -O3 -march=native -std=c++20.* # Random-access collections @@ -23,8 +25,8 @@ Most sorting algorithms are designed to work with random-access iterators, so th Sorting a random-access collection with an unstable sort is probably one of the most common things to want, and not only are those sorts among the fastest comparison sorts, but type-specific sorters can also be used to sort a variety of types. If you don't know what algorithm you want and don't have specific needs, then you probably want one of these. -![Benchmark speed of unstable sorts with increasing size for std::vector](https://i.imgur.com/Q3IEeci.png) -![Benchmark speed of unstable sorts with increasing size for std::deque](https://i.imgur.com/oRW5kFr.png) +![Benchmark speed of unstable sorts with increasing size for std::vector<double>](https://i.imgur.com/Q3IEeci.png) +![Benchmark speed of unstable sorts with increasing size for std::deque<double>](https://i.imgur.com/oRW5kFr.png) The plots above show a few general tendencies: * `selection_sort` is O(n²) and doesn't scale. @@ -33,8 +35,8 @@ The plots above show a few general tendencies: The quicksort derivatives and the hybrid radix sorts are generally the fastest of the lot, yet `drop_merge_sort` seems to offer interesting speedups for `std::deque` despite not being designed to be the fastest on truly shuffled data. Part of the explanation is that it uses `pdq_sort` in a contiguous memory buffer underneath, which might be faster for `std::deque` than sorting completely in-place. -![Benchmark unstable sorts over different patterns for std::vector](https://i.imgur.com/WZ4s6Xt.png) -![Benchmark unstable sorts over different patterns for std::deque](https://i.imgur.com/UAaObUW.png) +![Benchmark unstable sorts over different patterns for std::vector<double>](https://i.imgur.com/WZ4s6Xt.png) +![Benchmark unstable sorts over different patterns for std::deque<double>](https://i.imgur.com/UAaObUW.png) A few random takeways: * All the algorithms are more or less adaptive, not always for the same patterns. @@ -49,13 +51,13 @@ A few random takeways: Pretty much all stable sorts in the library are different flavours of merge sort with sligthly different properties. Most of them allocate additional merge memory, and a good number of those also have a fallback algorithm that makes them run in O(n log²n) instead of O(n log n) when no extra heap memory is available. -![Benchmark speed of stable sorts with increasing size for std::vector](https://i.imgur.com/vRW1zcs.png) -![Benchmark speed of stable sorts with increasing size for std::deque](https://i.imgur.com/CQePcBh.png) +![Benchmark speed of stable sorts with increasing size for std::vector<double>](https://i.imgur.com/vRW1zcs.png) +![Benchmark speed of stable sorts with increasing size for std::deque<double>](https://i.imgur.com/CQePcBh.png) `insertion_sort` being O(n²) it's not surprising that it doesn't perform well in such a benchmark. All the other sorting algorithms display roughly equivalent and rather tight curves. -![Benchmark stable sorts over different patterns for std::vector](https://i.imgur.com/bRQ5cu5.png) -![Benchmark stable sorts over different patterns for std::deque](https://i.imgur.com/fHIZB5L.png) +![Benchmark stable sorts over different patterns for std::vector<double>](https://i.imgur.com/bRQ5cu5.png) +![Benchmark stable sorts over different patterns for std::deque<double>](https://i.imgur.com/fHIZB5L.png) These plots highlight a few important things: * `spin_sort` consistently beats pretty much anything else. @@ -66,9 +68,9 @@ These plots highlight a few important things: I decided to include a dedicated category for slow O(n log n) sorts, because I find this class of algorithms interesting. This category contains experimental algorithms, often taken from rather old research papers. `heap_sort` is used as the "fast" algorithm in this category, despite it being consistently the slowest in the previous category. -![Benchmark speed of slow O(n log n) sorts with increasing size for std::vector](https://i.imgur.com/SUbyqKV.png) -![Benchmark slow O(n log n) sorts over different patterns for std::vector](https://i.imgur.com/Dli1xrp.png) -![Benchmark slow O(n log n) sorts over different patterns for std::deque](https://i.imgur.com/WxBmipj.png) +![Benchmark speed of slow O(n log n) sorts with increasing size for std::vector<double>](https://i.imgur.com/SUbyqKV.png) +![Benchmark slow O(n log n) sorts over different patterns for std::vector<double>](https://i.imgur.com/Dli1xrp.png) +![Benchmark slow O(n log n) sorts over different patterns for std::deque<double>](https://i.imgur.com/WxBmipj.png) The analysis is pretty simple here: * Most of the algorithms in this category are slow, but exhibit a good adaptiveness with most kinds of patterns. It isn't all that surprising since I specifically found them in literature about adaptive sorting. @@ -80,11 +82,11 @@ The analysis is pretty simple here: Sorting algorithms that handle non-random-access iterators are often second class citizens, but **cpp-sort** still provides a few ones. The most interesting part is that we can see how generic sorting algorithms perform compared to algorithms such as [`std::list::sort`][std-list-sort] which are aware of the data structure they are sorting. -![Benchmark speed of sorts with increasing size for std::list](https://i.imgur.com/yNQG8kk.png) +![Benchmark speed of sorts with increasing size for std::list<double>](https://i.imgur.com/yNQG8kk.png) For elements as small as `double`, there are two clear winners here: `drop_merge_sort` and `out_of_place_adapter(pdq_sort)`. Both have in common the fact that they move a part of the collection (or the whole collection) to a contiguous memory buffer and sort it there using `pdq_sort`. The only difference is that `drop_merge_sort` does that "accidentally" while `out_of_place_adapter` was specifically introduced to sort into a contiguous memory buffer and move back for speed. -![Benchmark sorts over different patterns for std::list](https://i.imgur.com/zlHzRLd.png) +![Benchmark sorts over different patterns for std::list<double>](https://i.imgur.com/zlHzRLd.png) `out_of_place_adapter(pdq_sort)` was not included in this benchmark, because it adapts to patterns the same way `pdq_sort` does. Comments can be added for these results: * `std::list::sort` would require more expensive to move elements for node relinking to be faster than move-based algorithms. @@ -96,8 +98,8 @@ For elements as small as `double`, there are two clear winners here: `drop_merge Even fewer sorters can handle forward iterators. `out_of_place_adapter(pdq_sort)` was not included in the patterns benchmark, because it adapts to patterns the same way `pdq_sort` does. -![Benchmark speed of sorts with increasing size for std::forward_list](https://i.imgur.com/if15kX1.png) -![Benchmark sorts over different patterns for std::forward_list](https://i.imgur.com/uF0UzLm.png) +![Benchmark speed of sorts with increasing size for std::forward_list<double>](https://i.imgur.com/if15kX1.png) +![Benchmark sorts over different patterns for std::forward_list<double>](https://i.imgur.com/uF0UzLm.png) The results are roughly the same than with bidirectional collections: * Sorting out-of-place is faster than anything else. @@ -113,8 +115,8 @@ This category will highlight the advantages of some sorters in sorting scenarios Integer sorting is a rather specific scenario for which many solutions exist: counting sorts, radix sorts, algorithms optimized to take advantage of branchless comparisons, etc. -![Benchmark speed of integer sorts with increasing size for std::vector](https://i.imgur.com/zuCAkIf.png) -![Benchmark integer sorts over different patterns for std::vector](https://i.imgur.com/20uDwTM.png) +![Benchmark speed of integer sorts with increasing size for std::vector<int>](https://i.imgur.com/zuCAkIf.png) +![Benchmark integer sorts over different patterns for std::vector<int>](https://i.imgur.com/20uDwTM.png) `counting_sort` appears as a clear winner here but with a catch: its speed depends on the difference between the smaller and the greater integers in the collection to sort. In the benchmarks above the integer values scale with the size of the collection, but if a collection contains just a few elements with a big difference of the minimum and maximum values, `counting_sort` won't be a good solution. @@ -126,7 +128,7 @@ Some sorting algorithms are specifically designed to be fast when there are only The following plot shows how fast those algorithms are depending on the percentage of inversions in the collection to sort. They are benchmarked against `pdq_sort` because it is the algorithm they use internally to sort the remaining unsorted elements prior to the merge, which makes it easy to compare the gains and overheads of those algorithms compared to a raw `pdq_sort`. -![Benchmark speed of Inv-adaptive sorts with an increasing percentage of inversions for std::vector](https://i.imgur.com/MYRdAKc.png) +![Benchmark speed of Inv-adaptive sorts with an increasing percentage of inversions for std::vector<int>](https://i.imgur.com/MYRdAKc.png) As long as there are up to 30~40% of inversions in the collection, `drop_merge_sort` and `split_sort` offer an advantage over a raw `pdq_sort`. Interestingly `drop_merge_sort` is the best when there are few inversions but `split_sort` is more robust: it can handle more inversions than `drop_merge_sort` before being slower than `pdq_sort`, and has a lower overhead when the number of inversions is high. @@ -138,7 +140,7 @@ Sometimes one has to sort a collection whose elements are expensive to move arou The following example uses a collection of `std::array` whose first element is the only one compared during the sort. Albeit a bit artificial, it illustrates the point well enough. -![Benchmark heap_sort vs. indirect_adapter(heap_sort) for a collection of std::array](https://i.imgur.com/Okkahwf.png) +![Benchmark heap_sort vs. indirect_adapter(heap_sort) for a collection of std::array<double, 100>](https://i.imgur.com/Okkahwf.png) The improvements are not always as clear as in this benchmark, but it shows that `indirect_adapter` might be an interesting tool to have in your sorting toolbox in such a scenario. @@ -146,14 +148,14 @@ The improvements are not always as clear as in this benchmark, but it shows that Only a few algorithms allow to sort a collection stably without using extra heap memory: `grail_sort` and `wiki_sort` can accept a fixed-size buffer (possibly of size 0) while `merge_sort` has a fallback algorithm when no heap memory is available. -![Benchmark speed of stable sorts with no heap memory with increasing size for std::vector](https://i.imgur.com/1a64irX.png) -![Benchmark speed of stable sorts with no heap memory with increasing size for std::deque](https://i.imgur.com/U5uD8Er.png) +![Benchmark speed of stable sorts with no heap memory with increasing size for std::vector<double>](https://i.imgur.com/1a64irX.png) +![Benchmark speed of stable sorts with no heap memory with increasing size for std::deque<double>](https://i.imgur.com/U5uD8Er.png) ![Detail of the previous benchmark](https://i.imgur.com/owUictQ.png) `merge_sort` is definitely losing this benchmark. Interestingly enough `wiki_sort` is way better with a fixed buffer of 512 elements while it hardly affects `grail_sort` at all. For `std::deque`, `grail_sort` is almost always the fastest no matter what. -![Benchmark stable sorts with no heap memory over different patterns for std::vector](https://i.imgur.com/74YxCLI.png) -![Benchmark stable sorts with no heap memory over different patterns for std::deque](https://i.imgur.com/jqek5Ii.png) +![Benchmark stable sorts with no heap memory over different patterns for std::vector<double>](https://i.imgur.com/74YxCLI.png) +![Benchmark stable sorts with no heap memory over different patterns for std::deque<double>](https://i.imgur.com/jqek5Ii.png) Here `merge_sort` still loses the battle, but it also displays an impressive enough adaptiveness to presortedness and patterns. @@ -161,8 +163,8 @@ Here `merge_sort` still loses the battle, but it also displays an impressive eno Some sorting algorithms are particularly suited to sort very small collections: [*fixed-size sorters*][fixed-size-sorters] of course, but also very simple regular sorters such as [`insertion_sorter`][insertion-sorter] or [`selection_sorter`][selection-sorter]. Most other sorting algorithms fallback to one of these when sorting a small collection. -![Benchmark speed of small sorts with increasing size for std::array](https://i.imgur.com/ABfEmJe.png) -![Benchmark speed of small sorts with increasing size for std::array](https://i.imgur.com/wqz1q3R.png) +![Benchmark speed of small sorts with increasing size for std::array<int>](https://i.imgur.com/ABfEmJe.png) +![Benchmark speed of small sorts with increasing size for std::array<long double>](https://i.imgur.com/wqz1q3R.png) We can see several trends in these benchmarks, rather consistant across `int` and `long double`: * As far as only speed matters, the size-optimal hand-unrolled sorting networks of [`sorting_network_sorter`][sorting-network-sorter] tend to win in these artificial microbenchmarks, but in a real world scenario the cost of loading the network code for a specific size again and again tends to make them slower. A sorting network can be fast when it is used over and over again. @@ -174,10 +176,10 @@ We can see several trends in these benchmarks, rather consistant across `int` an This benchmark for [measures of disorder][Measures-of-disorder] is small and only intends to show the cost that these tools might incur. It is not meant to be exhaustive in any way. -![Benchmark speed of measures of disorder for increasing size for std::vector](https://i.imgur.com/7QZqe0m.png) +![Benchmark speed of measures of disorder for increasing size for std::vector<int>](https://i.imgur.com/uGoVIcE.png) It makes rather easy to see the different groups of complexities: -* *Runs(X)* and *Mono(X)* are obvious O(n) algorithms. +* *Amp(X)*, *Runs(X)* and *Mono(X)* are obvious O(n) algorithms. * *Dis(X)* is a more involved O(n) algorithm. * All of the other measures of disorder run in O(n log n) time. diff --git a/docs/Changelog.md b/docs/Changelog.md index 882f1b5e..f52a0847 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -40,7 +40,7 @@ When compiled with C++20, **cpp-sort** might gain a few additional features depe [branchless-traits]: Miscellaneous-utilities.md#branchless-traits [counting-sorter]: Sorters.md#counting_sorter [cpp-sort-function-objects]: Miscellaneous-utilities.md#miscellaneous-function-objects - [cpp-sort-releases]: https://github.com/Morwenn/cpp-sort/releases + [cpp-sort-releases]: https://codeberg.org/Morwenn/cpp-sort/releases [feature-test-macros]: https://wg21.link/SD6 [pdq-sorter]: Sorters.md#pdq_sorter [ska-sorter]: Sorters.md#ska_sorter diff --git a/docs/Comparator-adapters.md b/docs/Comparator-adapters.md index 411ebc9d..3ef2b661 100644 --- a/docs/Comparator-adapters.md +++ b/docs/Comparator-adapters.md @@ -4,7 +4,7 @@ All adapters below are composed of two elements: * A class template that wraps a comparator and is itself a comparator (ex: `not_fn_t`, `flip_t`). * A function template that simplifies the construction and sometimes implements optimizations (ex: `not_fn`, `flip`). -The optimizations performed by the function templates are of the "unwrapping" kind, with a goal to reduce the nesting of templates in the library and to eventually reduce the overall number of instantiated templates. +The optimizations performed by the function templates are of the "unwrapping" kind, with a goal to reduce the nesting of templates in the library and to eventually reduce the overall number of template instantiations. ```cpp auto cmp = std::less{}; @@ -20,7 +20,7 @@ Those unwrappings are meant to be simple and only intended to work with "well-fo #include ``` -The class template `flip_t` is a function object which, when called, passes the arguments in reversed order to the *Callable* it holds with and returns the result. It is named after the [`flip`][prelude-flip] function from Haskell's Prelude module. +The class template `flip_t` is a function object which, when called, passes the arguments in reversed order to the *Callable* it holds and returns the result. It is named after the [`flip`][prelude-flip] function from Haskell's Prelude module. You can find more trivia about this function object, as well as examples of use [in a article][blog-std-flip] on my blog. `flip_t` has the following member functions: @@ -137,6 +137,7 @@ constexpr auto projection() const [binary-predicate]: https://en.cppreference.com/w/cpp/concept/BinaryPredicate + [blog-std-flip]: https://morwenn.github.io/c++/2025/09/25/TSB004-std-flip.html [branchless-traits]: Miscellaneous-utilities.md#branchless-traits [callable]: https://en.cppreference.com/w/cpp/named_req/Callable [prelude-flip]: https://hackage.haskell.org/package/base-4.16.0.0/docs/Prelude.html#v:flip diff --git a/docs/Comparators.md b/docs/Comparators.md index 28bdd16d..0fb46650 100644 --- a/docs/Comparators.md +++ b/docs/Comparators.md @@ -97,11 +97,11 @@ The two-parameter version of the customization point calls the three-parameter o *This comparator can be [refined][refining] for a specific type to provide better performance.* - [binary-predicate]: https://en.cppreference.com/w/cpp/concept/BinaryPredicate + [binary-predicate]: https://en.cppreference.com/w/cpp/named_req/BinaryPredicate [branchless-traits]: Miscellaneous-utilities.md#branchless-traits [callable]: https://en.cppreference.com/w/cpp/named_req/Callable [case-sensitivity]: https://en.wikipedia.org/wiki/Case_sensitivity - [cppcon2015-compare]: https://github.com/CppCon/CppCon2015/tree/master/Presentations/Comparison%20is%20not%20simple%2C%20but%20it%20can%20be%20simpler%20-%20Lawrence%20Crowl%20-%20CppCon%202015 + [cppcon2015-compare]: https://github.com/CppCon/CppCon2015/tree/master/Presentations/Comparison%20is%20not%20simple%2C%20but%20it%20can%20be%20simpler [custom-point]: https://ericniebler.com/2014/10/21/customization-point-design-in-c11-and-beyond/ [natural-sort]: https://en.wikipedia.org/wiki/Natural_sort_order [P0100]: http://open-std.org/JTC1/SC22/WG21/docs/papers/2015/p0100r1.html diff --git a/docs/Home.md b/docs/Home.md index 79b16d03..68437350 100644 --- a/docs/Home.md +++ b/docs/Home.md @@ -1,6 +1,6 @@ ![cpp-sort logo](images/cpp-sort-logo.svg) -Welcome to the **cpp-sort 2.0.0** documentation! +Welcome to the **cpp-sort 2.1.0** documentation! This wiki contains documentation about the library: basic documentation about the many sorting tools and how to use them, documentation about the additional utilities provided by the library, as well as a few tutorials about writing your own sorters or sorter adapters. This main page explains a few general things that didn't quite fit in other parts of the documentation. diff --git a/docs/Library-nomenclature.md b/docs/Library-nomenclature.md index 4d8af1c8..188c1203 100644 --- a/docs/Library-nomenclature.md +++ b/docs/Library-nomenclature.md @@ -1,12 +1,12 @@ **cpp-sort** deals with many concepts related to sorting and algorithms in general. This section tries to briefly explain the many things that you may encounter while using it. When a term or an expression appears in *italics* in the rest of the documentation, it is generally a reference to one of the following entries: -* *Buffered sorter*: some sorting algorithms optionally use a buffer where they store elements to improve the performance of the sort. Some of them, such as block sort, will manage to sort the collection regardless of the actual size of the buffer, which will only have on influence on the performance of the sort. A buffered sorter is a sorter that takes a *buffer provider* template parameter that tells how the temporary buffer should be allocated, and uses this provider to create the buffer. A *buffer provider* is a class that has a nested `buffer` class which implements a set of basic operations (construction with a size, `begin`, `end` and `size`). Implementing a buffer provider is a bit tricky, but using them should be easy enough: +* **Buffered sorter**: some sorting algorithms optionally use a buffer where they store elements to improve the performance of the sort. Some of them, such as block sort, will manage to sort the collection regardless of the actual size of the buffer, which will only have on influence on the performance of the sort. A buffered sorter is a sorter that takes a *buffer provider* template parameter that tells how the temporary buffer should be allocated, and uses this provider to create the buffer. A *buffer provider* is a class that has a nested `buffer` class which implements a set of basic operations (construction with a size, `begin`, `end` and `size`). Implementing a buffer provider is a bit tricky, but using them should be easy enough: using sorter = cppsort::grail_sorter< cppsort::utility::fixed_buffer<512> >; -* *Comparison function*: most of the sorting algorithms in the library are comparison sorts. It means that the algorithm uses a comparison function to know the order of the elements and sort them accordingly; such a comparison function shall take two values and have a return type convertible to `bool`. The available sorting algorithms transform comparison functions on the fly so that some pointers to member functions can also be used as comparison functions, as if called with [`std::invoke`][std-invoke]. The default comparison function used by the sorting algorithms is [`std::less<>`][std-less-void]. Many sorters can take a comparison function as an additional parameter. For example, using `std::greater<>` instead of the default comparison function would sort a collection in descending order. +* **Comparison function**: most of the sorting algorithms in the library are comparison sorts. It means that the algorithm uses a comparison function to know the order of the elements and sort them accordingly; such a comparison function shall take two values and have a return type convertible to `bool`. The available sorting algorithms transform comparison functions on the fly so that some pointers to member functions can also be used as comparison functions, as if called with [`std::invoke`][std-invoke]. The default comparison function used by the sorting algorithms is [`std::less<>`][std-less-void]. Many sorters can take a comparison function as an additional parameter. For example, using `std::greater<>` instead of the default comparison function would sort a collection in descending order. cppsort::heap_sort(collection, std::greater{}); @@ -14,25 +14,25 @@ The library provides a set of additional [comparators][comparators] generally corresponding to common ways to compare common types. -* *Equivalent elements*: this notion appears in the context of comparing elements with a predicate. Two elements `a` and `b` are equivalent with regard to a predicate `comp` when `not comp(a, b) && not comp(b, a)`. Predicates in comparison sorts only require to model a [weak order][weak-order], so elements satifying the previous expressions do not have to be strictly equal - we call them *equivalent elements* in the rest of the documentation. +* **Equivalent elements**: this notion appears in the context of comparing elements with a predicate. Two elements `a` and `b` are equivalent with regard to a predicate `comp` when `not comp(a, b) && not comp(b, a)`. Predicates in comparison sorts only require to model a [weak order][weak-order], so elements satifying the previous expressions do not have to be strictly equal - we call them *equivalent elements* in the rest of the documentation. -* *Fixed-size sorter*: [fixed-size sorters][fixed-size-sorters] are a special breed of sorters designed to sort a fixed number of values. While they try their best to be full-fledge sorters, they are definitely not full-fledge sorters and probably don't blend as well as one would like into the library. Their main advantage is that they can be more performant than regular sorters in some specific scenarios. +* **Fixed-size sorter**: [fixed-size sorters][fixed-size-sorters] are a special breed of sorters designed to sort a fixed number of values. While they try their best to be full-fledge sorters, they are definitely not full-fledge sorters and probably don't blend as well as one would like into the library. Their main advantage is that they can be more performant than regular sorters in some specific scenarios. -* *Iterator category*: the C++ standard defines [several categories of iterators][iterator-categories] such as forward iterators, bidirectional iterators or random-access iterators. The standard library uses [iterator tags][iterator-tags] to document the category of an iterator. These categories are important since algorithms are designed to work with some categories of iterators and not with other categories, and those in this library are not different: in-place sorting needs at least forward iterators. You can use the [`iterator_category`][iterator-category] sorter trait to get the least constrained iterator category associated with a sorter. +* **Iterator category**: the C++ standard defines [several categories of iterators][iterator-categories] such as forward iterators, bidirectional iterators or random-access iterators. The standard library uses [iterator tags][iterator-tags] to document the category of an iterator. These categories are important since algorithms are designed to work with some categories of iterators and not with other categories, and those in this library are not different: in-place sorting needs at least forward iterators. You can use the [`iterator_category`][iterator-category] sorter trait to get the least constrained iterator category associated with a sorter. using category = cppsort::iterator_category; Note that the *sorters* (and virtually bery algorithm) in **cpp-sort** accept iterators that do not implement post-increment and post-decrement operations. The iterator categories accepted by the library are thus less restrictive than the ones mandated for the standard library. -* *Measure of disorder*: a function used to estimate the amount of disorder in a sequence. There are many different to do that, such as counting the number of inversions in the sequence, or the number of elements to remove to get a sorted subsequence. **cpp-sort** provides a number of [measures of disorder][Measures-of-disorder] in the namespace `cppsort::probe`. +* **Measure of disorder**: a function used to estimate the amount of disorder in a sequence. There are many different to do that, such as counting the number of inversions in the sequence, or the number of elements to remove to get a sorted subsequence. **cpp-sort** provides a number of [measures of disorder][Measures-of-disorder] in the namespace `cppsort::probe`. auto max_inversions = cppsort::probe::dis(collection); -* *Measure of presortedness*: a special kind of *measure of disorder* that satisfies a specific set of additional properties (see the page on *measures of disorder*). The overarching goal of those measures is to be able to estimate and reason about the number of steps required to sort a sequence of elements. Most notably, they allow to formally reason about *adaptive sorting algorithms*: given a measure of presortedness $M$, an $M$-adaptive (or $M$-optimal) sorting algorithm is an algorithm that can sort a sequence with a number of steps that is without a constant bound of the estimated minimal number of steps for the estimated disorder. +* **Measure of presortedness**: a special kind of *measure of disorder* that satisfies a specific set of additional properties (see the page on *measures of disorder*). The overarching goal of those measures is to be able to estimate and reason about the number of steps required to sort a sequence of elements. Most notably, they allow to formally reason about *adaptive sorting algorithms*: given a measure of presortedness $M$, an $M$-adaptive (or $M$-optimal) sorting algorithm is an algorithm that can sort a sequence with a number of steps that is without a constant bound of the estimated minimal number of steps for the estimated disorder. -* *Metric*: as special kind of *sorter adapter* that returns information about sorted collections. See [the corresponding page][metrics] for additional information. +* **Metric**: as special kind of *sorter adapter* that returns information about sorted collections. See [the corresponding page][metrics] for additional information. -* *Projection*: some sorters accept a projection as an additional parameter. A projection is a unary function that allows to "view" the values of a collection differently. For example it may allow to sort a collection of values on a specific field. The available sorting algorithms transform projections on the fly so that pointers to member data can also be used as projections. Projections were pioneered by the [Adobe Source Libraries][stlab] and appear in the C++20 [constrained algorithms][std-ranges]. +* **Projection**: some sorters accept a projection as an additional parameter. A projection is a unary function that allows to "view" the values of a collection differently. For example it may allow to sort a collection of values on a specific field. The available sorting algorithms transform projections on the fly so that pointers to member data can also be used as projections. Projections were pioneered by the [Adobe Source Libraries][stlab] and appear in the C++20 [constrained algorithms][std-ranges]. struct wrapper { int value; }; std::vector collection = { /* ... */ }; @@ -40,16 +40,16 @@ Every *comparison sorter* is also a *projection sorter*, but there are also projection-only sorters, such as [`spread_sorter`][spread-sorter]. -* *Proxy iterator*: sometimes `std::move` and `std::swap` are not enough to correctly move values around, and we need to know more about the iterators in order to perform the appropriate operation. It's typically the case with proxy iterators: iterators whose `reference` type is not actually a reference type (*e.g.* `std::vector::reference`). Traditional algorithms don't play well with these types, however there are [standard proposals][p0022] to solve the problem by introducing a function named `iter_move` and making it as well as `iter_swap` customization points. No proposal has been accepted yet, so standard libraries don't handle proxy iterators; however every sorter in **cpp-sort** can actually handle such iterators (except `std_sorter` and `std_stable_sorter`). The library exposes the functions [`utility::iter_move` and `utility::iter_swap`][utility-iter-move] in case you also need to make your own algorithms handle proxy iterators. +* **Proxy iterator**: sometimes `std::move` and `std::swap` are not enough to correctly move values around, and we need to know more about the iterators in order to perform the appropriate operation. It's typically the case with proxy iterators: iterators whose `reference` type is not actually a reference type (*e.g.* `std::vector::reference`). Traditional algorithms don't play well with these types, however there are [standard proposals][p0022] to solve the problem by introducing a function named `iter_move` and making it as well as `iter_swap` customization points. No proposal has been accepted yet, so standard libraries don't handle proxy iterators; however every sorter in **cpp-sort** can actually handle such iterators (except `std_sorter` and `std_stable_sorter`). The library exposes the functions [`utility::iter_move` and `utility::iter_swap`][utility-iter-move] in case you also need to make your own algorithms handle proxy iterators. -* *Sorter*: [sorters][sorters] are the protagonists in this library. They are function objects implementing specific sorting algorithms. Their `operator()` is overloaded so that it can handle ranges or pairs of iterators, and conditionally overloaded so that it can handle user-provided comparison and/or projection functions (see *unified sorting interface*). +* **Sorter**: [sorters][sorters] are the protagonists in this library. They are function objects implementing specific sorting algorithms. Their `operator()` is overloaded so that it can handle ranges or pairs of iterators, and conditionally overloaded so that it can handle user-provided comparison and/or projection functions (see *unified sorting interface*). cppsort::pdq_sorter{}(std::begin(collection), std::end(collection), std::greater{}, &wrapper::value); -* *Sorter adapter*: [sorter adapters][sorter-adapters] are class templates that take one or several sorters and produce a new sorter from the parameters. What a sorter adapter can do is not constrained, but they are generally expected to behave like sorters themselves. For example, **cpp-sort** contains adapters to count the number of comparisons performed by a sorting algorithms or to aggregate several sorters together. The best way to learn more about them is still to read the dedicated section of the documentation. +* **Sorter adapter**: [sorter adapters][sorter-adapters] are class templates that take one or several sorters and produce a new sorter from the parameters. What a sorter adapter can do is not constrained, but they are generally expected to behave like sorters themselves. For example, **cpp-sort** contains adapters to count the number of comparisons performed by a sorting algorithms or to aggregate several sorters together. The best way to learn more about them is still to read the dedicated section of the documentation. -* *Stability*: a sorting algorithm is *stable* if it preserves the relative order of *equivalent elements*. While it does not matter when the equivalence relationship also happens to be an equality relationship, it may have its importance in other situations. It is possible to query whether a sorter is guaranteed to always use a stable sorting algorithm with the [`is_always_stable`][is-always-stable] sorter trait. +* **Stability**: a sorting algorithm is *stable* if it preserves the relative order of *equivalent elements*. While it does not matter when the equivalence relationship also happens to be an equality relationship, it may have its importance in other situations. It is possible to query whether a sorter is guaranteed to always use a stable sorting algorithm with the [`is_always_stable`][is-always-stable] sorter trait. using stability = cppsort::is_stable; @@ -61,13 +61,13 @@ The library also provides the adapter [`stable_adapter`][stable-adapter] to obtain a stable sorter corresponding to the passed sorter. When calling the adapted sorter yields an unstable sorting algorithm, the utility adapter `make_stable` is used to transform it into a stable sorting algorithm, providing the underlying algorithm handles proxy iterators. -* *Stateful/Stateless sorter*: a sorter either carries a state or not; when it does so it is called a *stateful* sorter, otherwise it is called a *stateless* sorter. Most of the *sorters* in the library are stateless sorters. +* **Stateful/Stateless sorter**: a sorter either carries a state or not; when it does so it is called a *stateful* sorter, otherwise it is called a *stateless* sorter. Most of the *sorters* in the library are stateless sorters. Stateless sorters are generally empty default-constructible types. If they indeed satisfy these guarantees, then several components of the library will provide space optimizations, and some wrapping components will also provide overloaded operators to be turned into several kinds of function pointers. Therefore, authors of *stateless sorters* are encouraged to also make them empty and default-constructible to benefit from the full powers of the library. -* *Type-specific sorter*: some non-comparison sorters such as the [`spread_sorter`][spread-sorter] implement specific sorting algorithms which only work with some specific types (for example integers or strings). +* **Type-specific sorter**: some non-comparison sorters such as the [`spread_sorter`][spread-sorter] implement specific sorting algorithms which only work with some specific types (for example integers or strings). -* *Unified sorting interface*: *sorters*, *sorter adapters*, *measures of disorder* and a few other components of the library accept a range or a pair of iterators, and optionally a comparison function and/or a comparison function. Those components typically rely on the library's [`sorter_facade`][sorter-facade] which handles the dispatching to the component's implementation and to handle a number of special cases. For simplicity, what is accepted by the `operator()` of such components is referred to as the *unified sorting interface* in the rest of the library. +* **Unified sorting interface**: *sorters*, *sorter adapters*, *measures of disorder* and a few other components of the library accept a range or a pair of iterators, and optionally a comparison function and/or a comparison function. Those components typically rely on the library's [`sorter_facade`][sorter-facade] which handles the dispatching to the component's implementation and to handle a number of special cases. For simplicity, what is accepted by the `operator()` of such components is referred to as the *unified sorting interface* in the rest of the library. [comparators]: Comparators.md diff --git a/docs/Measures-of-disorder.md b/docs/Measures-of-disorder.md index b9b9cbe5..0398a0da 100644 --- a/docs/Measures-of-disorder.md +++ b/docs/Measures-of-disorder.md @@ -1,10 +1,10 @@ *Measures of disorder* are functions used to measure how much a sequence differs from its sorted permutation. Several loose definitions of measures of disorder exist in the literature; in this documentation, we use the formal definition provided by Vladimir Estivill-Castro in *Sorting and Measures of Disorder*. That is, a *measure of disorder* $M$ is a non-negative real function that accepts a sequence $X$ and satifies the following properties: -1. When $X$ is sorted, $M(X) = \min_{|Y|=|X|}\{M(Y)\}$. In other words, a measure of disorder *grows* with the amount of disorder in $X$, and reaches its minimum when $X$ is sorted. +1. When $X$ is sorted, $M(X) = \min_{\lvert Y \rvert=\lvert X \rvert}\{M(Y)\}$. In other words, a measure of disorder *grows* with the amount of disorder in $X$, and reaches its minimum when $X$ is sorted. 2. Order isomorphism: if the relative order of elements in two sequences $X$ and $Y$ is the same, then $M(X) = M(Y)$. In the rest of this document, we also use the following notation: * Sequences are ordered, and use angle brackets as delimiter, ex: $\langle 1, 3, 2, 4, 10 \rangle$. -* $|X|$ corresponds to the number of elements in the sequence $X$ (its size). +* $\lvert X \rvert$ corresponds to the number of elements in the sequence $X$ (its size). * Given two sequences $X$ and $Y$, $X \lt Y$ means that every of $X$ compares less than every element in $Y$ (assume similar meaning for other ordering operators). * Given two sequences $X$ and $Y$, $XY$ corresponds to their concatenation. Similarly $\langle e \rangle X$ is the concatenation of the sequence made of the single element $e$ and of the sequence $X$. * The expression "subsequence of $X$" refers to a sequence obtained by removing any number of possibly non-adjacent elements from $X$, unless specified otherwise. @@ -19,7 +19,7 @@ In the rest of this document, we also use the following notation: > 2. If $X$ and $Y$ are order isomorphic, then $M(X) = M(Y)$ > 3. If $X$ is a subsequence of $Y$, then $M(X) ≤ M(Y)$ > 4. If $X \le Y$, then $M(XY) ≤ M(X) + M(Y)$ -> 5. $M(⟨e⟩X) ≤ |X| + M(X)$ for every element $e$ of the domain +> 5. $M(⟨e⟩X) ≤ \lvert X \rvert + M(X)$ for every element $e$ of the domain Mannila's goal was to define strong properties allowing to reason about the minimum amount of work required for an adaptive sorting algorithm to sort a sequence with little disorder. Namely: * Criterion 1 above tries to formally represent the intuitive notion that no work is needed to sort a sequence that is already sorted. @@ -34,7 +34,7 @@ Some authors found that definition to be overly strict for their application, an > 2. If $X$ and $Y$ are order isomorphic, then $M(X) = M(Y)$ > 3. If $X$ is a subsequence of $Y$, then $M(X) ≤ M(Y)$ > 4. If $X \le Y$, then $M(XY) ≤ M(X) + M(Y) + b$ -> 5. $M(⟨e⟩X) ≤ |X| + M(X) + c$ for every element $e$ of the domain +> 5. $M(⟨e⟩X) ≤ \lvert X \rvert + M(X) + c$ for every element $e$ of the domain That loosened definition however is arguably less suited to estimate the amount of work need to order a sequence. We include it here for the sake of exposition and to highlight that vocabulary in the domain has historically been debated, but in the rest of this document the expression *measure of presortedness* refers to any measure of disorder that satisfies Mannila's five criteria. @@ -50,13 +50,13 @@ The *monotonicity* property implies the *prefix monotonicity* one. A measure of Let $X$ be a sequence of elements, and let $S_X$ be set of all permutations of that sequence: -$$below_M(X) = \{ \pi | \pi \in S_X \text{ and } M(\pi) \le M(X) \}$$ +$$\mathit{below}_M(X) = \{ \pi \vert \pi \in S_X \text{ and } M(\pi) \le M(X) \}$$ Let $T_S(X)$ be the number of steps needed for an algorithm $S$ to sort $X$. A sorting algorithm is said to be $M$-optimal if and only if, for some constant $c$, we have for all $X$: -$$T_S(X) \le c \cdot max\{|X|, \log{} |below_M(X)|\}$$ +$$T_S(X) \le c \cdot max\{\lvert X \rvert, \log{} |\mathit{below}_M(X)|\}$$ -In other words, a sorting algorithm is considered $M$-optimal if it takes a number of steps that is within a constant factor of the lower bound of $M$ to sort a sequence. For example a $Rem$-optimal algorithm should be able to sort any sequence in $O(|X| \log{} Rem(X))$ steps. +In other words, a sorting algorithm is considered $M$-optimal if it takes a number of steps that is within a constant factor of the lower bound of $M$ to sort a sequence. For example a $\mathit{Rem}$-optimal algorithm should be able to sort any sequence in $O(\lvert X \rvert \log{} \mathit{Rem}(X))$ steps. ### Partial ordering of measures of disorder @@ -64,28 +64,28 @@ Early on, authors have been wanting to prove that some measures of disorder were > Let $M_1$ and $M_2$ be two measures of disorder: > -> * $M_1$ is algorithmically finer than $M_2$ (denoted $M_1 \le_{alg} M_2$) if and only if any $M_1$-optimal sorting algorithm is also $M_2$-optimal. -> * $M_1$ and $M_2$ are algorithmically equivalent (denoted $M_1 =_{alg} M_2$) if and only if $M_1 \le_{alg} M_2$ and $M_2 \le_{alg} M_1$. +> * $M_1$ is algorithmically finer than $M_2$ (denoted $M_1 \le_\mathit{alg} M_2$) if and only if any $M_1$-optimal sorting algorithm is also $M_2$-optimal. +> * $M_1$ and $M_2$ are algorithmically equivalent (denoted $M_1 =_\mathit{alg} M_2$) if and only if $M_1 \le_\mathit{alg} M_2$ and $M_2 \le_\mathit{alg} M_1$. While useful to understand what we want from a partial order on measures of disorder, the definition above does not help a lot when it comes to actually proving that a measure is algorithmically finer than another. To better compare two measures of disorder, Jingsen Chen introduces the following operator in *Computing and ranking measures of presortedness*: > Let $M_1$ and $M_2$ be two measures of disorder: > -> * $M_1$ is superior to $M_2$ (denoted $M_1 \preceq M_2$) if and only if there exists a constant $c$ such as $|below_{M_1}(X)| \le c \cdot |below_{M_2}(X)|$ for any sequence $X$. +> * $M_1$ is superior to $M_2$ (denoted $M_1 \preceq M_2$) if and only if there exists a constant $c$ such as $\lvert \mathit{below}_{M_1}(X) \rvert \le c \cdot \lvert \mathit{below}_{M_2}(X) \rvert$ for any sequence $X$. > * $M_1$ and $M_2$ are equivalent (denoted $M_1 \equiv M_2$) if and only if $M_1 \preceq M_2$ and $M_2 \preceq M_1$. That definition seems to match the one proposed much earlier by Alistair Moffat and Ola Petersson in *A Framework for Adaptive Sorting*, though the authors use the symbol $\supseteq$ instead of $\preceq$. -To prove that two measures of disorder were equivalent, authors have used the simpler method of showing that there exists some non-0 constants $c$ and $d$ such as $M_1(X) \le c \cdot M_2(X) \le d \cdot M_1(X)$. For example, the result $Max \equiv Dis$ below was originally obtained by proving that $Max(X) \le Dis(X) \le 2 Max(X)$ for any sequence $X$. +To prove that two measures of disorder were equivalent, authors have used the simpler method of showing that there exists some non-0 constants $c$ and $d$ such as $M_1(X) \le c \cdot M_2(X) \le d \cdot M_1(X)$. For example, the result $\mathit{Max} \equiv \mathit{Dis}$ below was originally obtained by proving that $\mathit{Max}(X) \le \mathit{Dis}(X) \le 2 \mathit{Max}(X)$ for any sequence $X$. The graph below shows the partial ordering of several measures of disorder: - *Reg* is a measure of presortedness superior to all other ones in the graph. - *m₀* is a measure of presortedness that always returns 0. - *m₀₁* is a measure of presortedness that returns 0 when $X$ is sorted and 1 otherwise. -![Partial ordering of measures of disorder](images/mops-partial-ordering.png) +![Partial ordering of measures of disorder](images/partial-ordering-measures-of-disorder.png) -This graph is a modified version of the one in *A framework for adaptive sorting*. The relations of *Mono* are empirically derived [original research][original-research] and incomplete (unknown relations with *Osc* and *Loc*). +This graph is a modified version of the one found in *A framework for adaptive sorting*. The relations of *Mono* and *Amp* with other measures of disorder are empirically derived [original research][original-research] and known to be incomplete (unknown relations with *Osc* and *Loc*). The measures of disorder in bold in the graph are available in **cpp-sort**, the others are not. @@ -133,7 +133,40 @@ It takes an integer `n` and returns the maximum value that the measure of disord ## Available measures of disorder -Measures of disorder are pretty formalized, so the names of the functions in the library are short and generally correspond to the ones used in the literature. +Measures of disorder are pretty formalized, so the names of the functions in the library are short and generally correspond to the ones used in the literature, with a few exceptions. A justification is given whenever a name does not exactly match the ones from the literature, or when the definition differs. + +### *Amp* + +```cpp +#include +``` + +Let's consider the following functions to compare two elements elements of a sequence: + +$$ +\mathit{comp}(x, y)= +\begin{cases} +1 & \text{ if } x \lt y\\ +-1 & \text{ if } x \gt y\\ +0 & \text{otherwise} +\end{cases} +$$ + +We define $\mathit{Amp}(X)$ as follows: + +$$\mathit{Amp}(X) = \lvert X \rvert - \mathit{PTP}(X) - N_{\mathit{eq}}(X) - 1$$ + +Where $N_{\mathit{eq}}(X)$ is the number of pairs of neighbors that compare equivalent in $X$, and $\mathit{PTP}(X)$ is the number of unique values in the prefix sum of the sequence obtained by applying $comp$ to every pair of adjacent elements in $X$. + +![Illustration of how comp is applied to pairs of neighbors up to the prefix sum](images/pairwise-order-shadow.png) + +| Complexity | Memory | Iterators | Monotonic | +| ----------- | ----------- | ------------- | --------- | +| n | 1 | Forward | No | + +`max_for_size`: $\lvert X \rvert - 2$ when the sign of $comp$ changes for every pair of neighbors. + +**Note:** *Amp* does not respect Mannila's criterion 4: $\mathit{Amp}(\langle 1, 2, 3 \rangle) = 0$ and $\mathit{Amp}(\langle 6, 5, 4 \rangle) = 0$, but $\mathit{Amp}(\langle 1, 2, 3, 6, 5, 4 \rangle) = 4$. ### *Block* @@ -141,7 +174,7 @@ Measures of disorder are pretty formalized, so the names of the functions in the #include ``` -Computes the number of elements in a sequence that aren't followed by the same element in the sorted sequence. +Computes the number of elements in $X$ that aren't followed by the same element in the sorted permutation. Our implementation is slightly different from the original description in *Sublinear merging and natural mergesort* by S. Carlsson, C. Levcopoulos and O. Petersson: * It doesn't add 1 to the general result, thus returning 0 when $X$ is sorted and respecting Mannila's first criterion for what makes a measure of presortedness (though this change might be responsible for the breakage of criterion 4). @@ -151,11 +184,11 @@ Our implementation is slightly different from the original description in *Subli | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | No | -`max_for_size`: $|X| - 1$ when $X$ is sorted in reverse order. +`max_for_size`: $\lvert X \rvert - 1$ when $X$ is sorted in reverse order. **Note:** *Block* does not seem to respect Mannila's criterion 3 in the presence of *equivalent elements*. -**Note²:** `probe::block` does not respect Mannila's criterion 4: $Block(\langle 1, 0 \rangle) = 1$ and $Block(\langle 2, 3 \rangle) = 0$, but $Block(\langle 1, 0, 2, 3 \rangle) = 2$. +**Note²:** `probe::block` does not respect Mannila's criterion 4: $\mathit{Block}(\langle 1, 0 \rangle) = 1$ and $\mathit{Block}(\langle 2, 3 \rangle) = 0$, but $\mathit{Block}(\langle 1, 0, 2, 3 \rangle) = 2$. ### *Dis* @@ -172,7 +205,7 @@ Computes the maximum distance determined by an inversion. When enough memory is available `probe::dis` runs in O(n) using an algorithm described by T. Altman and Y. Igarashi in *Roughly Sorting: Sequential and Parallel Approach*, otherwise it falls back to an O(n log n) algorithm that does not require extra memory. If forward iterators are passed, the O(n log n) algorithm is always used. -`max_for_size`: $|X| - 1$ when the last element of $X$ is smaller than the first one. +`max_for_size`: $\lvert X \rvert - 1$ when the last element of $X$ is smaller than the first one. ### *Enc* @@ -189,13 +222,13 @@ Computes an approximation of the number of encroaching lists that can be extract Those lists are called encroaching because the bounds of a given list "encroach" those of all lists on its right. -The number of encroaching lists does not satisfy the formal definition of a measure of presortedness because it returns $1$ for non-empty sorted sequences instead of $0$, which does not respect first Mannila's criterion. Using $Enc(X) - 1$ does not work either because it does not respect Mannila's fourth criterion. To circumvent these issues, `probe::enc` implements an equivalent measure of disorder $M_{Enc}$ proposed by V. Estivill-Castro in *Sorting and Measures of Disorder*, which satisfies all of Mannila's criteria for what makes a measure of presortedness: +The number of encroaching lists does not satisfy the formal definition of a measure of presortedness because it returns $1$ for non-empty sorted sequences instead of $0$, which does not respect first Mannila's criterion. Using $\mathit{Enc}(X) - 1$ does not work either because it does not respect Mannila's fourth criterion. To circumvent these issues, `probe::enc` implements an equivalent measure of disorder $M_\mathit{Enc}$ proposed by V. Estivill-Castro in *Sorting and Measures of Disorder*, which satisfies all of Mannila's criteria for what makes a measure of presortedness: $$ -M_{Enc}(X)= +M_\mathit{Enc}(X)= \begin{cases} 0 & \text{if } X \text{ is sorted,}\\ -Enc(X_{tail}) & \text{otherwise, where } X_{tail} \text{ is } X \text{ without its leading ascending run.} +\mathit{Enc}(X_\mathit{tail}) & \text{otherwise, where } X_\mathit{tail} \text{ is } X \text{ without its leading ascending run.} \end{cases} $$ @@ -203,7 +236,7 @@ $$ | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | No | -`max_for_size`: $\frac{|X|}{2}$ when all values extracted from $X$ are within the bounds of already extracted encroaching lists (for example the sequence $\langle 10, 0, 9, 1, 8, 2, 7, 3, 6, 4, 5 \rangle$ triggers the worst case). +`max_for_size`: $\frac{\lvert X \rvert}{2}$ when all values extracted from $X$ are within the bounds of already extracted encroaching lists (for example the sequence $\langle 10, 0, 9, 1, 8, 2, 7, 3, 6, 4, 5 \rangle$ triggers the worst case). ### *Exc* @@ -211,17 +244,19 @@ $$ #include ``` -Computes the minimum number of exchanges required to sort $X$, which corresponds to $|X|$ minus the number of cycles in the sequence. A cycle corresponds to a number of elements in a sequence that need to be rotated to be in their sorted position; for example, let $\langle 2, 4, 0, 6, 3, 1, 5 \rangle$ be a sequence, the cycles are $\langle 0, 2 \rangle$ and $\langle 1, 3, 4, 5, 6 \rangle$ so $Exc(X) = |X| - 2 = 5$. +Computes the minimum number of exchanges required to sort $X$, which corresponds to $\lvert X \rvert$ minus the number of cycles in the sequence. A cycle corresponds to a number of elements in a sequence that need to be rotated to be in their sorted position; for example, let $\langle 2, 4, 0, 6, 3, 1, 5 \rangle$ be a sequence, the cycles are $\langle 0, 2 \rangle$ and $\langle 1, 3, 4, 5, 6 \rangle$ so $\mathit{Exc}(X) = \lvert X \rvert - 2 = 5$. -**Warning:** `probe::exc` generally returns a result higher than the minimum number of exchanges required to sort $X$ when it contains *equivalent elements*. This is because extending $Exc$ to *equivalent elements* is a NP-hard problem (see *On the Cost of Interchange Rearrangement in Strings* by Amir et al). The function does handle such elements in some simple cases, but not in the general case. +![Visual representation of the two cycles of exchanges required to sort the aforementioned sequence](images/sorting-exchange-cycles.png) + +**Warning:** `probe::exc` generally returns a result greater than the minimum number of exchanges required to sort $X$ when it contains *equivalent elements*. This is because extending $\mathit{Exc}$ to *equivalent elements* is a NP-hard problem (see *On the Cost of Interchange Rearrangement in Strings* by Amir et al). The function does handle such elements in some simple cases, but not in the general case. | Complexity | Memory | Iterators | Monotonic | | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | Yes | -`max_for_size`: $|X| - 1$ when every element in $X$ is one element away from its sorted position. +`max_for_size`: $\lvert X \rvert - 1$ when every element in $X$ is one element away from its sorted position. -**Note:** *Exc* does not respect Mannila's criterion 3 (a subsequence contains no more disorder than the whole sequence): $Exc(\langle 3, 1, 2, 0 \rangle) = 1$, but $Exc(\langle 3, 1, 2 \rangle) = 2$. +**Note:** *Exc* does not respect Mannila's criterion 3 (a subsequence contains no more disorder than the whole sequence): $\mathit{Exc}(\langle 3, 1, 2, 0 \rangle) = 1$, but $\mathit{Exc}(\langle 3, 1, 2 \rangle) = 2$. *Warning: this algorithm might be noticeably slower when the passed range is not random-access.* @@ -237,11 +272,11 @@ Computes the number of elements in $X$ that are not in their sorted position, wh | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | Yes | -`max_for_size`: $|X|$ when every element in $X$ is one element away from its sorted position. +`max_for_size`: $\lvert X \rvert$ when every element in $X$ is one element away from its sorted position. -**Note:** *Ham* does not respect Mannila's criterion 3 (a subsequence contains no more disorder than the whole sequence): $Ham(\langle 3, 1, 2, 0 \rangle) = 2$, but $Ham(\langle 3, 1, 2 \rangle) = 3$. +**Note:** *Ham* does not respect Mannila's criterion 3 (a subsequence contains no more disorder than the whole sequence): $\mathit{Ham}(\langle 3, 1, 2, 0 \rangle) = 2$, but $\mathit{Ham}(\langle 3, 1, 2 \rangle) = 3$. -**Note²:** *Ham* does not respect Mannila's criterion 5: $Ham(\langle 4, 1, 2, 3 \rangle) \not \le |\langle 1, 2, 3 \rangle| + Ham(\langle 1, 2, 3 \rangle)$. +**Note²:** *Ham* does not respect Mannila's criterion 5: $\mathit{Ham}(\langle 4, 1, 2, 3 \rangle) \not \le \lvert \langle 1, 2, 3 \rangle \rvert + \mathit{Ham}(\langle 1, 2, 3 \rangle)$. ### *Inv* @@ -255,7 +290,7 @@ Computes the number of inversions in $X$, where an inversion corresponds to a pa | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | Yes | -`max_for_size`: $\frac{|X|(|X| - 1)}{2}$ when $X$ is sorted in reverse order. +`max_for_size`: $\frac{\lvert X \rvert(\lvert X \rvert - 1)}{2}$ when $X$ is sorted in reverse order. ### *Max* @@ -269,7 +304,7 @@ Computes the maximum distance an element in $X$ must travel to find its sorted p | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | Yes | -`max_for_size`: $|X| - 1$ when $X$ is sorted in reverse order. +`max_for_size`: $\lvert X \rvert - 1$ when $X$ is sorted in reverse order. ### *Mono* @@ -277,7 +312,7 @@ Computes the maximum distance an element in $X$ must travel to find its sorted p #include ``` -Computes the number of non-increasing and non-decreasing consecutive runs of adjacent elements that need to be removed from $X$ to make it sorted +Computes the number of non-increasing and non-decreasing consecutive runs of adjacent elements that need to be removed from $X$ to make it sorted. The measure of disorder is slightly different from its original description in [*Sort Race*][sort-race] by H. Zhang, B. Meng and Y. Liang: * It subtracts 1 from the number of runs, thus returning 0 when $X$ is sorted. @@ -287,9 +322,9 @@ The measure of disorder is slightly different from its original description in [ | ----------- | ----------- | ------------- | --------- | | n | 1 | Forward | No | -`max_for_size`: $\frac{|X| + 1}{2} - 1$ when $X$ is a sequence of elements that are alternatively greater then lesser than their previous neighbour. +`max_for_size`: $\frac{\lvert X \rvert + 1}{2} - 1$ when $X$ is a sequence of elements that are alternatively greater then lesser than their previous neighbour. -**Note:** `probe::mono` does not respect Mannila's criterion 4: $Mono(\langle 1, 2, 3, 4, 5 \rangle) = 0$ and $Mono(\langle 10, 9, 8, 7, 6 \rangle) = 0$, but $Mono(\langle 1, 2, 3, 4, 5, 10, 9, 8, 7, 6 \rangle) = 1$. +**Note:** `probe::mono` does not respect Mannila's criterion 4: $\mathit{Mono}(\langle 1, 2, 3, 4, 5 \rangle) = 0$ and $\mathit{Mono}(\langle 10, 9, 8, 7, 6 \rangle) = 0$, but $\mathit{Mono}(\langle 1, 2, 3, 4, 5, 10, 9, 8, 7, 6 \rangle) = 1$. ### *Osc* @@ -299,17 +334,23 @@ The measure of disorder is slightly different from its original description in [ Computes the *Oscillation* measure described by C. Levcopoulos and O. Petersson in *Adaptive Heapsort*, using an algorithm devised by J. Nehring. +Let $\lvert \lvert \mathit{Cross}(x_i) \rvert \rvert$ be the number of links between adjacent pairs that "cross" the value $x_i$. We define the oscillation measure as $\mathit{Osc}(X) = \sum_{i=0}^{\lvert X \rvert - 1} \lvert \lvert \mathit{Cross}(x_i) \rvert \rvert$. + +![Plot of Cross(x_5) over the sequence 6, 3, 9, 8, 4, 7, 1, 10](images/measures-of-disorder-osc-cross.png) + +In the illustration above, we can see that a horizontal line drawn from $x_5$ crosses three pairs of adjacent elements in the geometrical representation of the sequence $X = \langle 6, 3, 9, 8, 4, 7, 1, 10 \rangle$. Thus $\lvert \lvert \mathit{Cross}(x_5) \rvert \rvert = 3$. In this example, we have $\mathit{Osc(X) = 17}$. + | Complexity | Memory | Iterators | Monotonic | | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | No | -`max_for_size`: it is reached when the values in $X$ are strongly oscillating, and equals $\frac{|X|(|X| - 2)}{2}$ when $|X|$ is even, and $\frac{|X|(|X| - 2) - 1}{2}$ when $|X|$ is odd. +`max_for_size`: it is reached when the values in $X$ are strongly oscillating, and equals $\frac{\lvert X \rvert(\lvert X \rvert - 2)}{2}$ when $\lvert X \rvert$ is even, and $\frac{\lvert X \rvert(\lvert X \rvert - 2) - 1}{2}$ when $\lvert X \rvert$ is odd. **Note:** *Osc* does not seem to respect Mannila's criterion 3 in the presence of *equivalent elements*. -**Note²:** *Osc* does not respect Mannila's criterion 4: $Osc(\langle 0 \rangle) = 0$ and $Osc(\langle 3, 2, 1 \rangle) = 0$, but $Osc(\langle 0, 3, 2, 1 \rangle) = 2$. +**Note²:** *Osc* does not respect Mannila's criterion 4: $\mathit{Osc}(\langle 0 \rangle) = 0$ and $\mathit{Osc}(\langle 3, 2, 1 \rangle) = 0$, but $\mathit{Osc}(\langle 0, 3, 2, 1 \rangle) = 2$. -**Note³:** *Osc* does not respect Mannila's criterion 5: $Osc(\langle 3, 0, 4, 2, 5, 1 \rangle) \not \le |\langle 0, 4, 2, 5, 1 \rangle| + Osc(\langle 0, 4, 2, 5, 1 \rangle)$, simplified: $11 \not \le 5 + 5$. +**Note³:** *Osc* does not respect Mannila's criterion 5: $\mathit{Osc}(\langle 3, 0, 4, 2, 5, 1 \rangle) \not \le \lvert \langle 0, 4, 2, 5, 1 \rangle \rvert + \mathit{Osc}(\langle 0, 4, 2, 5, 1 \rangle)$, simplified: $11 \not \le 5 + 5$. ### *Rem* @@ -317,13 +358,13 @@ Computes the *Oscillation* measure described by C. Levcopoulos and O. Petersson #include ``` -Computes the minimum number of elements that must be removed from $X$ to obtain a sorted subsequence, which corresponds to $|X|$ minus the size of the [longest non-decreasing subsequence][longest-increasing-subsequence] of $X$. +Computes the minimum number of elements that must be removed from $X$ to obtain a sorted subsequence, which corresponds to $\lvert X \rvert$ minus the size of the [longest non-decreasing subsequence][longest-increasing-subsequence] of $X$. | Complexity | Memory | Iterators | Monotonic | | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | Yes | -`max_for_size`: $|X| - 1$ when $X$ is sorted in reverse order. +`max_for_size`: $\lvert X \rvert - 1$ when $X$ is sorted in reverse order. ### *Runs* @@ -337,7 +378,7 @@ Computes the number of non-decreasing runs in $X$ minus one. | ----------- | ----------- | ------------- | --------- | | n | 1 | Forward | Yes | -`max_for_size`: $|X| - 1$ when $X$ is sorted in reverse order. +`max_for_size`: $\lvert X \rvert - 1$ when $X$ is sorted in reverse order. ### *Spear* @@ -351,9 +392,9 @@ Spearman's footrule distance: sum of distances between the position of individua | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | Yes | -`max_for_size`: $\frac{|X|²}{2}$ when $X$ is sorted in reverse order. +`max_for_size`: $\frac{\lvert X \rvert²}{2}$ when $X$ is sorted in reverse order. -**Note:** *Spear* does not respect Mannila's criterion 5: $Spear(\langle 4, 1, 2, 3 \rangle) \not \le |\langle 1, 2, 3 \rangle| + Spear(\langle 1, 2, 3 \rangle)$. +**Note:** *Spear* does not respect Mannila's criterion 5: $\mathit{Spear}(\langle 4, 1, 2, 3 \rangle) \not \le \lvert \langle 1, 2, 3 \rangle \rvert + \mathit{Spear}(\langle 1, 2, 3 \rangle)$. ### *SUS* @@ -361,19 +402,19 @@ Spearman's footrule distance: sum of distances between the position of individua #include ``` -Computes the minimum number of non-decreasing subsequences (of possibly not adjacent elements) into which $X$ can be partitioned, minus 1. It happens to correspond to the size of the [longest decreasing subsequence][longest-increasing-subsequence] of $X$ minus 1. +Computes the minimum number of non-decreasing subsequences (of possibly non-adjacent elements) into which $X$ can be partitioned, minus 1. It happens to correspond to the size of the [longest decreasing subsequence][longest-increasing-subsequence] of $X$ minus 1. -*SUS* stands for *Shuffled Up-Sequences* and was introduced in *Sorting Shuffled Monotone Sequences* by C. Levcopoulos and O. Petersson. +*SUS* stands for *Shuffled UpSequences* and was introduced in *Sorting Shuffled Monotone Sequences* by C. Levcopoulos and O. Petersson. | Complexity | Memory | Iterators | Monotonic | | ----------- | ----------- | ------------- | --------- | | n log n | n | Forward | Yes | -`max_for_size`: $|X| - 1$ when $X$ is sorted in reverse order. +`max_for_size`: $\lvert X \rvert - 1$ when $X$ is sorted in reverse order. ## Other measures of disorder -Some additional measures of disorder have been described in the literature but do not appear in the partial ordering graph. This section describes some of them but is not an exhaustive list. +Some additional measures of disorder have been described in the literature but do not appear in the partial ordering graph, or are not provided in the library. This section describes some of them but is not an exhaustive list. ### *DS* @@ -383,19 +424,26 @@ A measure called *DS* appears in *Computing and ranking measures of presortednes In other domains, that value is called *F* (for *Footrule*). It is no more helpful a name than *D* or *DS*, so I decided to use *Spear* for this library's name (for *Spearman*) - following the same naming pattern that led to *Ham* -, despite there being no precedent in the literature. +### *Las*, *Lds* and *Lads* + +Those names tend to appear in papers authored by Jinseng Chen, such as *On Partitions and Presortedness of Sequences* and *Computing and Ranking Measures of Presortedness*: +* *Las(X)* is the length of the *longest ascending subsequence* of *X*. We do not provide it because it grows with _order_ in the sequence intead of growing with _disorder_. $\mathit{Rem}(X) = \lvert X \rvert - \mathit{Las}(X)$ is the closest measure provided by the library. +* *Lds(X)* is the length of the *longest descending subsequence* of *X*. It corresponds to to [$\mathit{SUS}$][probe-sus] in the library, the minimum number of increasing subsequences into which *X* can be decomposed. +* *Lads(X)* is an extension of both of the measures above, computing the minimum number of monotonic subsequences (ascending or descending) into which *X* can be decomposed. It is a different name for [*SMS*][probe-sms]. + ### *Par* *Par* is described by V. Estivill-Castro and D. Wood in *A New Measure of Presortedness* as follows: -> *Par(X)* = min { *p* | $X$ is *p*-sorted } +> *Par(X)* = min { *p* \vert $X$ is *p*-sorted } The following definition is also given to determine whether a sequence is *p*-sorted: -> $X$ is *p*-sorted iff for all *i*, *j* ∈ {1, 2, ..., $|X|$}, *i* - *j* > *p* implies *Xj* ≤ *Xi*. +> $X$ is *p*-sorted iff for all *i*, *j* ∈ {1, 2, ..., $\lvert X \rvert$}, *i* - *j* > *p* implies *Xj* ≤ *Xi*. *Right invariant metrics and measures of presortedness* by V. Estivill-Castro, H. Mannila and D. Wood mentions that: -> In fact, *Par*($X$) = *Dis*($X$), for all $X$. +> In fact, $\mathit{Par}(X) = \mathit{Dis}(X)$, for all $X$. In their subsequent papers, those authors consistently use *Dis* instead of *Par*, often accompanied by a link to *A New Measure of Presortedness*. @@ -407,8 +455,21 @@ In their subsequent papers, those authors consistently use *Dis* instead of *Par T. Altman and Y. Igarashi mention the concept of *k*-sortedness and the measure *Radius*($X$) in *Roughly Sorting: Sequential and Parallel Approach*. However *k*-sortedness is the same as *p*-sortedness, and *Radius* is just another name for *Par* (and thus for *Dis*). +### *SMS* + +*SMS* stands for *Shuffled Monotone Sequences* and was introduced in *Sorting Shuffled Monotone Sequences* by C. Levcopoulos and O. Petersson. It computes the minimum number of increasing or decreasing subsequences (of possibly non-adjacent elements) into which a sequence $X$ can be partitioned, minus 1. + +The concept in itself is fiarly straightforward, yet the problem of computing $\mathit{SMS}(X) \le k$ in NP-complete, as shown by K. Wagner in *Monotonic coverings of finite sets*. For this reason, the library does not provide an implementation. It is technically possible to compute an approximation of $\mathit{SMS}(X)$ by repeatedly removing the longest monotonic subsequence from $X$. + +Nevertheless we do know a few of the measure's properties: +* $1 \le \mathit{SMS}(X) \le \min {\mathit{SUS}(X), \mathit{SDS}(X)}$, where $\mathit{SUS}$ and $\mathit{SDS}$ respectivey standard for [*Shuffled UpSequences*][probe-sus] and *Shuffled DownSequences*, the first being a measure of presortedness that we provide. +* $\mathit{SMS}(X) \le \lfloor \sqrt{2n + \frac{1}{4}} - \frac{1}{2} \rfloor$, as proven by A. Brandstädt and D. Kratsch in *On partitions of permutations into increasing and decreasing subsequences*. + + [hamming-distance]: https://en.wikipedia.org/wiki/Hamming_distance [longest-increasing-subsequence]: https://en.wikipedia.org/wiki/Longest_increasing_subsequence [original-research]: Original-research.md#partial-ordering-of-mono + [probe-sms]: Measures-of-disorder.md#sms + [probe-sus]: Measures-of-disorder.md#sus [sort-race]: https://arxiv.org/ftp/arxiv/papers/1609/1609.04471.pdf diff --git a/docs/Miscellaneous-utilities.md b/docs/Miscellaneous-utilities.md index c59ea876..0b82ba7a 100644 --- a/docs/Miscellaneous-utilities.md +++ b/docs/Miscellaneous-utilities.md @@ -137,6 +137,17 @@ struct dynamic_buffer; This buffer provider allocates on the heap a number of elements depending on a given *size policy* (a class whose `operator()` takes the size of the collection and returns another size). You can use the function objects from `utility/functional.h` as basic size policies. The buffer construction may throw an instance of [`std::bad_alloc`][std-bad-alloc] if it fails to allocate the required memory. +### `is_sorted` and `is_sorted_until` + +```cpp +#include +#include +``` + +Simple reimplementations of the standard library algorithms [`std::is_sorted`][std-is-sorted] and [`std::is_sorted_until`][std-is-sorted-until], reimplemented as function objects that follow the library's *unified sorting interface*. + +*New in version 2.1.0* + ### Miscellaneous function objects ```cpp @@ -367,6 +378,31 @@ auto m = get(mm); `utility::metrics` is still mostly experimental and unused in the rest of the library. As such this documentation is voluntarily thin. +### `quicksort_adversary` + +```cpp +#include +``` + +`utility::quicksort_adversary` is a function template that implements an algorithm described by M. D. McIlroy in [*A Killer Adversary for Quicksort*][quicksort-adversary], which attempts to trigger the quadratic case of many quicksort implementations by trying to guess the pivot and forcing the tested algorithm to perform a certain set of comparisons. + +```cpp +template +auto quicksort_adversary(Sorter&& sorter, Integer size); +``` + +The function accepts a sorter to test, and a parameter corresponding to the size of the input for which we wish to test the sorter. It then instantiates a collection of `size` elements of `Integer` type that it passes to `sorter`, and returns the result of the operation. It additionally passes a custom comparison function to `sorter`, which means that it only works with *comparison sorters*. + +It can be used together with [`metrics::comparisons`][metrics-comparisons] or some other metrics to analyze the number of operations performed, and attempt to detect quadratic behavior in quicksort-like sorters: + +```cpp +auto sorter = cppsort::metrics::comparisons(cppsort::quick_sort); +auto comps = cppsort::utility::quicksort_adversary(sorter, 1000); +std::print("Comparisons: {}", comps.value()); +``` + +*New in version 2.1.0* + ### `size` ```cpp @@ -464,7 +500,32 @@ auto swap_index_pairs_force_unroll(RandomAccessIterator first, -> void; ``` -`swap_index_pairs` loops over the index pairs in the simplest fashion and calls the compare-exchange operations in the simplest possible way. `swap_index_pairs_force_unroll` is a best effort function trying to achieve the same job by unrolling the loop over indices the best it can - a perfect unrolling is thus attempted, but never guaranteed, which might or might result in faster runtime and/or increased binary size. +`swap_index_pairs` loops over the index pairs in the simplest fashion and calls the compare-exchange operations in the simplest possible way. `swap_index_pairs_force_unroll` is a best effort function trying to achieve the same job by unrolling the loop over indices the best it can - a perfect unrolling is thus attempted, but never guaranteed, which might or might not result in faster runtime and/or increased binary size. + +## Strict weak ordering checker + +```cpp +#include +``` + +Comparison sorting requires the comparison function to model a [strict weak ordering][strict-weak-ordering] over the values of the range to sort. Otherwise, the sorting algorithm might fail to sort the collection, or encounter even fail in hard-to-predict ways, potentially invoking undefined behavior. + +Checking whether a comparison function models such an ordering for a given is an expensive task, which means that it is remains an unchecked precondition of algorithms in the library. If you suspect that a bug with a comparison sort might be linked to a violation of the strict weak ordering by the comparison function, you can use `utility::check_strict_weak_ordering` to analyze it over a given range: + +```cpp +std::vector vec = { 1.0, 9.0, std::nan("1"), 11.5, 56.3, 2.8 }; +assert(not cppsort::utility::check_strict_weak_ordering(vec, std::less{});) +``` + +`check_strict_weak_ordering` is a function object that follows the *unified sorting interface*: it takes a range of elements and a comparison function (and optionally a projection function). When called, it returns `true` if the passed comparison function models a strict weak ordering over the values of the input range, and `false` otherwise. + +**WARNING: `check_strict_weak_ordering` alters the input range.** + +| Time | Memory | Iterators | +| ---- | ------ | ------------- | +| n² | 1 | Random-access | + +*New in version 2.1.0* [apply-permutation]: Miscellaneous-utilities.md#apply_permutation @@ -474,9 +535,11 @@ auto swap_index_pairs_force_unroll(RandomAccessIterator first, [fixed-size-sorters]: Fixed-size-sorters.md [is-stable]: Sorter-traits.md#is_stable [metrics]: Metrics.md + [metrics-comparisons]: Metrics.md#comparisons [numpy-argsort]: https://numpy.org/doc/stable/reference/generated/numpy.argsort.html [p0022]: https://wg21.link/P0022 [pdq-sorter]: Sorters.md#pdq_sorter + [quicksort-adversary]: https://www.cs.dartmouth.edu/~doug/mdmspe.pdf [range-v3]: https://github.com/ericniebler/range-v3 [sorter-adapters]: Sorter-adapters.md [sorters]: Sorters.md @@ -491,10 +554,13 @@ auto swap_index_pairs_force_unroll(RandomAccessIterator first, [std-invoke]: https://en.cppreference.com/w/cpp/utility/functional/invoke [std-is-arithmetic]: https://en.cppreference.com/w/cpp/types/is_arithmetic [std-is-member-function-pointer]: https://en.cppreference.com/w/cpp/types/is_member_function_pointer + [std-is-sorted]: https://en.cppreference.com/w/cpp/algorithm/is_sorted.html + [std-is-sorted-until]: https://en.cppreference.com/w/cpp/algorithm/is_sorted_until.html [std-less]: https://en.cppreference.com/w/cpp/utility/functional/less [std-less-void]: https://en.cppreference.com/w/cpp/utility/functional/less_void [std-mem-fn]: https://en.cppreference.com/w/cpp/utility/functional/mem_fn [std-ranges-greater]: https://en.cppreference.com/w/cpp/utility/functional/ranges/greater [std-ranges-less]: https://en.cppreference.com/w/cpp/utility/functional/ranges/less [std-size]: https://en.cppreference.com/w/cpp/iterator/size + [strict-weak-ordering]: https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings [transparent-func]: Comparators-and-projections.md#Transparent-function-objects diff --git a/docs/Original-research.md b/docs/Original-research.md index bc1d1173..8416583d 100644 --- a/docs/Original-research.md +++ b/docs/Original-research.md @@ -109,7 +109,7 @@ I tried to apply the same technique to create a 40-sorter, but the resulting 20- ### Sorting network for 29 inputs -_Note: the following has since been improved upon: [SorterHunter][sorter-hunter] found a network that sorts 29 inputs with 164 *compare-exchange* operations._ +_Note: the following has since been improved upon: [SorterHunter][sorter-hunter] found a network that sorts 29 inputs with 164 compare-exchange operations._ The following sorting network for 29 inputs has 165 *compare-exchange* operations (CEs), which is one less that the most size-optimal 29-input sorting networks that I could find in the literature. Here is how I generated it: first it sorts the first 16 inputs and the last 13 inputs independently. Then it merges the two sorted subarrays using a size 32 Batcher odd-even merge network (the version that does not need the inputs to be interleaved), where all compare-exchange operations working on indexes greater than 28 have been dropped. Dropping comparators in such a way is ok: consider that the values at the indexes [29, 32) are greater than every other value in the array to sort, and it will become intuitive that dropping them generates a correct merging network of a smaller size. @@ -204,44 +204,44 @@ Somehow Edelkamp and Weiß eventually [published a paper][quick-merge-sort-arxiv The measure of disorder *Mono* is described in [*Sort Race*][sort-race] by H. Zhang, B. Meng and Y. Liang. They describe it as follows: -> Intuitively, if $Mono(X) = k$, then $X$ is the concatenation of $k$ monotonic lists (either sorted or reversely sorted). +> Intuitively, if $\mathit{Mono}(X) = k$, then $X$ is the concatenation of $k$ monotonic lists (either sorted or reversely sorted). It computes the number of ascending or descending runs in $X$. Technically the definition in the paper makes it return 1 when the $X$ is sorted, which goes against Mannila's original definition of a measure of presortedness, which starts with the following criterion: > If $X$ is sorted, then $M(X) = 0$ -As a result the library's [`probe::mono`][probe-mono] uses $Mono(X) - 1$ instead, which does satisfy this first criterion, albeit not the fourth one: +As a result the library's [`probe::mono`][probe-mono] uses $\mathit{Mono}(X) - 1$ instead, which does satisfy this first criterion, albeit not the fourth one: -> If $X \le Y$, then $M(XY) ≤ M(X) + M(Y)$ +> If $X \le Y$, then $M(XY) \le M(X) + M(Y)$ -Counterexample: $Mono(\langle 1, 2, 3, 4, 5 \rangle) = 0$ and $Mono(\langle 10, 9, 8, 7, 6 \rangle) = 0$, but $Mono(\langle 1, 2, 3, 4, 5, 10, 9, 8, 7, 6 \rangle) = 1$. As such, we still don't have a definition of $Mono$ that satisfies all the criterion for a measure of presortedness. +Counterexample: $\mathit{Mono}(\langle 1, 2, 3, 4, 5 \rangle) = 0$ and $\mathit{Mono}(\langle 10, 9, 8, 7, 6 \rangle) = 0$, but $\mathit{Mono}(\langle 1, 2, 3, 4, 5, 10, 9, 8, 7, 6 \rangle) = 1$. As such, we still don't have a definition of $\mathit{Mono}$ that satisfies all the criteria for a measure of presortedness. Regardless, it is interesting to find how it fits in the existing partial ordering of measures of disorder.: -- $Mono \preceq Runs$: this relation is already mentioned in *Sort Race* and rather intuitive: since $Mono$ detects both non-increasing and non-decreasing runs, it is as least as good as $Runs$ that only detects non-decreasing runs. -- $SMS \preceq Mono$: this one seems intuitive too: $SMS$ which detects the minimum number of subsequences of non-adjacent elements should be at least as good as $Mono$ which only detects subsequences of adjacent elements. -- $Enc \preceq Mono$: when making encroaching lists, $Enc$ is guaranteed to create no more than one such new list per non-increasing or non-decreasing run found in $X$, so the result will be at most as big as that of $Mono$. However $Enc$ can also find presortedness in patterns such as $\langle 5, 6, 4, 7, 3, 8, 2, 9, 1, 10 \rangle$ where $Mono$ finds maximum disorder. Therefore $Enc(X)$ should always be at most as big as $Mono(X)$. -- $Mono \not \equiv SUS$: - - There is no constant $c$ such as $c \cdot SUS(X) \le Mono(X)$: a sequence $X$ like $\langle n - 1, ..., 2, 1, 0 \rangle$ always has $Mono(X) = 1$ (a single decreasing run), but $SUS(X) = |X|$ (each element is decreasing, and as such constitues a new single-element ascending subsequence). - - There is no constant $c$ such as $c \cdot Mono(X) \le SUS(X)$: a sequence $X$ like $\langle 0, \frac{n}{2}, 1, \frac{n}{2} + 1, 2, \frac{n}{2} + 2, ..., \frac{n}{2} - 2, n - 1, \frac{n}{2} - 1, n \rangle$ always has $SUS(X) = 2$ (an ascending subsequence of even indices, another one of odd indices), but $Mono(X) = \frac{|X|}{2}$ (every pair of elements is a new descending run). -- $Mono \not \equiv Max$: - - There is no constant $c$ such as $c \cdot Max(X) \le Mono(X)$: a sequence $X$ like $\langle n - 1, ..., 2, 1, 0 \rangle$ always has $Mono(X) = 1$ (a single decreasing run), but $Max(X) = |X| - 1$ (the distance between the smallest and greatest elements is maximal). - - There is no constant $c$ such as $c \cdot Mono(X) \le Max(X)$: a sequence $X$ like $\langle 1, 0, 3, 2, ..., n , n - 1 \rangle$ always has $Max(X) = 1$ (all inversions are with a neighbour, hence they all equal $1$), but $Mono(X) = \frac{|X|}{2}$ (every pair of elements is a new descending run). +- $\mathit{Mono} \preceq \mathit{Runs}$: this relation is already mentioned in *Sort Race* and rather intuitive: since $\mathit{Mono}$ detects both non-increasing and non-decreasing runs, it is as least as good as $\mathit{Runs}$ that only detects non-decreasing runs. +- $\mathit{SMS} \preceq \mathit{Mono}$: this one seems intuitive too: $\mathit{SMS}$ which detects the minimum number of subsequences of non-adjacent elements should be at least as good as $\mathit{Mono}$ which only detects subsequences of adjacent elements. +- $\mathit{Enc} \preceq \mathit{Mono}$: when making encroaching lists, $\mathit{Enc}$ is guaranteed to create no more than one such new list per non-increasing or non-decreasing run found in $X$, so the result will be at most as big as that of $\mathit{Mono}$. However $\mathit{Enc}$ can also find presortedness in patterns such as $\langle 5, 6, 4, 7, 3, 8, 2, 9, 1, 10 \rangle$ where $\mathit{Mono}$ finds maximum disorder. Therefore $\mathit{Enc}(X)$ should always be at most as big as $\mathit{Mono}(X)$. +- $\mathit{Mono} \not \equiv \mathit{SUS}$: + - There is no constant $c$ such as $c \cdot \mathit{SUS}(X) \le \mathit{Mono}(X)$: a sequence $X$ like $\langle n - 1, ..., 2, 1, 0 \rangle$ always has $\mathit{Mono}(X) = 1$ (a single decreasing run), but $\mathit{SUS}(X) = \lvert X \rvert$ (each element is decreasing, and as such constitues a new single-element ascending subsequence). + - There is no constant $c$ such as $c \cdot \mathit{Mono}(X) \le \mathit{SUS}(X)$: a sequence $X$ like $\langle 0, \frac{n}{2}, 1, \frac{n}{2} + 1, 2, \frac{n}{2} + 2, ..., \frac{n}{2} - 2, n - 1, \frac{n}{2} - 1, n \rangle$ always has $\mathit{SUS}(X) = 2$ (an ascending subsequence of even indices, another one of odd indices), but $\mathit{Mono}(X) = \frac{\lvert X \rvert}{2}$ (every pair of elements is a new descending run). +- $\mathit{Mono} \not \equiv \mathit{Max}$: + - There is no constant $c$ such as $c \cdot \mathit{Max}(X) \le \mathit{Mono}(X)$: a sequence $X$ like $\langle n - 1, ..., 2, 1, 0 \rangle$ always has $\mathit{Mono}(X) = 1$ (a single decreasing run), but $\mathit{Max}(X) = \lvert X \rvert - 1$ (the distance between the smallest and greatest elements is maximal). + - There is no constant $c$ such as $c \cdot \mathit{Mono}(X) \le \mathit{Max}(X)$: a sequence $X$ like $\langle 1, 0, 3, 2, ..., n , n - 1 \rangle$ always has $\mathit{Max}(X) = 1$ (all inversions are with a neighbour, hence they all equal $1$), but $\mathit{Mono}(X) = \frac{\lvert X \rvert}{2}$ (every pair of elements is a new descending run). The following relations can be transitively deduced from the results presented in *A framework for adaptive sorting*: -- $Mono \not \preceq Exc$: we know that $SMS \preceq Mono$ and $SMS \not \preceq Exc$. -- $Mono \not \preceq Inv$: we know that $SMS \preceq Mono$ and $SMS \not \preceq Inv$. -- $Hist \not \preceq Mono$: we know that $Mono \preceq Runs$ and $Hist \not \preceq Runs$. +- $\mathit{Mono} \not \preceq \mathit{Exc}$: we know that $\mathit{SMS} \preceq \mathit{Mono}$ and $\mathit{SMS} \not \preceq \mathit{Exc}$. +- $\mathit{Mono} \not \preceq \mathit{Inv}$: we know that $\mathit{SMS} \preceq \mathit{Mono}$ and $\mathit{SMS} \not \preceq \mathit{Inv}$. +- $\mathit{Hist} \not \preceq \mathit{Mono}$: we know that $\mathit{Mono} \preceq \mathit{Runs}$ and $\mathit{Hist} \not \preceq \mathit{Runs}$. The following relations have yet to be analyzed: -- $Osc \preceq Mono$ -- $Loc \preceq Mono$ +- $\mathit{Osc} \preceq \mathit{Mono}$ +- $\mathit{Loc} \preceq \mathit{Mono}$ -Another interesting property of $Mono$ is that it returns the same amount of disorder for a sequence $X$ and for a sequence corresponding to $X$ with the order of all elements reversed, a property that is only shared with $Osc$ in the library. +Another interesting property of $\mathit{Mono}$ is that it returns the same amount of disorder for a sequence $X$ and for a sequence corresponding to $X$ with the order of all elements reversed, a property that is only shared with $\mathit{Osc}$ in the library. [better-sorting-networks]: https://etd.ohiolink.edu/!etd.send_file?accession=kent1239814529 [cycle-sort]: https://en.wikipedia.org/wiki/Cycle_sort - [divide-sort-merge-strategy]: http://www.dtic.mil/dtic/tr/fulltext/u2/737270.pdf + [divide-sort-merge-strategy]: https://apps.dtic.mil/sti/tr/pdf/AD0737270.pdf [exact-sort]: https://www.geocities.ws/p356spt/ [indirect-adapter]: Sorter-adapters.md#indirect_adapter [morwenn-gist]: https://gist.github.com/Morwenn diff --git a/docs/Quickstart.md b/docs/Quickstart.md index 6d332e3b..d4420377 100644 --- a/docs/Quickstart.md +++ b/docs/Quickstart.md @@ -164,6 +164,40 @@ Almost any sorter can be passed to any adapter, with a few exceptions: The specific restrictions are all documented in the adapters descriptions. +## Metrics + +[Metrics][metrics] are a special kind of sorter adapters that can be used to retrieve information about about, such as the number of comparisons performed by a sorter, or the time it took to sort a collection. They are used together with the [metrics tools][metrics-tools] from the library utilities. + +* Count the number of comparisons performed by a comparison sort: + ```cpp + #include + #include + #include + + int main() + { + auto sorter = cppsort::metrics::comparisons{}; + std::vector collection = { /* ... */ }; + auto comps = sorter(collection); + std::print("slabsort perform {} comparisons", comps.value()); + } + ``` + +* Compute the time it takes to sort a collection: + ```cpp + #include + #include + #include + + int main() + { + auto sorter = cppsort::metrics::running_time{}; + std::vector collection = { /* ... */ }; + auto comps = sorter(collection); + std::print("melsort took {}", comps.value()); + } + ``` + ## Two-step sorting Sometimes the information is not represented as simple collection of class instances, but as [parallel arrays][parallel-arrays] (also known as structure of arrays). To sort those, **cpp-sort** provides components for two-step sorting of random-access collections: @@ -212,8 +246,10 @@ The previous sections describe some of the main tools provided by **cpp-sort** b [cmake]: https://cmake.org/ [conan]: https://conan.io/ - [merge-sorter]: Sorters.md#merge_sorter [measures-of-disorder]: Measures-of-disorder.md + [merge-sorter]: Sorters.md#merge_sorter + [metrics]: Metrics.md + [metrics-tools]: Miscellaneous-utilities.md#metrics-tools [numpy-argsort]: https://numpy.org/doc/stable/reference/generated/numpy.argsort.html [parallel-arrays]: https://en.wikipedia.org/wiki/Parallel_array [pdq-sorter]: Sorters.md#pdq_sorter @@ -228,5 +264,5 @@ The previous sections describe some of the main tools provided by **cpp-sort** b [std-searchers]: https://en.cppreference.com/w/cpp/utility/functional#Searchers [std-sort]: https://en.cppreference.com/w/cpp/algorithm/sort [tooling]: Tooling.md - [utility-apply-permutation]: https://github.com/Morwenn/cpp-sort/wiki/Miscellaneous-utilities#apply_permutation - [utility-sorted-indices]: https://github.com/Morwenn/cpp-sort/wiki/Miscellaneous-utilities#sorted_indices + [utility-apply-permutation]: Miscellaneous-utilities.md#apply_permutation + [utility-sorted-indices]: Miscellaneous-utilities.md#sorted_indices diff --git a/docs/Sorter-facade.md b/docs/Sorter-facade.md index 1ad9310d..77b1e7f4 100644 --- a/docs/Sorter-facade.md +++ b/docs/Sorter-facade.md @@ -45,7 +45,7 @@ The return type `Ret` can either match that of the sorter, or be `void`, in whic Note that the function pointer conversion syntax above is made up, but it allows to clearly highlight what it does while hiding the `typedef`s needed for the syntax to be valid. In these signatures, `Ret` is the [`std::result_of_t`][std-result-of] of the sorter called with the parameters. The actual implementation is more verbose and redundant, but it allows to transform a sorter into a function pointer corresponding to any valid overload of `operator()`. -***WARNING:** conversion to function pointers does not work with MSVC ([issue #185][issue-185]).* +***WARNING:** conversion to function pointers does not work with MSVC ([issue github#185][issue-185]).* ## `operator()` diff --git a/docs/Sorters.md b/docs/Sorters.md index 820726d2..ca82711c 100644 --- a/docs/Sorters.md +++ b/docs/Sorters.md @@ -465,7 +465,6 @@ struct spread_sorter: [heap-sorter]: Sorters.md#heap_sorter [insertion-sort]: https://en.wikipedia.org/wiki/Insertion_sort [introselect]: https://en.wikipedia.org/wiki/Introselect - [issue-168]: https://github.com/Morwenn/cpp-sort/issues/168 [measures-of-disorder]: Measures-of-disorder.md [median-of-medians]: https://en.wikipedia.org/wiki/Median_of_medians [merge-sort]: https://en.wikipedia.org/wiki/Merge_sort diff --git a/docs/Tooling.md b/docs/Tooling.md index ede97c1d..a56e4ed6 100644 --- a/docs/Tooling.md +++ b/docs/Tooling.md @@ -45,10 +45,10 @@ Note: when `CPPSORT_ENABLE_AUDITS` is `ON`, assertions in the library are enable conan search cpp-sort --remote=conancenter ``` -And then install any version to your local cache as follows (here with version 2.0.0): +And then install any version to your local cache as follows (here with version 2.1.0): ```sh -conan install --requires=cpp-sort/2.0.0 +conan install --requires=cpp-sort/2.1.0 ``` The packages downloaded from conan-center are minimal and only contain the files required to use **cpp-sort** as a library: the headers, CMake files and licensing information. If you need anything else you have to create your own package with the `conanfile.py` available in this repository. @@ -63,8 +63,6 @@ The packages downloaded from conan-center are minimal and only contain the files This can notably used to browse old versions of the documentation. It seems however that `--ref` doesn't understand Git tags, so you have to create a proper branch from the version tag you want to browse beforehand. -Due to slight markup differences, some pages might not fully render correctly but it should nonetheless be a better experience than navigaitng the Markdown files by hand. - [assertions-and-audits]: Home.md#assertions--audits [catch2]: https://github.com/catchorg/Catch2 diff --git a/docs/Writing-a-bubble_sorter.md b/docs/Writing-a-bubble_sorter.md index 98bc4e91..2ba74f75 100644 --- a/docs/Writing-a-bubble_sorter.md +++ b/docs/Writing-a-bubble_sorter.md @@ -286,7 +286,7 @@ Generic agorithms are good, more generic algorithms are sometimes better. The cu C++20 ranges introduce the notion of ["proxy iterators"][proxy-iterators], which are basically iterators that can't yield a proper reference to the object they point to, but instead yield a proxy object acting as a reference. In order to handle such iterators, C++20 introduces the *customization point objects* [`std::ranges::iter_move`][std-iter-move] and [`std::ranges::iter_swap`][std-iter-swap] which should be used instead of `std::move(*it)` and `std::iter_swap(it1, it2)` in generic algorithms that aim to support proxy iterators. -**cpp-sort** being a C++17 library, it can't rely on these CPOs and provides the utility functions [`utility::iter_move` and `utility::iter_swap`][utility-iter-move] to replace them. They are a bit cruder than their standard equivalents: you have to import them into the current namespace and perform an unqualified call, *à la* `std::swap`. Moreover, they are currently not compatible with their C++20 counterparts yet for legacy reasons (see [issue 223][issue-223]). +**cpp-sort** being a C++17 library, it can't rely on these CPOs and provides the utility functions [`utility::iter_move` and `utility::iter_swap`][utility-iter-move] to replace them. They are a bit cruder than their standard equivalents: you have to import them into the current namespace and perform an unqualified call, *à la* `std::swap`. Moreover, they are currently not compatible with their C++20 counterparts yet for legacy reasons (see [issue github#223][issue-223]). ```cpp template diff --git a/docs/Writing-a-randomizing_adapter.md b/docs/Writing-a-randomizing_adapter.md index 2e3437c0..a29c274a 100644 --- a/docs/Writing-a-randomizing_adapter.md +++ b/docs/Writing-a-randomizing_adapter.md @@ -72,7 +72,7 @@ When possible, a proper *sorter adapter* is expected to be callable with the sam ## Returned value -There is currently no strict rule about what a *sorter adapter* should return (this is actually a [open design issue][issue-134]), though the general wisdom is that an adapter should transparently provide as many features as the sorter it adapts when it reasonably can. The idea is that replacing the sorter by its wrapped counterpart should be easy. +There is no strict rule about what a *sorter adapter* should return. The general wisdom is that an adapter should transparently provide as many features as the *sorter* it adapts when it reasonably can unless it has good reasons to to otherwise. The idea is that replacing the sorter by its wrapped counterpart should be easy. We don't have a specific use for the return channel of `randomizing_adapter` and it is simple to make it transitively return whatever the wrapped sorter returns - and even convenient -, so I decided to do just that. @@ -142,7 +142,6 @@ The full implementation can be found in the `examples` folder. [ctad]: https://en.cppreference.com/w/cpp/language/class_template_argument_deduction [golden-tests]: https://en.wikipedia.org/wiki/Characterization_test [hyrums-law]: https://www.hyrumslaw.com/ - [issue-134]: https://github.com/Morwenn/cpp-sort/issues/134 [iterator-category]: https://en.cppreference.com/w/cpp/iterator [proxy-iterators]: https://wg21.link/P0022 [quick-sorter]: Sorters.md#quick_sorter diff --git a/docs/images/measures-of-disorder-osc-cross.png b/docs/images/measures-of-disorder-osc-cross.png new file mode 100644 index 00000000..77a9719a Binary files /dev/null and b/docs/images/measures-of-disorder-osc-cross.png differ diff --git a/docs/images/mops-partial-ordering.png b/docs/images/mops-partial-ordering.png deleted file mode 100644 index f2b3b183..00000000 Binary files a/docs/images/mops-partial-ordering.png and /dev/null differ diff --git a/docs/images/pairwise-order-shadow.png b/docs/images/pairwise-order-shadow.png new file mode 100644 index 00000000..bab57b54 Binary files /dev/null and b/docs/images/pairwise-order-shadow.png differ diff --git a/docs/images/partial-ordering-measures-of-disorder.png b/docs/images/partial-ordering-measures-of-disorder.png new file mode 100644 index 00000000..cebc34f6 Binary files /dev/null and b/docs/images/partial-ordering-measures-of-disorder.png differ diff --git a/docs/images/sorting-exchange-cycles.png b/docs/images/sorting-exchange-cycles.png new file mode 100644 index 00000000..f3b75ac4 Binary files /dev/null and b/docs/images/sorting-exchange-cycles.png differ diff --git a/include/cpp-sort/detail/associate_iterator.h b/include/cpp-sort/detail/associate_iterator.h index 176bad78..d6629366 100644 --- a/include/cpp-sort/detail/associate_iterator.h +++ b/include/cpp-sort/detail/associate_iterator.h @@ -11,6 +11,7 @@ #include #include #include +#include "config.h" #include "iterator_traits.h" namespace cppsort::detail @@ -62,6 +63,7 @@ namespace cppsort::detail {} auto operator=(association&& other) noexcept + CPPSORT_LIFETIME_BOUND -> association& { *it = std::move(*other.it); @@ -70,6 +72,7 @@ namespace cppsort::detail } auto operator=(associated_value, Data>&& other) + CPPSORT_LIFETIME_BOUND -> association& { *it = std::move(other.value); @@ -124,6 +127,7 @@ namespace cppsort::detail {} auto operator=(associated_value&& other) + CPPSORT_LIFETIME_BOUND -> associated_value& { value = std::move(other.value); @@ -133,6 +137,7 @@ namespace cppsort::detail [[nodiscard]] auto get() + CPPSORT_LIFETIME_BOUND -> Value& { return value; @@ -140,6 +145,7 @@ namespace cppsort::detail [[nodiscard]] auto get() const + CPPSORT_LIFETIME_BOUND -> const Value& { return value; @@ -207,6 +213,7 @@ namespace cppsort::detail // Increment/decrement operators auto operator++() + CPPSORT_LIFETIME_BOUND -> associate_iterator& { ++_it; @@ -214,6 +221,7 @@ namespace cppsort::detail } auto operator--() + CPPSORT_LIFETIME_BOUND -> associate_iterator& { --_it; @@ -221,6 +229,7 @@ namespace cppsort::detail } auto operator+=(difference_type increment) + CPPSORT_LIFETIME_BOUND -> associate_iterator& { _it += increment; @@ -228,6 +237,7 @@ namespace cppsort::detail } auto operator-=(difference_type increment) + CPPSORT_LIFETIME_BOUND -> associate_iterator& { _it -= increment; diff --git a/include/cpp-sort/detail/config.h b/include/cpp-sort/detail/config.h index 219f3d58..40675a2c 100644 --- a/include/cpp-sort/detail/config.h +++ b/include/cpp-sort/detail/config.h @@ -142,4 +142,19 @@ # endif #endif +//////////////////////////////////////////////////////////// +// CPPSORT_LIFETIME_BOUND + +#ifdef __has_cpp_attribute +# if __has_cpp_attribute(clang::lifetimebound) +# define CPPSORT_LIFETIME_BOUND [[clang::lifetimebound]] +# elif __has_cpp_attribute(msvc::lifetimebound) +# define CPPSORT_LIFETIME_BOUND [[msvc::lifetimebound]] +# else +# define CPPSORT_LIFETIME_BOUND +# endif +#else +# define CPPSORT_LIFETIME_BOUND +#endif + #endif // CPPSORT_DETAIL_CONFIG_H_ diff --git a/include/cpp-sort/detail/fake_category_iterator.h b/include/cpp-sort/detail/fake_category_iterator.h index 462405a2..e22bef39 100644 --- a/include/cpp-sort/detail/fake_category_iterator.h +++ b/include/cpp-sort/detail/fake_category_iterator.h @@ -12,6 +12,7 @@ #include #include #include +#include "config.h" #include "iterator_traits.h" #include "type_traits.h" @@ -84,6 +85,7 @@ namespace cppsort::detail // Increment/decrement operators auto operator++() + CPPSORT_LIFETIME_BOUND -> fake_category_iterator& { ++_it; @@ -99,6 +101,7 @@ namespace cppsort::detail } auto operator--() + CPPSORT_LIFETIME_BOUND -> fake_category_iterator& { --_it; @@ -114,6 +117,7 @@ namespace cppsort::detail } auto operator+=(difference_type increment) + CPPSORT_LIFETIME_BOUND -> fake_category_iterator& { _it += increment; @@ -121,6 +125,7 @@ namespace cppsort::detail } auto operator-=(difference_type increment) + CPPSORT_LIFETIME_BOUND -> fake_category_iterator& { _it -= increment; diff --git a/include/cpp-sort/detail/fixed_size_list.h b/include/cpp-sort/detail/fixed_size_list.h index e6a95087..a416fc41 100644 --- a/include/cpp-sort/detail/fixed_size_list.h +++ b/include/cpp-sort/detail/fixed_size_list.h @@ -175,6 +175,7 @@ namespace cppsort::detail [[nodiscard]] auto next_free_node() noexcept + CPPSORT_LIFETIME_BOUND -> node_type* { // Retrieve next free node @@ -307,6 +308,7 @@ namespace cppsort::detail // Increment/decrement operators auto operator++() noexcept + CPPSORT_LIFETIME_BOUND -> fixed_size_list_iterator& { ptr_ = ptr_->next; @@ -322,6 +324,7 @@ namespace cppsort::detail } auto operator--() noexcept + CPPSORT_LIFETIME_BOUND -> fixed_size_list_iterator& { ptr_ = ptr_->prev; @@ -487,6 +490,7 @@ namespace cppsort::detail [[nodiscard]] auto front() noexcept + CPPSORT_LIFETIME_BOUND -> reference { return static_cast(sentinel_node_.next)->value; @@ -494,6 +498,7 @@ namespace cppsort::detail [[nodiscard]] auto back() noexcept + CPPSORT_LIFETIME_BOUND -> reference { return static_cast(sentinel_node_.prev)->value; @@ -511,6 +516,7 @@ namespace cppsort::detail [[nodiscard]] auto begin() noexcept + CPPSORT_LIFETIME_BOUND -> iterator { return iterator(sentinel_node_.next); @@ -518,6 +524,7 @@ namespace cppsort::detail [[nodiscard]] auto end() noexcept + CPPSORT_LIFETIME_BOUND -> iterator { return iterator(&sentinel_node_); @@ -537,12 +544,14 @@ namespace cppsort::detail // Modifiers auto insert(iterator pos, const value_type& value) + CPPSORT_LIFETIME_BOUND -> iterator { return iterator(insert_node_(pos.base(), value)); } auto insert(iterator pos, value_type&& value) + CPPSORT_LIFETIME_BOUND -> iterator { return iterator(insert_node_(pos.base(), std::move(value))); @@ -781,6 +790,7 @@ namespace cppsort::detail // Helper functions auto insert_node_(list_node_base* pos, const value_type& value) + CPPSORT_LIFETIME_BOUND -> node_type* { node_type* new_node = node_pool_->next_free_node(); @@ -790,6 +800,7 @@ namespace cppsort::detail } auto insert_node_(list_node_base* pos, value_type&& value) + CPPSORT_LIFETIME_BOUND -> node_type* { node_type* new_node = node_pool_->next_free_node(); @@ -800,6 +811,7 @@ namespace cppsort::detail template auto insert_node_(list_node_base* pos, Callable setter) + CPPSORT_LIFETIME_BOUND -> node_type* { node_type* new_node = node_pool_->next_free_node(); diff --git a/include/cpp-sort/detail/grail_sort.h b/include/cpp-sort/detail/grail_sort.h index 0cf517c7..bf1e0639 100644 --- a/include/cpp-sort/detail/grail_sort.h +++ b/include/cpp-sort/detail/grail_sort.h @@ -467,7 +467,7 @@ namespace cppsort::detail::grail auto&& proj = utility::as_function(projection); auto size = last - first; - auto kbuf = std::min(K, LExtBuf); + auto kbuf = (std::min)(K, LExtBuf); while (kbuf & (kbuf - 1)) { kbuf &= kbuf - 1; // max power or 2 - just in case } diff --git a/include/cpp-sort/detail/immovable_vector.h b/include/cpp-sort/detail/immovable_vector.h index c3bebd2c..29614c3b 100644 --- a/include/cpp-sort/detail/immovable_vector.h +++ b/include/cpp-sort/detail/immovable_vector.h @@ -71,6 +71,7 @@ namespace cppsort::detail // Element access auto operator[](std::ptrdiff_t pos) noexcept + CPPSORT_LIFETIME_BOUND -> T& { CPPSORT_ASSERT(pos <= end_ - memory_); @@ -78,6 +79,7 @@ namespace cppsort::detail } auto front() noexcept + CPPSORT_LIFETIME_BOUND -> T& { CPPSORT_ASSERT(memory_ != end_); @@ -85,6 +87,7 @@ namespace cppsort::detail } auto back() noexcept + CPPSORT_LIFETIME_BOUND -> T& { CPPSORT_ASSERT(end_ - memory_ > 0); @@ -95,12 +98,14 @@ namespace cppsort::detail // Iterators auto begin() noexcept + CPPSORT_LIFETIME_BOUND -> T* { return memory_; } auto end() noexcept + CPPSORT_LIFETIME_BOUND -> T* { return end_; @@ -121,6 +126,7 @@ namespace cppsort::detail template auto emplace_back(Args&&... args) + CPPSORT_LIFETIME_BOUND -> T* { CPPSORT_ASSERT(end_ - memory_ < capacity_); diff --git a/include/cpp-sort/detail/longest_non_descending_subsequence.h b/include/cpp-sort/detail/longest_increasing_subsequence.h similarity index 71% rename from include/cpp-sort/detail/longest_non_descending_subsequence.h rename to include/cpp-sort/detail/longest_increasing_subsequence.h index 90155e52..dd51193b 100644 --- a/include/cpp-sort/detail/longest_non_descending_subsequence.h +++ b/include/cpp-sort/detail/longest_increasing_subsequence.h @@ -2,8 +2,8 @@ * Copyright (c) 2021-2025 Morwenn * SPDX-License-Identifier: MIT */ -#ifndef CPPSORT_DETAIL_LONGEST_NON_DESCENDING_SUBSEQUENCE_H_ -#define CPPSORT_DETAIL_LONGEST_NON_DESCENDING_SUBSEQUENCE_H_ +#ifndef CPPSORT_DETAIL_LONGEST_INCREASING_SUBSEQUENCE_H_ +#define CPPSORT_DETAIL_LONGEST_INCREASING_SUBSEQUENCE_H_ //////////////////////////////////////////////////////////// // Headers @@ -15,13 +15,13 @@ #include #include #include "iterator_traits.h" -#include "upper_bound.h" +#include "lower_bound.h" namespace cppsort::detail { - // Longest non-decreasing subsequence, computed with an altered + // Longest increasing subsequence, computed with an altered // patience sorting algorithm - returns a pair containing the - // size of the LNDS and the size of the collection + // size of the LIS and the size of the collection template< bool RecomputeSize, @@ -29,9 +29,9 @@ namespace cppsort::detail typename Compare, typename Projection > - auto longest_non_descending_subsequence(ForwardIterator first, ForwardIterator last, - difference_type_t size, - Compare compare, Projection projection) + auto longest_increasing_subsequence(ForwardIterator first, ForwardIterator last, + difference_type_t size, + Compare compare, Projection projection) -> std::pair, difference_type_t> { constexpr bool is_random_access = std::is_base_of_v< @@ -59,18 +59,18 @@ namespace cppsort::detail // Top (smaller) elements in patience sorting stacks std::vector stack_tops; - while (first != last) { - auto it = detail::upper_bound( + do { + auto it = detail::lower_bound( stack_tops.begin(), stack_tops.end(), proj(*first), compare, utility::indirect{} | projection); if (it == stack_tops.end()) { - // The element is bigger than everything else, + // The element is strictly bigger than everything else, // create a new "stack" to put it stack_tops.emplace_back(first); } else { - // The element is strictly smaller than the top - // of a given stack, replace the stack top + // The element is strictly smaller than or equal to + // the top of a given stack, replace the stack top *it = first; } ++first; @@ -79,10 +79,10 @@ namespace cppsort::detail // Compute the size as-we-go if iterators are not random-access ++size; } - } + } while (first != last); return { stack_tops.size(), size }; } } -#endif // CPPSORT_DETAIL_LONGEST_NON_DESCENDING_SUBSEQUENCE_H_ +#endif // CPPSORT_DETAIL_LONGEST_INCREASING_SUBSEQUENCE_H_ diff --git a/include/cpp-sort/detail/memory.h b/include/cpp-sort/detail/memory.h index 3e056d4b..159e764f 100644 --- a/include/cpp-sort/detail/memory.h +++ b/include/cpp-sort/detail/memory.h @@ -23,6 +23,7 @@ #include #include #include +#include "config.h" #include "type_traits.h" namespace cppsort::detail @@ -201,6 +202,7 @@ namespace cppsort::detail temporary_buffer& operator=(const temporary_buffer&) = delete; auto operator=(temporary_buffer&& other) noexcept + CPPSORT_LIFETIME_BOUND -> temporary_buffer& { using std::swap; diff --git a/include/cpp-sort/detail/merge_insertion_sort.h b/include/cpp-sort/detail/merge_insertion_sort.h index c6d1904a..fceb6d18 100644 --- a/include/cpp-sort/detail/merge_insertion_sort.h +++ b/include/cpp-sort/detail/merge_insertion_sort.h @@ -13,6 +13,7 @@ #include #include #include +#include "config.h" #include "fixed_size_list.h" #include "immovable_vector.h" #include "iterator_traits.h" @@ -90,6 +91,7 @@ namespace cppsort::detail // Increment/decrement operators auto operator++() + CPPSORT_LIFETIME_BOUND -> group_iterator& { std::advance(_it, _size); @@ -97,6 +99,7 @@ namespace cppsort::detail } auto operator--() + CPPSORT_LIFETIME_BOUND -> group_iterator& { std::advance(_it, -_size); @@ -104,6 +107,7 @@ namespace cppsort::detail } auto operator+=(difference_type increment) + CPPSORT_LIFETIME_BOUND -> group_iterator& { _it += _size * increment; @@ -111,6 +115,7 @@ namespace cppsort::detail } auto operator-=(difference_type increment) + CPPSORT_LIFETIME_BOUND -> group_iterator& { _it -= _size * increment; diff --git a/include/cpp-sort/detail/quick_merge_sort.h b/include/cpp-sort/detail/quick_merge_sort.h index 38a0ea1d..46071856 100644 --- a/include/cpp-sort/detail/quick_merge_sort.h +++ b/include/cpp-sort/detail/quick_merge_sort.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2025 Morwenn + * Copyright (c) 2018-2026 Morwenn * SPDX-License-Identifier: MIT */ #ifndef CPPSORT_DETAIL_QUICK_MERGE_SORT_H_ @@ -138,7 +138,7 @@ namespace cppsort::detail internal_mergesort(first, pivot, size_left, pivot, compare, projection); if (std::is_base_of_v>) { - // Avoid weird codegen bug with MinGW-w64 (see GitHub issue #151) + // Avoid weird codegen bug with MinGW-w64 (see issue github#151) std::advance(first, size_left); } else { first = pivot; diff --git a/include/cpp-sort/detail/ska_sort.h b/include/cpp-sort/detail/ska_sort.h index 6cc9fbe8..5b30bccd 100644 --- a/include/cpp-sort/detail/ska_sort.h +++ b/include/cpp-sort/detail/ska_sort.h @@ -23,6 +23,7 @@ #include #include #include +#include "config.h" #include "iterator_traits.h" // projected_t #include "memcpy_cast.h" #include "partition.h" @@ -460,7 +461,7 @@ namespace cppsort::detail using next = SubKey; using sub_key_type = T; - static auto sub_key(const T& value, void*) + static auto sub_key(const T& value CPPSORT_LIFETIME_BOUND, void*) -> const T& { return value; diff --git a/include/cpp-sort/detail/spreadsort/detail/float_sort.h b/include/cpp-sort/detail/spreadsort/detail/float_sort.h index aa6c4a48..2dd613c9 100644 --- a/include/cpp-sort/detail/spreadsort/detail/float_sort.h +++ b/include/cpp-sort/detail/spreadsort/detail/float_sort.h @@ -137,7 +137,7 @@ namespace cppsort::detail::spreadsort::detail auto&& proj = utility::as_function(projection); unsigned log_divisor = get_log_divisor( - last - first, rough_log_2_size(Size_type(max - min))); + last - first, rough_log_2_size(Size_type(max/2 - min/2)) + 1); Div_type div_min = min >> log_divisor; Div_type div_max = max >> log_divisor; unsigned bin_count = unsigned(div_max - div_min) + 1; @@ -203,7 +203,7 @@ namespace cppsort::detail::spreadsort::detail auto&& proj = utility::as_function(projection); unsigned log_divisor = get_log_divisor( - last - first, rough_log_2_size(Size_type(max - min))); + last - first, rough_log_2_size(Size_type(max/2 - min/2)) + 1); Div_type div_min = min >> log_divisor; Div_type div_max = max >> log_divisor; unsigned bin_count = unsigned(div_max - div_min) + 1; @@ -268,7 +268,7 @@ namespace cppsort::detail::spreadsort::detail auto&& proj = utility::as_function(projection); unsigned log_divisor = get_log_divisor( - last - first, rough_log_2_size(Size_type(max - min))); + last - first, rough_log_2_size(Size_type(max/2 - min/2)) + 1); Div_type div_min = min >> log_divisor; Div_type div_max = max >> log_divisor; unsigned bin_count = unsigned(div_max - div_min) + 1; diff --git a/include/cpp-sort/metrics/moves.h b/include/cpp-sort/metrics/moves.h index eb179081..2d7e672e 100644 --- a/include/cpp-sort/metrics/moves.h +++ b/include/cpp-sort/metrics/moves.h @@ -18,6 +18,7 @@ #include #include #include "../detail/checkers.h" +#include "../detail/config.h" #include "../detail/fake_category_iterator.h" #include "../detail/immovable_vector.h" #include "../detail/iterator_traits.h" @@ -50,6 +51,7 @@ namespace cppsort::metrics } auto operator=(move_counting_wrapper&& other) + CPPSORT_LIFETIME_BOUND -> move_counting_wrapper& { value = std::move(other.value); diff --git a/include/cpp-sort/probes.h b/include/cpp-sort/probes.h index f13bb5d5..3b0699ea 100644 --- a/include/cpp-sort/probes.h +++ b/include/cpp-sort/probes.h @@ -8,6 +8,7 @@ //////////////////////////////////////////////////////////// // Headers //////////////////////////////////////////////////////////// +#include #include #include #include diff --git a/include/cpp-sort/probes/amp.h b/include/cpp-sort/probes/amp.h new file mode 100644 index 00000000..b2dee5e9 --- /dev/null +++ b/include/cpp-sort/probes/amp.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2025 Morwenn + * SPDX-License-Identifier: MIT + */ +#ifndef CPPSORT_PROBES_AMP_H_ +#define CPPSORT_PROBES_AMP_H_ + +//////////////////////////////////////////////////////////// +// Headers +//////////////////////////////////////////////////////////// +#include +#include +#include +#include +#include +#include +#include +#include "../detail/iterator_traits.h" +#include "../detail/type_traits.h" + +namespace cppsort +{ +namespace probe +{ + namespace detail + { + struct amp_impl + { + template< + typename ForwardIterator, + typename Compare = std::less<>, + typename Projection = utility::identity, + typename = cppsort::detail::enable_if_t< + is_projection_iterator_v + > + > + auto operator()(ForwardIterator first, ForwardIterator last, + Compare compare={}, Projection projection={}) const + -> cppsort::detail::difference_type_t + { + using difference_type = cppsort::detail::difference_type_t; + auto&& comp = utility::as_function(compare); + auto&& proj = utility::as_function(projection); + + if (first == last || std::next(first) == last) { + return 0; + } + + difference_type size = 0, + shadow = 0, + min = 0, + max = 0; + + auto current = first; + auto next = std::next(current); + do { + ++size; + + if (comp(proj(*current), proj(*next))) { + max = (std::max)(max, ++shadow); + } else if (comp(proj(*next), proj(*current))) { + min = (std::min)(min, --shadow); + } else { + // Neighbours that compare equivalent don't contribute to the amplitude + --size; + } + + ++current; + ++next; + } while (next != last); + + return size - (max - min); + } + + template + static constexpr auto max_for_size(Integer n) + -> Integer + { + return n <= 2 ? 0 : n - 2; + } + }; + } + + inline constexpr sorter_facade amp{}; +}} + +#endif // CPPSORT_PROBES_AMP_H_ diff --git a/include/cpp-sort/probes/dis.h b/include/cpp-sort/probes/dis.h index d5ca5528..a9d7a819 100644 --- a/include/cpp-sort/probes/dis.h +++ b/include/cpp-sort/probes/dis.h @@ -92,7 +92,7 @@ namespace cppsort::probe while (j <= i && not comp(proj(*lr_cummax[j - 1]), proj(*rl_min_it)) && (j == 1 || not comp(proj(*rl_min_it), proj(*lr_cummax[j - 2])))) { // Compute the next value of DM - res = std::max(res, i - j); + res = (std::max)(res, i - j); // Compute the next value of RL if (--i <= res) { return res; diff --git a/include/cpp-sort/probes/rem.h b/include/cpp-sort/probes/rem.h index dd2bc2aa..6763d42a 100644 --- a/include/cpp-sort/probes/rem.h +++ b/include/cpp-sort/probes/rem.h @@ -11,11 +11,13 @@ #include #include #include +#include +#include #include #include #include #include -#include "../detail/longest_non_descending_subsequence.h" +#include "../detail/longest_increasing_subsequence.h" #include "../detail/type_traits.h" namespace cppsort::probe @@ -49,10 +51,10 @@ namespace cppsort::probe // with the assumption that it's better than O(n) - which is at least // consistent as far as the standard library is concerned. We also // handle C arrays whose size is known and part of the type. - auto res = cppsort::detail::longest_non_descending_subsequence( + auto res = cppsort::detail::longest_increasing_subsequence( std::begin(range), std::end(range), utility::size(range), - std::move(compare), std::move(projection) + cppsort::not_fn(cppsort::flip(compare)), std::move(projection) ); auto lnds_size = res.second - res.first; return lnds_size >= 0 ? lnds_size : 0; @@ -73,8 +75,9 @@ namespace cppsort::probe // We give 0 as a "dummy" value since it will be recomputed, but it // is also used by the non-random-access iterators version as the // initial value used for the size count - auto res = cppsort::detail::longest_non_descending_subsequence( - first, last, 0, std::move(compare), std::move(projection) + auto res = cppsort::detail::longest_increasing_subsequence( + first, last, 0, + cppsort::not_fn(cppsort::flip(compare)), std::move(projection) ); auto lnds_size = res.second - res.first; return lnds_size >= 0 ? lnds_size : 0; diff --git a/include/cpp-sort/probes/sus.h b/include/cpp-sort/probes/sus.h index 633f0d96..2ce971c7 100644 --- a/include/cpp-sort/probes/sus.h +++ b/include/cpp-sort/probes/sus.h @@ -10,11 +10,11 @@ //////////////////////////////////////////////////////////// #include #include -#include +#include #include #include #include -#include "../detail/longest_non_descending_subsequence.h" +#include "../detail/longest_increasing_subsequence.h" #include "../detail/type_traits.h" namespace cppsort::probe @@ -37,10 +37,10 @@ namespace cppsort::probe { // We don't need the size information, so we can avoid // computing it altogether - auto res = cppsort::detail::longest_non_descending_subsequence( + auto res = cppsort::detail::longest_increasing_subsequence( first, last, 0, // Dummy value, not useful here - cppsort::not_fn(compare), std::move(projection) + cppsort::flip(compare), std::move(projection) ); return res.first > 0 ? res.first - 1 : 0; } diff --git a/include/cpp-sort/utility/adapter_storage.h b/include/cpp-sort/utility/adapter_storage.h index 3aa2f5ea..c65615da 100644 --- a/include/cpp-sort/utility/adapter_storage.h +++ b/include/cpp-sort/utility/adapter_storage.h @@ -10,6 +10,7 @@ //////////////////////////////////////////////////////////// #include #include +#include "../detail/config.h" namespace cppsort::utility { @@ -87,24 +88,28 @@ namespace cppsort::utility } constexpr auto get() & noexcept + CPPSORT_LIFETIME_BOUND -> Sorter& { return static_cast(sorter); } constexpr auto get() const& noexcept + CPPSORT_LIFETIME_BOUND -> const Sorter& { return static_cast(sorter); } constexpr auto get() && noexcept + CPPSORT_LIFETIME_BOUND -> Sorter&& { return static_cast(sorter); } constexpr auto get() const&& noexcept + CPPSORT_LIFETIME_BOUND -> const Sorter&& { return static_cast(sorter); diff --git a/include/cpp-sort/utility/buffer.h b/include/cpp-sort/utility/buffer.h index 5260752d..8cb811cf 100644 --- a/include/cpp-sort/utility/buffer.h +++ b/include/cpp-sort/utility/buffer.h @@ -11,6 +11,7 @@ #include #include #include +#include "../detail/config.h" namespace cppsort::utility { @@ -39,49 +40,57 @@ namespace cppsort::utility } constexpr auto operator[](std::size_t pos) - -> decltype(_memory[pos]) + CPPSORT_LIFETIME_BOUND + -> typename std::array::reference { return _memory[pos]; } constexpr auto operator[](std::size_t pos) const - -> decltype(_memory[pos]) + CPPSORT_LIFETIME_BOUND + -> typename std::array::const_reference { return _memory[pos]; } constexpr auto begin() - -> decltype(_memory.data()) + CPPSORT_LIFETIME_BOUND + -> T* { return _memory.data(); } constexpr auto begin() const - -> decltype(_memory.data()) + CPPSORT_LIFETIME_BOUND + -> const T* { return _memory.data(); } constexpr auto cbegin() const - -> decltype(_memory.data()) + CPPSORT_LIFETIME_BOUND + -> const T* { return _memory.data(); } constexpr auto end() - -> decltype(_memory.data() + _memory.size()) + CPPSORT_LIFETIME_BOUND + -> T* { return _memory.data() + _memory.size(); } constexpr auto end() const - -> decltype(_memory.data() + _memory.size()) + CPPSORT_LIFETIME_BOUND + -> const T* { return _memory.data() + _memory.size(); } constexpr auto cend() const - -> decltype(_memory.data() + _memory.size()) + CPPSORT_LIFETIME_BOUND + -> const T* { return _memory.data() + _memory.size(); } @@ -193,49 +202,57 @@ namespace cppsort::utility } auto operator[](std::size_t pos) - -> decltype(_memory[pos]) + CPPSORT_LIFETIME_BOUND + -> T& { return _memory[pos]; } auto operator[](std::size_t pos) const - -> decltype(_memory[pos]) + CPPSORT_LIFETIME_BOUND + -> const T& { return _memory[pos]; } auto begin() - -> decltype(_memory.get()) + CPPSORT_LIFETIME_BOUND + -> T* { return _memory.get(); } auto begin() const - -> decltype(_memory.get()) + CPPSORT_LIFETIME_BOUND + -> const T* { return _memory.get(); } auto cbegin() const - -> decltype(_memory.get()) + CPPSORT_LIFETIME_BOUND + -> const T* { return _memory.get(); } auto end() - -> decltype(_memory.get() + size()) + CPPSORT_LIFETIME_BOUND + -> T* { return _memory.get() + size(); } auto end() const - -> decltype(_memory.get() + size()) + CPPSORT_LIFETIME_BOUND + -> const T* { return _memory.get() + size(); } auto cend() const - -> decltype(_memory.get() + size()) + CPPSORT_LIFETIME_BOUND + -> const T* { return _memory.get() + size(); } diff --git a/include/cpp-sort/utility/check_strict_weak_ordering.h b/include/cpp-sort/utility/check_strict_weak_ordering.h new file mode 100644 index 00000000..05382e69 --- /dev/null +++ b/include/cpp-sort/utility/check_strict_weak_ordering.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2026 Morwenn + * SPDX-License-Identifier: MIT + */ +#ifndef CPPSORT_UTILITY_CHECK_STRICT_WEAK_ORDERING_H_ +#define CPPSORT_UTILITY_CHECK_STRICT_WEAK_ORDERING_H_ + +//////////////////////////////////////////////////////////// +// Headers +//////////////////////////////////////////////////////////// +#include +#include +#include +#include +#include +#include +#include "../detail/heapsort.h" +#include "../detail/is_sorted_until.h" +#include "../detail/type_traits.h" + +namespace cppsort::utility +{ + //////////////////////////////////////////////////////////// + // Check whether a comparison function implements a strict + // weak ordering over a range of data, following an + // algorithm described by danlark1 here: + // https://github.com/danlark1/quadratic_strict_weak_ordering + + namespace detail + { + template + constexpr auto compare_equivalent(ForwardIterator it1, ForwardIterator it2, + Compare compare, Projection projection) + -> bool + { + return not compare(projection(*it1), projection(*it2)) + && not compare(projection(*it2), projection(*it1)); + } + + struct strict_weak_ordering_checker_impl + { + template< + typename ForwardIterator, + typename Compare = std::less<>, + typename Projection = utility::identity, + typename = cppsort::detail::enable_if_t< + is_projection_iterator_v + > + > + constexpr auto operator()(ForwardIterator first, ForwardIterator last, + Compare compare={}, Projection projection={}) const + -> bool + { + // In the comments below, we use the following abbreviations: + // - R is the input range + // - C is the comparison function + // - P is the projeciton function + + auto&& comp = utility::as_function(compare); + auto&& proj = utility::as_function(projection); + + while (first != last) { + // 1. Sort R + // + // Note: standard library implementations of heapsort supposedly do not + // crash when passed a comparison function that does not model a strict + // weak ordering, and ours happens to be copy-pasted from libc++ + cppsort::detail::heapsort(first, last, compare, projection); + + // 2. If the R is not sorted, then C does not model a strict weak ordering + if (not cppsort::detail::is_sorted(first, last, compare, projection)) { + return false; + } + + // 3. Find first it such as *first < *it + auto it = std::next(first); + while (it != last && not comp(proj(*first), proj(*it))) { + ++it; + } + + // 4. Check that all elements before it compare equivalent + for (auto it1 = first; it1 != it; ++it1) { + for (auto it2 = it1; it2 != it; ++it2) { + if (not compare_equivalent(it1, it2, comp, proj)) { + return false; + } + } + } + + // 5. Check that all elements separated by it follow transitivity + for (auto it1 = first; it1 != it; ++it1) { + for (auto it2 = it; it2 != last; ++it2) { + if (comp(proj(*it2), proj(*it1))) { + return false; + } + if (not comp(proj(*it1), proj(*it2))) { + return false; + } + } + } + + // Exclude leading elements that compare equivalent, + // start all over again with the rest of the elements + first = it; + } + + // All checks passed, C models a strict weak ordering over R + return true; + } + + //////////////////////////////////////////////////////////// + // Sorter traits + + using iterator_category = std::random_access_iterator_tag; + }; + } + + struct strict_weak_ordering_checker: + sorter_facade + {}; + + inline constexpr strict_weak_ordering_checker check_strict_weak_ordering{}; +} + +#endif // CPPSORT_UTILITY_CHECK_STRICT_WEAK_ORDERING_H_ diff --git a/include/cpp-sort/utility/functional.h b/include/cpp-sort/utility/functional.h index 9bd04b33..f39f153a 100644 --- a/include/cpp-sort/utility/functional.h +++ b/include/cpp-sort/utility/functional.h @@ -137,7 +137,7 @@ namespace cppsort::utility projection_base { template - constexpr auto operator()(T&& value) const noexcept + constexpr auto operator()(T&& value CPPSORT_LIFETIME_BOUND) const noexcept -> T&& { return std::forward(value); diff --git a/include/cpp-sort/utility/is_sorted.h b/include/cpp-sort/utility/is_sorted.h new file mode 100644 index 00000000..7885fec9 --- /dev/null +++ b/include/cpp-sort/utility/is_sorted.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2026 Morwenn + * SPDX-License-Identifier: MIT + */ +#ifndef CPPSORT_UTILITY_CHECK_IS_SORTED_H_ +#define CPPSORT_UTILITY_CHECK_IS_SORTED_H_ + +//////////////////////////////////////////////////////////// +// Headers +//////////////////////////////////////////////////////////// +#include +#include +#include +#include +#include +#include +#include "../detail/is_sorted_until.h" +#include "../detail/type_traits.h" + +namespace cppsort::utility +{ + namespace detail + { + struct is_sorted_impl + { + template< + typename ForwardIterator, + typename Compare = std::less<>, + typename Projection = utility::identity, + typename = cppsort::detail::enable_if_t< + is_projection_iterator_v + > + > + constexpr auto operator()(ForwardIterator first, ForwardIterator last, + Compare compare={}, Projection projection={}) const + -> bool + { + return cppsort::detail::is_sorted( + first, last, + std::move(compare), std::move(projection) + ); + } + + //////////////////////////////////////////////////////////// + // Sorter traits + + using iterator_category = std::forward_iterator_tag; + }; + } + + struct is_sorted_t: + sorter_facade + {}; + + inline constexpr is_sorted_t is_sorted{}; +} + +#endif // CPPSORT_UTILITY_CHECK_IS_SORTED_H_ diff --git a/include/cpp-sort/utility/is_sorted_until.h b/include/cpp-sort/utility/is_sorted_until.h new file mode 100644 index 00000000..99170469 --- /dev/null +++ b/include/cpp-sort/utility/is_sorted_until.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2026 Morwenn + * SPDX-License-Identifier: MIT + */ +#ifndef CPPSORT_UTILITY_CHECK_IS_SORTED_UNTIL_H_ +#define CPPSORT_UTILITY_CHECK_IS_SORTED_UNTIL_H_ + +//////////////////////////////////////////////////////////// +// Headers +//////////////////////////////////////////////////////////// +#include +#include +#include +#include +#include +#include +#include "../detail/is_sorted_until.h" +#include "../detail/type_traits.h" + +namespace cppsort::utility +{ + namespace detail + { + struct is_sorted_until_impl + { + template< + typename ForwardIterator, + typename Compare = std::less<>, + typename Projection = utility::identity, + typename = cppsort::detail::enable_if_t< + is_projection_iterator_v + > + > + constexpr auto operator()(ForwardIterator first, ForwardIterator last, + Compare compare={}, Projection projection={}) const + -> ForwardIterator + { + return cppsort::detail::is_sorted_until( + first, last, + std::move(compare), std::move(projection) + ); + } + + //////////////////////////////////////////////////////////// + // Sorter traits + + using iterator_category = std::forward_iterator_tag; + }; + } + + struct is_sorted_until_t: + sorter_facade + {}; + + inline constexpr is_sorted_until_t is_sorted_until{}; +} + +#endif // CPPSORT_UTILITY_CHECK_IS_SORTED_UNTIL_H_ diff --git a/include/cpp-sort/utility/metrics_tools.h b/include/cpp-sort/utility/metrics_tools.h index 0cc7bbc5..d66b2148 100644 --- a/include/cpp-sort/utility/metrics_tools.h +++ b/include/cpp-sort/utility/metrics_tools.h @@ -12,6 +12,7 @@ #include #include #include +#include "../detail/config.h" #include "../detail/type_traits.h" namespace cppsort::utility @@ -70,6 +71,7 @@ namespace cppsort::utility constexpr auto operator=(const T& other) noexcept(std::is_nothrow_copy_assignable_v) + CPPSORT_LIFETIME_BOUND -> metric& { _value = other; @@ -78,6 +80,7 @@ namespace cppsort::utility constexpr auto operator=(T&& other) noexcept(std::is_nothrow_move_assignable_v) + CPPSORT_LIFETIME_BOUND -> metric& { _value = std::move(other); @@ -87,6 +90,7 @@ namespace cppsort::utility template constexpr auto operator=(const metric& other) noexcept(std::is_nothrow_assignable_v) + CPPSORT_LIFETIME_BOUND -> metric& { _value = other._value; @@ -96,6 +100,7 @@ namespace cppsort::utility template constexpr auto operator=(metric&& other) noexcept(std::is_nothrow_assignable_v) + CPPSORT_LIFETIME_BOUND -> metric& { _value = std::move(other._value); @@ -226,7 +231,7 @@ namespace cppsort::utility // Stream operators template - friend auto operator<<(std::ostream& stream, const metric& met) + friend auto operator<<(std::ostream& stream CPPSORT_LIFETIME_BOUND, const metric& met) -> decltype(stream << std::declval()) { stream << met.value(); @@ -271,28 +276,28 @@ namespace cppsort::utility // Index-based get() template - friend constexpr auto get(metrics& mm) + friend constexpr auto get(metrics& mm CPPSORT_LIFETIME_BOUND) -> std::tuple_element_t...>>& { return std::get(mm.metrics_); } template - friend constexpr auto get(const metrics& mm) + friend constexpr auto get(const metrics& mm CPPSORT_LIFETIME_BOUND) -> const std::tuple_element_t...>>& { return std::get(mm.metrics_); } template - friend constexpr auto get(metrics&& mm) + friend constexpr auto get(metrics&& mm CPPSORT_LIFETIME_BOUND) -> std::tuple_element_t...>>&& { return std::get(std::move(mm).metrics_); } template - friend constexpr auto get(const metrics&& mm) + friend constexpr auto get(const metrics&& mm CPPSORT_LIFETIME_BOUND) -> const std::tuple_element_t...>>&& { return std::get(std::move(mm).metrics_); @@ -302,28 +307,28 @@ namespace cppsort::utility // Tag-based get() template> - friend constexpr auto get(metrics& mm) + friend constexpr auto get(metrics& mm CPPSORT_LIFETIME_BOUND) -> std::tuple_element_t...>>& { return std::get(mm.metrics_); } template> - friend constexpr auto get(const metrics& mm) + friend constexpr auto get(const metrics& mm CPPSORT_LIFETIME_BOUND) -> const std::tuple_element_t...>>& { return std::get(mm.metrics_); } template> - friend constexpr auto get(metrics&& mm) + friend constexpr auto get(metrics&& mm CPPSORT_LIFETIME_BOUND) -> std::tuple_element_t...>>&& { return std::get(std::move(mm).metrics_); } template> - friend constexpr auto get(const metrics&& mm) + friend constexpr auto get(const metrics&& mm CPPSORT_LIFETIME_BOUND) -> const std::tuple_element_t...>>&& { return std::get(std::move(mm).metrics_); diff --git a/include/cpp-sort/utility/quicksort_adversary.h b/include/cpp-sort/utility/quicksort_adversary.h new file mode 100644 index 00000000..b4978a8a --- /dev/null +++ b/include/cpp-sort/utility/quicksort_adversary.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025 Morwenn + * SPDX-License-Identifier: MIT + */ +#ifndef CPPSORT_QUICKSORT_ADVERSARY_H_ +#define CPPSORT_QUICKSORT_ADVERSARY_H_ + +//////////////////////////////////////////////////////////// +// Headers +//////////////////////////////////////////////////////////// +#include +#include + +namespace cppsort::utility +{ + // Implementation of a quicksort adversary as described by M. D. McIlroy + // in *A Killer Adversary for Quicksort* + + template + auto quicksort_adversary(Sorter&& sorter, Integer size) + { + Integer solid = 0; + auto gas = size - 1; + std::vector elements(size, gas); + + std::vector values(size, 0); + std::iota(values.begin(), values.end(), 0); + + int pivot_candidate = size; // Too big to match any + return sorter(values, [&, gas](Integer lhs_idx, Integer rhs_idx) { + int& lhs = elements[lhs_idx]; + int& rhs = elements[rhs_idx]; + if (lhs == gas && rhs == gas) { + if (lhs_idx == pivot_candidate) { + lhs = solid++; + } else { + rhs = solid++; + } + } + if (lhs == gas) { + pivot_candidate = lhs_idx; + } else if (rhs == gas) { + pivot_candidate = rhs_idx; + } + return lhs < rhs; + }); + } +} + +#endif // CPPSORT_QUICKSORT_ADVERSARY_H_ \ No newline at end of file diff --git a/include/cpp-sort/version.h b/include/cpp-sort/version.h index d2c34fea..0ea4eb92 100644 --- a/include/cpp-sort/version.h +++ b/include/cpp-sort/version.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2025 Morwenn + * Copyright (c) 2018-2026 Morwenn * SPDX-License-Identifier: MIT */ #ifndef CPPSORT_VERSION_H_ @@ -8,7 +8,7 @@ // Semantic versioning macros #define CPPSORT_VERSION_MAJOR 2 -#define CPPSORT_VERSION_MINOR 0 +#define CPPSORT_VERSION_MINOR 1 #define CPPSORT_VERSION_PATCH 0 #endif // CPPSORT_VERSION_H_ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e03906c8..f2f5fc25 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2025 Morwenn +# Copyright (c) 2015-2026 Morwenn # SPDX-License-Identifier: MIT include(cpp-sort-utils) @@ -25,16 +25,15 @@ else() FetchContent_Declare( Catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2 - GIT_TAG 25319fd3047c6bdcf3c0170e76fa526c77f99ca9 # v3.10.0 + GIT_TAG 88abf9bf325c798c33f54f6b9220ef885b267f4f # v3.12.0 ) FetchContent_GetProperties(Catch2) if (NOT Catch2_POPULATED) FetchContent_Populate(Catch2) add_subdirectory(${catch2_SOURCE_DIR} ${catch2_BINARY_DIR}) list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) - # Make Catch2::Catch2 a SYSTEM target - get_target_property(catch2_INCLUDE_DIRECTORY Catch2 INTERFACE_INCLUDE_DIRECTORIES) - set_target_properties(Catch2 PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${catch2_INCLUDE_DIRECTORY}") + mark_system_library(Catch2) + mark_system_library(Catch2WithMain) endif() endif() include(Catch) @@ -59,9 +58,8 @@ else() FetchContent_Populate(RapidCheck) set(RC_INSTALL_ALL_EXTRAS ON) add_subdirectory(${rapidcheck_SOURCE_DIR} ${rapidcheck_BINARY_DIR}) - # Make rapidcheck a SYSTEM target - get_target_property(rapidcheck_INCLUDE_DIRECTORY rapidcheck INTERFACE_INCLUDE_DIRECTORIES) - set_target_properties(rapidcheck PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${rapidcheck_INCLUDE_DIRECTORY}") + mark_system_library(rapidcheck) + mark_system_library(rapidcheck_catch) endif() endif() @@ -209,6 +207,7 @@ add_executable(main-tests distributions/shuffled_16_values.cpp # Probes tests + probes/amp.cpp probes/block.cpp probes/dis.cpp probes/enc.cpp @@ -264,8 +263,11 @@ add_executable(main-tests utility/branchless_traits.cpp utility/buffer.cpp utility/chainable_projections.cpp + utility/check_strict_weak_ordering.cpp + utility/is_sorted.cpp utility/iter_swap.cpp utility/metric_tools.cpp + utility/quicksort_adversary.cpp utility/sorted_indices.cpp utility/sorted_iterators.cpp utility/sorting_networks.cpp @@ -303,8 +305,7 @@ endif() include(CTest) -string(RANDOM LENGTH 6 ALPHABET 123456789 RNG_SEED) -catch_discover_tests(main-tests EXTRA_ARGS --rng-seed ${RNG_SEED}) +catch_discover_tests(main-tests) if (NOT "${CPPSORT_SANITIZE}" MATCHES "address|memory") - catch_discover_tests(heap-memory-exhaustion-tests EXTRA_ARGS --rng-seed ${RNG_SEED}) + catch_discover_tests(heap-memory-exhaustion-tests) endif() diff --git a/tests/probes/amp.cpp b/tests/probes/amp.cpp new file mode 100644 index 00000000..4cebd6ca --- /dev/null +++ b/tests/probes/amp.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2025 Morwenn + * SPDX-License-Identifier: MIT + */ +#include +#include +#include +#include +#include +#include +#include +#include + +TEST_CASE( "measure of disorder: amp", "[probe][amp]" ) +{ + using cppsort::probe::amp; + + SECTION( "simple test" ) + { + std::forward_list li = { 4, 6, 5, 2, 9, 1, 3, 8, 0, 7 }; + CHECK( amp(li) == 7 ); + CHECK( amp(li.begin(), li.end()) == 7 ); + + std::vector> tricky(li.begin(), li.end()); + CHECK( amp(tricky, &internal_compare::compare_to) == 7 ); + } + + SECTION( "upper bound" ) + { + // The upper bound should correspond to a sequence that + // oscillates at every step + + std::forward_list li = { 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9 }; + auto max_n = amp.max_for_size(cppsort::utility::size(li)); + CHECK( max_n == 9 ); + CHECK( amp(li) == max_n ); + CHECK( amp(li.begin(), li.end()) == max_n ); + } + + // https://morwenn.github.io/presortedness/2025/10/18/TSB005-symmetry-of-amp.html + rc::prop("Amp(Reversed(X)) = Amp(X)", [](std::vector sequence) { + auto amp_x = amp(sequence); + std::reverse(sequence.begin(), sequence.end()); + return amp(sequence) == amp_x; + }); +} diff --git a/tests/probes/every_probe_common.cpp b/tests/probes/every_probe_common.cpp index c0a519a3..6c90ff5b 100644 --- a/tests/probes/every_probe_common.cpp +++ b/tests/probes/every_probe_common.cpp @@ -18,6 +18,7 @@ // TEMPLATE_TEST_CASE( "test every probe with all_equal distribution", "[probe]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::block), decltype(cppsort::probe::dis), decltype(cppsort::probe::enc), @@ -43,6 +44,7 @@ TEMPLATE_TEST_CASE( "test every probe with all_equal distribution", "[probe]", } TEMPLATE_TEST_CASE( "test every probe with a sorted collection", "[probe]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::block), decltype(cppsort::probe::dis), decltype(cppsort::probe::enc), @@ -69,6 +71,7 @@ TEMPLATE_TEST_CASE( "test every probe with a sorted collection", "[probe]", } TEMPLATE_TEST_CASE( "test every probe with a 0 or 1 element", "[probe]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::block), decltype(cppsort::probe::dis), decltype(cppsort::probe::enc), @@ -107,6 +110,7 @@ TEMPLATE_TEST_CASE( "test every probe with a 0 or 1 element", "[probe]", } TEMPLATE_TEST_CASE( "test order isomorphism for every probe", "[probe]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::block), decltype(cppsort::probe::dis), decltype(cppsort::probe::enc), @@ -166,6 +170,7 @@ namespace } TEMPLATE_TEST_CASE( "test M(subsequence(X)) <= M(X) for most probes M", "[probe]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::dis), decltype(cppsort::probe::enc), decltype(cppsort::probe::inv), @@ -279,7 +284,7 @@ TEMPLATE_TEST_CASE( "test M(2, 1, 4, 3, 6, 5, ...) <= |X| * M(2, 1) / 2 for most { // From *Sorting and Measures of Disorder* by Estivill-Castro: // property derived from Mannila's criteria 2 & 4 - // The following probes don't satisfy it: Block, Mono, Osc + // The following probes don't satisfy it: Amp, Block, Mono, Osc int size = 1000; std::vector sequence(size, 0); @@ -295,6 +300,7 @@ TEMPLATE_TEST_CASE( "test M(2, 1, 4, 3, 6, 5, ...) <= |X| * M(2, 1) / 2 for most } TEMPLATE_TEST_CASE( "test M(aX) <= |X| + M(X) for most probes M", "[probe]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::block), decltype(cppsort::probe::dis), decltype(cppsort::probe::enc), @@ -335,7 +341,7 @@ TEMPLATE_TEST_CASE( "test prefix monotonicity", "[probe]", decltype(cppsort::probe::sus) ) { // Property formalized by Estivill-Castro in *Sorting and Measures of Disorder* - // The following probes don't satisfy it: Block, Mono, Osc + // The following probes don't satisfy it: Amp, Block, Mono, Osc // Note: the original paper claims that Osc also satisfies this property, // but it fails for X=⟨3, 0⟩ Y=⟨⟩ Z=⟨4, 2⟩ @@ -383,7 +389,7 @@ TEMPLATE_TEST_CASE( "test monotonicity", "[probe]", decltype(cppsort::probe::sus) ) { // Property formalized by Estivill-Castro in *Sorting and Measures of Disorder* - // The following probes don't satisfy it: Block, Enc, Mono, Osc + // The following probes don't satisfy it: Amp, Block, Enc, Mono, Osc // Note: the original paper claims that MEnc[k,A,D] also satisfies this property, // but at the time of writing this comment I ahev no idea what that means @@ -424,6 +430,7 @@ TEMPLATE_TEST_CASE( "test monotonicity", "[probe]", } TEMPLATE_TEST_CASE( "test that probes never produce more disorder than their theoretical maximum", "[probe]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::block), decltype(cppsort::probe::dis), decltype(cppsort::probe::enc), diff --git a/tests/probes/every_probe_heap_memory_exhaustion.cpp b/tests/probes/every_probe_heap_memory_exhaustion.cpp index 6d437a5c..436cff71 100644 --- a/tests/probes/every_probe_heap_memory_exhaustion.cpp +++ b/tests/probes/every_probe_heap_memory_exhaustion.cpp @@ -20,6 +20,7 @@ // TEMPLATE_TEST_CASE( "heap exhaustion for random-access probes", "[probe][heap_exhaustion]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::dis), decltype(cppsort::probe::mono), decltype(cppsort::probe::runs) ) @@ -38,6 +39,7 @@ TEMPLATE_TEST_CASE( "heap exhaustion for random-access probes", "[probe][heap_ex } TEMPLATE_TEST_CASE( "heap exhaustion for bidirectional probes", "[probe][heap_exhaustion]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::dis), decltype(cppsort::probe::mono), decltype(cppsort::probe::runs) ) @@ -56,6 +58,7 @@ TEMPLATE_TEST_CASE( "heap exhaustion for bidirectional probes", "[probe][heap_ex } TEMPLATE_TEST_CASE( "heap exhaustion for forward probes", "[probe][heap_exhaustion]", + decltype(cppsort::probe::amp), decltype(cppsort::probe::dis), decltype(cppsort::probe::mono), decltype(cppsort::probe::runs) ) diff --git a/tests/probes/relations.cpp b/tests/probes/relations.cpp index 4b0feb1e..f4a14288 100644 --- a/tests/probes/relations.cpp +++ b/tests/probes/relations.cpp @@ -183,4 +183,16 @@ TEST_CASE( "relations between measures of disorder", "[probe]" ) rc::prop("Enc(X) ≤ Mono(X) + 1", [](const std::vector& sequence) { return enc(sequence) <= mono(sequence) + 1; }); + + // Original research about Amp + + // https://morwenn.github.io/presortedness/2025/11/09/TSB007-relationship-between-amp-and-runs.html + rc::prop("Amp(X) ≤ 2 Runs(X)", [](const std::vector& sequence) { + return amp(sequence) <= 2 * runs(sequence); + }); + + // Conjecture + rc::prop("Mono(X) ≤ Amp(X)", [](const std::vector& sequence) { + return mono(sequence) <= amp(sequence); + }); } diff --git a/tests/utility/check_strict_weak_ordering.cpp b/tests/utility/check_strict_weak_ordering.cpp new file mode 100644 index 00000000..f9932628 --- /dev/null +++ b/tests/utility/check_strict_weak_ordering.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2026 Morwenn + * SPDX-License-Identifier: MIT + */ +#include +#include +#include +#include + +TEST_CASE( "check_strict_weak_ordering test", "[utility][check_strict_weak_ordering]" ) +{ + using cppsort::utility::check_strict_weak_ordering; + + SECTION( "empty collection" ) + { + std::vector vec = {}; + CHECK( check_strict_weak_ordering(vec) ); + } + + SECTION( "one element" ) + { + std::vector vec = { 0 }; + CHECK( check_strict_weak_ordering(vec) ); + } + + SECTION( "one element, NaN" ) + { + std::vector vec = { std::nan("1") }; + CHECK( check_strict_weak_ordering(vec) ); + } + + SECTION( "empty collection" ) + { + std::vector vec = {}; + CHECK( check_strict_weak_ordering(vec) ); + } + + SECTION( "two elements" ) + { + std::vector vec1 = { 0, 0 }; + CHECK( check_strict_weak_ordering(vec1) ); + + std::vector vec2 = { 0, 5 }; + CHECK( check_strict_weak_ordering(vec2) ); + + std::vector vec3 = { 5, 0 }; + CHECK( check_strict_weak_ordering(vec3) ); + } + + SECTION( "two elements, NaN" ) + { + std::vector vec1 = { std::nan("1"), std::nan("1") }; + CHECK( check_strict_weak_ordering(vec1) ); + + std::vector vec2 = { std::nan("1"), 5 }; + CHECK( check_strict_weak_ordering(vec2) ); + + std::vector vec3 = { 5, std::nan("1") }; + CHECK( check_strict_weak_ordering(vec3) ); + } + + SECTION( "small collection" ) + { + std::vector vec = { 1, 4, 32, 5, 89, 43, 56, 8, 7, 2, 44, 37, 73 }; + CHECK( check_strict_weak_ordering(vec) ); + } + + SECTION( "small collection with duplicates" ) + { + std::vector vec = { 1, 4, 32, 5, 1, 89, 43, 56, 8, 7, 2, 2, 4, 44, 37, 73 }; + CHECK( check_strict_weak_ordering(vec) ); + } + + SECTION( "small collection with NaN" ) + { + std::vector vec = { + 1.0, 4.0, 32.0, 5.0, 89.0, 43.0, 56.0, 345.0, + 8.0, 7.0, 2.0, std::nan("2"), 44.0, 37.0, 73.0, + }; + CHECK_FALSE( check_strict_weak_ordering(vec) ); + } + + SECTION( "small collection with std::less_equal" ) + { + std::vector vec = { 1, 4, 32, 5, 89, 43, 56, 8, 7, 2, 44, 37, 73 }; + CHECK_FALSE( check_strict_weak_ordering(vec, std::less_equal{}) ); + } + + SECTION( "small collection with duplicates with std::less_equal" ) + { + std::vector vec = { 1, 4, 32, 5, 1, 89, 43, 56, 8, 7, 2, 2, 4, 44, 37, 73 }; + CHECK_FALSE( check_strict_weak_ordering(vec, std::less_equal{}) ); + } +} diff --git a/tests/utility/is_sorted.cpp b/tests/utility/is_sorted.cpp new file mode 100644 index 00000000..2acaa138 --- /dev/null +++ b/tests/utility/is_sorted.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2026 Morwenn + * SPDX-License-Identifier: MIT + */ +#include +#include +#include +#include +#include +#include + +namespace utility = cppsort::utility; + +TEST_CASE( "is_sorted and is_sorted_until test", "[utility][is_sorted]" ) +{ + SECTION( "empty collection" ) + { + std::vector vec = {}; + CHECK( utility::is_sorted(vec) ); + CHECK( utility::is_sorted_until(vec) == vec.end() ); + } + + SECTION( "collection with one element" ) + { + std::vector vec = {5}; + CHECK( utility::is_sorted(vec) ); + CHECK( utility::is_sorted_until(vec) == vec.end() ); + } + + SECTION( "collection with two elements" ) + { + std::vector vec1 = {0, 0}; + CHECK( utility::is_sorted(vec1) ); + CHECK( utility::is_sorted_until(vec1) == vec1.end() ); + + std::vector vec2 = {0, 5}; + CHECK( utility::is_sorted(vec2) ); + CHECK( utility::is_sorted_until(vec2) == vec2.end() ); + + std::vector vec3 = {5, 0}; + CHECK_FALSE( utility::is_sorted(vec3) ); + CHECK( utility::is_sorted_until(vec3) == std::next(vec3.begin()) ); + } + + SECTION( "distribution: ascending" ) + { + std::vector vec; + vec.reserve(250); + auto distribution = dist::ascending{}; + distribution(std::back_inserter(vec), 250); + CHECK( utility::is_sorted(vec) ); + CHECK( utility::is_sorted_until(vec) == vec.end() ); + } + + SECTION( "distribution: ascending_duplicates" ) + { + std::vector vec; + vec.reserve(250); + auto distribution = dist::ascending_duplicates{}; + distribution(std::back_inserter(vec), 250); + CHECK( utility::is_sorted(vec) ); + CHECK( utility::is_sorted_until(vec) == vec.end() ); + } + + SECTION( "distribution: all_equal" ) + { + std::vector vec; + vec.reserve(250); + auto distribution = dist::all_equal{}; + distribution(std::back_inserter(vec), 250); + CHECK( utility::is_sorted(vec) ); + CHECK( utility::is_sorted_until(vec) == vec.end() ); + } + + SECTION( "distribution: descending" ) + { + std::vector vec; + vec.reserve(250); + auto distribution = dist::descending{}; + distribution(std::back_inserter(vec), 250); + CHECK_FALSE( utility::is_sorted(vec) ); + CHECK( utility::is_sorted_until(vec) == std::next(vec.begin()) ); + } +} diff --git a/tests/utility/quicksort_adversary.cpp b/tests/utility/quicksort_adversary.cpp new file mode 100644 index 00000000..912075f4 --- /dev/null +++ b/tests/utility/quicksort_adversary.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2025 Morwenn + * SPDX-License-Identifier: MIT + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +TEMPLATE_TEST_CASE( "test quicksort-based sorters with quicksort_adversary", "[utility][quicksort_adversary]", + cppsort::pdq_sorter, + cppsort::quick_merge_sorter, + cppsort::quick_sorter, + cppsort::std_sorter ) +{ + cppsort::metrics::comparisons sorter; + auto comps = cppsort::utility::quicksort_adversary(sorter, 1000); + CHECK( comps < 100'000 ); // Guesstimate between n and n² +} + +namespace +{ + // Median-of-three quicksort + template + auto median_of_3_quicksort(Iterator first, Iterator last, Compare comp) + -> void + { + auto size = last - first; + if (size < 2) return; + + auto middle = first + size / 2; + auto pivot_pos = cppsort::detail::iter_sort3( + first, middle, last - 1, + comp, cppsort::utility::identity{} + ); + + iter_swap(pivot_pos, last - 1); + auto middle1 = std::partition( + first, last - 1, + [&](int& value) { return comp(value, *(last - 1)); } + ); + + iter_swap(middle1, last - 1); + auto middle2 = std::partition( + std::next(middle1), last, + [&](int& value) { return not comp(*middle1, value); } + ); + + median_of_3_quicksort(first, middle1, comp); + median_of_3_quicksort(middle2, last, comp); + } +} + + +TEST_CASE( "quicksort adversary over a simple quicksort", + "[utility][quicksort_adversary]" ) +{ + auto do_sort = [](std::vector& vec, auto comp) { + return median_of_3_quicksort(vec.begin(), vec.end(), comp); + }; + auto sorter = cppsort::metrics::comparisons(do_sort); + auto comps = cppsort::utility::quicksort_adversary(sorter, 100); + CHECK( comps > 5000 ); // Guesstimate over n² +} diff --git a/tools/measures-of-disorder-osc-cross.tex b/tools/measures-of-disorder-osc-cross.tex new file mode 100755 index 00000000..baa544b7 --- /dev/null +++ b/tools/measures-of-disorder-osc-cross.tex @@ -0,0 +1,50 @@ +\documentclass[margin=0.5cm]{standalone} +\usepackage{tikz} +\pagestyle{empty} + +\begin{document} +\begin{tikzpicture} + +\draw[thin,->] (-0.25,0) -- (8,0) node[right] {$pos$}; +\draw[thin,->] (0,-0.25) -- (0,6) node[above] {$val$}; + +% ticks +\foreach \x [count=\xi starting from 0] in {1,2,3,4,5,6,7}{ + \draw (\x,2pt) -- (\x,-2pt); + \ifodd\xi + \node[anchor=north] at (\x,0) {$\x$}; + \fi +} +\foreach \x [count=\xi starting from 0] in {1,2,3,4,5,6,7,8,9,10,11}{ + \draw (2pt,\x/2) -- (-2pt,\x/2); + \ifodd\xi + \node[anchor=east] at (0,\x/2) {$\x$}; + \fi +} + +% points +\foreach \y [count=\x] in { + 6, 3, 9, 8, 4, 7, 1, 10 +}{ + \fill (\x-1,\y/2) circle (2pt); +} + +\tikzset{every edge/.append style = {gray}}; + +\path[every node/.style={font=\sffamily\small}] + (0,6/2) edge (1,3/2) + (1,3/2) edge (2,9/2) + (2,9/2) edge (3,8/2) + (3,8/2) edge (4,4/2) + (4,4/2) edge (5,7/2) + (5,7/2) edge (6,1/2) + (6,1/2) edge (7,10/2); + + +\tikzset{every edge/.append style = {dashed,gray}}; + +\path[every node/.style={font=\sffamily\small}] + (0,7/2) edge (8,7/2); + +\end{tikzpicture} +\end{document} diff --git a/tools/mops-partial-ordering.tex b/tools/partial-ordering-measures-of-disorder.tex similarity index 58% rename from tools/mops-partial-ordering.tex rename to tools/partial-ordering-measures-of-disorder.tex index 2f2894a9..7d36686c 100644 --- a/tools/mops-partial-ordering.tex +++ b/tools/partial-ordering-measures-of-disorder.tex @@ -1,4 +1,4 @@ -% Copyright (c) 2021 Morwenn +% Copyright (c) 2021-2025 Morwenn % SPDX-License-Identifier: MIT \documentclass{standalone} @@ -10,7 +10,8 @@ background rectangle/.style={fill=white}, show background rectangle, auto, - node distance = 1.3cm, + on grid, + node distance = 1.3cm and 3cm, semithick ] @@ -26,21 +27,23 @@ % Max=Dis equivalence comes from: % NeatSort - A practical adaptive algorithm % by M. La Rocca and D. Cantone - \node[state] (reg) {$Reg$}; - \node[state] (loc) [below of=reg] {$Loc$}; - \node[state] (hist) [left=2.2cm of loc] {$Hist$}; - \node[state] (sms) [right=2.2cm of loc] {$SMS$}; - \node[state] (block) [below of=hist] {$\bm{Block}$}; - \node[state] (osc) [below of=loc] {$\bm{Osc}$}; - \node[state] (enc) [below of=sms] {$\bm{Enc}$}; - \node[state] (rem) [below of=block] {$\bm{Rem}$}; - \node[state] (inv) [below of=osc] {$\bm{Inv}~$$\equiv$$~\bm{Spear}$}; - \node[state] (sus) [below of=enc] {$\bm{SUS}$}; - \node[state] (exc) [below of=rem] {$\bm{Exc}~$$\equiv$$~\bm{Ham}$}; - \node[state] (max) [below of=inv] {$\bm{Max}~$$\equiv$$~\bm{Dis}~$}; - \node[state] (runs) [below of=sus] {$\bm{Runs}$}; + \node[state] (reg) {$\mathit{Reg}$}; + \node[state] (loc) [below of=reg] {$\mathit{Loc}$}; + \node[state] (sms) [right=of loc] {$\mathit{SMS}$}; + \node[state] (osc) [below of=loc] {$\bm{\mathit{Osc}}$}; + \node[state] (hist) [left=of loc] {$\mathit{Hist}$}; + \node[state] (blank) [below=of osc] {}; + \node[state] (enc) [below of=sms] {$\bm{\mathit{Enc}}$}; + \node[state] (inv) [below of=blank] {$\bm{\mathit{Inv}}~$$\equiv$$~\bm{\mathit{Spear}}$}; + \node[state] (rem) [left=of inv] {$\bm{\mathit{Rem}}$}; + \node[state] (sus) [below of=enc] {$\bm{\mathit{SUS}}$}; + \node[state] (exc) [below of=rem] {$\bm{\mathit{Exc}}~$$\equiv$$~\bm{\mathit{Ham}}$}; + \node[state] (block) [above=of rem] {$\bm{\mathit{Block}}$}; + \node[state] (max) [below of=inv] {$\bm{\mathit{Max}}~$$\equiv$$~\bm{\mathit{Dis}}~$}; + \node[state] (runs) [right=of max] {$\bm{\mathit{Runs}}$}; \node[state] (m01) [below of=max] {$m_{01}$}; \node[state] (m0) [below of=m01] {$m_{0}$}; + \path[-] (reg) edge node {} (hist); \path[-] (reg) edge node {} (loc); \path[-] (reg) edge node {} (sms); @@ -64,11 +67,15 @@ % Sort Race % by H. Zhang, B. Meng and Y. Liang - \node[state] (mono) [right=1.4cm of sus] {$\bm{Mono}$}; - \path[-] (mono) edge node {} (runs); - + \node[state] (mono) [right=of sus] {$\bm{\mathit{Mono}}$}; % See the Original Research page of the docs \path[-] (enc) edge node {} (mono); + + % Original research: Amp + \node[state] (amp) [below=of mono] {$\bm{\mathit{Amp}}$}; + \path[-] (amp) edge node {} (runs); + % Conjecture + \path[-] (amp) edge node {} (mono); \end{tikzpicture} \end{document} diff --git a/tools/release_template.md b/tools/release_template.md index d4da7daa..5988af65 100644 --- a/tools/release_template.md +++ b/tools/release_template.md @@ -14,6 +14,11 @@ TODO 1 TODO 2 +### Bug fixes + +* TODO: bug fix 1 +* TODO: bug fix 2 + ### Improvements Algorithmic & speed improvements: @@ -41,5 +46,5 @@ Miscellaneous: I didn't manage to fix every bug I could find since the previous release, so you might want to check the [list of known bugs][known-bugs]. - [deprecation-warnings]: https://github.com/Morwenn/cpp-sort/wiki#deprecation-warnings + [deprecation-warnings]: https://codeberg.org/Morwenn/cpp-sort/wiki#deprecation-warnings [known-bugs]: https://github.com/Morwenn/cpp-sort/issues?q=is%3Aissue+is%3Aopen+label%3Abug diff --git a/tools/sorting-exchange-cycles.tex b/tools/sorting-exchange-cycles.tex new file mode 100755 index 00000000..63125253 --- /dev/null +++ b/tools/sorting-exchange-cycles.tex @@ -0,0 +1,35 @@ +\documentclass{standalone} +\usepackage{tikz} +\usetikzlibrary {arrows.meta} +\pagestyle{empty} + +\begin{document} +\begin{tikzpicture} + +\tikzstyle{every node}=[draw,shape=circle,minimum size=0.5cm,inner sep=0pt,anchor=center]; + +% Draw the splay tree nodes +\node (n0) at (2,0) {0}; +\node (n1) at (5,0) {1}; +\node (n2) at (0,0) {2}; +\node (n3) at (4,0) {3}; +\node (n4) at (1,0) {4}; +\node (n5) at (6,0) {5}; +\node (n6) at (3,0) {6}; + + +\tikzset{every edge/.append style = {-Stealth,dashed,gray}}; + +% Visitation: normal edition +\path[every node/.style={font=\sffamily\small}] + (n2) edge[out=290,in=240,orange] (n0) + (n0) edge[out=110,in=70,orange] (n2) + (n4) edge[out=290,in=240,teal] (n3) + (n3) edge[out=110,in=70,teal] (n6) + (n6) edge[out=290,in=240,teal] (n5) + (n5) edge[out=110,in=70,teal] (n1) + (n1) edge[out=110,in=70,teal] (n4); + + +\end{tikzpicture} +\end{document}