Skip to content

Commit 3af3502

Browse files
authored
Merge pull request #21 from numpy/strptrdtype
Initial skeleton of string dtype
2 parents 81372de + 51b69e3 commit 3af3502

File tree

22 files changed

+1050
-24
lines changed

22 files changed

+1050
-24
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,5 @@ dmypy.json
130130

131131
.mesonpy-native-file.ini
132132
compile_commands.json
133+
134+
.ruff-cache/

.pre-commit-config.yaml

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ repos:
3333
entry: |
3434
bash -c 'cd unytdtype && mkdir -p build && pip install build meson-python patchelf wheel && python -m build --wheel --no-isolation -Cbuilddir=build';
3535
fail_fast: false
36+
- id: generate-compilation-database-stringdtype
37+
name: Generate compilation database [stringdtype]
38+
files: stringdtype/(meson\.build$|.*\.(c|h)$)
39+
language: system
40+
require_serial: true
41+
entry: |
42+
bash -c 'cd stringdtype && mkdir -p build && pip install build meson-python patchelf wheel && python -m build --wheel --no-isolation -Cbuilddir=build';
43+
fail_fast: false
3644
- repo: https://github.com/pocc/pre-commit-hooks
3745
rev: v1.3.5
3846
hooks:
@@ -48,33 +56,30 @@ repos:
4856
name: clang-tidy [unytdtype]
4957
args: [-p=unytdtype/build]
5058
files: unytdtype/(.*\.(c|h)$)
59+
- id: clang-tidy
60+
name: clang-tidy [stringdtype]
61+
args: [-p=stringdtype/build]
62+
files: stringdtype/(.*\.(c|h)$)
5163
- id: clang-format
5264
args: ['--no-diff', -i]
5365
# - id: oclint
5466
# - id: cppcheck
5567
- repo: https://github.com/pre-commit/pre-commit-hooks
56-
rev: v4.3.0
68+
rev: v4.4.0
5769
hooks:
5870
- id: trailing-whitespace
5971
- id: end-of-file-fixer
6072
- id: check-yaml
6173
- id: check-added-large-files
6274
- id: check-ast
63-
- repo: https://github.com/pycqa/flake8
64-
rev: 3.9.2
75+
- repo: https://github.com/charliermarsh/ruff-pre-commit
76+
rev: v0.0.217
6577
hooks:
66-
- id: flake8
67-
additional_dependencies:
68-
[
69-
flake8-mutable,
70-
flake8-debugger,
71-
flake8-pytest-style,
72-
flake8-simplify,
73-
pep8-naming,
74-
darglint,
75-
]
78+
- id: ruff
79+
# Respect `exclude` and `extend-exclude` settings.
80+
args: ["--force-exclude"]
7681
- repo: https://github.com/pre-commit/mirrors-prettier
77-
rev: v3.0.0-alpha.0
82+
rev: v3.0.0-alpha.4
7883
hooks:
7984
- id: prettier
8085
types:
@@ -83,7 +88,7 @@ repos:
8388
yaml,
8489
]
8590
- repo: https://github.com/pycqa/isort
86-
rev: 5.10.1
91+
rev: 5.11.4
8792
hooks:
8893
- id: isort
8994
name: isort (python)

benchmarks/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
strings

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/asv.conf.json

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
{
2+
// The version of the config file format. Do not change, unless
3+
// you know what you are doing.
4+
"version": 1,
5+
6+
// The name of the project being benchmarked
7+
"project": "numpy-user-dtypes",
8+
9+
// The project's homepage
10+
"project_url": "https://github.com/numpy/numpy-user-dtypes",
11+
12+
// The URL or local path of the source code repository for the
13+
// project being benchmarked
14+
"repo": "..",
15+
16+
// The Python project's subdirectory in your repo. If missing or
17+
// the empty string, the project is assumed to be located at the root
18+
// of the repository.
19+
"repo_subdir": "",
20+
21+
// Customizable commands for building, installing, and
22+
// uninstalling the project. See asv.conf.json documentation.
23+
//
24+
"install_command": [
25+
"pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy",
26+
"pip install meson-python patchelf wheel",
27+
"in-dir={conf_dir} pip install ./asciidtype/ --no-build-isolation",
28+
"in-dir={conf_dir} pip install ./strptrdtype/ --no-build-isolation"
29+
],
30+
"uninstall_command": ["return-code=any pip uninstall -y asciidtype strptrdtype"],
31+
"build_command": [
32+
"pip -V"
33+
/* "python setup.py build", */
34+
/* "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" */
35+
],
36+
37+
// List of branches to benchmark. If not provided, defaults to "master"
38+
// (for git) or "default" (for mercurial).
39+
"branches": ["main"], // for git
40+
41+
// The DVCS being used. If not set, it will be automatically
42+
// determined from "repo" by looking at the protocol in the URL
43+
// (if remote), or by looking for special directories, such as
44+
// ".git" (if local).
45+
// "dvcs": "git",
46+
47+
// The tool to use to create environments. May be "conda",
48+
// "virtualenv" or other value depending on the plugins in use.
49+
// If missing or the empty string, the tool will be automatically
50+
// determined by looking for tools on the PATH environment
51+
// variable.
52+
"environment_type": "virtualenv",
53+
54+
// timeout in seconds for installing any dependencies in environment
55+
// defaults to 10 min
56+
//"install_timeout": 600,
57+
58+
// the base URL to show a commit for the project.
59+
// "show_commit_url": "http://github.com/owner/project/commit/",
60+
61+
// The Pythons you'd like to test against. If not provided, defaults
62+
// to the current version of Python used to run `asv`.
63+
// "pythons": ["2.7", "3.6"],
64+
65+
// The matrix of dependencies to test. Each key of the "req"
66+
// requirements dictionary is the name of a package (in PyPI) and
67+
// the values are version numbers. An empty list or empty string
68+
// indicates to just test against the default (latest)
69+
// version. null indicates that the package is to not be
70+
// installed. If the package to be tested is only available from
71+
// PyPi, and the 'environment_type' is conda, then you can preface
72+
// the package name by 'pip+', and the package will be installed
73+
// via pip (with all the conda available packages installed first,
74+
// followed by the pip installed packages).
75+
//
76+
// The ``@env`` and ``@env_nobuild`` keys contain the matrix of
77+
// environment variables to pass to build and benchmark commands.
78+
// An environment will be created for every combination of the
79+
// cartesian product of the "@env" variables in this matrix.
80+
// Variables in "@env_nobuild" will be passed to every environment
81+
// during the benchmark phase, but will not trigger creation of
82+
// new environments. A value of ``null`` means that the variable
83+
// will not be set for the current combination.
84+
//
85+
// "matrix": {
86+
// "req": {
87+
// "numpy": ["1.6", "1.7"],
88+
// "six": ["", null], // test with and without six installed
89+
// "pip+emcee": [""] // emcee is only available for install with pip.
90+
// },
91+
// "env": {"ENV_VAR_1": ["val1", "val2"]},
92+
// "env_nobuild": {"ENV_VAR_2": ["val3", null]},
93+
// },
94+
95+
// Combinations of libraries/python versions can be excluded/included
96+
// from the set to test. Each entry is a dictionary containing additional
97+
// key-value pairs to include/exclude.
98+
//
99+
// An exclude entry excludes entries where all values match. The
100+
// values are regexps that should match the whole string.
101+
//
102+
// An include entry adds an environment. Only the packages listed
103+
// are installed. The 'python' key is required. The exclude rules
104+
// do not apply to includes.
105+
//
106+
// In addition to package names, the following keys are available:
107+
//
108+
// - python
109+
// Python version, as in the *pythons* variable above.
110+
// - environment_type
111+
// Environment type, as above.
112+
// - sys_platform
113+
// Platform, as in sys.platform. Possible values for the common
114+
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
115+
// - req
116+
// Required packages
117+
// - env
118+
// Environment variables
119+
// - env_nobuild
120+
// Non-build environment variables
121+
//
122+
// "exclude": [
123+
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
124+
// {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
125+
// {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
126+
// ],
127+
//
128+
// "include": [
129+
// // additional env for python2.7
130+
// {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}},
131+
// // additional env if run on windows+conda
132+
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}},
133+
// ],
134+
135+
// The directory (relative to the current directory) that benchmarks are
136+
// stored in. If not provided, defaults to "benchmarks"
137+
// "benchmark_dir": "benchmarks",
138+
139+
// The directory (relative to the current directory) to cache the Python
140+
// environments in. If not provided, defaults to "env"
141+
"env_dir": ".asv/env",
142+
143+
// The directory (relative to the current directory) that raw benchmark
144+
// results are stored in. If not provided, defaults to "results".
145+
"results_dir": ".asv/results",
146+
147+
// The directory (relative to the current directory) that the html tree
148+
// should be written to. If not provided, defaults to "html".
149+
"html_dir": ".asv/html",
150+
151+
// The number of characters to retain in the commit hashes.
152+
// "hash_length": 8,
153+
154+
// `asv` will cache results of the recent builds in each
155+
// environment, making them faster to install next time. This is
156+
// the number of builds to keep, per environment.
157+
// "build_cache_size": 2,
158+
159+
// The commits after which the regression search in `asv publish`
160+
// should start looking for regressions. Dictionary whose keys are
161+
// regexps matching to benchmark names, and values corresponding to
162+
// the commit (exclusive) after which to start looking for
163+
// regressions. The default is to start from the first commit
164+
// with results. If the commit is `null`, regression detection is
165+
// skipped for the matching benchmark.
166+
//
167+
// "regressions_first_commits": {
168+
// "some_benchmark": "352cdf", // Consider regressions only after this commit
169+
// "another_benchmark": null, // Skip regression detection altogether
170+
// },
171+
172+
// The thresholds for relative change in results, after which `asv
173+
// publish` starts reporting regressions. Dictionary of the same
174+
// form as in ``regressions_first_commits``, with values
175+
// indicating the thresholds. If multiple entries match, the
176+
// maximum is taken. If no entry matches, the default is 5%.
177+
//
178+
// "regressions_thresholds": {
179+
// "some_benchmark": 0.01, // Threshold of 1%
180+
// "another_benchmark": 0.5, // Threshold of 50%
181+
// },
182+
}

benchmarks/benchmarks.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Write the benchmarking functions here.
2+
# See "Writing benchmarks" in the asv docs for more information.
3+
import uuid
4+
5+
import numpy as np
6+
7+
from asciidtype import ASCIIDType
8+
from strptrdtype import StrPtrDType
9+
10+
11+
def generate_data():
12+
n = 100000
13+
strings_list = [str(uuid.uuid4()) + '\n' for i in range(n)]
14+
15+
with open('strings', 'w') as f:
16+
f.writelines(strings_list)
17+
18+
19+
class TimeASCIIDType:
20+
def setup(self):
21+
self.ascii_dtype_object = ASCIIDType(36)
22+
with open('strings', 'r') as f:
23+
self.strings = f.readlines()
24+
25+
def time_allocate(self):
26+
_ = np.array(self.strings, dtype=self.ascii_dtype_object)
27+
28+
29+
class TimeStrPtrDType:
30+
def setup(self):
31+
self.strptr_dtype_object = StrPtrDType()
32+
with open('strings', 'rb') as f:
33+
self.bytestrings = f.readlines()
34+
35+
def time_allocate(self):
36+
_ = np.array(self.bytestrings, dtype=self.strptr_dtype_object)
37+
38+
39+
if __name__ == "__main__":
40+
strptr_instance = TimeStrPtrDType()
41+
strptr_instance.setup()
42+
43+
# ascii_instance = TimeASCIIDType()
44+
# ascii_instance.setup()
45+
46+
strptr_instance.time_allocate()
47+
# ascii_instance.time_allocate()

quaddtype/quaddtype/src/casts.c

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,3 @@
1-
#include <Python.h>
2-
3-
#define PY_ARRAY_UNIQUE_SYMBOL unitdtype_ARRAY_API
4-
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
5-
#define NO_IMPORT_ARRAY
6-
#include "numpy/arrayobject.h"
7-
#include "numpy/experimental_dtype_api.h"
8-
#include "numpy/ndarraytypes.h"
9-
101
#include "casts.h"
112
#include "dtype.h"
123

stringdtype/.clang-format

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# A clang-format style that approximates Python's PEP 7
2+
# Useful for IDE integration
3+
#
4+
# Based on Paul Ganssle's version at
5+
# https://gist.github.com/pganssle/0e3a5f828b4d07d79447f6ced8e7e4db
6+
# and modified for NumPy
7+
BasedOnStyle: Google
8+
AlignAfterOpenBracket: Align
9+
AllowShortEnumsOnASingleLine: false
10+
AllowShortIfStatementsOnASingleLine: false
11+
AlwaysBreakAfterReturnType: TopLevel
12+
BreakBeforeBraces: Stroustrup
13+
ColumnLimit: 79
14+
ContinuationIndentWidth: 8
15+
DerivePointerAlignment: false
16+
IndentWidth: 4
17+
IncludeBlocks: Regroup
18+
IncludeCategories:
19+
- Regex: '^[<"](Python|structmember|pymem)\.h'
20+
Priority: -3
21+
CaseSensitive: true
22+
- Regex: '^"numpy/'
23+
Priority: -2
24+
- Regex: '^"(npy_pycompat|npy_config)'
25+
Priority: -1
26+
- Regex: '^"[[:alnum:]_.]+"'
27+
Priority: 1
28+
- Regex: '^<[[:alnum:]_.]+"'
29+
Priority: 2
30+
Language: Cpp
31+
PointerAlignment: Right
32+
ReflowComments: true
33+
SpaceBeforeParens: ControlStatements
34+
SpacesInParentheses: false
35+
StatementMacros: [PyObject_HEAD, PyObject_VAR_HEAD, PyObject_HEAD_EXTRA]
36+
TabWidth: 4
37+
UseTab: Never

stringdtype/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
dist/
2+
.mesonpy*.ini
3+
__pycache__

stringdtype/README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# A dtype that stores ASCII data
2+
3+
This is a simple proof-of-concept dtype using the (as of late 2022) experimental
4+
[new dtype
5+
implementation](https://numpy.org/neps/nep-0041-improved-dtype-support.html) in
6+
NumPy.
7+
8+
## Building
9+
10+
Ensure Meson and NumPy are installed in the python environment you would like to use:
11+
12+
```
13+
$ python3 -m pip install meson meson-python numpy build patchelf
14+
```
15+
16+
Build with meson, create a wheel, and install it
17+
18+
```
19+
$ rm -r dist/
20+
$ meson build
21+
$ python -m build --wheel -Cbuilddir=build
22+
$ python -m pip install dist/asciidtype*.whl
23+
```
24+
25+
The `mesonpy` build backend for pip [does not currently support editable
26+
installs](https://github.com/mesonbuild/meson-python/issues/47), so `pip install
27+
-e .` will not work.

0 commit comments

Comments
 (0)