- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 3k
Support writing/reading bytes in librt.internal #20069
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -346,6 +346,100 @@ write_str(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames | |
| return Py_None; | ||
| } | ||
|  | ||
| /* | ||
| bytes format: size followed by bytes | ||
| short bytes (len <= 127): single byte for size as `(uint8_t)size << 1` | ||
| long bytes: \x01 followed by size as Py_ssize_t | ||
| */ | ||
|  | ||
| static PyObject* | ||
| read_bytes_internal(PyObject *data) { | ||
| _CHECK_BUFFER(data, NULL) | ||
|  | ||
| // Read length. | ||
| Py_ssize_t size; | ||
| _CHECK_READ(data, 1, NULL) | ||
| uint8_t first = _READ(data, uint8_t) | ||
| if (likely(first != LONG_STR_TAG)) { | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once we have 2-byte etc. integer formats, it could make sense to use the same integer format here, even if it's slightly non-ideal, so that we can share code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, as I mentioned in other issue I will try to simply code when adding tags for primitive types. | ||
| // Common case: short bytes (len <= 127). | ||
| size = (Py_ssize_t)(first >> 1); | ||
| } else { | ||
| _CHECK_READ(data, sizeof(CPyTagged), NULL) | ||
| size = _READ(data, Py_ssize_t) | ||
| } | ||
| // Read byes content. | ||
|         
                  ilevkivskyi marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
| char *buf = ((BufferObject *)data)->buf; | ||
| _CHECK_READ(data, size, NULL) | ||
| PyObject *res = PyBytes_FromStringAndSize( | ||
| buf + ((BufferObject *)data)->pos, (Py_ssize_t)size | ||
| ); | ||
| if (unlikely(res == NULL)) | ||
| return NULL; | ||
| ((BufferObject *)data)->pos += size; | ||
| return res; | ||
| } | ||
|  | ||
| static PyObject* | ||
| read_bytes(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames) { | ||
| static const char * const kwlist[] = {"data", 0}; | ||
| static CPyArg_Parser parser = {"O:read_bytes", kwlist, 0}; | ||
| PyObject *data; | ||
| if (unlikely(!CPyArg_ParseStackAndKeywordsOneArg(args, nargs, kwnames, &parser, &data))) { | ||
| return NULL; | ||
| } | ||
| return read_bytes_internal(data); | ||
| } | ||
|  | ||
| static char | ||
| write_bytes_internal(PyObject *data, PyObject *value) { | ||
| _CHECK_BUFFER(data, CPY_NONE_ERROR) | ||
|  | ||
| Py_ssize_t size = PyBytes_GET_SIZE(value); | ||
| const char *chunk = PyBytes_AsString(value); | ||
|         
                  ilevkivskyi marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| if (unlikely(chunk == NULL)) | ||
| return CPY_NONE_ERROR; | ||
|  | ||
| Py_ssize_t need; | ||
| // Write length. | ||
| if (likely(size <= MAX_SHORT_LEN)) { | ||
| // Common case: short bytes (len <= 127) store as single byte. | ||
| need = size + 1; | ||
| _CHECK_SIZE(data, need) | ||
| _WRITE(data, uint8_t, (uint8_t)size << 1) | ||
| } else { | ||
| need = size + sizeof(Py_ssize_t) + 1; | ||
| _CHECK_SIZE(data, need) | ||
| _WRITE(data, uint8_t, LONG_STR_TAG) | ||
| _WRITE(data, Py_ssize_t, size) | ||
| } | ||
| // Write bytes content. | ||
| char *buf = ((BufferObject *)data)->buf; | ||
| memcpy(buf + ((BufferObject *)data)->pos, chunk, size); | ||
| ((BufferObject *)data)->pos += size; | ||
| ((BufferObject *)data)->end += need; | ||
| return CPY_NONE; | ||
| } | ||
|  | ||
| static PyObject* | ||
| write_bytes(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames) { | ||
| static const char * const kwlist[] = {"data", "value", 0}; | ||
| static CPyArg_Parser parser = {"OO:write_bytes", kwlist, 0}; | ||
| PyObject *data; | ||
| PyObject *value; | ||
| if (unlikely(!CPyArg_ParseStackAndKeywordsSimple(args, nargs, kwnames, &parser, &data, &value))) { | ||
| return NULL; | ||
| } | ||
| if (unlikely(!PyBytes_Check(value))) { | ||
| PyErr_SetString(PyExc_TypeError, "value must be a bytes object"); | ||
| return NULL; | ||
| } | ||
| if (unlikely(write_bytes_internal(data, value) == CPY_NONE_ERROR)) { | ||
| return NULL; | ||
| } | ||
| Py_INCREF(Py_None); | ||
| return Py_None; | ||
| } | ||
|  | ||
| /* | ||
| float format: | ||
| stored as a C double | ||
|  | @@ -565,6 +659,8 @@ static PyMethodDef librt_internal_module_methods[] = { | |
| {"read_bool", (PyCFunction)read_bool, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("read a bool")}, | ||
| {"write_str", (PyCFunction)write_str, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("write a string")}, | ||
| {"read_str", (PyCFunction)read_str, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("read a string")}, | ||
| {"write_bytes", (PyCFunction)write_bytes, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("write bytes")}, | ||
| {"read_bytes", (PyCFunction)read_bytes, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("read bytes")}, | ||
| {"write_float", (PyCFunction)write_float, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("write a float")}, | ||
| {"read_float", (PyCFunction)read_float, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("read a float")}, | ||
| {"write_int", (PyCFunction)write_int, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("write an int")}, | ||
|  | @@ -590,7 +686,7 @@ librt_internal_module_exec(PyObject *m) | |
| } | ||
|  | ||
| // Export mypy internal C API, be careful with the order! | ||
| static void *NativeInternal_API[14] = { | ||
| static void *NativeInternal_API[16] = { | ||
| (void *)Buffer_internal, | ||
| (void *)Buffer_internal_empty, | ||
| (void *)Buffer_getvalue_internal, | ||
|  | @@ -605,6 +701,8 @@ librt_internal_module_exec(PyObject *m) | |
| (void *)write_tag_internal, | ||
| (void *)read_tag_internal, | ||
| (void *)NativeInternal_ABI_Version, | ||
| (void *)write_bytes_internal, | ||
| (void *)read_bytes_internal, | ||
| }; | ||
| PyObject *c_api_object = PyCapsule_New((void *)NativeInternal_API, "librt.internal._C_API", NULL); | ||
| if (PyModule_Add(m, "_C_API", c_api_object) < 0) { | ||
|  | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Somewhat meta: what if we'd make all these accept positional arguments only? We could then generate slightly faster wrapper methods for interpreted use cases perhaps (this would affect other methods as well, so the change would be in a separate PR).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW I am not worried about interpreted performance that much, current code is already relatively efficient, but I am not against if someone else will do this.