From 5ec47799c12bee3fcc267981cdfcb3366b3c7246 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 22 Jan 2026 21:21:21 +0200 Subject: [PATCH] gh-135573: Make pickled lists, sets and dicts a tiny bit smaller Ensure that APPENDS and SETITEMS are never used for a batch of size 1. Ensure that ADDITEMS and SETITEMS are never used for a batch of size 0. This harmonizes the C implementation with the Python implementation which already guarantees this and makes a pickle a tiny bit smaller with a tiny chance (about 0.1%). Saves 1 byte for list and dict with size 1001, 2001, ... Saves 2 bytes for set and dict with size 1000, 2000, ... --- Modules/_pickle.c | 79 +++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 063547c9a4d020..5784bc7d5af0bc 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -3034,11 +3034,6 @@ batch_list(PickleState *state, PicklerObject *self, PyObject *iter, PyObject *or assert(iter != NULL); - /* XXX: I think this function could be made faster by avoiding the - iterator interface and fetching objects directly from list using - PyList_GET_ITEM. - */ - if (self->proto == 0) { /* APPENDS isn't available; do one at a time. */ for (;; total++) { @@ -3160,24 +3155,24 @@ batch_list_exact(PickleState *state, PicklerObject *self, PyObject *obj) assert(obj != NULL); assert(self->proto > 0); assert(PyList_CheckExact(obj)); - - if (PyList_GET_SIZE(obj) == 1) { - item = PyList_GET_ITEM(obj, 0); - Py_INCREF(item); - int err = save(state, self, item, 0); - Py_DECREF(item); - if (err < 0) { - _PyErr_FormatNote("when serializing %T item 0", obj); - return -1; - } - if (_Pickler_Write(self, &append_op, 1) < 0) - return -1; - return 0; - } + assert(PyList_GET_SIZE(obj)); /* Write in batches of BATCHSIZE. */ total = 0; do { + if (PyList_GET_SIZE(obj) - total == 1) { + item = PyList_GET_ITEM(obj, total); + Py_INCREF(item); + int err = save(state, self, item, 0); + Py_DECREF(item); + if (err < 0) { + _PyErr_FormatNote("when serializing %T item %zd", obj, total); + return -1; + } + if (_Pickler_Write(self, &append_op, 1) < 0) + return -1; + return 0; + } this_batch = 0; if (_Pickler_Write(self, &mark_op, 1) < 0) return -1; @@ -3438,28 +3433,29 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj) assert(self->proto > 0); dict_size = PyDict_GET_SIZE(obj); - - /* Special-case len(d) == 1 to save space. */ - if (dict_size == 1) { - PyDict_Next(obj, &ppos, &key, &value); - Py_INCREF(key); - Py_INCREF(value); - if (save(state, self, key, 0) < 0) { - goto error; - } - if (save(state, self, value, 0) < 0) { - _PyErr_FormatNote("when serializing %T item %R", obj, key); - goto error; - } - Py_CLEAR(key); - Py_CLEAR(value); - if (_Pickler_Write(self, &setitem_op, 1) < 0) - return -1; - return 0; - } + assert(dict_size); /* Write in batches of BATCHSIZE. */ + Py_ssize_t total = 0; do { + if (dict_size - total == 1) { + PyDict_Next(obj, &ppos, &key, &value); + Py_INCREF(key); + Py_INCREF(value); + if (save(state, self, key, 0) < 0) { + goto error; + } + if (save(state, self, value, 0) < 0) { + _PyErr_FormatNote("when serializing %T item %R", obj, key); + goto error; + } + Py_CLEAR(key); + Py_CLEAR(value); + if (_Pickler_Write(self, &setitem_op, 1) < 0) + return -1; + return 0; + } + i = 0; if (_Pickler_Write(self, &mark_op, 1) < 0) return -1; @@ -3475,6 +3471,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj) } Py_CLEAR(key); Py_CLEAR(value); + total++; if (++i == BATCHSIZE) break; } @@ -3487,7 +3484,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj) return -1; } - } while (i == BATCHSIZE); + } while (total < dict_size); return 0; error: Py_XDECREF(key); @@ -3605,6 +3602,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj) return 0; /* nothing to do */ /* Write in batches of BATCHSIZE. */ + Py_ssize_t total = 0; do { i = 0; if (_Pickler_Write(self, &mark_op, 1) < 0) @@ -3619,6 +3617,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj) _PyErr_FormatNote("when serializing %T element", obj); break; } + total++; if (++i == BATCHSIZE) break; } @@ -3634,7 +3633,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj) "set changed size during iteration"); return -1; } - } while (i == BATCHSIZE); + } while (total < set_size); return 0; }