diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index 52dca1362592d6..bfa10eadf73573 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -45,7 +45,7 @@ extern int _PyCodec_UnregisterError(const char *name); in Python 3.5+? */ -extern PyObject* _PyCodec_LookupTextEncoding( +PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding( const char *encoding, const char *alternate_command); diff --git a/Lib/codecs.py b/Lib/codecs.py index e4a8010aba90a5..411856b3738d61 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -93,7 +93,8 @@ class CodecInfo(tuple): def __new__(cls, encode, decode, streamreader=None, streamwriter=None, incrementalencoder=None, incrementaldecoder=None, name=None, - *, _is_text_encoding=None): + *, _is_text_encoding=None, + _is_multibyte=None): self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) self.name = name self.encode = encode @@ -104,6 +105,8 @@ def __new__(cls, encode, decode, streamreader=None, streamwriter=None, self.streamreader = streamreader if _is_text_encoding is not None: self._is_text_encoding = _is_text_encoding + if _is_multibyte is not None: + self._is_multibyte = _is_multibyte return self def __repr__(self): diff --git a/Lib/encodings/big5.py b/Lib/encodings/big5.py index 7adeb0e1605274..0ffbf78f8c5f4d 100644 --- a/Lib/encodings/big5.py +++ b/Lib/encodings/big5.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/big5hkscs.py b/Lib/encodings/big5hkscs.py index 350df37baaedaf..c0c8960516469e 100644 --- a/Lib/encodings/big5hkscs.py +++ b/Lib/encodings/big5hkscs.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/cp932.py b/Lib/encodings/cp932.py index e01f59b7190576..08213e7d8682ea 100644 --- a/Lib/encodings/cp932.py +++ b/Lib/encodings/cp932.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/cp949.py b/Lib/encodings/cp949.py index 627c87125e2aff..4a0fb42579c4e6 100644 --- a/Lib/encodings/cp949.py +++ b/Lib/encodings/cp949.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/cp950.py b/Lib/encodings/cp950.py index 39eec5ed0ddef9..a1e0196668a619 100644 --- a/Lib/encodings/cp950.py +++ b/Lib/encodings/cp950.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/euc_jis_2004.py b/Lib/encodings/euc_jis_2004.py index 72b87aea68862f..ede44475ae0891 100644 --- a/Lib/encodings/euc_jis_2004.py +++ b/Lib/encodings/euc_jis_2004.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/euc_jisx0213.py b/Lib/encodings/euc_jisx0213.py index cc47d04112a187..958240852519ce 100644 --- a/Lib/encodings/euc_jisx0213.py +++ b/Lib/encodings/euc_jisx0213.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/euc_jp.py b/Lib/encodings/euc_jp.py index 7bcbe4147f2ad4..e1d4d25d6b417d 100644 --- a/Lib/encodings/euc_jp.py +++ b/Lib/encodings/euc_jp.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/euc_kr.py b/Lib/encodings/euc_kr.py index c1fb1260e879f0..28f491e7367d6a 100644 --- a/Lib/encodings/euc_kr.py +++ b/Lib/encodings/euc_kr.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/gb18030.py b/Lib/encodings/gb18030.py index 34fb6c366a7614..db8368747bad42 100644 --- a/Lib/encodings/gb18030.py +++ b/Lib/encodings/gb18030.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/gb2312.py b/Lib/encodings/gb2312.py index 3c3b837d618ecd..cb230c22b948a7 100644 --- a/Lib/encodings/gb2312.py +++ b/Lib/encodings/gb2312.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/gbk.py b/Lib/encodings/gbk.py index 1b45db89859cdf..7a6402036d39c1 100644 --- a/Lib/encodings/gbk.py +++ b/Lib/encodings/gbk.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/hz.py b/Lib/encodings/hz.py index 383442a3c9ac9a..5d175cc18d80cd 100644 --- a/Lib/encodings/hz.py +++ b/Lib/encodings/hz.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index d31ee07ab45b76..a7934dd9880dd1 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -385,4 +385,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_multibyte=True, ) diff --git a/Lib/encodings/iso2022_jp.py b/Lib/encodings/iso2022_jp.py index ab0406069356e4..ab2361562b1099 100644 --- a/Lib/encodings/iso2022_jp.py +++ b/Lib/encodings/iso2022_jp.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/iso2022_jp_1.py b/Lib/encodings/iso2022_jp_1.py index 997044dc378749..8066806b212e74 100644 --- a/Lib/encodings/iso2022_jp_1.py +++ b/Lib/encodings/iso2022_jp_1.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/iso2022_jp_2.py b/Lib/encodings/iso2022_jp_2.py index 9106bf762512fd..0804129a08b9db 100644 --- a/Lib/encodings/iso2022_jp_2.py +++ b/Lib/encodings/iso2022_jp_2.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/iso2022_jp_2004.py b/Lib/encodings/iso2022_jp_2004.py index 40198bf098570b..292e3a7b95c0cc 100644 --- a/Lib/encodings/iso2022_jp_2004.py +++ b/Lib/encodings/iso2022_jp_2004.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/iso2022_jp_3.py b/Lib/encodings/iso2022_jp_3.py index 346e08beccbbaf..036312d202374a 100644 --- a/Lib/encodings/iso2022_jp_3.py +++ b/Lib/encodings/iso2022_jp_3.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/iso2022_jp_ext.py b/Lib/encodings/iso2022_jp_ext.py index 752bab9813a094..e6a3f888f04516 100644 --- a/Lib/encodings/iso2022_jp_ext.py +++ b/Lib/encodings/iso2022_jp_ext.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/iso2022_kr.py b/Lib/encodings/iso2022_kr.py index bf7018763eae38..56a6e1d3115f1c 100644 --- a/Lib/encodings/iso2022_kr.py +++ b/Lib/encodings/iso2022_kr.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/johab.py b/Lib/encodings/johab.py index 512aeeb732b522..a835154b552117 100644 --- a/Lib/encodings/johab.py +++ b/Lib/encodings/johab.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index 268fccbd53974e..d274d642d020cd 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -250,4 +250,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_multibyte=True, ) diff --git a/Lib/encodings/raw_unicode_escape.py b/Lib/encodings/raw_unicode_escape.py index 46c8e070dd192e..bb8bb15bd589be 100644 --- a/Lib/encodings/raw_unicode_escape.py +++ b/Lib/encodings/raw_unicode_escape.py @@ -43,4 +43,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_multibyte=True, ) diff --git a/Lib/encodings/shift_jis.py b/Lib/encodings/shift_jis.py index 83381172764dea..ee2300fb4ad001 100644 --- a/Lib/encodings/shift_jis.py +++ b/Lib/encodings/shift_jis.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/shift_jis_2004.py b/Lib/encodings/shift_jis_2004.py index 161b1e86f9918a..4d9c6fb8613cc7 100644 --- a/Lib/encodings/shift_jis_2004.py +++ b/Lib/encodings/shift_jis_2004.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/shift_jisx0213.py b/Lib/encodings/shift_jisx0213.py index cb653f53055e67..2b80a1f7b2c102 100644 --- a/Lib/encodings/shift_jisx0213.py +++ b/Lib/encodings/shift_jisx0213.py @@ -36,4 +36,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/unicode_escape.py b/Lib/encodings/unicode_escape.py index 9b1ce99b339ae0..65b10462228554 100644 --- a/Lib/encodings/unicode_escape.py +++ b/Lib/encodings/unicode_escape.py @@ -43,4 +43,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py index d3b9980026666f..41c4f610532927 100644 --- a/Lib/encodings/utf_16.py +++ b/Lib/encodings/utf_16.py @@ -152,4 +152,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_16_be.py b/Lib/encodings/utf_16_be.py index 86b458eb9bcd96..9dbb25ff018262 100644 --- a/Lib/encodings/utf_16_be.py +++ b/Lib/encodings/utf_16_be.py @@ -39,4 +39,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_16_le.py b/Lib/encodings/utf_16_le.py index ec454142eedf25..f9655609379e02 100644 --- a/Lib/encodings/utf_16_le.py +++ b/Lib/encodings/utf_16_le.py @@ -39,4 +39,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py index 1924bedbb74c68..e5fd8175809be0 100644 --- a/Lib/encodings/utf_32.py +++ b/Lib/encodings/utf_32.py @@ -147,4 +147,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_32_be.py b/Lib/encodings/utf_32_be.py index fe272b5fafec69..100a167a064473 100644 --- a/Lib/encodings/utf_32_be.py +++ b/Lib/encodings/utf_32_be.py @@ -34,4 +34,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_32_le.py b/Lib/encodings/utf_32_le.py index 9e48210928ee65..1395c51dcfeac7 100644 --- a/Lib/encodings/utf_32_le.py +++ b/Lib/encodings/utf_32_le.py @@ -34,4 +34,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_7.py b/Lib/encodings/utf_7.py index 8e0567f2087d65..a273f0fa26c818 100644 --- a/Lib/encodings/utf_7.py +++ b/Lib/encodings/utf_7.py @@ -35,4 +35,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_8.py b/Lib/encodings/utf_8.py index 1bf6336571547b..d5544140451a95 100644 --- a/Lib/encodings/utf_8.py +++ b/Lib/encodings/utf_8.py @@ -39,4 +39,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py index 1bb479203f365d..fab8aaf7ba2abb 100644 --- a/Lib/encodings/utf_8_sig.py +++ b/Lib/encodings/utf_8_sig.py @@ -127,4 +127,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 79c8a7ef886482..aada3752e318a0 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1892,6 +1892,7 @@ def test_copy(self): self.assertIsNot(dup, orig) self.assertEqual(dup, orig) self.assertTrue(orig._is_text_encoding) + self.assertTrue(orig._is_multibyte) self.assertEqual(dup.encode, orig.encode) self.assertEqual(dup.name, orig.name) self.assertEqual(dup.incrementalencoder, orig.incrementalencoder) @@ -1912,6 +1913,7 @@ def test_deepcopy(self): self.assertIsNot(dup, orig) self.assertEqual(dup, orig) self.assertTrue(orig._is_text_encoding) + self.assertTrue(orig._is_multibyte) self.assertEqual(dup.encode, orig.encode) self.assertEqual(dup.name, orig.name) self.assertEqual(dup.incrementalencoder, orig.incrementalencoder) @@ -1940,6 +1942,7 @@ def test_pickle(self): unpickled_codec_info.incrementalencoder ) self.assertTrue(unpickled_codec_info._is_text_encoding) + self.assertTrue(unpickled_codec_info._is_multibyte) # Test a CodecInfo with _is_text_encoding equal to false. codec_info = codecs.lookup('base64') diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 10dca684accee3..ccc2d833d2f2db 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -325,11 +325,15 @@ def test_supported_encodings2(self, encoding): ]) @support.subTests('encoding', [ - 'UTF-7', + 'UTF8', 'UTF-7', + "unicode-escape", "raw-unicode-escape", "Big5-HKSCS", "Big5", "cp932", "cp949", "cp950", "EUC_JIS-2004", "EUC_JISX0213", "EUC-JP", "EUC-KR", "GB18030", "GB2312", "GBK", + "HZ-GB-2312", + "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2004", + "ISO-2022-JP-2", "ISO-2022-JP-3", "ISO-2022-JP-EXT", "ISO-2022-KR", "johab", "Shift_JIS", "Shift_JIS-2004", "Shift_JISX0213", diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 3a41ea97a2e0a2..38fbe10db75c91 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1039,9 +1039,11 @@ def bxml(encoding, body=''): 'cp932', 'cp949', 'cp950', 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', - 'iso2022-kr', 'johab', + 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', + 'iso2022-jp-3', 'iso2022-jp-ext', + 'iso2022-kr', 'johab', 'hz', 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', - 'utf-7', + 'utf-7', 'utf-8-sig', 'utf8', ] for encoding in unsupported_ascii_compatible_encodings: with self.subTest(encoding=encoding): diff --git a/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst b/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst new file mode 100644 index 00000000000000..119a465fcb200a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst @@ -0,0 +1,4 @@ +The :mod:`XML parser ` now raises :exc:`ValueError` for known +unsupported multi-byte encodings such us "UTF8", "ISO-2022-JP" or +"raw-unicode-escape" instead of failing later, when encounter non-ASCII +data. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 64314e5dff93a1..b688cf6a16fc37 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() +#include "pycore_codecs.h" // _PyCodec_LookupTextEncoding() #include "pycore_import.h" // _PyImport_SetModule() #include "pycore_pyhash.h" // _Py_HashSecret #include "pycore_traceback.h" // _PyTraceback_Add() @@ -1465,6 +1466,32 @@ PyUnknownEncodingHandler(void *encodingHandlerData, if (PyErr_Occurred()) return XML_STATUS_ERROR; + PyObject *codec = _PyCodec_LookupTextEncoding(name, NULL); + if (codec == NULL) { + return XML_STATUS_ERROR; + } + if (!PyTuple_CheckExact(codec)) { + PyObject *attr; + if (PyObject_GetOptionalAttrString(codec, "_is_multibyte", &attr) < 0) { + Py_DECREF(codec); + return XML_STATUS_ERROR; + } + if (attr != NULL) { + int is_multibyte = PyObject_IsTrue(attr); + Py_DECREF(attr); + if (is_multibyte != 0) { // true or error + Py_DECREF(codec); + if (is_multibyte > 0) { // true + PyErr_Format(PyExc_ValueError, + "multi-byte encodings are not supported: '%s'", + name); + } + return XML_STATUS_ERROR; + } + } + } + Py_DECREF(codec); + u = PyUnicode_Decode((const char*) template_buffer, 256, name, "replace"); if (u == NULL) { Py_XDECREF(u); @@ -1473,8 +1500,9 @@ PyUnknownEncodingHandler(void *encodingHandlerData, if (PyUnicode_GET_LENGTH(u) != 256) { Py_DECREF(u); - PyErr_SetString(PyExc_ValueError, - "multi-byte encodings are not supported"); + PyErr_Format(PyExc_ValueError, + "multi-byte encodings are not supported: '%s'", + name); return XML_STATUS_ERROR; } diff --git a/Python/codecs.c b/Python/codecs.c index 0bde56c0ac662e..a522e6b88068b3 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -10,6 +10,7 @@ Copyright (c) Corporation for National Research Initiatives. #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_codecs.h" // export _PyCodec_LookupTextEncoding() #include "pycore_interp.h" // PyInterpreterState.codec_search_path #include "pycore_pyerrors.h" // _PyErr_FormatNote() #include "pycore_pystate.h" // _PyInterpreterState_GET() diff --git a/Tools/unicode/gencjkcodecs.py b/Tools/unicode/gencjkcodecs.py index 45866bf2f61062..71d19693eb6f7b 100644 --- a/Tools/unicode/gencjkcodecs.py +++ b/Tools/unicode/gencjkcodecs.py @@ -51,6 +51,7 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_multibyte=True, ) """)