=== modified file 'cjson.c' --- a/cjson.c 2007-08-24 16:12:17 +0000 +++ b/cjson.c 2010-05-26 05:05:55 +0000 @@ -613,6 +613,25 @@ char *p; static const char *hexdigit = "0123456789abcdef"; +#ifdef Py_UNICODE_WIDE + const Py_ssize_t expandsize = 10; +#else + const Py_ssize_t expandsize = 6; +#endif + + /* Initial allocation is based on the longest-possible unichr + escape. + + In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source + unichr, so in this case it's the longest unichr escape. In + narrow (UTF-16) builds this is five chars per source unichr + since there are two unichrs in the surrogate pair, so in narrow + (UTF-16) builds it's not the longest unichr escape. + + In wide or narrow builds '\uxxxx' is 6 chars per source unichr, + so in the narrow (UTF-16) build case it's the longest unichr + escape. + */ s = PyUnicode_AS_UNICODE(unicode); size = PyUnicode_GET_SIZE(unicode); @@ -623,7 +642,7 @@ return NULL; } - repr = PyString_FromStringAndSize(NULL, 2 + 6*size + 1); + repr = PyString_FromStringAndSize(NULL, 2 + expandsize*size + 1); if (repr == NULL) return NULL; @@ -644,15 +663,6 @@ #ifdef Py_UNICODE_WIDE /* Map 21-bit characters to '\U00xxxxxx' */ else if (ch >= 0x10000) { - int offset = p - PyString_AS_STRING(repr); - - /* Resize the string if necessary */ - if (offset + 12 > PyString_GET_SIZE(repr)) { - if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100)) - return NULL; - p = PyString_AS_STRING(repr) + offset; - } - *p++ = '\\'; *p++ = 'U'; *p++ = hexdigit[(ch >> 28) & 0x0000000F]; === modified file 'jsontest.py' --- a/jsontest.py 2007-08-24 16:12:17 +0000 +++ b/jsontest.py 2010-05-26 05:05:55 +0000 @@ -316,6 +316,18 @@ def testWriteLong(self): self.assertEqual("12345678901234567890", cjson.encode(12345678901234567890)) + + def testWriteLongUnicode(self): + # This test causes a buffer overrun in cjson 1.0.5, on UCS4 builds. + # The string length is only resized for wide unicode characters if + # there is less than 12 bytes of space left. Padding with + # narrow-but-escaped characters prevents string resizing. + # Note that u'\U0001D11E\u1234' also breaks, but sometimes goes + # undetected. + s = cjson.encode(u'\U0001D11E\U0001D11E\U0001D11E\U0001D11E' + u'\u1234\u1234\u1234\u1234\u1234\u1234') + self.assertEqual(r'"\U0001d11e\U0001d11e\U0001d11e\U0001d11e' + r'\u1234\u1234\u1234\u1234\u1234\u1234"', s) def main(): unittest.main()