Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(272057)

Delta Between Two Patch Sets: Lib/test/test_unicode.py

Issue 28822: Fix indices handling in PyUnicode_FindChar
Left Patch Set: Created 2 years, 8 months ago
Right Patch Set: Created 2 years, 8 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | Modules/_testcapimodule.c » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 """ Test script for the Unicode implementation. 1 """ Test script for the Unicode implementation.
2 2
3 Written by Marc-Andre Lemburg (mal@lemburg.com). 3 Written by Marc-Andre Lemburg (mal@lemburg.com).
4 4
5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6 6
7 """ 7 """
8 import _string 8 import _string
9 import codecs 9 import codecs
10 import itertools 10 import itertools
(...skipping 2710 matching lines...) Expand 10 before | Expand all | Expand 10 after
2721 self.assertEqual(unicode_asucs4(s, l, 1), s+'\0') 2721 self.assertEqual(unicode_asucs4(s, l, 1), s+'\0')
2722 self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff') 2722 self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff')
2723 self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff') 2723 self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff')
2724 self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff') 2724 self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff')
2725 self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1) 2725 self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1)
2726 self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0) 2726 self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0)
2727 s = '\0'.join([s, s]) 2727 s = '\0'.join([s, s])
2728 self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') 2728 self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
2729 self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') 2729 self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
2730 2730
2731 # Test PyUnicode_FindChar() 2731 # Test PyUnicode_FindChar()
haypo 2016/11/29 18:13:50 Put the comment in the function body.
xiang.zhang 2016/11/29 18:27:46 Hmm it's here because most other cpython unicode t
haypo 2016/11/29 18:31:04 Oh, you're right. This file is not consistent :-)
2732 @support.cpython_only 2732 @support.cpython_only
2733 def test_findchar(self): 2733 def test_findchar(self):
2734 from _testcapi import unicode_findchar 2734 from _testcapi import unicode_findchar
haypo 2016/11/29 18:13:50 Put the comment here, before "from ...".
2735 2735
2736 str = "bye;\xe0 bient\xf4t;\u518d\u89c1;\U0001F44B" * 2 2736 for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1 ":
haypo 2016/11/29 09:00:22 I dislike the *2, it makes the unit test harder to
storchaka 2016/11/29 09:35:08 I think there should be separate string objects fo
2737 2737 for i, ch in enumerate(str):
2738 self.assertEqual(unicode_findchar(str, 'b', 0, len(str), 1), 0) 2738 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
storchaka 2016/11/29 09:35:08 The second argument of PyUnicode_FindChar() is int
2739 self.assertEqual(unicode_findchar(str, 'b', 0, len(str), -1), 24) 2739 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1) , i)
2740 self.assertEqual(unicode_findchar(str, '\xe0', 0, len(str), 1), 4) 2740
2741 self.assertEqual(unicode_findchar(str, '\xe0', 0, len(str), -1), 22) 2741 str = "!>_<!"
2742 self.assertEqual(unicode_findchar(str, '\u518d', 0, len(str), 1), 14) 2742 self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
2743 self.assertEqual(unicode_findchar(str, '\u518d', 0, len(str), -1), 32) 2743 self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
2744 self.assertEqual(unicode_findchar(str, '\U0001F44B', 0, len(str), 1), 17 )
2745 self.assertEqual(unicode_findchar(str, '\U0001F44B', 0, len(str), -1), 3 5)
haypo 2016/11/29 09:00:22 I don't see the need for so many unit tests. IMO t
2746
2747 # start < end 2744 # start < end
2748 self.assertEqual(unicode_findchar(str, 'b', 1, len(str), 1), 6) 2745 self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
2749 self.assertEqual(unicode_findchar(str, 'b', 1, len(str)+1, 1), 6) 2746 self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
2750 self.assertEqual(unicode_findchar(str, 'b', 1, 5, 1), -1) 2747 # start >= end
haypo 2016/11/29 09:00:22 Hum, an interested test is to try the reverse sear
2751 # start > end 2748 self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
2752 self.assertEqual(unicode_findchar(str, 'b', len(str), 0, 1), -1) 2749 self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
2753 # start = end
2754 self.assertEqual(unicode_findchar(str, 'b', 0, 0, 1), -1)
2755 # negative 2750 # negative
2756 self.assertEqual(unicode_findchar(str, 'b', -len(str), len(str), 1), 0) 2751 self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
2757 self.assertEqual(unicode_findchar(str, 'b', 0, -1, 1), 0) 2752 self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
2758 self.assertEqual(unicode_findchar(str, 'b', -len(str), -1, 1), 0)
2759 self.assertEqual(unicode_findchar(str, 'b', -1, -len(str), 1), -1)
2760 2753
2761 # Test PyUnicode_CopyCharacters() 2754 # Test PyUnicode_CopyCharacters()
2762 @support.cpython_only 2755 @support.cpython_only
2763 def test_copycharacters(self): 2756 def test_copycharacters(self):
2764 from _testcapi import unicode_copycharacters 2757 from _testcapi import unicode_copycharacters
2765 2758
2766 strings = [ 2759 strings = [
2767 'abcde', '\xa1\xa2\xa3\xa4\xa5', 2760 'abcde', '\xa1\xa2\xa3\xa4\xa5',
2768 '\u4f60\u597d\u4e16\u754c\uff01', 2761 '\u4f60\u597d\u4e16\u754c\uff01',
2769 '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604' 2762 '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
2892 "obj", 2885 "obj",
2893 [(True, 'arg'), 2886 [(True, 'arg'),
2894 (False, 'key1'), 2887 (False, 'key1'),
2895 (False, 'key2'), 2888 (False, 'key2'),
2896 ]]) 2889 ]])
2897 self.assertRaises(TypeError, _string.formatter_field_name_split, 1) 2890 self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
2898 2891
2899 2892
2900 if __name__ == "__main__": 2893 if __name__ == "__main__":
2901 unittest.main() 2894 unittest.main()
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+