Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(23106)

Delta Between Two Patch Sets: Lib/html/entities.py

Issue 11113: html.entities mapping dicts need updating?
Left Patch Set: Created 7 years, 2 months ago
Right Patch Set: Created 7 years, 2 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Doc/library/html.entities.rst ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 """HTML character entity references.""" 1 """HTML character entity references."""
2 2
3 # maps the HTML entity name to the Unicode codepoint 3 # maps the HTML entity name to the Unicode codepoint
4 name2codepoint = { 4 name2codepoint = {
5 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U +00C6 ISOlat1 5 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U +00C6 ISOlat1
6 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1 6 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1
7 'Acirc': 0x00c2, # latin capital letter A with circumflex, U+00C2 ISOlat1 7 'Acirc': 0x00c2, # latin capital letter A with circumflex, U+00C2 ISOlat1
8 'Agrave': 0x00c0, # latin capital letter A with grave = latin capital lett er A grave, U+00C0 ISOlat1 8 'Agrave': 0x00c0, # latin capital letter A with grave = latin capital lett er A grave, U+00C0 ISOlat1
9 'Alpha': 0x0391, # greek capital letter alpha, U+0391 9 'Alpha': 0x0391, # greek capital letter alpha, U+0391
10 'Aring': 0x00c5, # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 10 'Aring': 0x00c5, # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after
251 'yacute': 0x00fd, # latin small letter y with acute, U+00FD ISOlat1 251 'yacute': 0x00fd, # latin small letter y with acute, U+00FD ISOlat1
252 'yen': 0x00a5, # yen sign = yuan sign, U+00A5 ISOnum 252 'yen': 0x00a5, # yen sign = yuan sign, U+00A5 ISOnum
253 'yuml': 0x00ff, # latin small letter y with diaeresis, U+00FF ISOlat1 253 'yuml': 0x00ff, # latin small letter y with diaeresis, U+00FF ISOlat1
254 'zeta': 0x03b6, # greek small letter zeta, U+03B6 ISOgrk3 254 'zeta': 0x03b6, # greek small letter zeta, U+03B6 ISOgrk3
255 'zwj': 0x200d, # zero width joiner, U+200D NEW RFC 2070 255 'zwj': 0x200d, # zero width joiner, U+200D NEW RFC 2070
256 'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070 256 'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070
257 } 257 }
258 258
259 259
260 # maps the HTML5 named character references to the equivalent Unicode character( s) 260 # maps the HTML5 named character references to the equivalent Unicode character( s)
261 html5namedcharref = { 261 html5 = {
262 'Aacute;': '\xc1', 262 'Aacute;': '\xc1',
263 'Aacute': '\xc1', 263 'Aacute': '\xc1',
264 'aacute;': '\xe1', 264 'aacute;': '\xe1',
265 'aacute': '\xe1', 265 'aacute': '\xe1',
266 'Abreve;': '\u0102', 266 'Abreve;': '\u0102',
267 'abreve;': '\u0103', 267 'abreve;': '\u0103',
268 'ac;': '\u223e', 268 'ac;': '\u223e',
269 'acd;': '\u223f', 269 'acd;': '\u223f',
270 'acE;': '\u223e\u0333', 270 'acE;': '\u223e\u0333',
271 'Acirc;': '\xc2', 271 'Acirc;': '\xc2',
(...skipping 1118 matching lines...) Expand 10 before | Expand all | Expand 10 after
1390 'nacute;': '\u0144', 1390 'nacute;': '\u0144',
1391 'nang;': '\u2220\u20d2', 1391 'nang;': '\u2220\u20d2',
1392 'nap;': '\u2249', 1392 'nap;': '\u2249',
1393 'napE;': '\u2a70\u0338', 1393 'napE;': '\u2a70\u0338',
1394 'napid;': '\u224b\u0338', 1394 'napid;': '\u224b\u0338',
1395 'napos;': '\u0149', 1395 'napos;': '\u0149',
1396 'napprox;': '\u2249', 1396 'napprox;': '\u2249',
1397 'natur;': '\u266e', 1397 'natur;': '\u266e',
1398 'natural;': '\u266e', 1398 'natural;': '\u266e',
1399 'naturals;': '\u2115', 1399 'naturals;': '\u2115',
1400 'nbsp;': '', 1400 'nbsp;': '\xa0',
1401 'nbsp': '', 1401 'nbsp': '\xa0',
1402 'nbump;': '\u224e\u0338', 1402 'nbump;': '\u224e\u0338',
1403 'nbumpe;': '\u224f\u0338', 1403 'nbumpe;': '\u224f\u0338',
1404 'ncap;': '\u2a43', 1404 'ncap;': '\u2a43',
1405 'Ncaron;': '\u0147', 1405 'Ncaron;': '\u0147',
1406 'ncaron;': '\u0148', 1406 'ncaron;': '\u0148',
1407 'Ncedil;': '\u0145', 1407 'Ncedil;': '\u0145',
1408 'ncedil;': '\u0146', 1408 'ncedil;': '\u0146',
1409 'ncong;': '\u2247', 1409 'ncong;': '\u2247',
1410 'ncongdot;': '\u2a6d\u0338', 1410 'ncongdot;': '\u2a6d\u0338',
1411 'ncup;': '\u2a42', 1411 'ncup;': '\u2a42',
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
1469 'nless;': '\u226e', 1469 'nless;': '\u226e',
1470 'nLl;': '\u22d8\u0338', 1470 'nLl;': '\u22d8\u0338',
1471 'nlsim;': '\u2274', 1471 'nlsim;': '\u2274',
1472 'nLt;': '\u226a\u20d2', 1472 'nLt;': '\u226a\u20d2',
1473 'nlt;': '\u226e', 1473 'nlt;': '\u226e',
1474 'nltri;': '\u22ea', 1474 'nltri;': '\u22ea',
1475 'nltrie;': '\u22ec', 1475 'nltrie;': '\u22ec',
1476 'nLtv;': '\u226a\u0338', 1476 'nLtv;': '\u226a\u0338',
1477 'nmid;': '\u2224', 1477 'nmid;': '\u2224',
1478 'NoBreak;': '\u2060', 1478 'NoBreak;': '\u2060',
1479 'NonBreakingSpace;': '', 1479 'NonBreakingSpace;': '\xa0',
1480 'Nopf;': '\u2115', 1480 'Nopf;': '\u2115',
1481 'nopf;': '\U0001d55f', 1481 'nopf;': '\U0001d55f',
1482 'Not;': '\u2aec', 1482 'Not;': '\u2aec',
1483 'not;': '\xac', 1483 'not;': '\xac',
1484 'not': '\xac', 1484 'not': '\xac',
1485 'NotCongruent;': '\u2262', 1485 'NotCongruent;': '\u2262',
1486 'NotCupCap;': '\u226d', 1486 'NotCupCap;': '\u226d',
1487 'NotDoubleVerticalBar;': '\u2226', 1487 'NotDoubleVerticalBar;': '\u2226',
1488 'NotElement;': '\u2209', 1488 'NotElement;': '\u2209',
1489 'NotEqual;': '\u2260', 1489 'NotEqual;': '\u2260',
(...skipping 1007 matching lines...) Expand 10 before | Expand all | Expand 10 after
2497 2497
2498 # maps the HTML entity name to the character 2498 # maps the HTML entity name to the character
2499 # (or a character reference if the character is outside the Latin-1 range) 2499 # (or a character reference if the character is outside the Latin-1 range)
2500 entitydefs = {} 2500 entitydefs = {}
2501 2501
2502 for (name, codepoint) in name2codepoint.items(): 2502 for (name, codepoint) in name2codepoint.items():
2503 codepoint2name[codepoint] = name 2503 codepoint2name[codepoint] = name
2504 entitydefs[name] = chr(codepoint) 2504 entitydefs[name] = chr(codepoint)
2505 2505
2506 del name, codepoint 2506 del name, codepoint
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+