Changeset 328 for pyyaml/trunk/lib3/yaml/scanner.py
- Timestamp:
- 12/29/08 12:24:05 (4 years ago)
- Location:
- pyyaml/trunk/lib3
- Files:
-
- 1 edited
- 1 copied
-
. (copied) (copied from pyyaml/trunk/lib)
-
yaml/scanner.py (modified) (49 diffs)
Legend:
- Unmodified
- Added
- Removed
-
pyyaml/trunk/lib3/yaml/scanner.py
r222 r328 27 27 __all__ = ['Scanner', 'ScannerError'] 28 28 29 from error import MarkedYAMLError30 from tokens import *29 from .error import MarkedYAMLError 30 from .tokens import * 31 31 32 32 class ScannerError(MarkedYAMLError): 33 33 pass 34 34 35 class SimpleKey (object):35 class SimpleKey: 36 36 # See below simple keys treatment. 37 37 … … 44 44 self.mark = mark 45 45 46 class Scanner (object):46 class Scanner: 47 47 48 48 def __init__(self): … … 167 167 168 168 # Is it the end of stream? 169 if ch == u'\0':169 if ch == '\0': 170 170 return self.fetch_stream_end() 171 171 172 172 # Is it a directive? 173 if ch == u'%' and self.check_directive():173 if ch == '%' and self.check_directive(): 174 174 return self.fetch_directive() 175 175 176 176 # Is it the document start? 177 if ch == u'-' and self.check_document_start():177 if ch == '-' and self.check_document_start(): 178 178 return self.fetch_document_start() 179 179 180 180 # Is it the document end? 181 if ch == u'.' and self.check_document_end():181 if ch == '.' and self.check_document_end(): 182 182 return self.fetch_document_end() 183 183 184 184 # TODO: support for BOM within a stream. 185 #if ch == u'\uFEFF':185 #if ch == '\uFEFF': 186 186 # return self.fetch_bom() <-- issue BOMToken 187 187 … … 189 189 190 190 # Is it the flow sequence start indicator? 191 if ch == u'[':191 if ch == '[': 192 192 return self.fetch_flow_sequence_start() 193 193 194 194 # Is it the flow mapping start indicator? 195 if ch == u'{':195 if ch == '{': 196 196 return self.fetch_flow_mapping_start() 197 197 198 198 # Is it the flow sequence end indicator? 199 if ch == u']':199 if ch == ']': 200 200 return self.fetch_flow_sequence_end() 201 201 202 202 # Is it the flow mapping end indicator? 203 if ch == u'}':203 if ch == '}': 204 204 return self.fetch_flow_mapping_end() 205 205 206 206 # Is it the flow entry indicator? 207 if ch == u',':207 if ch == ',': 208 208 return self.fetch_flow_entry() 209 209 210 210 # Is it the block entry indicator? 211 if ch == u'-' and self.check_block_entry():211 if ch == '-' and self.check_block_entry(): 212 212 return self.fetch_block_entry() 213 213 214 214 # Is it the key indicator? 215 if ch == u'?' and self.check_key():215 if ch == '?' and self.check_key(): 216 216 return self.fetch_key() 217 217 218 218 # Is it the value indicator? 219 if ch == u':' and self.check_value():219 if ch == ':' and self.check_value(): 220 220 return self.fetch_value() 221 221 222 222 # Is it an alias? 223 if ch == u'*':223 if ch == '*': 224 224 return self.fetch_alias() 225 225 226 226 # Is it an anchor? 227 if ch == u'&':227 if ch == '&': 228 228 return self.fetch_anchor() 229 229 230 230 # Is it a tag? 231 if ch == u'!':231 if ch == '!': 232 232 return self.fetch_tag() 233 233 234 234 # Is it a literal scalar? 235 if ch == u'|' and not self.flow_level:235 if ch == '|' and not self.flow_level: 236 236 return self.fetch_literal() 237 237 238 238 # Is it a folded scalar? 239 if ch == u'>' and not self.flow_level:239 if ch == '>' and not self.flow_level: 240 240 return self.fetch_folded() 241 241 242 242 # Is it a single quoted scalar? 243 if ch == u'\'':243 if ch == '\'': 244 244 return self.fetch_single() 245 245 246 246 # Is it a double quoted scalar? 247 if ch == u'\"':247 if ch == '\"': 248 248 return self.fetch_double() 249 249 … … 254 254 # No? It's an error. Let's produce a nice error message. 255 255 raise ScannerError("while scanning for the next token", None, 256 "found character %r that cannot start any token" 257 % ch.encode('utf-8'),self.get_mark())256 "found character %r that cannot start any token" % ch, 257 self.get_mark()) 258 258 259 259 # Simple keys treatment. … … 281 281 # Disabling this procedure will allow simple keys of any length and 282 282 # height (may cause problems if indentation is broken though). 283 for level in self.possible_simple_keys.keys():283 for level in list(self.possible_simple_keys): 284 284 key = self.possible_simple_keys[level] 285 285 if key.line != self.line \ … … 692 692 # DOCUMENT-START: ^ '---' (' '|'\n') 693 693 if self.column == 0: 694 if self.prefix(3) == u'---' \695 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':694 if self.prefix(3) == '---' \ 695 and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': 696 696 return True 697 697 … … 700 700 # DOCUMENT-END: ^ '...' (' '|'\n') 701 701 if self.column == 0: 702 if self.prefix(3) == u'...' \703 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':702 if self.prefix(3) == '...' \ 703 and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': 704 704 return True 705 705 … … 707 707 708 708 # BLOCK-ENTRY: '-' (' '|'\n') 709 return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'709 return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' 710 710 711 711 def check_key(self): … … 717 717 # KEY(block context): '?' (' '|'\n') 718 718 else: 719 return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'719 return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' 720 720 721 721 def check_value(self): … … 727 727 # VALUE(block context): ':' (' '|'\n') 728 728 else: 729 return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'729 return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' 730 730 731 731 def check_plain(self): … … 744 744 # independent. 745 745 ch = self.peek() 746 return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \747 or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'748 and (ch == u'-' or (not self.flow_level and ch in u'?:')))746 return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ 747 or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' 748 and (ch == '-' or (not self.flow_level and ch in '?:'))) 749 749 750 750 # Scanners. … … 770 770 # Scanners for block, flow, and plain scalars need to be modified. 771 771 772 if self.index == 0 and self.peek() == u'\uFEFF':772 if self.index == 0 and self.peek() == '\uFEFF': 773 773 self.forward() 774 774 found = False 775 775 while not found: 776 while self.peek() == u' ':776 while self.peek() == ' ': 777 777 self.forward() 778 if self.peek() == u'#':779 while self.peek() not in u'\0\r\n\x85\u2028\u2029':778 if self.peek() == '#': 779 while self.peek() not in '\0\r\n\x85\u2028\u2029': 780 780 self.forward() 781 781 if self.scan_line_break(): … … 791 791 name = self.scan_directive_name(start_mark) 792 792 value = None 793 if name == u'YAML':793 if name == 'YAML': 794 794 value = self.scan_yaml_directive_value(start_mark) 795 795 end_mark = self.get_mark() 796 elif name == u'TAG':796 elif name == 'TAG': 797 797 value = self.scan_tag_directive_value(start_mark) 798 798 end_mark = self.get_mark() 799 799 else: 800 800 end_mark = self.get_mark() 801 while self.peek() not in u'\0\r\n\x85\u2028\u2029':801 while self.peek() not in '\0\r\n\x85\u2028\u2029': 802 802 self.forward() 803 803 self.scan_directive_ignored_line(start_mark) … … 808 808 length = 0 809 809 ch = self.peek(length) 810 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \811 or ch in u'-_':810 while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ 811 or ch in '-_': 812 812 length += 1 813 813 ch = self.peek(length) … … 815 815 raise ScannerError("while scanning a directive", start_mark, 816 816 "expected alphabetic or numeric character, but found %r" 817 % ch .encode('utf-8'), self.get_mark())817 % ch, self.get_mark()) 818 818 value = self.prefix(length) 819 819 self.forward(length) 820 820 ch = self.peek() 821 if ch not in u'\0 \r\n\x85\u2028\u2029':821 if ch not in '\0 \r\n\x85\u2028\u2029': 822 822 raise ScannerError("while scanning a directive", start_mark, 823 823 "expected alphabetic or numeric character, but found %r" 824 % ch .encode('utf-8'), self.get_mark())824 % ch, self.get_mark()) 825 825 return value 826 826 827 827 def scan_yaml_directive_value(self, start_mark): 828 828 # See the specification for details. 829 while self.peek() == u' ':829 while self.peek() == ' ': 830 830 self.forward() 831 831 major = self.scan_yaml_directive_number(start_mark) 832 832 if self.peek() != '.': 833 833 raise ScannerError("while scanning a directive", start_mark, 834 "expected a digit or '.', but found %r" 835 % self.peek().encode('utf-8'), 834 "expected a digit or '.', but found %r" % self.peek(), 836 835 self.get_mark()) 837 836 self.forward() 838 837 minor = self.scan_yaml_directive_number(start_mark) 839 if self.peek() not in u'\0 \r\n\x85\u2028\u2029':838 if self.peek() not in '\0 \r\n\x85\u2028\u2029': 840 839 raise ScannerError("while scanning a directive", start_mark, 841 "expected a digit or ' ', but found %r" 842 % self.peek().encode('utf-8'), 840 "expected a digit or ' ', but found %r" % self.peek(), 843 841 self.get_mark()) 844 842 return (major, minor) … … 847 845 # See the specification for details. 848 846 ch = self.peek() 849 if not ( u'0' <= ch <= '9'):847 if not ('0' <= ch <= '9'): 850 848 raise ScannerError("while scanning a directive", start_mark, 851 "expected a digit, but found %r" % ch.encode('utf-8'), 852 self.get_mark()) 849 "expected a digit, but found %r" % ch, self.get_mark()) 853 850 length = 0 854 while u'0' <= self.peek(length) <= u'9':851 while '0' <= self.peek(length) <= '9': 855 852 length += 1 856 853 value = int(self.prefix(length)) … … 860 857 def scan_tag_directive_value(self, start_mark): 861 858 # See the specification for details. 862 while self.peek() == u' ':859 while self.peek() == ' ': 863 860 self.forward() 864 861 handle = self.scan_tag_directive_handle(start_mark) 865 while self.peek() == u' ':862 while self.peek() == ' ': 866 863 self.forward() 867 864 prefix = self.scan_tag_directive_prefix(start_mark) … … 872 869 value = self.scan_tag_handle('directive', start_mark) 873 870 ch = self.peek() 874 if ch != u' ':871 if ch != ' ': 875 872 raise ScannerError("while scanning a directive", start_mark, 876 "expected ' ', but found %r" % ch.encode('utf-8'), 877 self.get_mark()) 873 "expected ' ', but found %r" % ch, self.get_mark()) 878 874 return value 879 875 … … 882 878 value = self.scan_tag_uri('directive', start_mark) 883 879 ch = self.peek() 884 if ch not in u'\0 \r\n\x85\u2028\u2029':880 if ch not in '\0 \r\n\x85\u2028\u2029': 885 881 raise ScannerError("while scanning a directive", start_mark, 886 "expected ' ', but found %r" % ch.encode('utf-8'), 887 self.get_mark()) 882 "expected ' ', but found %r" % ch, self.get_mark()) 888 883 return value 889 884 890 885 def scan_directive_ignored_line(self, start_mark): 891 886 # See the specification for details. 892 while self.peek() == u' ':887 while self.peek() == ' ': 893 888 self.forward() 894 if self.peek() == u'#':895 while self.peek() not in u'\0\r\n\x85\u2028\u2029':889 if self.peek() == '#': 890 while self.peek() not in '\0\r\n\x85\u2028\u2029': 896 891 self.forward() 897 892 ch = self.peek() 898 if ch not in u'\0\r\n\x85\u2028\u2029':893 if ch not in '\0\r\n\x85\u2028\u2029': 899 894 raise ScannerError("while scanning a directive", start_mark, 900 895 "expected a comment or a line break, but found %r" 901 % ch .encode('utf-8'), self.get_mark())896 % ch, self.get_mark()) 902 897 self.scan_line_break() 903 898 … … 920 915 length = 0 921 916 ch = self.peek(length) 922 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \923 or ch in u'-_':917 while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ 918 or ch in '-_': 924 919 length += 1 925 920 ch = self.peek(length) … … 927 922 raise ScannerError("while scanning an %s" % name, start_mark, 928 923 "expected alphabetic or numeric character, but found %r" 929 % ch .encode('utf-8'), self.get_mark())924 % ch, self.get_mark()) 930 925 value = self.prefix(length) 931 926 self.forward(length) 932 927 ch = self.peek() 933 if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':928 if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': 934 929 raise ScannerError("while scanning an %s" % name, start_mark, 935 930 "expected alphabetic or numeric character, but found %r" 936 % ch .encode('utf-8'), self.get_mark())931 % ch, self.get_mark()) 937 932 end_mark = self.get_mark() 938 933 return TokenClass(value, start_mark, end_mark) … … 942 937 start_mark = self.get_mark() 943 938 ch = self.peek(1) 944 if ch == u'<':939 if ch == '<': 945 940 handle = None 946 941 self.forward(2) 947 942 suffix = self.scan_tag_uri('tag', start_mark) 948 if self.peek() != u'>':943 if self.peek() != '>': 949 944 raise ScannerError("while parsing a tag", start_mark, 950 "expected '>', but found %r" % self.peek() .encode('utf-8'),945 "expected '>', but found %r" % self.peek(), 951 946 self.get_mark()) 952 947 self.forward() 953 elif ch in u'\0 \t\r\n\x85\u2028\u2029':948 elif ch in '\0 \t\r\n\x85\u2028\u2029': 954 949 handle = None 955 suffix = u'!'950 suffix = '!' 956 951 self.forward() 957 952 else: 958 953 length = 1 959 954 use_handle = False 960 while ch not in u'\0 \r\n\x85\u2028\u2029':961 if ch == u'!':955 while ch not in '\0 \r\n\x85\u2028\u2029': 956 if ch == '!': 962 957 use_handle = True 963 958 break 964 959 length += 1 965 960 ch = self.peek(length) 966 handle = u'!'961 handle = '!' 967 962 if use_handle: 968 963 handle = self.scan_tag_handle('tag', start_mark) 969 964 else: 970 handle = u'!'965 handle = '!' 971 966 self.forward() 972 967 suffix = self.scan_tag_uri('tag', start_mark) 973 968 ch = self.peek() 974 if ch not in u'\0 \r\n\x85\u2028\u2029':969 if ch not in '\0 \r\n\x85\u2028\u2029': 975 970 raise ScannerError("while scanning a tag", start_mark, 976 "expected ' ', but found %r" % ch.encode('utf-8'), 977 self.get_mark()) 971 "expected ' ', but found %r" % ch, self.get_mark()) 978 972 value = (handle, suffix) 979 973 end_mark = self.get_mark() … … 1006 1000 indent = min_indent+increment-1 1007 1001 breaks, end_mark = self.scan_block_scalar_breaks(indent) 1008 line_break = u''1002 line_break = '' 1009 1003 1010 1004 # Scan the inner part of the block scalar. 1011 while self.column == indent and self.peek() != u'\0':1005 while self.column == indent and self.peek() != '\0': 1012 1006 chunks.extend(breaks) 1013 leading_non_space = self.peek() not in u' \t'1007 leading_non_space = self.peek() not in ' \t' 1014 1008 length = 0 1015 while self.peek(length) not in u'\0\r\n\x85\u2028\u2029':1009 while self.peek(length) not in '\0\r\n\x85\u2028\u2029': 1016 1010 length += 1 1017 1011 chunks.append(self.prefix(length)) … … 1019 1013 line_break = self.scan_line_break() 1020 1014 breaks, end_mark = self.scan_block_scalar_breaks(indent) 1021 if self.column == indent and self.peek() != u'\0':1015 if self.column == indent and self.peek() != '\0': 1022 1016 1023 1017 # Unfortunately, folding rules are ambiguous. … … 1025 1019 # This is the folding according to the specification: 1026 1020 1027 if folded and line_break == u'\n'\1028 and leading_non_space and self.peek() not in u' \t':1021 if folded and line_break == '\n' \ 1022 and leading_non_space and self.peek() not in ' \t': 1029 1023 if not breaks: 1030 chunks.append( u' ')1024 chunks.append(' ') 1031 1025 else: 1032 1026 chunks.append(line_break) … … 1035 1029 # examples): 1036 1030 # 1037 #if folded and line_break == u'\n':1031 #if folded and line_break == '\n': 1038 1032 # if not breaks: 1039 1033 # if self.peek() not in ' \t': 1040 # chunks.append( u' ')1034 # chunks.append(' ') 1041 1035 # else: 1042 1036 # chunks.append(line_break) … … 1053 1047 1054 1048 # We are done. 1055 return ScalarToken( u''.join(chunks), False, start_mark, end_mark,1049 return ScalarToken(''.join(chunks), False, start_mark, end_mark, 1056 1050 style) 1057 1051 … … 1061 1055 increment = None 1062 1056 ch = self.peek() 1063 if ch in u'+-':1057 if ch in '+-': 1064 1058 if ch == '+': 1065 1059 chomping = True … … 1068 1062 self.forward() 1069 1063 ch = self.peek() 1070 if ch in u'0123456789':1064 if ch in '0123456789': 1071 1065 increment = int(ch) 1072 1066 if increment == 0: … … 1075 1069 self.get_mark()) 1076 1070 self.forward() 1077 elif ch in u'0123456789':1071 elif ch in '0123456789': 1078 1072 increment = int(ch) 1079 1073 if increment == 0: … … 1083 1077 self.forward() 1084 1078 ch = self.peek() 1085 if ch in u'+-':1079 if ch in '+-': 1086 1080 if ch == '+': 1087 1081 chomping = True … … 1090 1084 self.forward() 1091 1085 ch = self.peek() 1092 if ch not in u'\0 \r\n\x85\u2028\u2029':1086 if ch not in '\0 \r\n\x85\u2028\u2029': 1093 1087 raise ScannerError("while scanning a block scalar", start_mark, 1094 1088 "expected chomping or indentation indicators, but found %r" 1095 % ch.encode('utf-8'), self.get_mark())1089 % ch, self.get_mark()) 1096 1090 return chomping, increment 1097 1091 1098 1092 def scan_block_scalar_ignored_line(self, start_mark): 1099 1093 # See the specification for details. 1100 while self.peek() == u' ':1094 while self.peek() == ' ': 1101 1095 self.forward() 1102 if self.peek() == u'#':1103 while self.peek() not in u'\0\r\n\x85\u2028\u2029':1096 if self.peek() == '#': 1097 while self.peek() not in '\0\r\n\x85\u2028\u2029': 1104 1098 self.forward() 1105 1099 ch = self.peek() 1106 if ch not in u'\0\r\n\x85\u2028\u2029':1100 if ch not in '\0\r\n\x85\u2028\u2029': 1107 1101 raise ScannerError("while scanning a block scalar", start_mark, 1108 "expected a comment or a line break, but found %r" 1109 % ch.encode('utf-8'),self.get_mark())1102 "expected a comment or a line break, but found %r" % ch, 1103 self.get_mark()) 1110 1104 self.scan_line_break() 1111 1105 … … 1115 1109 max_indent = 0 1116 1110 end_mark = self.get_mark() 1117 while self.peek() in u' \r\n\x85\u2028\u2029':1118 if self.peek() != u' ':1111 while self.peek() in ' \r\n\x85\u2028\u2029': 1112 if self.peek() != ' ': 1119 1113 chunks.append(self.scan_line_break()) 1120 1114 end_mark = self.get_mark() … … 1129 1123 chunks = [] 1130 1124 end_mark = self.get_mark() 1131 while self.column < indent and self.peek() == u' ':1125 while self.column < indent and self.peek() == ' ': 1132 1126 self.forward() 1133 while self.peek() in u'\r\n\x85\u2028\u2029':1127 while self.peek() in '\r\n\x85\u2028\u2029': 1134 1128 chunks.append(self.scan_line_break()) 1135 1129 end_mark = self.get_mark() 1136 while self.column < indent and self.peek() == u' ':1130 while self.column < indent and self.peek() == ' ': 1137 1131 self.forward() 1138 1132 return chunks, end_mark … … 1159 1153 self.forward() 1160 1154 end_mark = self.get_mark() 1161 return ScalarToken( u''.join(chunks), False, start_mark, end_mark,1155 return ScalarToken(''.join(chunks), False, start_mark, end_mark, 1162 1156 style) 1163 1157 1164 1158 ESCAPE_REPLACEMENTS = { 1165 u'0': u'\0',1166 u'a': u'\x07',1167 u'b': u'\x08',1168 u't': u'\x09',1169 u'\t': u'\x09',1170 u'n': u'\x0A',1171 u'v': u'\x0B',1172 u'f': u'\x0C',1173 u'r': u'\x0D',1174 u'e': u'\x1B',1175 u' ': u'\x20',1176 u'\"': u'\"',1177 u'\\': u'\\',1178 u'N': u'\x85',1179 u'_': u'\xA0',1180 u'L': u'\u2028',1181 u'P': u'\u2029',1159 '0': '\0', 1160 'a': '\x07', 1161 'b': '\x08', 1162 't': '\x09', 1163 '\t': '\x09', 1164 'n': '\x0A', 1165 'v': '\x0B', 1166 'f': '\x0C', 1167 'r': '\x0D', 1168 'e': '\x1B', 1169 ' ': '\x20', 1170 '\"': '\"', 1171 '\\': '\\', 1172 'N': '\x85', 1173 '_': '\xA0', 1174 'L': '\u2028', 1175 'P': '\u2029', 1182 1176 } 1183 1177 1184 1178 ESCAPE_CODES = { 1185 u'x':2,1186 u'u':4,1187 u'U':8,1179 'x': 2, 1180 'u': 4, 1181 'U': 8, 1188 1182 } 1189 1183 … … 1193 1187 while True: 1194 1188 length = 0 1195 while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':1189 while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': 1196 1190 length += 1 1197 1191 if length: … … 1199 1193 self.forward(length) 1200 1194 ch = self.peek() 1201 if not double and ch == u'\'' and self.peek(1) == u'\'':1202 chunks.append( u'\'')1195 if not double and ch == '\'' and self.peek(1) == '\'': 1196 chunks.append('\'') 1203 1197 self.forward(2) 1204 elif (double and ch == u'\'') or (not double and ch in u'\"\\'):1198 elif (double and ch == '\'') or (not double and ch in '\"\\'): 1205 1199 chunks.append(ch) 1206 1200 self.forward() 1207 elif double and ch == u'\\':1201 elif double and ch == '\\': 1208 1202 self.forward() 1209 1203 ch = self.peek() … … 1215 1209 self.forward() 1216 1210 for k in range(length): 1217 if self.peek(k) not in u'0123456789ABCDEFabcdef':1211 if self.peek(k) not in '0123456789ABCDEFabcdef': 1218 1212 raise ScannerError("while scanning a double-quoted scalar", start_mark, 1219 1213 "expected escape sequence of %d hexdecimal numbers, but found %r" % 1220 (length, self.peek(k) .encode('utf-8')), self.get_mark())1214 (length, self.peek(k)), self.get_mark()) 1221 1215 code = int(self.prefix(length), 16) 1222 chunks.append( unichr(code))1216 chunks.append(chr(code)) 1223 1217 self.forward(length) 1224 elif ch in u'\r\n\x85\u2028\u2029':1218 elif ch in '\r\n\x85\u2028\u2029': 1225 1219 self.scan_line_break() 1226 1220 chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) 1227 1221 else: 1228 1222 raise ScannerError("while scanning a double-quoted scalar", start_mark, 1229 "found unknown escape character %r" % ch .encode('utf-8'), self.get_mark())1223 "found unknown escape character %r" % ch, self.get_mark()) 1230 1224 else: 1231 1225 return chunks … … 1235 1229 chunks = [] 1236 1230 length = 0 1237 while self.peek(length) in u' \t':1231 while self.peek(length) in ' \t': 1238 1232 length += 1 1239 1233 whitespaces = self.prefix(length) 1240 1234 self.forward(length) 1241 1235 ch = self.peek() 1242 if ch == u'\0':1236 if ch == '\0': 1243 1237 raise ScannerError("while scanning a quoted scalar", start_mark, 1244 1238 "found unexpected end of stream", self.get_mark()) 1245 elif ch in u'\r\n\x85\u2028\u2029':1239 elif ch in '\r\n\x85\u2028\u2029': 1246 1240 line_break = self.scan_line_break() 1247 1241 breaks = self.scan_flow_scalar_breaks(double, start_mark) 1248 if line_break != u'\n':1242 if line_break != '\n': 1249 1243 chunks.append(line_break) 1250 1244 elif not breaks: 1251 chunks.append( u' ')1245 chunks.append(' ') 1252 1246 chunks.extend(breaks) 1253 1247 else: … … 1262 1256 # separators. 1263 1257 prefix = self.prefix(3) 1264 if (prefix == u'---' or prefix == u'...') \1265 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1258 if (prefix == '---' or prefix == '...') \ 1259 and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': 1266 1260 raise ScannerError("while scanning a quoted scalar", start_mark, 1267 1261 "found unexpected document separator", self.get_mark()) 1268 while self.peek() in u' \t':1262 while self.peek() in ' \t': 1269 1263 self.forward() 1270 if self.peek() in u'\r\n\x85\u2028\u2029':1264 if self.peek() in '\r\n\x85\u2028\u2029': 1271 1265 chunks.append(self.scan_line_break()) 1272 1266 else: … … 1290 1284 while True: 1291 1285 length = 0 1292 if self.peek() == u'#':1286 if self.peek() == '#': 1293 1287 break 1294 1288 while True: 1295 1289 ch = self.peek(length) 1296 if ch in u'\0 \t\r\n\x85\u2028\u2029'\1297 or (not self.flow_level and ch == u':' and1298 self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \1299 or (self.flow_level and ch in u',:?[]{}'):1290 if ch in '\0 \t\r\n\x85\u2028\u2029' \ 1291 or (not self.flow_level and ch == ':' and 1292 self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029') \ 1293 or (self.flow_level and ch in ',:?[]{}'): 1300 1294 break 1301 1295 length += 1 1302 1296 # It's not clear what we should do with ':' in the flow context. 1303 if (self.flow_level and ch == u':'1304 and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):1297 if (self.flow_level and ch == ':' 1298 and self.peek(length+1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'): 1305 1299 self.forward(length) 1306 1300 raise ScannerError("while scanning a plain scalar", start_mark, … … 1315 1309 end_mark = self.get_mark() 1316 1310 spaces = self.scan_plain_spaces(indent, start_mark) 1317 if not spaces or self.peek() == u'#' \1311 if not spaces or self.peek() == '#' \ 1318 1312 or (not self.flow_level and self.column < indent): 1319 1313 break 1320 return ScalarToken( u''.join(chunks), True, start_mark, end_mark)1314 return ScalarToken(''.join(chunks), True, start_mark, end_mark) 1321 1315 1322 1316 def scan_plain_spaces(self, indent, start_mark): … … 1326 1320 chunks = [] 1327 1321 length = 0 1328 while self.peek(length) in u' ':1322 while self.peek(length) in ' ': 1329 1323 length += 1 1330 1324 whitespaces = self.prefix(length) 1331 1325 self.forward(length) 1332 1326 ch = self.peek() 1333 if ch in u'\r\n\x85\u2028\u2029':1327 if ch in '\r\n\x85\u2028\u2029': 1334 1328 line_break = self.scan_line_break() 1335 1329 self.allow_simple_key = True 1336 1330 prefix = self.prefix(3) 1337 if (prefix == u'---' or prefix == u'...') \1338 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1331 if (prefix == '---' or prefix == '...') \ 1332 and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': 1339 1333 return 1340 1334 breaks = [] 1341 while self.peek() in u' \r\n\x85\u2028\u2029':1335 while self.peek() in ' \r\n\x85\u2028\u2029': 1342 1336 if self.peek() == ' ': 1343 1337 self.forward() … … 1345 1339 breaks.append(self.scan_line_break()) 1346 1340 prefix = self.prefix(3) 1347 if (prefix == u'---' or prefix == u'...') \1348 and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1341 if (prefix == '---' or prefix == '...') \ 1342 and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': 1349 1343 return 1350 if line_break != u'\n':1344 if line_break != '\n': 1351 1345 chunks.append(line_break) 1352 1346 elif not breaks: 1353 chunks.append( u' ')1347 chunks.append(' ') 1354 1348 chunks.extend(breaks) 1355 1349 elif whitespaces: … … 1362 1356 # tag handles. I have allowed it anyway. 1363 1357 ch = self.peek() 1364 if ch != u'!':1358 if ch != '!': 1365 1359 raise ScannerError("while scanning a %s" % name, start_mark, 1366 "expected '!', but found %r" % ch.encode('utf-8'), 1367 self.get_mark()) 1360 "expected '!', but found %r" % ch, self.get_mark()) 1368 1361 length = 1 1369 1362 ch = self.peek(length) 1370 if ch != u' ':1371 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \1372 or ch in u'-_':1363 if ch != ' ': 1364 while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ 1365 or ch in '-_': 1373 1366 length += 1 1374 1367 ch = self.peek(length) 1375 if ch != u'!':1368 if ch != '!': 1376 1369 self.forward(length) 1377 1370 raise ScannerError("while scanning a %s" % name, start_mark, 1378 "expected '!', but found %r" % ch.encode('utf-8'), 1379 self.get_mark()) 1371 "expected '!', but found %r" % ch, self.get_mark()) 1380 1372 length += 1 1381 1373 value = self.prefix(length) … … 1389 1381 length = 0 1390 1382 ch = self.peek(length) 1391 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \1392 or ch in u'-;/?:@&=+$,_.!~*\'()[]%':1393 if ch == u'%':1383 while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ 1384 or ch in '-;/?:@&=+$,_.!~*\'()[]%': 1385 if ch == '%': 1394 1386 chunks.append(self.prefix(length)) 1395 1387 self.forward(length) … … 1405 1397 if not chunks: 1406 1398 raise ScannerError("while parsing a %s" % name, start_mark, 1407 "expected URI, but found %r" % ch.encode('utf-8'), 1408 self.get_mark()) 1409 return u''.join(chunks) 1399 "expected URI, but found %r" % ch, self.get_mark()) 1400 return ''.join(chunks) 1410 1401 1411 1402 def scan_uri_escapes(self, name, start_mark): 1412 1403 # See the specification for details. 1413 bytes = []1404 codes = [] 1414 1405 mark = self.get_mark() 1415 while self.peek() == u'%':1406 while self.peek() == '%': 1416 1407 self.forward() 1417 1408 for k in range(2): 1418 if self.peek(k) not in u'0123456789ABCDEFabcdef':1409 if self.peek(k) not in '0123456789ABCDEFabcdef': 1419 1410 raise ScannerError("while scanning a %s" % name, start_mark, 1420 "expected URI escape sequence of 2 hexdecimal numbers, but found %r" %1421 (self.peek(k).encode('utf-8')), self.get_mark())1422 bytes.append(chr(int(self.prefix(2), 16)))1411 "expected URI escape sequence of 2 hexdecimal numbers, but found %r" 1412 % self.peek(k), self.get_mark()) 1413 codes.append(int(self.prefix(2), 16)) 1423 1414 self.forward(2) 1424 1415 try: 1425 value = unicode(''.join(bytes),'utf-8')1426 except UnicodeDecodeError ,exc:1416 value = bytes(codes).decode('utf-8') 1417 except UnicodeDecodeError as exc: 1427 1418 raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) 1428 1419 return value … … 1438 1429 # default : '' 1439 1430 ch = self.peek() 1440 if ch in u'\r\n\x85':1441 if self.prefix(2) == u'\r\n':1431 if ch in '\r\n\x85': 1432 if self.prefix(2) == '\r\n': 1442 1433 self.forward(2) 1443 1434 else: 1444 1435 self.forward() 1445 return u'\n'1446 elif ch in u'\u2028\u2029':1436 return '\n' 1437 elif ch in '\u2028\u2029': 1447 1438 self.forward() 1448 1439 return ch 1449 return u''1440 return '' 1450 1441 1451 1442 #try:
Note: See TracChangeset
for help on using the changeset viewer.
