Ticket #97: reader.patch
| File reader.patch, 47.4 KB (added by py4fun@…, 5 years ago) |
|---|
-
C:/projects/workspace_python/PyYAML_2/lib/yaml/scanner.py
45 45 46 46 class Scanner(object): 47 47 48 def __init__(self ):48 def __init__(self, reader): 49 49 """Initialize the scanner.""" 50 # It is assumed that Scanner and Reader will have a common descendant. 51 # Reader do the dirty work of checking for BOM and converting the 50 # Reader does the dirty work of checking for BOM and converting the 52 51 # input data to Unicode. It also adds NUL to the end. 53 52 # 54 53 # Reader supports the following methods 55 # self. peek(i=0) # peek the next i-th character56 # self. prefix(l=1) # peek the next l characters57 # self. forward(l=1) # read the next l characters and move the pointer.54 # self.reader.peek(i=0) # peek the next i-th character 55 # self.reader.prefix(l=1) # peek the next l characters 56 # self.reader.forward(l=1) # read the next l characters and move the pointer. 58 57 58 self.reader = reader 59 59 60 # Had we reached the end of the stream? 60 61 self.done = False 61 62 … … 160 161 161 162 # Compare the current indentation and column. It may add some tokens 162 163 # and decrease the current indentation level. 163 self.unwind_indent(self. column)164 self.unwind_indent(self.reader.column) 164 165 165 166 # Peek the next character. 166 ch = self. peek()167 ch = self.reader.peek() 167 168 168 169 # Is it the end of stream? 169 170 if ch == u'\0': … … 254 255 # No? It's an error. Let's produce a nice error message. 255 256 raise ScannerError("while scanning for the next token", None, 256 257 "found character %r that cannot start any token" 257 % ch.encode('utf-8'), self. get_mark())258 % ch.encode('utf-8'), self.reader.get_mark()) 258 259 259 260 # Simple keys treatment. 260 261 … … 282 283 # height (may cause problems if indentation is broken though). 283 284 for level in self.possible_simple_keys.keys(): 284 285 key = self.possible_simple_keys[level] 285 if key.line != self. line \286 or self. index-key.index > 1024:286 if key.line != self.reader.line \ 287 or self.reader.index-key.index > 1024: 287 288 if key.required: 288 289 raise ScannerError("while scanning a simple key", key.mark, 289 "could not found expected ':'", self. get_mark())290 "could not found expected ':'", self.reader.get_mark()) 290 291 del self.possible_simple_keys[level] 291 292 292 293 def save_possible_simple_key(self): … … 295 296 # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. 296 297 297 298 # Check if a simple key is required at the current position. 298 required = not self.flow_level and self.indent == self. column299 required = not self.flow_level and self.indent == self.reader.column 299 300 300 301 # A simple key is required only if it is the first token in the current 301 302 # line. Therefore it is always allowed. … … 307 308 self.remove_possible_simple_key() 308 309 token_number = self.tokens_taken+len(self.tokens) 309 310 key = SimpleKey(token_number, required, 310 self. index, self.line, self.column, self.get_mark())311 self.reader.index, self.reader.line, self.reader.column, self.reader.get_mark()) 311 312 self.possible_simple_keys[self.flow_level] = key 312 313 313 314 def remove_possible_simple_key(self): … … 317 318 318 319 if key.required: 319 320 raise ScannerError("while scanning a simple key", key.mark, 320 "could not found expected ':'", self. get_mark())321 "could not found expected ':'", self.reader.get_mark()) 321 322 322 323 del self.possible_simple_keys[self.flow_level] 323 324 … … 334 335 #if self.flow_level and self.indent > column: 335 336 # raise ScannerError(None, None, 336 337 # "invalid intendation or unclosed '[' or '{'", 337 # self. get_mark())338 # self.reader.get_mark()) 338 339 339 340 # In the flow context, indentation is ignored. We make the scanner less 340 341 # restrictive then specification requires. … … 343 344 344 345 # In block context, we may need to issue the BLOCK-END tokens. 345 346 while self.indent > column: 346 mark = self. get_mark()347 mark = self.reader.get_mark() 347 348 self.indent = self.indents.pop() 348 349 self.tokens.append(BlockEndToken(mark, mark)) 349 350 … … 362 363 # last token. 363 364 364 365 # Read the token. 365 mark = self. get_mark()366 mark = self.reader.get_mark() 366 367 367 368 # Add STREAM-START. 368 369 self.tokens.append(StreamStartToken(mark, mark, 369 encoding=self. encoding))370 encoding=self.reader.encoding)) 370 371 371 372 372 373 def fetch_stream_end(self): … … 379 380 self.possible_simple_keys = {} 380 381 381 382 # Read the token. 382 mark = self. get_mark()383 mark = self.reader.get_mark() 383 384 384 385 # Add STREAM-END. 385 386 self.tokens.append(StreamEndToken(mark, mark)) … … 416 417 self.allow_simple_key = False 417 418 418 419 # Add DOCUMENT-START or DOCUMENT-END. 419 start_mark = self. get_mark()420 self. forward(3)421 end_mark = self. get_mark()420 start_mark = self.reader.get_mark() 421 self.reader.forward(3) 422 end_mark = self.reader.get_mark() 422 423 self.tokens.append(TokenClass(start_mark, end_mark)) 423 424 424 425 def fetch_flow_sequence_start(self): … … 439 440 self.allow_simple_key = True 440 441 441 442 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. 442 start_mark = self. get_mark()443 self. forward()444 end_mark = self. get_mark()443 start_mark = self.reader.get_mark() 444 self.reader.forward() 445 end_mark = self.reader.get_mark() 445 446 self.tokens.append(TokenClass(start_mark, end_mark)) 446 447 447 448 def fetch_flow_sequence_end(self): … … 462 463 self.allow_simple_key = False 463 464 464 465 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. 465 start_mark = self. get_mark()466 self. forward()467 end_mark = self. get_mark()466 start_mark = self.reader.get_mark() 467 self.reader.forward() 468 end_mark = self.reader.get_mark() 468 469 self.tokens.append(TokenClass(start_mark, end_mark)) 469 470 470 471 def fetch_flow_entry(self): … … 476 477 self.remove_possible_simple_key() 477 478 478 479 # Add FLOW-ENTRY. 479 start_mark = self. get_mark()480 self. forward()481 end_mark = self. get_mark()480 start_mark = self.reader.get_mark() 481 self.reader.forward() 482 end_mark = self.reader.get_mark() 482 483 self.tokens.append(FlowEntryToken(start_mark, end_mark)) 483 484 484 485 def fetch_block_entry(self): … … 490 491 if not self.allow_simple_key: 491 492 raise ScannerError(None, None, 492 493 "sequence entries are not allowed here", 493 self. get_mark())494 self.reader.get_mark()) 494 495 495 496 # We may need to add BLOCK-SEQUENCE-START. 496 if self.add_indent(self. column):497 mark = self. get_mark()497 if self.add_indent(self.reader.column): 498 mark = self.reader.get_mark() 498 499 self.tokens.append(BlockSequenceStartToken(mark, mark)) 499 500 500 501 # It's an error for the block entry to occur in the flow context, … … 509 510 self.remove_possible_simple_key() 510 511 511 512 # Add BLOCK-ENTRY. 512 start_mark = self. get_mark()513 self. forward()514 end_mark = self. get_mark()513 start_mark = self.reader.get_mark() 514 self.reader.forward() 515 end_mark = self.reader.get_mark() 515 516 self.tokens.append(BlockEntryToken(start_mark, end_mark)) 516 517 517 518 def fetch_key(self): … … 523 524 if not self.allow_simple_key: 524 525 raise ScannerError(None, None, 525 526 "mapping keys are not allowed here", 526 self. get_mark())527 self.reader.get_mark()) 527 528 528 529 # We may need to add BLOCK-MAPPING-START. 529 if self.add_indent(self. column):530 mark = self. get_mark()530 if self.add_indent(self.reader.column): 531 mark = self.reader.get_mark() 531 532 self.tokens.append(BlockMappingStartToken(mark, mark)) 532 533 533 534 # Simple keys are allowed after '?' in the block context. … … 537 538 self.remove_possible_simple_key() 538 539 539 540 # Add KEY. 540 start_mark = self. get_mark()541 self. forward()542 end_mark = self. get_mark()541 start_mark = self.reader.get_mark() 542 self.reader.forward() 543 end_mark = self.reader.get_mark() 543 544 self.tokens.append(KeyToken(start_mark, end_mark)) 544 545 545 546 def fetch_value(self): … … 576 577 if not self.allow_simple_key: 577 578 raise ScannerError(None, None, 578 579 "mapping values are not allowed here", 579 self. get_mark())580 self.reader.get_mark()) 580 581 581 582 # If this value starts a new block mapping, we need to add 582 583 # BLOCK-MAPPING-START. It will be detected as an error later by 583 584 # the parser. 584 585 if not self.flow_level: 585 if self.add_indent(self. column):586 mark = self. get_mark()586 if self.add_indent(self.reader.column): 587 mark = self.reader.get_mark() 587 588 self.tokens.append(BlockMappingStartToken(mark, mark)) 588 589 589 590 # Simple keys are allowed after ':' in the block context. … … 593 594 self.remove_possible_simple_key() 594 595 595 596 # Add VALUE. 596 start_mark = self. get_mark()597 self. forward()598 end_mark = self. get_mark()597 start_mark = self.reader.get_mark() 598 self.reader.forward() 599 end_mark = self.reader.get_mark() 599 600 self.tokens.append(ValueToken(start_mark, end_mark)) 600 601 601 602 def fetch_alias(self): … … 684 685 685 686 # DIRECTIVE: ^ '%' ... 686 687 # The '%' indicator is already checked. 687 if self. column == 0:688 if self.reader.column == 0: 688 689 return True 689 690 690 691 def check_document_start(self): 691 692 692 693 # DOCUMENT-START: ^ '---' (' '|'\n') 693 if self. column == 0:694 if self. prefix(3) == u'---' \695 and self. peek(3) in u'\0 \t\r\n\x85\u2028\u2029':694 if self.reader.column == 0: 695 if self.reader.prefix(3) == u'---' \ 696 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 696 697 return True 697 698 698 699 def check_document_end(self): 699 700 700 701 # DOCUMENT-END: ^ '...' (' '|'\n') 701 if self. column == 0:702 if self. prefix(3) == u'...' \703 and self. peek(3) in u'\0 \t\r\n\x85\u2028\u2029':702 if self.reader.column == 0: 703 if self.reader.prefix(3) == u'...' \ 704 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 704 705 return True 705 706 706 707 def check_block_entry(self): 707 708 708 709 # BLOCK-ENTRY: '-' (' '|'\n') 709 return self. peek(1) in u'\0 \t\r\n\x85\u2028\u2029'710 return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' 710 711 711 712 def check_key(self): 712 713 … … 716 717 717 718 # KEY(block context): '?' (' '|'\n') 718 719 else: 719 return self. peek(1) in u'\0 \t\r\n\x85\u2028\u2029'720 return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' 720 721 721 722 def check_value(self): 722 723 … … 726 727 727 728 # VALUE(block context): ':' (' '|'\n') 728 729 else: 729 return self. peek(1) in u'\0 \t\r\n\x85\u2028\u2029'730 return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' 730 731 731 732 def check_plain(self): 732 733 … … 742 743 # Note that we limit the last rule to the block context (except the 743 744 # '-' character) because we want the flow context to be space 744 745 # independent. 745 ch = self. peek()746 ch = self.reader.peek() 746 747 return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ 747 or (self. peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'748 or (self.reader.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' 748 749 and (ch == u'-' or (not self.flow_level and ch in u'?:'))) 749 750 750 751 # Scanners. … … 769 770 # `unwind_indent` before issuing BLOCK-END. 770 771 # Scanners for block, flow, and plain scalars need to be modified. 771 772 772 if self. index == 0 and self.peek() == u'\uFEFF':773 self. forward()773 if self.reader.index == 0 and self.reader.peek() == u'\uFEFF': 774 self.reader.forward() 774 775 found = False 775 776 while not found: 776 while self. peek() == u' ':777 self. forward()778 if self. peek() == u'#':779 while self. peek() not in u'\0\r\n\x85\u2028\u2029':780 self. forward()777 while self.reader.peek() == u' ': 778 self.reader.forward() 779 if self.reader.peek() == u'#': 780 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': 781 self.reader.forward() 781 782 if self.scan_line_break(): 782 783 if not self.flow_level: 783 784 self.allow_simple_key = True … … 786 787 787 788 def scan_directive(self): 788 789 # See the specification for details. 789 start_mark = self. get_mark()790 self. forward()790 start_mark = self.reader.get_mark() 791 self.reader.forward() 791 792 name = self.scan_directive_name(start_mark) 792 793 value = None 793 794 if name == u'YAML': 794 795 value = self.scan_yaml_directive_value(start_mark) 795 end_mark = self. get_mark()796 end_mark = self.reader.get_mark() 796 797 elif name == u'TAG': 797 798 value = self.scan_tag_directive_value(start_mark) 798 end_mark = self. get_mark()799 end_mark = self.reader.get_mark() 799 800 else: 800 end_mark = self. get_mark()801 while self. peek() not in u'\0\r\n\x85\u2028\u2029':802 self. forward()801 end_mark = self.reader.get_mark() 802 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': 803 self.reader.forward() 803 804 self.scan_directive_ignored_line(start_mark) 804 805 return DirectiveToken(name, value, start_mark, end_mark) 805 806 806 807 def scan_directive_name(self, start_mark): 807 808 # See the specification for details. 808 809 length = 0 809 ch = self. peek(length)810 ch = self.reader.peek(length) 810 811 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ 811 812 or ch in u'-_': 812 813 length += 1 813 ch = self. peek(length)814 ch = self.reader.peek(length) 814 815 if not length: 815 816 raise ScannerError("while scanning a directive", start_mark, 816 817 "expected alphabetic or numeric character, but found %r" 817 % ch.encode('utf-8'), self. get_mark())818 value = self. prefix(length)819 self. forward(length)820 ch = self. peek()818 % ch.encode('utf-8'), self.reader.get_mark()) 819 value = self.reader.prefix(length) 820 self.reader.forward(length) 821 ch = self.reader.peek() 821 822 if ch not in u'\0 \r\n\x85\u2028\u2029': 822 823 raise ScannerError("while scanning a directive", start_mark, 823 824 "expected alphabetic or numeric character, but found %r" 824 % ch.encode('utf-8'), self. get_mark())825 % ch.encode('utf-8'), self.reader.get_mark()) 825 826 return value 826 827 827 828 def scan_yaml_directive_value(self, start_mark): 828 829 # See the specification for details. 829 while self. peek() == u' ':830 self. forward()830 while self.reader.peek() == u' ': 831 self.reader.forward() 831 832 major = self.scan_yaml_directive_number(start_mark) 832 if self. peek() != '.':833 if self.reader.peek() != '.': 833 834 raise ScannerError("while scanning a directive", start_mark, 834 835 "expected a digit or '.', but found %r" 835 % self. peek().encode('utf-8'),836 self. get_mark())837 self. forward()836 % self.reader.peek().encode('utf-8'), 837 self.reader.get_mark()) 838 self.reader.forward() 838 839 minor = self.scan_yaml_directive_number(start_mark) 839 if self. peek() not in u'\0 \r\n\x85\u2028\u2029':840 if self.reader.peek() not in u'\0 \r\n\x85\u2028\u2029': 840 841 raise ScannerError("while scanning a directive", start_mark, 841 842 "expected a digit or ' ', but found %r" 842 % self. peek().encode('utf-8'),843 self. get_mark())843 % self.reader.peek().encode('utf-8'), 844 self.reader.get_mark()) 844 845 return (major, minor) 845 846 846 847 def scan_yaml_directive_number(self, start_mark): 847 848 # See the specification for details. 848 ch = self. peek()849 ch = self.reader.peek() 849 850 if not (u'0' <= ch <= '9'): 850 851 raise ScannerError("while scanning a directive", start_mark, 851 852 "expected a digit, but found %r" % ch.encode('utf-8'), 852 self. get_mark())853 self.reader.get_mark()) 853 854 length = 0 854 while u'0' <= self. peek(length) <= u'9':855 while u'0' <= self.reader.peek(length) <= u'9': 855 856 length += 1 856 value = int(self. prefix(length))857 self. forward(length)857 value = int(self.reader.prefix(length)) 858 self.reader.forward(length) 858 859 return value 859 860 860 861 def scan_tag_directive_value(self, start_mark): 861 862 # See the specification for details. 862 while self. peek() == u' ':863 self. forward()863 while self.reader.peek() == u' ': 864 self.reader.forward() 864 865 handle = self.scan_tag_directive_handle(start_mark) 865 while self. peek() == u' ':866 self. forward()866 while self.reader.peek() == u' ': 867 self.reader.forward() 867 868 prefix = self.scan_tag_directive_prefix(start_mark) 868 869 return (handle, prefix) 869 870 870 871 def scan_tag_directive_handle(self, start_mark): 871 872 # See the specification for details. 872 873 value = self.scan_tag_handle('directive', start_mark) 873 ch = self. peek()874 ch = self.reader.peek() 874 875 if ch != u' ': 875 876 raise ScannerError("while scanning a directive", start_mark, 876 877 "expected ' ', but found %r" % ch.encode('utf-8'), 877 self. get_mark())878 self.reader.get_mark()) 878 879 return value 879 880 880 881 def scan_tag_directive_prefix(self, start_mark): 881 882 # See the specification for details. 882 883 value = self.scan_tag_uri('directive', start_mark) 883 ch = self. peek()884 ch = self.reader.peek() 884 885 if ch not in u'\0 \r\n\x85\u2028\u2029': 885 886 raise ScannerError("while scanning a directive", start_mark, 886 887 "expected ' ', but found %r" % ch.encode('utf-8'), 887 self. get_mark())888 self.reader.get_mark()) 888 889 return value 889 890 890 891 def scan_directive_ignored_line(self, start_mark): 891 892 # See the specification for details. 892 while self. peek() == u' ':893 self. forward()894 if self. peek() == u'#':895 while self. peek() not in u'\0\r\n\x85\u2028\u2029':896 self. forward()897 ch = self. peek()893 while self.reader.peek() == u' ': 894 self.reader.forward() 895 if self.reader.peek() == u'#': 896 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': 897 self.reader.forward() 898 ch = self.reader.peek() 898 899 if ch not in u'\0\r\n\x85\u2028\u2029': 899 900 raise ScannerError("while scanning a directive", start_mark, 900 901 "expected a comment or a line break, but found %r" 901 % ch.encode('utf-8'), self. get_mark())902 % ch.encode('utf-8'), self.reader.get_mark()) 902 903 self.scan_line_break() 903 904 904 905 def scan_anchor(self, TokenClass): … … 910 911 # and 911 912 # [ *alias , "value" ] 912 913 # Therefore we restrict aliases to numbers and ASCII letters. 913 start_mark = self. get_mark()914 indicator = self. peek()914 start_mark = self.reader.get_mark() 915 indicator = self.reader.peek() 915 916 if indicator == '*': 916 917 name = 'alias' 917 918 else: 918 919 name = 'anchor' 919 self. forward()920 self.reader.forward() 920 921 length = 0 921 ch = self. peek(length)922 ch = self.reader.peek(length) 922 923 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ 923 924 or ch in u'-_': 924 925 length += 1 925 ch = self. peek(length)926 ch = self.reader.peek(length) 926 927 if not length: 927 928 raise ScannerError("while scanning an %s" % name, start_mark, 928 929 "expected alphabetic or numeric character, but found %r" 929 % ch.encode('utf-8'), self. get_mark())930 value = self. prefix(length)931 self. forward(length)932 ch = self. peek()930 % ch.encode('utf-8'), self.reader.get_mark()) 931 value = self.reader.prefix(length) 932 self.reader.forward(length) 933 ch = self.reader.peek() 933 934 if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': 934 935 raise ScannerError("while scanning an %s" % name, start_mark, 935 936 "expected alphabetic or numeric character, but found %r" 936 % ch.encode('utf-8'), self. get_mark())937 end_mark = self. get_mark()937 % ch.encode('utf-8'), self.reader.get_mark()) 938 end_mark = self.reader.get_mark() 938 939 return TokenClass(value, start_mark, end_mark) 939 940 940 941 def scan_tag(self): 941 942 # See the specification for details. 942 start_mark = self. get_mark()943 ch = self. peek(1)943 start_mark = self.reader.get_mark() 944 ch = self.reader.peek(1) 944 945 if ch == u'<': 945 946 handle = None 946 self. forward(2)947 self.reader.forward(2) 947 948 suffix = self.scan_tag_uri('tag', start_mark) 948 if self. peek() != u'>':949 if self.reader.peek() != u'>': 949 950 raise ScannerError("while parsing a tag", start_mark, 950 "expected '>', but found %r" % self. peek().encode('utf-8'),951 self. get_mark())952 self. forward()951 "expected '>', but found %r" % self.reader.peek().encode('utf-8'), 952 self.reader.get_mark()) 953 self.reader.forward() 953 954 elif ch in u'\0 \t\r\n\x85\u2028\u2029': 954 955 handle = None 955 956 suffix = u'!' 956 self. forward()957 self.reader.forward() 957 958 else: 958 959 length = 1 959 960 use_handle = False … … 962 963 use_handle = True 963 964 break 964 965 length += 1 965 ch = self. peek(length)966 ch = self.reader.peek(length) 966 967 handle = u'!' 967 968 if use_handle: 968 969 handle = self.scan_tag_handle('tag', start_mark) 969 970 else: 970 971 handle = u'!' 971 self. forward()972 self.reader.forward() 972 973 suffix = self.scan_tag_uri('tag', start_mark) 973 ch = self. peek()974 ch = self.reader.peek() 974 975 if ch not in u'\0 \r\n\x85\u2028\u2029': 975 976 raise ScannerError("while scanning a tag", start_mark, 976 977 "expected ' ', but found %r" % ch.encode('utf-8'), 977 self. get_mark())978 self.reader.get_mark()) 978 979 value = (handle, suffix) 979 end_mark = self. get_mark()980 end_mark = self.reader.get_mark() 980 981 return TagToken(value, start_mark, end_mark) 981 982 982 983 def scan_block_scalar(self, style): … … 988 989 folded = False 989 990 990 991 chunks = [] 991 start_mark = self. get_mark()992 start_mark = self.reader.get_mark() 992 993 993 994 # Scan the header. 994 self. forward()995 self.reader.forward() 995 996 chomping, increment = self.scan_block_scalar_indicators(start_mark) 996 997 self.scan_block_scalar_ignored_line(start_mark) 997 998 … … 1008 1009 line_break = u'' 1009 1010 1010 1011 # Scan the inner part of the block scalar. 1011 while self. column == indent and self.peek() != u'\0':1012 while self.reader.column == indent and self.reader.peek() != u'\0': 1012 1013 chunks.extend(breaks) 1013 leading_non_space = self. peek() not in u' \t'1014 leading_non_space = self.reader.peek() not in u' \t' 1014 1015 length = 0 1015 while self. peek(length) not in u'\0\r\n\x85\u2028\u2029':1016 while self.reader.peek(length) not in u'\0\r\n\x85\u2028\u2029': 1016 1017 length += 1 1017 chunks.append(self. prefix(length))1018 self. forward(length)1018 chunks.append(self.reader.prefix(length)) 1019 self.reader.forward(length) 1019 1020 line_break = self.scan_line_break() 1020 1021 breaks, end_mark = self.scan_block_scalar_breaks(indent) 1021 if self. column == indent and self.peek() != u'\0':1022 if self.reader.column == indent and self.reader.peek() != u'\0': 1022 1023 1023 1024 # Unfortunately, folding rules are ambiguous. 1024 1025 # 1025 1026 # This is the folding according to the specification: 1026 1027 1027 1028 if folded and line_break == u'\n' \ 1028 and leading_non_space and self. peek() not in u' \t':1029 and leading_non_space and self.reader.peek() not in u' \t': 1029 1030 if not breaks: 1030 1031 chunks.append(u' ') 1031 1032 else: … … 1036 1037 # 1037 1038 #if folded and line_break == u'\n': 1038 1039 # if not breaks: 1039 # if self. peek() not in ' \t':1040 # if self.reader.peek() not in ' \t': 1040 1041 # chunks.append(u' ') 1041 1042 # else: 1042 1043 # chunks.append(line_break) … … 1059 1060 # See the specification for details. 1060 1061 chomping = None 1061 1062 increment = None 1062 ch = self. peek()1063 ch = self.reader.peek() 1063 1064 if ch in u'+-': 1064 1065 if ch == '+': 1065 1066 chomping = True 1066 1067 else: 1067 1068 chomping = False 1068 self. forward()1069 ch = self. peek()1069 self.reader.forward() 1070 ch = self.reader.peek() 1070 1071 if ch in u'0123456789': 1071 1072 increment = int(ch) 1072 1073 if increment == 0: 1073 1074 raise ScannerError("while scanning a block scalar", start_mark, 1074 1075 "expected indentation indicator in the range 1-9, but found 0", 1075 self. get_mark())1076 self. forward()1076 self.reader.get_mark()) 1077 self.reader.forward() 1077 1078 elif ch in u'0123456789': 1078 1079 increment = int(ch) 1079 1080 if increment == 0: 1080 1081 raise ScannerError("while scanning a block scalar", start_mark, 1081 1082 "expected indentation indicator in the range 1-9, but found 0", 1082 self. get_mark())1083 self. forward()1084 ch = self. peek()1083 self.reader.get_mark()) 1084 self.reader.forward() 1085 ch = self.reader.peek() 1085 1086 if ch in u'+-': 1086 1087 if ch == '+': 1087 1088 chomping = True 1088 1089 else: 1089 1090 chomping = False 1090 self. forward()1091 ch = self. peek()1091 self.reader.forward() 1092 ch = self.reader.peek() 1092 1093 if ch not in u'\0 \r\n\x85\u2028\u2029': 1093 1094 raise ScannerError("while scanning a block scalar", start_mark, 1094 1095 "expected chomping or indentation indicators, but found %r" 1095 % ch.encode('utf-8'), self. get_mark())1096 % ch.encode('utf-8'), self.reader.get_mark()) 1096 1097 return chomping, increment 1097 1098 1098 1099 def scan_block_scalar_ignored_line(self, start_mark): 1099 1100 # See the specification for details. 1100 while self. peek() == u' ':1101 self. forward()1102 if self. peek() == u'#':1103 while self. peek() not in u'\0\r\n\x85\u2028\u2029':1104 self. forward()1105 ch = self. peek()1101 while self.reader.peek() == u' ': 1102 self.reader.forward() 1103 if self.reader.peek() == u'#': 1104 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': 1105 self.reader.forward() 1106 ch = self.reader.peek() 1106 1107 if ch not in u'\0\r\n\x85\u2028\u2029': 1107 1108 raise ScannerError("while scanning a block scalar", start_mark, 1108 1109 "expected a comment or a line break, but found %r" 1109 % ch.encode('utf-8'), self. get_mark())1110 % ch.encode('utf-8'), self.reader.get_mark()) 1110 1111 self.scan_line_break() 1111 1112 1112 1113 def scan_block_scalar_indentation(self): 1113 1114 # See the specification for details. 1114 1115 chunks = [] 1115 1116 max_indent = 0 1116 end_mark = self. get_mark()1117 while self. peek() in u' \r\n\x85\u2028\u2029':1118 if self. peek() != u' ':1117 end_mark = self.reader.get_mark() 1118 while self.reader.peek() in u' \r\n\x85\u2028\u2029': 1119 if self.reader.peek() != u' ': 1119 1120 chunks.append(self.scan_line_break()) 1120 end_mark = self. get_mark()1121 end_mark = self.reader.get_mark() 1121 1122 else: 1122 self. forward()1123 if self. column > max_indent:1124 max_indent = self. column1123 self.reader.forward() 1124 if self.reader.column > max_indent: 1125 max_indent = self.reader.column 1125 1126 return chunks, max_indent, end_mark 1126 1127 1127 1128 def scan_block_scalar_breaks(self, indent): 1128 1129 # See the specification for details. 1129 1130 chunks = [] 1130 end_mark = self. get_mark()1131 while self. column < indent and self.peek() == u' ':1132 self. forward()1133 while self. peek() in u'\r\n\x85\u2028\u2029':1131 end_mark = self.reader.get_mark() 1132 while self.reader.column < indent and self.reader.peek() == u' ': 1133 self.reader.forward() 1134 while self.reader.peek() in u'\r\n\x85\u2028\u2029': 1134 1135 chunks.append(self.scan_line_break()) 1135 end_mark = self. get_mark()1136 while self. column < indent and self.peek() == u' ':1137 self. forward()1136 end_mark = self.reader.get_mark() 1137 while self.reader.column < indent and self.reader.peek() == u' ': 1138 self.reader.forward() 1138 1139 return chunks, end_mark 1139 1140 1140 1141 def scan_flow_scalar(self, style): … … 1149 1150 else: 1150 1151 double = False 1151 1152 chunks = [] 1152 start_mark = self. get_mark()1153 quote = self. peek()1154 self. forward()1153 start_mark = self.reader.get_mark() 1154 quote = self.reader.peek() 1155 self.reader.forward() 1155 1156 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) 1156 while self. peek() != quote:1157 while self.reader.peek() != quote: 1157 1158 chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) 1158 1159 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) 1159 self. forward()1160 end_mark = self. get_mark()1160 self.reader.forward() 1161 end_mark = self.reader.get_mark() 1161 1162 return ScalarToken(u''.join(chunks), False, start_mark, end_mark, 1162 1163 style) 1163 1164 … … 1192 1193 chunks = [] 1193 1194 while True: 1194 1195 length = 0 1195 while self. peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':1196 while self.reader.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': 1196 1197 length += 1 1197 1198 if length: 1198 chunks.append(self. prefix(length))1199 self. forward(length)1200 ch = self. peek()1201 if not double and ch == u'\'' and self. peek(1) == u'\'':1199 chunks.append(self.reader.prefix(length)) 1200 self.reader.forward(length) 1201 ch = self.reader.peek() 1202 if not double and ch == u'\'' and self.reader.peek(1) == u'\'': 1202 1203 chunks.append(u'\'') 1203 self. forward(2)1204 self.reader.forward(2) 1204 1205 elif (double and ch == u'\'') or (not double and ch in u'\"\\'): 1205 1206 chunks.append(ch) 1206 self. forward()1207 self.reader.forward() 1207 1208 elif double and ch == u'\\': 1208 self. forward()1209 ch = self. peek()1209 self.reader.forward() 1210 ch = self.reader.peek() 1210 1211 if ch in self.ESCAPE_REPLACEMENTS: 1211 1212 chunks.append(self.ESCAPE_REPLACEMENTS[ch]) 1212 self. forward()1213 self.reader.forward() 1213 1214 elif ch in self.ESCAPE_CODES: 1214 1215 length = self.ESCAPE_CODES[ch] 1215 self. forward()1216 self.reader.forward() 1216 1217 for k in range(length): 1217 if self. peek(k) not in u'0123456789ABCDEFabcdef':1218 if self.reader.peek(k) not in u'0123456789ABCDEFabcdef': 1218 1219 raise ScannerError("while scanning a double-quoted scalar", start_mark, 1219 1220 "expected escape sequence of %d hexdecimal numbers, but found %r" % 1220 (length, self. peek(k).encode('utf-8')), self.get_mark())1221 code = int(self. prefix(length), 16)1221 (length, self.reader.peek(k).encode('utf-8')), self.reader.get_mark()) 1222 code = int(self.reader.prefix(length), 16) 1222 1223 chunks.append(unichr(code)) 1223 self. forward(length)1224 self.reader.forward(length) 1224 1225 elif ch in u'\r\n\x85\u2028\u2029': 1225 1226 self.scan_line_break() 1226 1227 chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) 1227 1228 else: 1228 1229 raise ScannerError("while scanning a double-quoted scalar", start_mark, 1229 "found unknown escape character %r" % ch.encode('utf-8'), self. get_mark())1230 "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_mark()) 1230 1231 else: 1231 1232 return chunks 1232 1233 … … 1234 1235 # See the specification for details. 1235 1236 chunks = [] 1236 1237 length = 0 1237 while self. peek(length) in u' \t':1238 while self.reader.peek(length) in u' \t': 1238 1239 length += 1 1239 whitespaces = self. prefix(length)1240 self. forward(length)1241 ch = self. peek()1240 whitespaces = self.reader.prefix(length) 1241 self.reader.forward(length) 1242 ch = self.reader.peek() 1242 1243 if ch == u'\0': 1243 1244 raise ScannerError("while scanning a quoted scalar", start_mark, 1244 "found unexpected end of stream", self. get_mark())1245 "found unexpected end of stream", self.reader.get_mark()) 1245 1246 elif ch in u'\r\n\x85\u2028\u2029': 1246 1247 line_break = self.scan_line_break() 1247 1248 breaks = self.scan_flow_scalar_breaks(double, start_mark) … … 1260 1261 while True: 1261 1262 # Instead of checking indentation, we check for document 1262 1263 # separators. 1263 prefix = self. prefix(3)1264 prefix = self.reader.prefix(3) 1264 1265 if (prefix == u'---' or prefix == u'...') \ 1265 and self. peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1266 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 1266 1267 raise ScannerError("while scanning a quoted scalar", start_mark, 1267 "found unexpected document separator", self. get_mark())1268 while self. peek() in u' \t':1269 self. forward()1270 if self. peek() in u'\r\n\x85\u2028\u2029':1268 "found unexpected document separator", self.reader.get_mark()) 1269 while self.reader.peek() in u' \t': 1270 self.reader.forward() 1271 if self.reader.peek() in u'\r\n\x85\u2028\u2029': 1271 1272 chunks.append(self.scan_line_break()) 1272 1273 else: 1273 1274 return chunks … … 1279 1280 # We also keep track of the `allow_simple_key` flag here. 1280 1281 # Indentation rules are loosed for the flow context. 1281 1282 chunks = [] 1282 start_mark = self. get_mark()1283 start_mark = self.reader.get_mark() 1283 1284 end_mark = start_mark 1284 1285 indent = self.indent+1 1285 1286 # We allow zero indentation for scalars, but then we need to check for … … 1289 1290 spaces = [] 1290 1291 while True: 1291 1292 length = 0 1292 if self. peek() == u'#':1293 if self.reader.peek() == u'#': 1293 1294 break 1294 1295 while True: 1295 ch = self. peek(length)1296 ch = self.reader.peek(length) 1296 1297 if ch in u'\0 \t\r\n\x85\u2028\u2029' \ 1297 1298 or (not self.flow_level and ch == u':' and 1298 self. peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \1299 self.reader.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ 1299 1300 or (self.flow_level and ch in u',:?[]{}'): 1300 1301 break 1301 1302 length += 1 1302 1303 # It's not clear what we should do with ':' in the flow context. 1303 1304 if (self.flow_level and ch == u':' 1304 and self. peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):1305 self. forward(length)1305 and self.reader.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): 1306 self.reader.forward(length) 1306 1307 raise ScannerError("while scanning a plain scalar", start_mark, 1307 "found unexpected ':'", self. get_mark(),1308 "found unexpected ':'", self.reader.get_mark(), 1308 1309 "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") 1309 1310 if length == 0: 1310 1311 break 1311 1312 self.allow_simple_key = False 1312 1313 chunks.extend(spaces) 1313 chunks.append(self. prefix(length))1314 self. forward(length)1315 end_mark = self. get_mark()1314 chunks.append(self.reader.prefix(length)) 1315 self.reader.forward(length) 1316 end_mark = self.reader.get_mark() 1316 1317 spaces = self.scan_plain_spaces(indent, start_mark) 1317 if not spaces or self. peek() == u'#' \1318 or (not self.flow_level and self. column < indent):1318 if not spaces or self.reader.peek() == u'#' \ 1319 or (not self.flow_level and self.reader.column < indent): 1319 1320 break 1320 1321 return ScalarToken(u''.join(chunks), True, start_mark, end_mark) 1321 1322 … … 1325 1326 # We just forbid them completely. Do not use tabs in YAML! 1326 1327 chunks = [] 1327 1328 length = 0 1328 while self. peek(length) in u' ':1329 while self.reader.peek(length) in u' ': 1329 1330 length += 1 1330 whitespaces = self. prefix(length)1331 self. forward(length)1332 ch = self. peek()1331 whitespaces = self.reader.prefix(length) 1332 self.reader.forward(length) 1333 ch = self.reader.peek() 1333 1334 if ch in u'\r\n\x85\u2028\u2029': 1334 1335 line_break = self.scan_line_break() 1335 1336 self.allow_simple_key = True 1336 prefix = self. prefix(3)1337 prefix = self.reader.prefix(3) 1337 1338 if (prefix == u'---' or prefix == u'...') \ 1338 and self. peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1339 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 1339 1340 return 1340 1341 breaks = [] 1341 while self. peek() in u' \r\n\x85\u2028\u2029':1342 if self. peek() == ' ':1343 self. forward()1342 while self.reader.peek() in u' \r\n\x85\u2028\u2029': 1343 if self.reader.peek() == ' ': 1344 self.reader.forward() 1344 1345 else: 1345 1346 breaks.append(self.scan_line_break()) 1346 prefix = self. prefix(3)1347 prefix = self.reader.prefix(3) 1347 1348 if (prefix == u'---' or prefix == u'...') \ 1348 and self. peek(3) in u'\0 \t\r\n\x85\u2028\u2029':1349 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': 1349 1350 return 1350 1351 if line_break != u'\n': 1351 1352 chunks.append(line_break) … … 1360 1361 # See the specification for details. 1361 1362 # For some strange reasons, the specification does not allow '_' in 1362 1363 # tag handles. I have allowed it anyway. 1363 ch = self. peek()1364 ch = self.reader.peek() 1364 1365 if ch != u'!': 1365 1366 raise ScannerError("while scanning a %s" % name, start_mark, 1366 1367 "expected '!', but found %r" % ch.encode('utf-8'), 1367 self. get_mark())1368 self.reader.get_mark()) 1368 1369 length = 1 1369 ch = self. peek(length)1370 ch = self.reader.peek(length) 1370 1371 if ch != u' ': 1371 1372 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ 1372 1373 or ch in u'-_': 1373 1374 length += 1 1374 ch = self. peek(length)1375 ch = self.reader.peek(length) 1375 1376 if ch != u'!': 1376 self. forward(length)1377 self.reader.forward(length) 1377 1378 raise ScannerError("while scanning a %s" % name, start_mark, 1378 1379 "expected '!', but found %r" % ch.encode('utf-8'), 1379 self. get_mark())1380 self.reader.get_mark()) 1380 1381 length += 1 1381 value = self. prefix(length)1382 self. forward(length)1382 value = self.reader.prefix(length) 1383 self.reader.forward(length) 1383 1384 return value 1384 1385 1385 1386 def scan_tag_uri(self, name, start_mark): … … 1387 1388 # Note: we do not check if URI is well-formed. 1388 1389 chunks = [] 1389 1390 length = 0 1390 ch = self. peek(length)1391 ch = self.reader.peek(length) 1391 1392 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ 1392 1393 or ch in u'-;/?:@&=+$,_.!~*\'()[]%': 1393 1394 if ch == u'%': 1394 chunks.append(self. prefix(length))1395 self. forward(length)1395 chunks.append(self.reader.prefix(length)) 1396 self.reader.forward(length) 1396 1397 length = 0 1397 1398 chunks.append(self.scan_uri_escapes(name, start_mark)) 1398 1399 else: 1399 1400 length += 1 1400 ch = self. peek(length)1401 ch = self.reader.peek(length) 1401 1402 if length: 1402 chunks.append(self. prefix(length))1403 self. forward(length)1403 chunks.append(self.reader.prefix(length)) 1404 self.reader.forward(length) 1404 1405 length = 0 1405 1406 if not chunks: 1406 1407 raise ScannerError("while parsing a %s" % name, start_mark, 1407 1408 "expected URI, but found %r" % ch.encode('utf-8'), 1408 self. get_mark())1409 self.reader.get_mark()) 1409 1410 return u''.join(chunks) 1410 1411 1411 1412 def scan_uri_escapes(self, name, start_mark): 1412 1413 # See the specification for details. 1413 1414 bytes = [] 1414 mark = self. get_mark()1415 while self. peek() == u'%':1416 self. forward()1415 mark = self.reader.get_mark() 1416 while self.reader.peek() == u'%': 1417 self.reader.forward() 1417 1418 for k in range(2): 1418 if self. peek(k) not in u'0123456789ABCDEFabcdef':1419 if self.reader.peek(k) not in u'0123456789ABCDEFabcdef': 1419 1420 raise ScannerError("while scanning a %s" % name, start_mark, 1420 1421 "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % 1421 (self. peek(k).encode('utf-8')), self.get_mark())1422 bytes.append(chr(int(self. prefix(2), 16)))1423 self. forward(2)1422 (self.reader.peek(k).encode('utf-8')), self.reader.get_mark()) 1423 bytes.append(chr(int(self.reader.prefix(2), 16))) 1424 self.reader.forward(2) 1424 1425 try: 1425 1426 value = unicode(''.join(bytes), 'utf-8') 1426 1427 except UnicodeDecodeError, exc: … … 1436 1437 # '\u2028' : '\u2028' 1437 1438 # '\u2029 : '\u2029' 1438 1439 # default : '' 1439 ch = self. peek()1440 ch = self.reader.peek() 1440 1441 if ch in u'\r\n\x85': 1441 if self. prefix(2) == u'\r\n':1442 self. forward(2)1442 if self.reader.prefix(2) == u'\r\n': 1443 self.reader.forward(2) 1443 1444 else: 1444 self. forward()1445 self.reader.forward() 1445 1446 return u'\n' 1446 1447 elif ch in u'\u2028\u2029': 1447 self. forward()1448 self.reader.forward() 1448 1449 return ch 1449 1450 return u'' 1450 1451 -
C:/projects/workspace_python/PyYAML_2/lib/yaml/loader.py
8 8 from constructor import * 9 9 from resolver import * 10 10 11 class BaseLoader( Reader,Scanner, Parser, Composer, BaseConstructor, BaseResolver):11 class BaseLoader(Scanner, Parser, Composer, BaseConstructor, BaseResolver): 12 12 13 13 def __init__(self, stream): 14 Reader.__init__(self, stream) 15 Scanner.__init__(self) 14 Scanner.__init__(self, Reader(stream)) 16 15 Parser.__init__(self) 17 16 Composer.__init__(self) 18 17 BaseConstructor.__init__(self) 19 18 BaseResolver.__init__(self) 20 19 21 class SafeLoader( Reader,Scanner, Parser, Composer, SafeConstructor, Resolver):20 class SafeLoader(Scanner, Parser, Composer, SafeConstructor, Resolver): 22 21 23 22 def __init__(self, stream): 24 Reader.__init__(self, stream) 25 Scanner.__init__(self) 23 Scanner.__init__(self, Reader(stream)) 26 24 Parser.__init__(self) 27 25 Composer.__init__(self) 28 26 SafeConstructor.__init__(self) 29 27 Resolver.__init__(self) 30 28 31 class Loader( Reader,Scanner, Parser, Composer, Constructor, Resolver):29 class Loader(Scanner, Parser, Composer, Constructor, Resolver): 32 30 33 31 def __init__(self, stream): 34 Reader.__init__(self, stream) 35 Scanner.__init__(self) 32 Scanner.__init__(self, Reader(stream)) 36 33 Parser.__init__(self) 37 34 Composer.__init__(self) 38 35 Constructor.__init__(self)
