Changeset 43
- Timestamp:
- 02/14/06 19:31:51 (7 years ago)
- Location:
- branches/pyyaml3000
- Files:
-
- 3 added
- 5 edited
-
lib/yaml/marker.py (modified) (1 diff)
-
lib/yaml/parser.py (added)
-
lib/yaml/scanner.py (modified) (14 diffs)
-
lib/yaml/stream.py (added)
-
tests/test_appliance.py (modified) (1 diff)
-
tests/test_canonical.py (added)
-
tests/test_tokens.py (modified) (4 diffs)
-
tests/test_yaml.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
branches/pyyaml3000/lib/yaml/marker.py
r39 r43 2 2 class Marker: 3 3 4 def __init__(self, source, data, index, row, column):4 def __init__(self, source, data, index, line, column): 5 5 self.source = source 6 6 self.data = data 7 7 self.index = index 8 self. row = row8 self.line = line 9 9 self.column = column 10 10 -
branches/pyyaml3000/lib/yaml/scanner.py
r39 r43 1 2 # Tokens: 3 # YAML-DIRECTIVE(major_version, minor_version), TAG-DIRECTIVE(handle, prefix) 4 # RESERVED-DIRECTIVE(name) 5 # DOCUMENT-START, DOCUMENT-END 6 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END 7 # FLOW-SEQUENCE-START, FLOW-MAPPING-START, FLOW-SEQUENCE-END, FLOW-MAPPING-END 8 # ENTRY, KEY, VALUE 9 # ALIAS(name), ANCHOR(name), TAG(value), SCALAR(value, plain) 10 1 11 2 12 from marker import Marker 3 from error import ParserError13 #from error import YAMLError 4 14 from stream import Stream 15 16 #class ScannerError(YAMLError): 17 class ScannerError(Exception): 18 pass 19 20 class Token: 21 def __init__(self, start_marker, end_marker): 22 self.start_marker = start_marker 23 self.end_marker = end_marker 24 25 class YAMLDirective(Token): 26 def __init__(self, major_version, minor_version, start_marker, end_marker): 27 self.major_version = major_version 28 self.minor_version = minor_version 29 self.start_marker = start_marker 30 self.end_marker = end_marker 31 32 class TagDirective(Token): 33 pass 34 35 class ReservedDirective(Token): 36 def __init__(self, name, start_marker, end_marker): 37 self.name = name 38 self.start_marker = start_marker 39 self.end_marker = end_marker 40 41 class DocumentStart(Token): 42 pass 43 44 class DocumentEnd(Token): 45 pass 46 47 class End(Token): 48 pass 49 50 class BlockSequenceStart(Token): 51 pass 52 53 class BlockMappingStart(Token): 54 pass 55 56 class BlockEnd(Token): 57 pass 58 59 class FlowSequenceStart(Token): 60 pass 61 62 class FlowMappingStart(Token): 63 pass 64 65 class FlowSequenceEnd(Token): 66 pass 67 68 class FlowMappingEnd(Token): 69 pass 70 71 class Key(Token): 72 pass 73 74 class Value(Token): 75 pass 76 77 class Entry(Token): 78 pass 79 80 class Alias(Token): 81 def __init__(self, value, start_marker, end_marker): 82 self.value = value 83 self.start_marker = start_marker 84 self.end_marker = end_marker 85 86 class Anchor(Token): 87 def __init__(self, value, start_marker, end_marker): 88 self.value = value 89 self.start_marker = start_marker 90 self.end_marker = end_marker 91 92 class Tag(Token): 93 def __init__(self, value, start_marker, end_marker): 94 self.value = value 95 self.start_marker = start_marker 96 self.end_marker = end_marker 97 98 class Scalar(Token): 99 def __init__(self, value, plain, start_marker, end_marker): 100 self.value = value 101 self.plain = plain 102 self.start_marker = start_marker 103 self.end_marker = end_marker 104 105 class SimpleKey: 106 def __init__(self, token_number, required, index, line, column, marker): 107 self.token_number = token_number 108 self.required = required 109 self.index = index 110 self.line = line 111 self.column = column 112 self.marker = marker 5 113 6 114 class Scanner: … … 9 117 """Initialize the scanner.""" 10 118 # The input stream. The Stream class do the dirty work of checking for 11 # BOM and converting the input data to Unicode. It also adds LF to the12 # end if the data does not ends with an EOL character.119 # BOM and converting the input data to Unicode. It also adds NUL to 120 # the end. 13 121 # 14 122 # Stream supports the following methods … … 37 145 self.indents = [] 38 146 39 # Variables related to simple key treatment.147 # Variables related to simple keys treatment. 40 148 41 149 # A simple key is a key that is not denoted by the '?' indicator. … … 49 157 # Simple keys should be limited to a single line and 1024 characters. 50 158 51 # Can a block collection start at the current position? A block52 # collection maystart:53 # - at the beginning of the line (not counting spaces),54 # - after the block sequence indicator '-'.55 self.allow_block_collection = True56 57 # Can a simple key in flow context start at the current position? A58 # simple key may start after the '{', '[', and ',' indicators.59 self.allow_ flow_simple_keys = False159 # Can a simple key start at the current position? A simple key may 160 # start: 161 # - at the beginning of the line, not counting indentation spaces 162 # (in block context), 163 # - after '{', '[', ',' (in the flow context), 164 # - after '?', ':', '-' (in the block context). 165 # In the block context, this flag also signify if a block collection 166 # may start at the current position. 167 self.allow_simple_key = True 60 168 61 169 # Keep track of possible simple keys. This is a dictionary. The key 62 170 # is `flow_level`; there can be no more that one possible simple key 63 # for each level. The value is a record of 64 # (stream.index, stream.line, stream.column, token_number) 171 # for each level. The value is a SimpleKey record: 172 # (token_number, required, index, line, column, marker) 173 # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), 174 # '[', or '{' tokens. 65 175 self.possible_simple_keys = {} 66 176 67 # Public methods:177 # Two public methods. 68 178 69 179 def peek_token(self): 70 180 """Get the current token.""" 71 while self.need_more_tokens() 181 while self.need_more_tokens(): 72 182 self.fetch_more_tokens() 73 183 if self.tokens: … … 75 185 76 186 def get_token(self): 77 "Get the current token and remove it from the list ."""187 "Get the current token and remove it from the list of pending tokens.""" 78 188 while self.need_more_tokens(): 79 189 self.fetch_more_tokens() … … 82 192 return self.tokens.pop(0) 83 193 84 # Private methods :194 # Private methods. 85 195 86 196 def need_more_tokens(self): … … 91 201 # The current token may be a potential simple key, so we 92 202 # need to look further. 203 self.stale_possible_simple_keys() 93 204 if self.next_possible_simple_key() == self.tokens_taken: 94 205 return True … … 97 208 98 209 # Eat whitespaces and comments until we reach the next token. 99 self.find_next_token() 210 self.scan_to_next_token() 211 212 # Remove obsolete possible simple keys. 213 self.stale_possible_simple_keys() 100 214 101 215 # Compare the current indentation and column. It may add some tokens 102 # and decrease the current indentation .216 # and decrease the current indentation level. 103 217 self.unwind_indent(self.stream.column) 218 219 #print 220 #print self.stream.get_marker().get_snippet() 104 221 105 222 # Peek the next character. … … 107 224 108 225 # Is it the end of stream? 109 if ch is None:226 if ch == u'\0': 110 227 return self.fetch_end() 111 228 … … 124 241 # Note: the order of the following checks is NOT significant. 125 242 126 # Is it the sequence indicator?127 if ch in u'-,' and self.check_entry():128 return self.fetch_entry()129 130 243 # Is it the flow sequence start indicator? 131 244 if ch == u'[': … … 144 257 return self.fetch_flow_mapping_end() 145 258 259 # Is it the entry indicator? 260 if ch in u'-,' and self.check_entry(): 261 return self.fetch_entry() 262 146 263 # Is it the key indicator? 147 264 if ch == u'?' and self.check_key(): … … 160 277 return self.fetch_anchor() 161 278 162 # Is i sa tag?279 # Is it a tag? 163 280 if ch == u'!': 164 281 return self.fetch_tag() 165 282 166 # Is i sa literal scalar?167 if ch == u'|' :283 # Is it a literal scalar? 284 if ch == u'|' and not self.flow_level: 168 285 return self.fetch_literal() 169 286 170 287 # Is it a folded scalar? 171 if ch == u'>' :288 if ch == u'>' and not self.flow_level: 172 289 return self.fetch_folded() 173 290 … … 180 297 return self.fetch_double() 181 298 182 # It must be a plain scalar .299 # It must be a plain scalar then. 183 300 if self.check_plain(): 184 301 return self.fetch_plain() 185 302 186 # No? It's an error then. Let's produce a nice error message.303 # No? It's an error. Let's produce a nice error message. 187 304 self.invalid_token() 188 305 306 # Simple keys treatment. 307 308 def next_possible_simple_key(self): 309 # Return the number of the nearest possible simple key. Actually we 310 # don't need to loop through the whole dictionary. We may replace it 311 # with the following code: 312 # if not self.possible_simple_keys: 313 # return None 314 # return self.possible_simple_keys[ 315 # min(self.possible_simple_keys.keys())].token_number 316 min_token_number = None 317 for level in self.possible_simple_keys: 318 key = self.possible_simple_keys[level] 319 if min_token_number is None or key.token_number < min_token_number: 320 min_token_number = key.token_number 321 return min_token_number 322 323 def stale_possible_simple_keys(self): 324 # Remove entries that are no longer possible simple keys. According to 325 # the YAML specification, simple keys 326 # - should be limited to a single line, 327 # - should be no longer than 1024 characters. 328 # Disabling this procedure will allow simple keys of any length and 329 # height (may cause problems if indentation is broken though). 330 for level in self.possible_simple_keys.keys(): 331 key = self.possible_simple_keys[level] 332 if key.line != self.stream.line \ 333 or self.stream.index-key.index > 1024: 334 if key.required: 335 self.fail("simple key is required") 336 del self.possible_simple_keys[level] 337 338 def save_possible_simple_key(self): 339 # The next token may start a simple key. We check if it's possible 340 # and save its position. This function is called for 341 # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. 342 343 # Check if a simple key is required at the current position. 344 required = not self.flow_level and self.indent == self.stream.column 345 346 # The next token might be a simple key. Let's save it's number and 347 # position. 348 if self.allow_simple_key: 349 self.remove_possible_simple_key() 350 token_number = self.tokens_taken+len(self.tokens) 351 index = self.stream.index 352 line = self.stream.line 353 column = self.stream.column 354 marker = self.stream.get_marker() 355 key = SimpleKey(token_number, required, 356 index, line, column, marker) 357 self.possible_simple_keys[self.flow_level] = key 358 359 # A simple key is required at the current position. 360 elif required: 361 self.fail("simple key is required") 362 363 def remove_possible_simple_key(self): 364 # Remove the saved possible key position at the current flow level. 365 if self.flow_level in self.possible_simple_keys: 366 key = self.possible_simple_keys[self.flow_level] 367 if key.required: 368 self.fail("simple key is required") 369 370 # Indentation functions. 371 372 def unwind_indent(self, column): 373 374 # In flow context, tokens should respect indentation. 375 if self.flow_level and self.indent > column: 376 self.fail("invalid intendation in the flow context") 377 378 # In block context, we may need to issue the BLOCK-END tokens. 379 while self.indent > column: 380 marker = self.stream.get_marker() 381 self.indent = self.indents.pop() 382 self.tokens.append(BlockEnd(marker, marker)) 383 384 def add_indent(self, column): 385 # Check if we need to increase indentation. 386 if self.indent < column: 387 self.indents.append(self.indent) 388 self.indent = column 389 return True 390 return False 391 392 # Fetchers. 393 189 394 def fetch_end(self): 190 395 191 396 # Set the current intendation to -1. 192 self.unwind_indent s(-1)397 self.unwind_indent(-1) 193 398 194 399 # Reset everything (not really needed). 195 self.allow_block_collection = False 196 self.allow_flow_simple_keys = False 400 self.allow_simple_key = False 197 401 self.possible_simple_keys = {} 198 402 403 # Read the token. 404 marker = self.stream.get_marker() 405 199 406 # Add END. 200 marker = self.stream.get_marker() 201 self.tokens.append(EndToken(marker)) 407 self.tokens.append(End(marker, marker)) 202 408 203 409 # The stream is ended. 204 410 self.done = True 205 411 412 def fetch_directive(self): 413 414 # Set the current intendation to -1. 415 self.unwind_indent(-1) 416 417 # Reset simple keys. 418 self.remove_possible_simple_key() 419 self.allow_simple_key = False 420 421 # Scan and add DIRECTIVE. 422 self.scan_directive() 423 424 def fetch_document_start(self): 425 self.fetch_document_indicator(DocumentStart) 426 427 def fetch_document_end(self): 428 self.fetch_document_indicator(DocumentEnd) 429 430 def fetch_document_indicator(self, TokenClass): 431 432 # Set the current intendation to -1. 433 self.unwind_indent(-1) 434 435 # Reset simple keys. Note that there could not be a block collection 436 # after '---'. 437 self.remove_possible_simple_key() 438 self.allow_simple_key = False 439 440 # Add DOCUMENT-START or DOCUMENT-END. 441 start_marker = self.stream.get_marker() 442 self.stream.read(3) 443 end_marker = self.stream.get_marker() 444 self.tokens.append(TokenClass(start_marker, end_marker)) 445 446 def fetch_flow_sequence_start(self): 447 self.fetch_flow_collection_start(FlowSequenceStart) 448 449 def fetch_flow_mapping_start(self): 450 self.fetch_flow_collection_start(FlowMappingStart) 451 452 def fetch_flow_collection_start(self, TokenClass): 453 454 # Increase the flow level. 455 self.flow_level += 1 456 457 # '[' and '{' may start a simple key. 458 self.save_possible_simple_key() 459 460 # Simple keys are allowed after '[' and '{'. 461 self.allow_simple_key = True 462 463 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. 464 start_marker = self.stream.get_marker() 465 self.stream.read() 466 end_marker = self.stream.get_marker() 467 self.tokens.append(TokenClass(start_marker, end_marker)) 468 469 def fetch_flow_sequence_end(self): 470 self.fetch_flow_collection_end(FlowSequenceEnd) 471 472 def fetch_flow_mapping_end(self): 473 self.fetch_flow_collection_end(FlowMappingEnd) 474 475 def fetch_flow_collection_end(self, TokenClass): 476 477 # Reset possible simple key on the current level. 478 self.remove_possible_simple_key() 479 480 # Decrease the flow level. 481 self.flow_level -= 1 482 483 # No simple keys after ']' or '}'. 484 self.allow_simple_key = False 485 486 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. 487 start_marker = self.stream.get_marker() 488 self.stream.read() 489 end_marker = self.stream.get_marker() 490 self.tokens.append(TokenClass(start_marker, end_marker)) 491 492 def fetch_entry(self): 493 494 # Block context needs additional checks. 495 if not self.flow_level: 496 497 # Are we allowed to start a new entry? 498 if not self.allow_simple_key: 499 self.fail("Cannot start a new entry here") 500 501 # We may need to add BLOCK-SEQUENCE-START. 502 if self.add_indent(self.stream.column): 503 marker = self.stream.get_marker() 504 self.tokens.append(BlockSequenceStart(marker, marker)) 505 506 # Simple keys are allowed after '-' and ','. 507 self.allow_simple_key = True 508 509 # Reset possible simple key on the current level. 510 self.remove_possible_simple_key() 511 512 # Add ENTRY. 513 start_marker = self.stream.get_marker() 514 self.stream.read() 515 end_marker = self.stream.get_marker() 516 self.tokens.append(Entry(start_marker, end_marker)) 517 518 def fetch_key(self): 519 520 # Block context needs additional checks. 521 if not self.flow_level: 522 523 # Are we allowed to start a key (not nessesary a simple)? 524 if not self.allow_simple_key: 525 self.fail("Cannot start a new key here") 526 527 # We may need to add BLOCK-MAPPING-START. 528 if self.add_indent(self.stream.column): 529 marker = self.stream.get_marker() 530 self.tokens.append(BlockMappingStart(marker, marker)) 531 532 # Simple keys are allowed after '?' in the block context. 533 self.allow_simple_key = not self.flow_level 534 535 # Reset possible simple key on the current level. 536 self.remove_possible_simple_key() 537 538 # Add KEY. 539 start_marker = self.stream.get_marker() 540 self.stream.read() 541 end_marker = self.stream.get_marker() 542 self.tokens.append(Key(start_marker, end_marker)) 543 544 def fetch_value(self): 545 546 # Do we determine a simple key? 547 if self.flow_level in self.possible_simple_keys: 548 549 # Add KEY. 550 key = self.possible_simple_keys[self.flow_level] 551 del self.possible_simple_keys[self.flow_level] 552 self.tokens.insert(key.token_number-self.tokens_taken, 553 Key(key.marker, key.marker)) 554 555 # If this key starts a new block mapping, we need to add 556 # BLOCK-MAPPING-START. 557 if not self.flow_level: 558 if self.add_indent(key.column): 559 self.tokens.insert(key.token_number-self.tokens_taken, 560 BlockMappingStart(key.marker, key.marker)) 561 562 # There cannot be two simple keys one after another. 563 self.allow_simple_key = False 564 565 # It must be a part of a complex key. 566 else: 567 568 # Simple keys are allowed after ':' in the block context. 569 self.allow_simple_key = not self.flow_level 570 571 # Reset possible simple key on the current level. 572 self.remove_possible_simple_key() 573 574 # Add VALUE. 575 start_marker = self.stream.get_marker() 576 self.stream.read() 577 end_marker = self.stream.get_marker() 578 self.tokens.append(Value(start_marker, end_marker)) 579 580 def fetch_alias(self): 581 582 # ALIAS could be a simple key. 583 self.save_possible_simple_key() 584 585 # No simple keys after ALIAS. 586 self.allow_simple_key = False 587 588 # Scan and add ALIAS. 589 self.scan_anchor(Alias) 590 591 def fetch_anchor(self): 592 593 # ANCHOR could start a simple key. 594 self.save_possible_simple_key() 595 596 # No simple keys after ANCHOR. 597 self.allow_simple_key = False 598 599 # Scan and add ANCHOR. 600 self.scan_anchor(Anchor) 601 602 def fetch_tag(self): 603 604 # TAG could start a simple key. 605 self.save_possible_simple_key() 606 607 # No simple keys after TAG. 608 self.allow_simple_key = False 609 610 # Scan and add TAG. 611 self.scan_tag() 612 613 def fetch_literal(self): 614 self.fetch_block_scalar(folded=False) 615 616 def fetch_folded(self): 617 self.fetch_block_scalar(folded=True) 618 619 def fetch_block_scalar(self, folded): 620 621 # A simple key may follow a block scalar. 622 self.allow_simple_key = True 623 624 # Reset possible simple key on the current level. 625 self.remove_possible_simple_key() 626 627 # Scan and add SCALAR. 628 self.scan_block_scalar(folded) 629 630 def fetch_single(self): 631 self.fetch_flow_scalar(double=False) 632 633 def fetch_double(self): 634 self.fetch_flow_scalar(double=True) 635 636 def fetch_flow_scalar(self, double): 637 638 # A flow scalar could be a simple key. 639 self.save_possible_simple_key() 640 641 # No simple keys after flow scalars. 642 self.allow_simple_key = False 643 644 # Scan and add SCALAR. 645 self.scan_flow_scalar(double) 646 647 def fetch_plain(self): 648 649 # A plain scalar could be a simple key. 650 self.save_possible_simple_key() 651 652 # No simple keys after plain scalars. But note that `scan_plain` will 653 # change this flag if the scan is finished at the beginning of the 654 # line. 655 self.allow_simple_key = False 656 657 # Scan and add SCALAR. May change `allow_simple_key`. 658 self.scan_plain() 659 660 # Checkers. 661 206 662 def check_directive(self): 207 663 208 # Checking for 209 # /* The beginning of the line */ '%' 664 # DIRECTIVE: ^ '%' ... 210 665 # The '%' indicator is already checked. 211 666 if self.stream.column == 0: … … 214 669 def check_document_start(self): 215 670 216 # Checking for 217 # /* The beginning of the line */ '---' /* Space or EOL */ 671 # DOCUMENT-START: ^ '---' (' '|'\n') 218 672 if self.stream.column == 0: 219 673 prefix = self.stream.peek(4) 220 if prefix[:3] == u'---' and prefix[3] in u' \t\r\n\x85\u2028\u2029':674 if prefix[:3] == u'---' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029': 221 675 return True 222 676 223 def fetch_document_start(self):224 225 # Set the current intendation to -1.226 self.unwind_indents(-1)227 228 # No block collections after '---'.229 self.allow_block_collection = False230 231 # No flow simple keys (not needed -- we are in the block context).232 self.allow_flow_simple_keys = False233 234 # Reset possible simple keys (not needed -- EOL should have reset it).235 self.possible_simple_keys = {}236 237 start_marker = self.stream.get_marker()238 239 # The characters are already checked, just move forward.240 self.stream.read(3)241 242 end_marker = self.stream.get_marker()243 244 # Add DOCUMENT-START.245 self.tokens.append(DocumentStartToken(start_marker, end_marker))246 247 248 677 def check_document_end(self): 678 679 # DOCUMENT-END: ^ '...' (' '|'\n') 249 680 if self.stream.column == 0: 250 681 prefix = self.stream.peek(4) 251 if prefix[:3] == u'...' and prefix[3] in u' \t\r\n\x85\u2028\u2029':682 if prefix[:3] == u'...' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029': 252 683 return True 253 684 254 def fetch_document_end(self): 255 # The same code as `fetch_document_start`. 256 257 # Set the current intendation to -1. 258 self.unwind_indents(-1) 259 260 # Reset everything (not really needed). 261 self.allow_block_collection = False 262 self.allow_flow_simple_keys = False 263 self.possible_simple_keys = {} 264 685 def check_entry(self): 686 687 # ENTRY(flow context): ',' 688 if self.flow_level: 689 return self.stream.peek() == u',' 690 691 # ENTRY(block context): '-' (' '|'\n') 692 else: 693 prefix = self.stream.peek(2) 694 return prefix[0] == u'-' and prefix[1] in u'\0 \t\r\n\x85\u2028\u2029' 695 696 def check_key(self): 697 698 # KEY(flow context): '?' 699 if self.flow_level: 700 return True 701 702 # KEY(block context): '?' (' '|'\n') 703 else: 704 prefix = self.stream.peek(2) 705 return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029' 706 707 def check_value(self): 708 709 # VALUE(flow context): ':' 710 if self.flow_level: 711 return True 712 713 # VALUE(block context): ':' (' '|'\n') 714 else: 715 prefix = self.stream.peek(2) 716 return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029' 717 718 def check_plain(self): 719 return True 720 721 # Scanners. 722 723 def scan_to_next_token(self): 724 found = False 725 while not found: 726 while self.stream.peek() == u' ': 727 self.stream.read() 728 if self.stream.peek() == u'#': 729 while self.stream.peek() not in u'\r\n': 730 self.stream.read() 731 if self.stream.peek() in u'\r\n': 732 self.stream.read() 733 if not self.flow_level: 734 self.allow_simple_key = True 735 else: 736 found = True 737 738 def scan_directive(self): 739 marker = self.stream.get_marker() 740 if self.stream.peek(5) == u'%YAML ': 741 self.tokens.append(YAMLDirective(1, 1, marker, marker)) 742 elif self.stream.peek(4) == u'%TAG ': 743 self.tokens.append(TagDirective(marker, marker)) 744 else: 745 self.tokens.append(ReservedDirective('', marker, marker)) 746 while self.stream.peek() not in u'\0\r\n': 747 self.stream.read() 748 self.stream.read() 749 750 def scan_anchor(self, TokenClass): 265 751 start_marker = self.stream.get_marker() 266 267 # The characters are already checked, just move forward. 268 self.stream.read(3) 269 752 while self.stream.peek() not in u'\0 \t\r\n,:': 753 self.stream.read() 270 754 end_marker = self.stream.get_marker() 271 272 # Add DOCUMENT-END. 273 self.tokens.append(DocumentEndToken(start_marker, end_marker)) 274 275 276 277 # Tokens: 278 # YAML_DIRECTIVE: ^ '%' YAML ' '+ (version: \d+ '.' \d+) s-l-comments 279 # TAG_DIRECTIVE: ^ % TAG ' '+ (handle: '!' (word-char* '!')? ) (prefix: uri-char+) s-l-comments 280 # RESERVED_DIRECTIVE: ^ '%' (directive-name: ns-char+) (' '+ (directive-parameter: ns-char+))* s-l-comments 281 # DOCUMENT_START: ^ '---' (' ' | b-any) 282 # DOCUMENT_END: ^ ... (' ' | b-any) 283 # TAG: '!' ( ('<' uri-char+ '>') | uri-char* ) (' ' | b-any) 284 # ANCHOR: '&' ns-char+ <-- bug 285 # ALIAS: * ns-char+ <-- bug 286 # ENTRY(block): '-' (' ' | b-any) 287 # KEY(block): '?' (' ' | b-any) 288 # VALUE(block): ':' (' ' | b-any) 289 # FLOW_SEQ_START: '[' 290 # FLOW_SEQ_END: ']' 291 # FLOW_MAP_START: '{' 292 # FLOW_MAP_END: '}' 293 # KEY(flow): '?' 294 # VALUE(flow): ':' 295 # ENTRY(flow): ',' 296 # PLAIN: (plain-char - indicator) | ([-?:] plain-char) ... <-- bugs 297 # DOUBLE_QUOTED: '"' ... 298 # SINGLE_QUOTED: ''' ... 299 # LITERAL: '|' ... 300 # FOLDED: '>' ... 301 # BLOCK_SEQ_START: indentation before '-'. 302 # BLOCK_MAP_START: indentation before '?' or a simple key. 303 # BLOCK_END: no indentation 304 # LINE: end of line 305 306 # b-generic: \r \n | \r | \n | #x85 307 # b-specific: #x2028 | #x2029 308 # b-any: b-generic | b-specific 309 # hex-digit: [0-9A-Fa-f] 310 # word-char: [0-9A-Za-z-] 311 # uri-char: word-char | % hex-digit hex-digit | [;/?:@&=+$,_.!~*'()[]] 312 313 # Production rules: 314 # stream :== implicit_document? explicit_document* END 315 # explicit_document :== DIRECTIVE* DOCUMENT_START block_node? DOCUMENT_END? 316 # implicit_document :== block_node DOCUMENT_END? 317 # block_node :== ALIAS | properties? block_content 318 # flow_node :== ALIAS | properties? flow_content 319 # properties :== TAG ANCHOR? | ANCHOR TAG? 320 # block_content :== block_collection | flow_collection | SCALAR 321 # flow_content :== flow_collection | SCALAR 322 # block_collection :== block_sequence | block_mapping 323 # block_sequence :== BLOCK_SEQ_START (ENTRY block_node?)* BLOCK_END 324 # block_mapping :== BLOCK_MAP_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK_END 325 # block_node_or_indentless_sequence :== ALIAS | properties? (block_content | indentless_block_sequence) 326 # indentless_block_sequence :== (ENTRY block_node?)+ 327 # flow_collection :== flow_sequence | flow_mapping 328 # flow_sequence :== FLOW_SEQ_START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW_SEQ_END 329 # flow_sequence_entry :== flow_node | KEY flow_node (VALUE flow_node?)? 330 # flow_mapping :== FLOW_MAP_START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW_MAP_END 331 # flow_mapping_entry :== flow_node | KEY flow_node (VALUE flow_node?)? 332 333 # FIRST(rule) sets: 334 # stream: {} 335 # explicit_document: { DIRECTIVE DOCUMENT_START } 336 # implicit_document: block_node 337 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START } 338 # flow_node: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START } 339 # block_content: { BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START SCALAR } 340 # flow_content: { FLOW_SEQ_START FLOW_MAP_START SCALAR } 341 # block_collection: { BLOCK_SEQ_START BLOCK_MAP_START } 342 # flow_collection: { FLOW_SEQ_START FLOW_MAP_START } 343 # block_sequence: { BLOCK_SEQ_START } 344 # block_mapping: { BLOCK_MAP_START } 345 # block_node_or_indentless_sequence: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START ENTRY } 346 # indentless_sequence: { ENTRY } 347 # flow_collection: { FLOW_SEQ_START FLOW_MAP_START } 348 # flow_sequence: { FLOW_SEQ_START } 349 # flow_mapping: { FLOW_MAP_START } 350 # flow_sequence_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY } 351 # flow_mapping_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY } 352 353 class Marker(object): 354 355 def __init__(self, source, data, index, length=0): 356 self.source = source 357 self.data = data 358 self.index = index 359 self.length = length 360 self._line = None 361 self._position = None 362 363 def line(self): 364 if not self._line: 365 self._make_line_position() 366 return self._line 367 368 def position(self): 369 if not self._position: 370 self._make_line_position() 371 return self._position 372 373 def _make_line_position(self): 374 allow_block_collection = self.data.rfind('\n', 0, self.index)+1 375 line_end = self.data.find('\n', self.index)+1 376 if line_end == 0: 377 line_end = len(self.data) 378 self._line = (allow_block_collection, line_end) 379 row = self.data.count('\n', 0, allow_block_collection) 380 col = self.index-allow_block_collection 381 self._position = (row, col) 382 383 class Error(Exception): 384 385 def __init__(self, message=None, marker=None): 386 Exception.__init__(self) 387 self.message = message 388 self.marker = marker 389 390 def __str__(self): 391 if self.marker is not None: 392 row, col = self.marker.position() 393 start, end = self.marker.line() 394 error_position = "source \"%s\", line %s, column %s:\n%s\n" \ 395 % (self.marker.source, row+1, col+1, self.marker.data[start:end].rstrip().encode('utf-8')) 396 error_pointer = " " * col + "^\n" 397 else: 398 error_position = "" 399 error_pointer = "" 400 if self.message is not None: 401 error_message = self.message 402 else: 403 error_message = "YAML error" 404 return error_position+error_pointer+error_message 405 406 class _Scanner: 407 408 def scan(self, source, data): 409 self.source = source 410 self.data = data 411 self.flow_level = 0 412 self.indents = [] 413 self.indent = -1 414 self.index = 0 415 self.line = 0 416 self.column = 0 417 self.allow_block_collection = True 418 self.guess_simple_key = False 419 self.guess_simple_key_token = None 420 self.guess_simple_key_indent = None 421 self.allow_flow_key = False 422 self.guess_flow_key_levels = [] 423 self.guess_flow_key_tokens = [] 424 self.tokens = [] 425 while self.eat_ignored() or self.fetch_token(): 426 pass 427 return self.tokens 428 429 def eat_ignored(self): 430 result = False 431 while self.eat_ignored_spaces() or self.eat_ignored_comment() or self.eat_ignored_newline(): 432 result = True 433 return result 434 435 def eat_ignored_spaces(self): 436 result = False 437 while self.index < len(self.data) and self.data[self.index] == ' ': 438 self.index += 1 439 self.column += 1 440 result = True 441 return result 442 443 def eat_ignored_comment(self): 444 if self.index < len(self.data) and self.data[self.index] == '#': 445 self.eat_line() 446 return False 447 448 def eat_line(self): 449 result = False 450 while self.index < len(self.data) and self.data[self.index] not in '\r\n': 451 self.index += 1 452 self.column += 1 453 result = True 454 return result 455 456 def eat_ignored_newline(self): 457 if self.index < len(self.data) and self.data[self.index] in '\r\n': 458 if self.data[self.index:self.index+2] == '\r\n': 459 self.index += 2 460 else: 461 self.index += 1 462 self.line += 1 463 self.column = 0 464 self.allow_block_collection = True 465 return True 466 return False 467 468 def eat_ns(self): 469 result = False 470 while self.index < len(self.data) and self.data[self.index] not in ' \t\r\n': 471 self.index += 1 472 self.column += 1 473 result = True 474 return result 475 476 def eat_indent(self, indent=0): 477 if indent < self.indent: 478 indent = self.indent 479 if self.column != 0: 480 return False 481 count = 0 482 while self.index < len(self.data) and self.data[self.index] == ' ' and count < indent: 483 self.index += 1 484 self.column += 1 485 count += 1 486 return count == indent 487 488 def eat_double_quoted(self): 489 if self.index < len(self.data) and self.data[self.index] == '"': 490 self.index += 1 491 self.column += 1 492 while self.index < len(self.data) and self.data[self.index] != '"': 493 if self.data[self.index:self.index+2] in ['\\\\', '\\"']: 494 self.index += 2 495 self.column += 2 496 elif self.data[self.index] in '\r\n': 497 self.eat_ignored_newline() 498 if not self.eat_indent(1): 499 self.error("Invalid indentation") 500 else: 501 self.index += 1 502 self.column += 1 503 if self.index < len(self.data) and self.data[self.index] == '"': 504 self.index += 1 505 self.column += 1 506 return True 507 else: 508 self.error("unclosed double quoted scalar") 509 else: 510 return False 511 512 def eat_single_quoted(self): 513 if self.index < len(self.data) and self.data[self.index] == '\'': 514 self.index += 1 515 self.column += 1 516 while self.index < len(self.data) and \ 517 (self.data[self.index] != '\'' or self.data[self.index:self.index+2] == '\'\''): 518 if self.data[self.index:self.index+2] == '\'\'': 519 self.index += 2 520 self.column += 2 521 elif self.data[self.index] in '\r\n': 522 self.eat_ignored_newline() 523 if not self.eat_indent(1): 524 self.error("Invalid indentation") 525 else: 526 self.index += 1 527 self.column += 1 528 if self.index < len(self.data) and self.data[self.index] == '\'': 529 self.index += 1 530 self.column += 1 531 return True 532 else: 533 self.error("unclosed single quoted scalar") 534 else: 535 return False 536 537 def eat_folded(self): 538 self.eat_block_scalar() 539 540 def eat_literal(self): 541 self.eat_block_scalar() 542 543 def eat_block_scalar(self): 544 if self.index < len(self.data) and self.data[self.index] in '>|': 545 self.eat_line() 546 if not self.eat_ignored_newline(): 547 return True 548 indent = self.indent+1 549 if indent < 1: 550 indent = 1 551 while (self.eat_indent(indent) and ((self.eat_line() and self.eat_ignored_newline()) or (self.eat_ignored_newline()))) or \ 552 (self.eat_ignored_comment() and self.eat_ignored_newline()) or \ 553 self.eat_ignored_newline(): 554 pass 555 return True 556 return False 557 558 def eat_block_plain(self): 559 return self.eat_plain(block=True) 560 561 def eat_flow_plain(self): 562 return self.eat_plain(block=False) 563 564 def eat_plain(self, block): 755 self.tokens.append(TokenClass('', start_marker, end_marker)) 756 757 def scan_tag(self): 758 start_marker = self.stream.get_marker() 759 while self.stream.peek() not in u'\0 \t\r\n': 760 self.stream.read() 761 end_marker = self.stream.get_marker() 762 self.tokens.append(Tag('', start_marker, end_marker)) 763 764 def scan_block_scalar(self, folded): 765 start_marker = self.stream.get_marker() 565 766 indent = self.indent+1 566 767 if indent < 1: 567 768 indent = 1 568 if self.index < len(self.data): 569 if self.data[self.index] not in ' \t\r\n-?:,[]{}#&*!|>\'"%@`' or \ 570 (block and self.data[self.index] == '-' and self.data[self.index:self.index+2] not in ['-', '- ', '-\r', '-\n']) or \ 571 (block and self.data[self.index] == '?' and self.data[self.index:self.index+2] not in ['?', '? ', '?\r', '?\n']) or \ 572 (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n']): 573 if block and self.allow_block_collection: 574 self.guessing_simple_key() 575 if self.flow_level and self.allow_flow_key: 576 self.guess_flow_key_levels.append(self.flow_level) 577 self.guess_flow_key_tokens.append(len(self.tokens)) 578 self.allow_flow_key = False 579 self.index += 1 580 self.column += 1 581 space = False 582 while True: 583 self.eat_ignored_spaces() 584 while self.index < len(self.data) and ( 585 self.data[self.index] not in '\r\n?:,[]{}#' or 586 (not space and self.data[self.index] == '#') or 587 (block and self.data[self.index] in '?,[]{}') or 588 (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n'])): 589 space = self.data[self.index] not in ' \t' 590 self.index += 1 591 self.column += 1 592 self.allow_block_collection = False 593 if not (self.eat_ignored_newline() and self.eat_indent(indent)): 594 break 595 space = True 596 return True 597 return False 598 599 def no_simple_key(self): 600 self.guess_simple_key = False 601 self.guess_simple_key_token = None 602 self.guess_simple_key_indent = None 603 604 def guessing_simple_key(self): 605 self.guess_simple_key = True 606 self.guess_simple_key_token = len(self.tokens) 607 self.guess_simple_key_indent = self.column 608 609 def unwind_indents(self, level): 610 while self.indent > level: 611 if self.flow_level: 612 self.error("Invalid indentation") 613 self.tokens.append('BLOCK_END') 614 self.indent = self.indents.pop() 615 self.no_simple_key() 616 617 def fetch_token(self): 618 self.unwind_indents(self.column) 619 if self.index < len(self.data): 620 if self.column == 0: 621 if self.data[self.index] == '%': 622 self.tokens.append('DIRECTIVE') 623 self.eat_line() 624 self.no_simple_key() 625 return True 626 if self.data[self.index:self.index+3] == '---' and \ 627 (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'): 628 self.unwind_indents(-1) 629 self.tokens.append('DOCUMENT_START') 630 self.index += 3 631 self.column += 3 632 self.allow_block_collection = False 633 self.allow_flow_key = False 634 self.guess_flow_keys = [] 635 self.no_simple_key() 636 return True 637 if self.data[self.index:self.index+3] == '...' and \ 638 (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'): 639 self.unwind_indents(-1) 640 self.tokens.append('DOCUMENT_END') 641 self.index += 3 642 self.column += 3 643 self.allow_block_collection = False 644 self.allow_flow_key = False 645 self.guess_flow_keys = [] 646 self.no_simple_key() 647 return True 648 if self.data[self.index] in '[]{}': 649 if self.data[self.index] == '[': 650 self.flow_level += 1 651 self.allow_flow_key = True 652 self.tokens.append('FLOW_SEQ_START') 653 elif self.data[self.index] == '{': 654 self.flow_level += 1 655 self.allow_flow_key = True 656 self.tokens.append('FLOW_MAP_START') 657 elif self.data[self.index] == ']': 658 if not self.flow_level: 659 self.error("Extra ]") 660 self.flow_level -= 1 661 self.allow_flow_key = False 662 self.tokens.append('FLOW_SEQ_END') 663 else: 664 if not self.flow_level: 665 self.error("Extra }") 666 self.flow_level -= 1 667 self.allow_flow_key = False 668 self.tokens.append('FLOW_MAP_END') 669 while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] > self.flow_level: 670 self.guess_flow_key_levels.pop() 671 self.guess_flow_key_tokens.pop() 672 self.index += 1 673 self.column += 1 674 self.allow_block_collection = False 675 return True 676 if self.data[self.index] in '!&*': 677 if self.flow_level and self.allow_flow_key: 678 self.guess_flow_key_levels.append(self.flow_level) 679 self.guess_flow_key_tokens.append(len(self.tokens)) 680 if not self.flow_level and self.allow_block_collection: 681 self.guessing_simple_key() 682 if self.data[self.index] == '!': 683 self.tokens.append('TAG') 684 elif self.data[self.index] == '&': 685 self.tokens.append('ANCHOR') 686 else: 687 self.tokens.append('ALIAS') 688 self.eat_ns() 689 self.allow_flow_key = False 690 self.allow_block_collection = False 691 return True 692 if self.data[self.index] == '"': 693 if self.flow_level and self.allow_flow_key: 694 self.guess_flow_key_levels.append(self.flow_level) 695 self.guess_flow_key_tokens.append(len(self.tokens)) 696 if not self.flow_level and self.allow_block_collection: 697 self.guessing_simple_key() 698 self.tokens.append('SCALAR') 699 self.eat_double_quoted() 700 self.allow_flow_key = False 701 self.allow_block_collection = False 702 return True 703 if self.data[self.index] == '\'': 704 if self.flow_level and self.allow_flow_key: 705 self.guess_flow_key_levels.append(self.flow_level) 706 self.guess_flow_key_tokens.append(len(self.tokens)) 707 if not self.flow_level and self.allow_block_collection: 708 self.guessing_simple_key() 709 self.tokens.append('SCALAR') 710 self.eat_single_quoted() 711 self.allow_flow_key = False 712 self.allow_block_collection = False 713 return True 714 if not self.flow_level: 715 if self.data[self.index] in '-?:' and \ 716 (not self.data[self.index+1:self.index+2] or self.data[self.index+1:self.index+2] in ' \r\n'): 717 if self.guess_simple_key and self.data[self.index] == ':': 718 self.tokens.insert(self.guess_simple_key_token, 'KEY') 719 if self.guess_simple_key_indent > self.indent: 720 self.indents.append(self.indent) 721 self.indent = self.guess_simple_key_indent 722 self.tokens.insert(self.guess_simple_key_token, 'BLOCK_MAP_START') 723 self.tokens.append('VALUE') 724 self.no_simple_key() 725 self.index += 1 726 self.column += 1 727 self.allow_block_collection = False 728 return True 729 else: 730 if not self.allow_block_collection: 731 self.error("Block collection should start at the beginning of the line") 732 if self.column > self.indent: 733 self.indents.append(self.indent) 734 self.indent = self.column 735 if self.data[self.index] == '-': 736 self.tokens.append('BLOCK_SEQ_START') 737 else: 738 self.tokens.append('BLOCK_MAP_START') 739 if self.data[self.index] == '-': 740 self.tokens.append('ENTRY') 741 elif self.data[self.index] == '?': 742 self.tokens.append('KEY') 743 else: 744 self.tokens.append('VALUE') 745 self.index += 1 746 self.column += 1 747 #self.allow_block_collection = False 748 self.allow_block_collection = True 749 self.no_simple_key() 750 return True 751 if self.data[self.index] == '>': 752 self.no_simple_key() 753 self.tokens.append('SCALAR') 754 self.eat_folded() 755 self.allow_block_collection = True 756 return True 757 if self.data[self.index] == '|': 758 self.no_simple_key() 759 self.tokens.append('SCALAR') 760 self.eat_literal() 761 self.allow_block_collection = True 762 return True 763 if self.eat_block_plain(): 764 self.tokens.append('SCALAR') 765 return True 769 while True: 770 while self.stream.peek() and self.stream.peek() and self.stream.peek() not in u'\0\r\n': 771 self.stream.read() 772 if self.stream.peek() != u'\0': 773 self.stream.read() 774 count = 0 775 while count < indent and self.stream.peek() == u' ': 776 self.stream.read() 777 count += 1 778 if count < indent and self.stream.peek() not in u'#\r\n': 779 break 780 self.tokens.append(Scalar('', False, start_marker, start_marker)) 781 782 def scan_flow_scalar(self, double): 783 marker = self.stream.get_marker() 784 quote = self.stream.read() 785 while self.stream.peek() != quote: 786 if double and self.stream.peek() == u'\\': 787 self.stream.read(2) 788 elif not double and self.stream.peek(3)[1:] == u'\'\'': 789 self.stream.read(3) 766 790 else: 767 if self.data[self.index] in ',?:': 768 if self.data[self.index] == ',': 769 self.tokens.append('ENTRY') 770 while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level: 771 self.guess_flow_key_levels.pop() 772 self.guess_flow_key_tokens.pop() 773 self.allow_flow_key = True 774 elif self.data[self.index] == '?': 775 self.tokens.append('KEY') 776 while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level: 777 self.guess_flow_key_levels.pop() 778 self.guess_flow_key_tokens.pop() 779 self.allow_flow_key = False 780 else: 781 self.tokens.append('VALUE') 782 if self.guess_flow_key_levels and self.guess_flow_key_levels[-1] == self.flow_level: 783 self.guess_flow_key_levels.pop() 784 index = self.guess_flow_key_tokens.pop() 785 self.tokens.insert(index, 'KEY') 786 self.allow_flow_key =False 787 self.index += 1 788 self.column += 1 789 return True 790 if self.eat_flow_plain(): 791 self.tokens.append('SCALAR') 792 return True 793 self.error("Invalid token") 794 else: 795 self.unwind_indents(-1) 796 797 def error(self, message): 798 raise Error(message, Marker(self.source, self.data, self.index)) 799 800 class Parser: 801 802 def parse(self, source, data): 803 scanner = Scanner() 804 self.tokens = scanner.scan(source, data) 805 self.tokens.append('END') 806 documents = self.parse_stream() 807 if len(documents) == 1: 808 return documents[0] 809 return documents 810 811 def parse_stream(self): 812 documents = [] 813 if self.tokens[0] not in ['DIRECTIVE', 'DOCUMENT_START', 'END']: 814 documents.append(self.parse_block_node()) 815 while self.tokens[0] != 'END': 816 while self.tokens[0] == 'DIRECTIVE': 817 self.tokens.pop(0) 818 if self.tokens[0] != 'DOCUMENT_START': 819 self.error('DOCUMENT_START is expected') 820 self.tokens.pop(0) 821 if self.tokens[0] in ['DIRECTIVE', 'DOCUMENT_START', 'DOCUMENT_END', 'END']: 822 documents.append(None) 823 else: 824 documents.append(self.parse_block_node()) 825 while self.tokens[0] == 'DOCUMENT_END': 826 self.tokens.pop(0) 827 if self.tokens[0] != 'END': 828 self.error("END is expected") 829 return tuple(documents) 830 831 def parse_block_node(self): 832 if self.tokens[0] == 'ALIAS': 833 self.tokens.pop(0) 834 return '*' 835 if self.tokens[0] == 'TAG': 836 self.tokens.pop(0) 837 if self.tokens[0] == 'ANCHOR': 838 self.tokens.pop(0) 839 elif self.tokens[0] == 'ANCHOR': 840 self.tokens.pop(0) 841 if self.tokens[0] == 'TAG': 842 self.tokens.pop(0) 843 return self.parse_block_content() 844 845 def parse_flow_node(self): 846 if self.tokens[0] == 'ALIAS': 847 self.tokens.pop(0) 848 return '*' 849 if self.tokens[0] == 'TAG': 850 self.tokens.pop(0) 851 if self.tokens[0] == 'ANCHOR': 852 self.tokens.pop(0) 853 elif self.tokens[0] == 'ANCHOR': 854 self.tokens.pop(0) 855 if self.tokens[0] == 'TAG': 856 self.tokens.pop(0) 857 return self.parse_flow_content() 858 859 def parse_block_node_or_indentless_sequence(self): 860 if self.tokens[0] == 'ALIAS': 861 self.tokens.pop(0) 862 return '*' 863 if self.tokens[0] == 'TAG': 864 self.tokens.pop(0) 865 if self.tokens[0] == 'ANCHOR': 866 self.tokens.pop(0) 867 elif self.tokens[0] == 'ANCHOR': 868 self.tokens.pop(0) 869 if self.tokens[0] == 'TAG': 870 self.tokens.pop(0) 871 if self.tokens[0] == 'ENTRY': 872 return self.parse_indentless_sequence(self) 873 return self.parse_block_content() 874 875 def parse_block_content(self): 876 if self.tokens[0] == 'SCALAR': 877 self.tokens.pop(0) 878 return True 879 elif self.tokens[0] == 'BLOCK_SEQ_START': 880 return self.parse_block_sequence() 881 elif self.tokens[0] == 'BLOCK_MAP_START': 882 return self.parse_block_mapping() 883 elif self.tokens[0] == 'FLOW_SEQ_START': 884 return self.parse_flow_sequence() 885 elif self.tokens[0] == 'FLOW_MAP_START': 886 return self.parse_flow_mapping() 887 else: 888 self.error('block content is expected') 889 890 def parse_flow_content(self): 891 if self.tokens[0] == 'SCALAR': 892 self.tokens.pop(0) 893 return True 894 elif self.tokens[0] == 'FLOW_SEQ_START': 895 return self.parse_flow_sequence() 896 elif self.tokens[0] == 'FLOW_MAP_START': 897 return self.parse_flow_mapping() 898 else: 899 self.error('flow content is expected') 900 901 def parse_block_sequence(self): 902 sequence = [] 903 if self.tokens[0] != 'BLOCK_SEQ_START': 904 self.error('BLOCK_SEQ_START is expected') 905 self.tokens.pop(0) 906 while self.tokens[0] == 'ENTRY': 907 self.tokens.pop(0) 908 if self.tokens[0] not in ['ENTRY', 'BLOCK_END']: 909 sequence.append(self.parse_block_node()) 910 else: 911 sequence.append(None) 912 if self.tokens[0] != 'BLOCK_END': 913 self.error('BLOCK_END is expected') 914 self.tokens.pop(0) 915 return sequence 916 917 def parse_indentless_sequence(self): 918 sequence = [] 919 while self.tokens[0] == 'ENTRY': 920 self.tokens.pop(0) 921 if self.tokens[0] not in ['ENTRY']: 922 sequence.append(self.parse_block_node()) 923 else: 924 sequence.append(None) 925 return sequence 926 927 def parse_block_mapping(self): 928 mapping = [] 929 if self.tokens[0] != 'BLOCK_MAP_START': 930 self.error('BLOCK_MAP_START is expected') 931 self.tokens.pop(0) 932 while self.tokens[0] in ['KEY', 'VALUE']: 933 key = None 934 value = None 935 if self.tokens[0] == 'KEY': 936 self.tokens.pop(0) 937 if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']: 938 key = self.parse_block_node_or_indentless_sequence() 939 if self.tokens[0] == 'VALUE': 940 self.tokens.pop(0) 941 if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']: 942 value = self.parse_block_node_or_indentless_sequence() 943 mapping.append((key, value)) 944 if self.tokens[0] != 'BLOCK_END': 945 self.error('BLOCK_END is expected') 946 self.tokens.pop(0) 947 return mapping 948 949 def parse_flow_sequence(self): 950 sequence = [] 951 if self.tokens[0] != 'FLOW_SEQ_START': 952 self.error('FLOW_SEQ_START is expected') 953 self.tokens.pop(0) 954 while self.tokens[0] != 'FLOW_SEQ_END': 955 if self.tokens[0] == 'KEY': 956 self.tokens.pop(0) 957 key = None 958 value = None 959 if self.tokens[0] != 'VALUE': 960 key = self.parse_flow_node() 961 if self.tokens[0] == 'VALUE': 962 self.tokens.pop(0) 963 if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']: 964 value = self.parse_flow_node() 965 sequence.append([(key, value)]) 966 else: 967 sequence.append(self.parse_flow_node()) 968 if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']: 969 self.error("ENTRY or FLOW_SEQ_END is expected") 970 if self.tokens[0] == 'ENTRY': 971 self.tokens.pop(0) 972 if self.tokens[0] != 'FLOW_SEQ_END': 973 self.error('FLOW_SEQ_END is expected') 974 self.tokens.pop(0) 975 return sequence 976 977 def parse_flow_mapping(self): 978 mapping = [] 979 if self.tokens[0] != 'FLOW_MAP_START': 980 self.error('FLOW_MAP_START is expected') 981 self.tokens.pop(0) 982 while self.tokens[0] != 'FLOW_MAP_END': 983 if self.tokens[0] == 'KEY': 984 self.tokens.pop(0) 985 key = None 986 value = None 987 if self.tokens[0] != 'VALUE': 988 key = self.parse_flow_node() 989 if self.tokens[0] == 'VALUE': 990 self.tokens.pop(0) 991 if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']: 992 value = self.parse_flow_node() 993 mapping.append((key, value)) 994 else: 995 mapping.append((self.parse_flow_node(), None)) 996 if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']: 997 self.error("ENTRY or FLOW_MAP_END is expected") 998 if self.tokens[0] == 'ENTRY': 999 self.tokens.pop(0) 1000 if self.tokens[0] != 'FLOW_MAP_END': 1001 self.error('FLOW_MAP_END is expected') 1002 self.tokens.pop(0) 1003 return mapping 1004 1005 def error(self, message): 1006 raise Error(message+': '+str(self.tokens)) 1007 791 self.stream.read(1) 792 self.stream.read(1) 793 self.tokens.append(Scalar('', False, marker, marker)) 794 795 def scan_plain(self): 796 indent = self.indent+1 797 if indent < 1: 798 indent = 1 799 space = False 800 marker = self.stream.get_marker() 801 while True: 802 while self.stream.peek() == u' ': 803 self.stream.read() 804 space = True 805 while self.stream.peek() not in u'\0\r\n?:,[]{}#' \ 806 or (not space and self.stream.peek() == '#') \ 807 or (not self.flow_level and self.stream.peek() in '?,[]{}') \ 808 or (not self.flow_level and self.stream.peek() == ':' and self.stream.peek(2)[1] not in u' \0\r\n'): 809 space = self.stream.peek() not in u' \t' 810 self.stream.read() 811 self.allow_simple_key = False 812 if self.stream.peek() not in u'\r\n': 813 break 814 while self.stream.peek() in u'\r\n': 815 self.stream.read() 816 if not self.flow_level: 817 self.allow_simple_key = True 818 count = 0 819 while self.stream.peek() == u' ' and count < indent: 820 self.stream.read() 821 count += 1 822 if count < indent: 823 break 824 space = True 825 self.tokens.append(Scalar('', True, marker, marker)) 826 827 def invalid_token(self): 828 self.fail("invalid token") 829 830 def fail(self, message): 831 raise ScannerError(message) 832 -
branches/pyyaml3000/tests/test_appliance.py
r39 r43 27 27 add_tests = classmethod(add_tests) 28 28 29 class Node: 30 def __repr__(self): 31 args = [] 32 for attribute in ['anchor', 'tag', 'value']: 33 if hasattr(self, attribute): 34 args.append(repr(getattr(self, attribute))) 35 return "%s(%s)" % (self.__class__.__name__, ''.join(args)) 36 37 class AliasNode(Node): 38 def __init__(self, anchor): 39 self.anchor = anchor 40 41 class ScalarNode(Node): 42 def __init__(self, anchor, tag, value): 43 self.anchor = anchor 44 self.tag = tag 45 self.value = value 46 47 class SequenceNode(Node): 48 def __init__(self, anchor, tag, value): 49 self.anchor = anchor 50 self.tag = tag 51 self.value = value 52 53 class MappingNode(Node): 54 def __init__(self, anchor, tag, value): 55 self.anchor = anchor 56 self.tag = tag 57 self.value = value 58 59 class Token: 60 def __repr__(self): 61 args = [] 62 if hasattr(self, 'value'): 63 args.append(repr(self.value)) 64 return "%s(%s)" % (self.__class__.__name__, ''.join(args)) 65 66 class EndToken(Token): 67 pass 68 69 class DirectiveToken(Token): 70 pass 71 72 class DocumentStartToken(Token): 73 pass 74 75 class SequenceStartToken(Token): 76 pass 77 78 class MappingStartToken(Token): 79 pass 80 81 class SequenceEndToken(Token): 82 pass 83 84 class MappingEndToken(Token): 85 pass 86 87 class KeyToken(Token): 88 pass 89 90 class ValueToken(Token): 91 pass 92 93 class EntryToken(Token): 94 pass 95 96 class AliasToken(Token): 97 def __init__(self, value): 98 self.value = value 99 100 class AnchorToken(Token): 101 def __init__(self, value): 102 self.value = value 103 104 class TagToken(Token): 105 def __init__(self, value): 106 self.value = value 107 108 class ScalarToken(Token): 109 def __init__(self, value): 110 self.value = value 111 112 class Error(Exception): 113 pass 114 115 class CanonicalScanner: 116 117 def __init__(self, source, data): 118 self.source = source 119 self.data = unicode(data, 'utf-8')+u'\0' 120 self.index = 0 121 122 def scan(self): 123 #print self.data[self.index:] 124 tokens = [] 125 while True: 126 self.find_token() 127 ch = self.data[self.index] 128 if ch == u'\0': 129 tokens.append(EndToken()) 130 break 131 elif ch == u'%': 132 tokens.append(self.scan_directive()) 133 elif ch == u'-' and self.data[self.index:self.index+3] == u'---': 134 self.index += 3 135 tokens.append(DocumentStartToken()) 136 elif ch == u'[': 137 self.index += 1 138 tokens.append(SequenceStartToken()) 139 elif ch == u'{': 140 self.index += 1 141 tokens.append(MappingStartToken()) 142 elif ch == u']': 143 self.index += 1 144 tokens.append(SequenceEndToken()) 145 elif ch == u'}': 146 self.index += 1 147 tokens.append(MappingEndToken()) 148 elif ch == u'?': 149 self.index += 1 150 tokens.append(KeyToken()) 151 elif ch == u':': 152 self.index += 1 153 tokens.append(ValueToken()) 154 elif ch == u',': 155 self.index += 1 156 tokens.append(EntryToken()) 157 elif ch == u'*' or ch == u'&': 158 tokens.append(self.scan_alias()) 159 elif ch == u'!': 160 tokens.append(self.scan_tag()) 161 elif ch == u'"': 162 tokens.append(self.scan_scalar()) 163 else: 164 raise Error("invalid token") 165 return tokens 166 167 DIRECTIVE = u'%YAML 1.1' 168 169 def scan_directive(self): 170 if self.data[self.index:self.index+len(self.DIRECTIVE)] == self.DIRECTIVE and \ 171 self.data[self.index+len(self.DIRECTIVE)] in u' \n\0': 172 self.index += len(self.DIRECTIVE) 173 return DirectiveToken() 174 175 def scan_alias(self): 176 if self.data[self.index] == u'*': 177 TokenClass = AliasToken 178 else: 179 TokenClass = AnchorToken 180 self.index += 1 181 start = self.index 182 while self.data[self.index] not in u', \n\0': 183 self.index += 1 184 value = self.data[start:self.index] 185 return TokenClass(value) 186 187 def scan_tag(self): 188 self.index += 1 189 start = self.index 190 while self.data[self.index] not in u' \n\0': 191 self.index += 1 192 value = self.data[start:self.index] 193 if value[0] == u'!': 194 value = 'tag:yaml.org,2002:'+value[1:] 195 else: 196 value = value[1:-1] 197 return TagToken(value) 198 199 QUOTE_CODES = { 200 'x': 2, 201 'u': 4, 202 'U': 8, 203 } 204 205 QUOTE_REPLACES = { 206 u'\\': u'\\', 207 u'\"': u'\"', 208 u' ': u' ', 209 u'a': u'\x07', 210 u'b': u'\x08', 211 u'e': u'\x1B', 212 u'f': u'\x0C', 213 u'n': u'\x0A', 214 u'r': u'\x0D', 215 u't': u'\x09', 216 u'v': u'\x0B', 217 u'N': u'\u0085', 218 u'L': u'\u2028', 219 u'P': u'\u2029', 220 u'_': u'_', 221 u'0': u'\x00', 222 223 } 224 225 def scan_scalar(self): 226 self.index += 1 227 chunks = [] 228 start = self.index 229 ignore_spaces = False 230 while self.data[self.index] != u'"': 231 if self.data[self.index] == u'\\': 232 ignore_spaces = False 233 chunks.append(self.data[start:self.index]) 234 self.index += 1 235 ch = self.data[self.index] 236 self.index += 1 237 if ch == u'\n': 238 ignore_spaces = True 239 elif ch in self.QUOTE_CODES: 240 length = self.QUOTE_CODES[ch] 241 code = int(self.data[self.index:self.index+length], 16) 242 chunks.append(unichr(code)) 243 self.index += length 244 else: 245 chunks.append(self.QUOTE_REPLACES[ch]) 246 start = self.index 247 elif self.data[self.index] == u'\n': 248 chunks.append(u' ') 249 self.index += 1 250 ignore_spaces = True 251 elif ignore_spaces and self.data[self.index] == u' ': 252 self.index += 1 253 start = self.index 254 else: 255 ignore_spaces = False 256 self.index += 1 257 chunks.append(self.data[start:self.index]) 258 self.index += 1 259 return ScalarToken(u''.join(chunks)) 260 261 def find_token(self): 262 found = False 263 while not found: 264 while self.data[self.index] in u' \t': 265 self.index += 1 266 if self.data[self.index] == u'#': 267 while self.data[self.index] != u'\n': 268 self.index += 1 269 if self.data[self.index] == u'\n': 270 self.index += 1 271 else: 272 found = True 273 274 class CanonicalParser: 275 276 def __init__(self, source, data): 277 self.scanner = CanonicalScanner(source, data) 278 279 # stream: document* END 280 def parse_stream(self): 281 documents = [] 282 while not self.test_token(EndToken): 283 if self.test_token(DirectiveToken, DocumentStartToken): 284 documents.append(self.parse_document()) 285 else: 286 raise Error("document is expected, got "+repr(self.tokens[self.index])) 287 return documents 288 289 # document: DIRECTIVE? DOCUMENT-START node? 290 def parse_document(self): 291 node = None 292 if self.test_token(DirectiveToken): 293 self.consume_token(DirectiveToken) 294 self.consume_token(DocumentStartToken) 295 if self.test_token(TagToken, AliasToken, AnchorToken, TagToken, 296 SequenceStartToken, MappingStartToken, ScalarToken): 297 node = self.parse_node() 298 return node 299 300 # node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping) 301 def parse_node(self): 302 if self.test_token(AliasToken): 303 return AliasNode(self.get_value()) 304 else: 305 anchor = None 306 if self.test_token(AnchorToken): 307 anchor = self.get_value() 308 tag = None 309 if self.test_token(TagToken): 310 tag = self.get_value() 311 if self.test_token(ScalarToken): 312 return ScalarNode(anchor, tag, self.get_value()) 313 elif self.test_token(SequenceStartToken): 314 return SequenceNode(anchor, tag, self.parse_sequence()) 315 elif self.test_token(MappingStartToken): 316 return MappingNode(anchor, tag, self.parse_mapping()) 317 else: 318 raise Error("SCALAR, '[', or '{' is expected, got "+repr(self.tokens[self.index])) 319 320 # sequence: SEQUENCE-START (node (ENTRY node)*)? ENTRY? SEQUENCE-END 321 def parse_sequence(self): 322 values = [] 323 self.consume_token(SequenceStartToken) 324 if not self.test_token(SequenceEndToken): 325 values.append(self.parse_node()) 326 while not self.test_token(SequenceEndToken): 327 self.consume_token(EntryToken) 328 if not self.test_token(SequenceEndToken): 329 values.append(self.parse_node()) 330 self.consume_token(SequenceEndToken) 331 return values 332 333 # mapping: MAPPING-START (map_entry (ENTRY map_entry)*)? ENTRY? MAPPING-END 334 def parse_mapping(self): 335 values = [] 336 self.consume_token(MappingStartToken) 337 if not self.test_token(MappingEndToken): 338 values.append(self.parse_map_entry()) 339 while not self.test_token(MappingEndToken): 340 self.consume_token(EntryToken) 341 if not self.test_token(MappingEndToken): 342 values.append(self.parse_map_entry()) 343 self.consume_token(MappingEndToken) 344 return values 345 346 # map_entry: KEY node VALUE node 347 def parse_map_entry(self): 348 self.consume_token(KeyToken) 349 key = self.parse_node() 350 self.consume_token(ValueToken) 351 value = self.parse_node() 352 return (key, value) 353 354 def test_token(self, *choices): 355 for choice in choices: 356 if isinstance(self.tokens[self.index], choice): 357 return True 358 return False 359 360 def consume_token(self, cls): 361 if not isinstance(self.tokens[self.index], cls): 362 raise Error("unexpected token "+repr(self.tokens[self.index])) 363 self.index += 1 364 365 def get_value(self): 366 value = self.tokens[self.index].value 367 self.index += 1 368 return value 369 370 def parse(self): 371 self.tokens = self.scanner.scan() 372 self.index = 0 373 return self.parse_stream() 374 -
branches/pyyaml3000/tests/test_tokens.py
r39 r43 2 2 import test_appliance 3 3 4 from yaml.scanner import Scanner4 from yaml.scanner import * 5 5 6 6 class TestTokens(test_appliance.TestAppliance): … … 26 26 27 27 replaces = { 28 'DIRECTIVE': '%', 29 'DOCUMENT_START': '---', 30 'DOCUMENT_END': '...', 31 'ALIAS': '*', 32 'ANCHOR': '&', 33 'TAG': '!', 34 'SCALAR': '_', 35 'BLOCK_SEQ_START': '[[', 36 'BLOCK_MAP_START': '{{', 37 'BLOCK_END': ']}', 38 'FLOW_SEQ_START': '[', 39 'FLOW_SEQ_END': ']', 40 'FLOW_MAP_START': '{', 41 'FLOW_MAP_END': '}', 42 'ENTRY': ',', 43 'KEY': '?', 44 'VALUE': ':', 28 YAMLDirective: '%', 29 TagDirective: '%', 30 ReservedDirective: '%', 31 DocumentStart: '---', 32 DocumentEnd: '...', 33 Alias: '*', 34 Anchor: '&', 35 Tag: '!', 36 Scalar: '_', 37 BlockSequenceStart: '[[', 38 BlockMappingStart: '{{', 39 BlockEnd: ']}', 40 FlowSequenceStart: '[', 41 FlowSequenceEnd: ']', 42 FlowMappingStart: '{', 43 FlowMappingEnd: '}', 44 Entry: ',', 45 Key: '?', 46 Value: ':', 45 47 } 46 48 … … 49 51 tokens2 = file(tokens_filename, 'rb').read().split() 50 52 try: 51 scanner = Scanner() 52 tokens1 = scanner.scan(data_filename, file(data_filename, 'rb').read()) 53 tokens1 = [self.replaces[t] for t in tokens1] 53 scanner = Scanner(data_filename, file(data_filename, 'rb').read()) 54 tokens1 = [] 55 while not isinstance(scanner.peek_token(), End): 56 tokens1.append(scanner.get_token()) 57 tokens1 = [self.replaces[t.__class__] for t in tokens1] 54 58 self.failUnlessEqual(tokens1, tokens2) 55 59 except: 56 60 print 61 print "DATA:" 62 print file(data_filename, 'rb').read() 57 63 print "TOKENS1:", tokens1 58 64 print "TOKENS2:", tokens2 … … 61 67 TestTokens.add_tests('testTokens', '.data', '.tokens') 62 68 69 class TestScanner(test_appliance.TestAppliance): 70 71 def _testScanner(self, test_name, data_filename, canonical_filename): 72 for filename in [canonical_filename, data_filename]: 73 tokens = None 74 try: 75 scanner = Scanner(filename, file(filename, 'rb').read()) 76 tokens = [] 77 while not isinstance(scanner.peek_token(), End): 78 tokens.append(scanner.get_token().__class__.__name__) 79 except: 80 print 81 print "DATA:" 82 print file(data_filename, 'rb').read() 83 print "TOKENS:", tokens 84 raise 85 86 TestScanner.add_tests('testScanner', '.data', '.canonical') 87 -
branches/pyyaml3000/tests/test_yaml.py
r39 r43 3 3 4 4 from test_marker import * 5 from test_canonical import * 5 6 from test_tokens import * 6 7
Note: See TracChangeset
for help on using the changeset viewer.
