source: pyyaml/trunk/lib/yaml/scanner.py @ 222

Revision 222, 51.4 KB checked in by xi, 8 years ago (diff)

Subclass all base classes from object.

Hold references to the objects being represented (should fix #22).

The value of a mapping node is represented as a list of pairs (key, value)
now.

Sort dictionary items (fix #23).

Recursive structures are now loaded and dumped correctly, including complex
structures like recursive tuples (fix #5). Thanks Peter Murphy for the patches.
To make it possible, representer functions are allowed to be generators.
In this case, the first generated value is an object. Other values produced
by the representer are ignored.

Make Representer not try to guess !!pairs when a list is represented.
You need to construct a !!pairs node explicitly now.

Do not check for duplicate mapping keys as it didn't work correctly anyway.

Line 
1
2# Scanner produces tokens of the following types:
3# STREAM-START
4# STREAM-END
5# DIRECTIVE(name, value)
6# DOCUMENT-START
7# DOCUMENT-END
8# BLOCK-SEQUENCE-START
9# BLOCK-MAPPING-START
10# BLOCK-END
11# FLOW-SEQUENCE-START
12# FLOW-MAPPING-START
13# FLOW-SEQUENCE-END
14# FLOW-MAPPING-END
15# BLOCK-ENTRY
16# FLOW-ENTRY
17# KEY
18# VALUE
19# ALIAS(value)
20# ANCHOR(value)
21# TAG(value)
22# SCALAR(value, plain, style)
23#
24# Read comments in the Scanner code for more details.
25#
26
27__all__ = ['Scanner', 'ScannerError']
28
29from error import MarkedYAMLError
30from tokens import *
31
32class ScannerError(MarkedYAMLError):
33    pass
34
35class SimpleKey(object):
36    # See below simple keys treatment.
37
38    def __init__(self, token_number, required, index, line, column, mark):
39        self.token_number = token_number
40        self.required = required
41        self.index = index
42        self.line = line
43        self.column = column
44        self.mark = mark
45
46class Scanner(object):
47
48    def __init__(self):
49        """Initialize the scanner."""
50        # It is assumed that Scanner and Reader will have a common descendant.
51        # Reader do the dirty work of checking for BOM and converting the
52        # input data to Unicode. It also adds NUL to the end.
53        #
54        # Reader supports the following methods
55        #   self.peek(i=0)       # peek the next i-th character
56        #   self.prefix(l=1)     # peek the next l characters
57        #   self.forward(l=1)    # read the next l characters and move the pointer.
58
59        # Had we reached the end of the stream?
60        self.done = False
61
62        # The number of unclosed '{' and '['. `flow_level == 0` means block
63        # context.
64        self.flow_level = 0
65
66        # List of processed tokens that are not yet emitted.
67        self.tokens = []
68
69        # Add the STREAM-START token.
70        self.fetch_stream_start()
71
72        # Number of tokens that were emitted through the `get_token` method.
73        self.tokens_taken = 0
74
75        # The current indentation level.
76        self.indent = -1
77
78        # Past indentation levels.
79        self.indents = []
80
81        # Variables related to simple keys treatment.
82
83        # A simple key is a key that is not denoted by the '?' indicator.
84        # Example of simple keys:
85        #   ---
86        #   block simple key: value
87        #   ? not a simple key:
88        #   : { flow simple key: value }
89        # We emit the KEY token before all keys, so when we find a potential
90        # simple key, we try to locate the corresponding ':' indicator.
91        # Simple keys should be limited to a single line and 1024 characters.
92
93        # Can a simple key start at the current position? A simple key may
94        # start:
95        # - at the beginning of the line, not counting indentation spaces
96        #       (in block context),
97        # - after '{', '[', ',' (in the flow context),
98        # - after '?', ':', '-' (in the block context).
99        # In the block context, this flag also signifies if a block collection
100        # may start at the current position.
101        self.allow_simple_key = True
102
103        # Keep track of possible simple keys. This is a dictionary. The key
104        # is `flow_level`; there can be no more that one possible simple key
105        # for each level. The value is a SimpleKey record:
106        #   (token_number, required, index, line, column, mark)
107        # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
108        # '[', or '{' tokens.
109        self.possible_simple_keys = {}
110
111    # Public methods.
112
113    def check_token(self, *choices):
114        # Check if the next token is one of the given types.
115        while self.need_more_tokens():
116            self.fetch_more_tokens()
117        if self.tokens:
118            if not choices:
119                return True
120            for choice in choices:
121                if isinstance(self.tokens[0], choice):
122                    return True
123        return False
124
125    def peek_token(self):
126        # Return the next token, but do not delete if from the queue.
127        while self.need_more_tokens():
128            self.fetch_more_tokens()
129        if self.tokens:
130            return self.tokens[0]
131
132    def get_token(self):
133        # Return the next token.
134        while self.need_more_tokens():
135            self.fetch_more_tokens()
136        if self.tokens:
137            self.tokens_taken += 1
138            return self.tokens.pop(0)
139
140    # Private methods.
141
142    def need_more_tokens(self):
143        if self.done:
144            return False
145        if not self.tokens:
146            return True
147        # The current token may be a potential simple key, so we
148        # need to look further.
149        self.stale_possible_simple_keys()
150        if self.next_possible_simple_key() == self.tokens_taken:
151            return True
152
153    def fetch_more_tokens(self):
154
155        # Eat whitespaces and comments until we reach the next token.
156        self.scan_to_next_token()
157
158        # Remove obsolete possible simple keys.
159        self.stale_possible_simple_keys()
160
161        # Compare the current indentation and column. It may add some tokens
162        # and decrease the current indentation level.
163        self.unwind_indent(self.column)
164
165        # Peek the next character.
166        ch = self.peek()
167
168        # Is it the end of stream?
169        if ch == u'\0':
170            return self.fetch_stream_end()
171
172        # Is it a directive?
173        if ch == u'%' and self.check_directive():
174            return self.fetch_directive()
175
176        # Is it the document start?
177        if ch == u'-' and self.check_document_start():
178            return self.fetch_document_start()
179
180        # Is it the document end?
181        if ch == u'.' and self.check_document_end():
182            return self.fetch_document_end()
183
184        # TODO: support for BOM within a stream.
185        #if ch == u'\uFEFF':
186        #    return self.fetch_bom()    <-- issue BOMToken
187
188        # Note: the order of the following checks is NOT significant.
189
190        # Is it the flow sequence start indicator?
191        if ch == u'[':
192            return self.fetch_flow_sequence_start()
193
194        # Is it the flow mapping start indicator?
195        if ch == u'{':
196            return self.fetch_flow_mapping_start()
197
198        # Is it the flow sequence end indicator?
199        if ch == u']':
200            return self.fetch_flow_sequence_end()
201
202        # Is it the flow mapping end indicator?
203        if ch == u'}':
204            return self.fetch_flow_mapping_end()
205
206        # Is it the flow entry indicator?
207        if ch == u',':
208            return self.fetch_flow_entry()
209
210        # Is it the block entry indicator?
211        if ch == u'-' and self.check_block_entry():
212            return self.fetch_block_entry()
213
214        # Is it the key indicator?
215        if ch == u'?' and self.check_key():
216            return self.fetch_key()
217
218        # Is it the value indicator?
219        if ch == u':' and self.check_value():
220            return self.fetch_value()
221
222        # Is it an alias?
223        if ch == u'*':
224            return self.fetch_alias()
225
226        # Is it an anchor?
227        if ch == u'&':
228            return self.fetch_anchor()
229
230        # Is it a tag?
231        if ch == u'!':
232            return self.fetch_tag()
233
234        # Is it a literal scalar?
235        if ch == u'|' and not self.flow_level:
236            return self.fetch_literal()
237
238        # Is it a folded scalar?
239        if ch == u'>' and not self.flow_level:
240            return self.fetch_folded()
241
242        # Is it a single quoted scalar?
243        if ch == u'\'':
244            return self.fetch_single()
245
246        # Is it a double quoted scalar?
247        if ch == u'\"':
248            return self.fetch_double()
249
250        # It must be a plain scalar then.
251        if self.check_plain():
252            return self.fetch_plain()
253
254        # No? It's an error. Let's produce a nice error message.
255        raise ScannerError("while scanning for the next token", None,
256                "found character %r that cannot start any token"
257                % ch.encode('utf-8'), self.get_mark())
258
259    # Simple keys treatment.
260
261    def next_possible_simple_key(self):
262        # Return the number of the nearest possible simple key. Actually we
263        # don't need to loop through the whole dictionary. We may replace it
264        # with the following code:
265        #   if not self.possible_simple_keys:
266        #       return None
267        #   return self.possible_simple_keys[
268        #           min(self.possible_simple_keys.keys())].token_number
269        min_token_number = None
270        for level in self.possible_simple_keys:
271            key = self.possible_simple_keys[level]
272            if min_token_number is None or key.token_number < min_token_number:
273                min_token_number = key.token_number
274        return min_token_number
275
276    def stale_possible_simple_keys(self):
277        # Remove entries that are no longer possible simple keys. According to
278        # the YAML specification, simple keys
279        # - should be limited to a single line,
280        # - should be no longer than 1024 characters.
281        # Disabling this procedure will allow simple keys of any length and
282        # height (may cause problems if indentation is broken though).
283        for level in self.possible_simple_keys.keys():
284            key = self.possible_simple_keys[level]
285            if key.line != self.line  \
286                    or self.index-key.index > 1024:
287                if key.required:
288                    raise ScannerError("while scanning a simple key", key.mark,
289                            "could not found expected ':'", self.get_mark())
290                del self.possible_simple_keys[level]
291
292    def save_possible_simple_key(self):
293        # The next token may start a simple key. We check if it's possible
294        # and save its position. This function is called for
295        #   ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
296
297        # Check if a simple key is required at the current position.
298        required = not self.flow_level and self.indent == self.column
299
300        # A simple key is required only if it is the first token in the current
301        # line. Therefore it is always allowed.
302        assert self.allow_simple_key or not required
303
304        # The next token might be a simple key. Let's save it's number and
305        # position.
306        if self.allow_simple_key:
307            self.remove_possible_simple_key()
308            token_number = self.tokens_taken+len(self.tokens)
309            key = SimpleKey(token_number, required,
310                    self.index, self.line, self.column, self.get_mark())
311            self.possible_simple_keys[self.flow_level] = key
312
313    def remove_possible_simple_key(self):
314        # Remove the saved possible key position at the current flow level.
315        if self.flow_level in self.possible_simple_keys:
316            key = self.possible_simple_keys[self.flow_level]
317           
318            if key.required:
319                raise ScannerError("while scanning a simple key", key.mark,
320                        "could not found expected ':'", self.get_mark())
321
322            del self.possible_simple_keys[self.flow_level]
323
324    # Indentation functions.
325
326    def unwind_indent(self, column):
327
328        ## In flow context, tokens should respect indentation.
329        ## Actually the condition should be `self.indent >= column` according to
330        ## the spec. But this condition will prohibit intuitively correct
331        ## constructions such as
332        ## key : {
333        ## }
334        #if self.flow_level and self.indent > column:
335        #    raise ScannerError(None, None,
336        #            "invalid intendation or unclosed '[' or '{'",
337        #            self.get_mark())
338
339        # In the flow context, indentation is ignored. We make the scanner less
340        # restrictive then specification requires.
341        if self.flow_level:
342            return
343
344        # In block context, we may need to issue the BLOCK-END tokens.
345        while self.indent > column:
346            mark = self.get_mark()
347            self.indent = self.indents.pop()
348            self.tokens.append(BlockEndToken(mark, mark))
349
350    def add_indent(self, column):
351        # Check if we need to increase indentation.
352        if self.indent < column:
353            self.indents.append(self.indent)
354            self.indent = column
355            return True
356        return False
357
358    # Fetchers.
359
360    def fetch_stream_start(self):
361        # We always add STREAM-START as the first token and STREAM-END as the
362        # last token.
363
364        # Read the token.
365        mark = self.get_mark()
366       
367        # Add STREAM-START.
368        self.tokens.append(StreamStartToken(mark, mark,
369            encoding=self.encoding))
370       
371
372    def fetch_stream_end(self):
373
374        # Set the current intendation to -1.
375        self.unwind_indent(-1)
376
377        # Reset everything (not really needed).
378        self.allow_simple_key = False
379        self.possible_simple_keys = {}
380
381        # Read the token.
382        mark = self.get_mark()
383       
384        # Add STREAM-END.
385        self.tokens.append(StreamEndToken(mark, mark))
386
387        # The steam is finished.
388        self.done = True
389
390    def fetch_directive(self):
391       
392        # Set the current intendation to -1.
393        self.unwind_indent(-1)
394
395        # Reset simple keys.
396        self.remove_possible_simple_key()
397        self.allow_simple_key = False
398
399        # Scan and add DIRECTIVE.
400        self.tokens.append(self.scan_directive())
401
402    def fetch_document_start(self):
403        self.fetch_document_indicator(DocumentStartToken)
404
405    def fetch_document_end(self):
406        self.fetch_document_indicator(DocumentEndToken)
407
408    def fetch_document_indicator(self, TokenClass):
409
410        # Set the current intendation to -1.
411        self.unwind_indent(-1)
412
413        # Reset simple keys. Note that there could not be a block collection
414        # after '---'.
415        self.remove_possible_simple_key()
416        self.allow_simple_key = False
417
418        # Add DOCUMENT-START or DOCUMENT-END.
419        start_mark = self.get_mark()
420        self.forward(3)
421        end_mark = self.get_mark()
422        self.tokens.append(TokenClass(start_mark, end_mark))
423
424    def fetch_flow_sequence_start(self):
425        self.fetch_flow_collection_start(FlowSequenceStartToken)
426
427    def fetch_flow_mapping_start(self):
428        self.fetch_flow_collection_start(FlowMappingStartToken)
429
430    def fetch_flow_collection_start(self, TokenClass):
431
432        # '[' and '{' may start a simple key.
433        self.save_possible_simple_key()
434
435        # Increase the flow level.
436        self.flow_level += 1
437
438        # Simple keys are allowed after '[' and '{'.
439        self.allow_simple_key = True
440
441        # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
442        start_mark = self.get_mark()
443        self.forward()
444        end_mark = self.get_mark()
445        self.tokens.append(TokenClass(start_mark, end_mark))
446
447    def fetch_flow_sequence_end(self):
448        self.fetch_flow_collection_end(FlowSequenceEndToken)
449
450    def fetch_flow_mapping_end(self):
451        self.fetch_flow_collection_end(FlowMappingEndToken)
452
453    def fetch_flow_collection_end(self, TokenClass):
454
455        # Reset possible simple key on the current level.
456        self.remove_possible_simple_key()
457
458        # Decrease the flow level.
459        self.flow_level -= 1
460
461        # No simple keys after ']' or '}'.
462        self.allow_simple_key = False
463
464        # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
465        start_mark = self.get_mark()
466        self.forward()
467        end_mark = self.get_mark()
468        self.tokens.append(TokenClass(start_mark, end_mark))
469
470    def fetch_flow_entry(self):
471
472        # Simple keys are allowed after ','.
473        self.allow_simple_key = True
474
475        # Reset possible simple key on the current level.
476        self.remove_possible_simple_key()
477
478        # Add FLOW-ENTRY.
479        start_mark = self.get_mark()
480        self.forward()
481        end_mark = self.get_mark()
482        self.tokens.append(FlowEntryToken(start_mark, end_mark))
483
484    def fetch_block_entry(self):
485
486        # Block context needs additional checks.
487        if not self.flow_level:
488
489            # Are we allowed to start a new entry?
490            if not self.allow_simple_key:
491                raise ScannerError(None, None,
492                        "sequence entries are not allowed here",
493                        self.get_mark())
494
495            # We may need to add BLOCK-SEQUENCE-START.
496            if self.add_indent(self.column):
497                mark = self.get_mark()
498                self.tokens.append(BlockSequenceStartToken(mark, mark))
499
500        # It's an error for the block entry to occur in the flow context,
501        # but we let the parser detect this.
502        else:
503            pass
504
505        # Simple keys are allowed after '-'.
506        self.allow_simple_key = True
507
508        # Reset possible simple key on the current level.
509        self.remove_possible_simple_key()
510
511        # Add BLOCK-ENTRY.
512        start_mark = self.get_mark()
513        self.forward()
514        end_mark = self.get_mark()
515        self.tokens.append(BlockEntryToken(start_mark, end_mark))
516
517    def fetch_key(self):
518       
519        # Block context needs additional checks.
520        if not self.flow_level:
521
522            # Are we allowed to start a key (not nessesary a simple)?
523            if not self.allow_simple_key:
524                raise ScannerError(None, None,
525                        "mapping keys are not allowed here",
526                        self.get_mark())
527
528            # We may need to add BLOCK-MAPPING-START.
529            if self.add_indent(self.column):
530                mark = self.get_mark()
531                self.tokens.append(BlockMappingStartToken(mark, mark))
532
533        # Simple keys are allowed after '?' in the block context.
534        self.allow_simple_key = not self.flow_level
535
536        # Reset possible simple key on the current level.
537        self.remove_possible_simple_key()
538
539        # Add KEY.
540        start_mark = self.get_mark()
541        self.forward()
542        end_mark = self.get_mark()
543        self.tokens.append(KeyToken(start_mark, end_mark))
544
545    def fetch_value(self):
546
547        # Do we determine a simple key?
548        if self.flow_level in self.possible_simple_keys:
549
550            # Add KEY.
551            key = self.possible_simple_keys[self.flow_level]
552            del self.possible_simple_keys[self.flow_level]
553            self.tokens.insert(key.token_number-self.tokens_taken,
554                    KeyToken(key.mark, key.mark))
555
556            # If this key starts a new block mapping, we need to add
557            # BLOCK-MAPPING-START.
558            if not self.flow_level:
559                if self.add_indent(key.column):
560                    self.tokens.insert(key.token_number-self.tokens_taken,
561                            BlockMappingStartToken(key.mark, key.mark))
562
563            # There cannot be two simple keys one after another.
564            self.allow_simple_key = False
565
566        # It must be a part of a complex key.
567        else:
568           
569            # Block context needs additional checks.
570            # (Do we really need them? They will be catched by the parser
571            # anyway.)
572            if not self.flow_level:
573
574                # We are allowed to start a complex value if and only if
575                # we can start a simple key.
576                if not self.allow_simple_key:
577                    raise ScannerError(None, None,
578                            "mapping values are not allowed here",
579                            self.get_mark())
580
581            # If this value starts a new block mapping, we need to add
582            # BLOCK-MAPPING-START.  It will be detected as an error later by
583            # the parser.
584            if not self.flow_level:
585                if self.add_indent(self.column):
586                    mark = self.get_mark()
587                    self.tokens.append(BlockMappingStartToken(mark, mark))
588
589            # Simple keys are allowed after ':' in the block context.
590            self.allow_simple_key = not self.flow_level
591
592            # Reset possible simple key on the current level.
593            self.remove_possible_simple_key()
594
595        # Add VALUE.
596        start_mark = self.get_mark()
597        self.forward()
598        end_mark = self.get_mark()
599        self.tokens.append(ValueToken(start_mark, end_mark))
600
601    def fetch_alias(self):
602
603        # ALIAS could be a simple key.
604        self.save_possible_simple_key()
605
606        # No simple keys after ALIAS.
607        self.allow_simple_key = False
608
609        # Scan and add ALIAS.
610        self.tokens.append(self.scan_anchor(AliasToken))
611
612    def fetch_anchor(self):
613
614        # ANCHOR could start a simple key.
615        self.save_possible_simple_key()
616
617        # No simple keys after ANCHOR.
618        self.allow_simple_key = False
619
620        # Scan and add ANCHOR.
621        self.tokens.append(self.scan_anchor(AnchorToken))
622
623    def fetch_tag(self):
624
625        # TAG could start a simple key.
626        self.save_possible_simple_key()
627
628        # No simple keys after TAG.
629        self.allow_simple_key = False
630
631        # Scan and add TAG.
632        self.tokens.append(self.scan_tag())
633
634    def fetch_literal(self):
635        self.fetch_block_scalar(style='|')
636
637    def fetch_folded(self):
638        self.fetch_block_scalar(style='>')
639
640    def fetch_block_scalar(self, style):
641
642        # A simple key may follow a block scalar.
643        self.allow_simple_key = True
644
645        # Reset possible simple key on the current level.
646        self.remove_possible_simple_key()
647
648        # Scan and add SCALAR.
649        self.tokens.append(self.scan_block_scalar(style))
650
651    def fetch_single(self):
652        self.fetch_flow_scalar(style='\'')
653
654    def fetch_double(self):
655        self.fetch_flow_scalar(style='"')
656
657    def fetch_flow_scalar(self, style):
658
659        # A flow scalar could be a simple key.
660        self.save_possible_simple_key()
661
662        # No simple keys after flow scalars.
663        self.allow_simple_key = False
664
665        # Scan and add SCALAR.
666        self.tokens.append(self.scan_flow_scalar(style))
667
668    def fetch_plain(self):
669
670        # A plain scalar could be a simple key.
671        self.save_possible_simple_key()
672
673        # No simple keys after plain scalars. But note that `scan_plain` will
674        # change this flag if the scan is finished at the beginning of the
675        # line.
676        self.allow_simple_key = False
677
678        # Scan and add SCALAR. May change `allow_simple_key`.
679        self.tokens.append(self.scan_plain())
680
681    # Checkers.
682
683    def check_directive(self):
684
685        # DIRECTIVE:        ^ '%' ...
686        # The '%' indicator is already checked.
687        if self.column == 0:
688            return True
689
690    def check_document_start(self):
691
692        # DOCUMENT-START:   ^ '---' (' '|'\n')
693        if self.column == 0:
694            if self.prefix(3) == u'---'  \
695                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
696                return True
697
698    def check_document_end(self):
699
700        # DOCUMENT-END:     ^ '...' (' '|'\n')
701        if self.column == 0:
702            if self.prefix(3) == u'...'  \
703                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
704                return True
705
706    def check_block_entry(self):
707
708        # BLOCK-ENTRY:      '-' (' '|'\n')
709        return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
710
711    def check_key(self):
712
713        # KEY(flow context):    '?'
714        if self.flow_level:
715            return True
716
717        # KEY(block context):   '?' (' '|'\n')
718        else:
719            return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
720
721    def check_value(self):
722
723        # VALUE(flow context):  ':'
724        if self.flow_level:
725            return True
726
727        # VALUE(block context): ':' (' '|'\n')
728        else:
729            return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
730
731    def check_plain(self):
732
733        # A plain scalar may start with any non-space character except:
734        #   '-', '?', ':', ',', '[', ']', '{', '}',
735        #   '#', '&', '*', '!', '|', '>', '\'', '\"',
736        #   '%', '@', '`'.
737        #
738        # It may also start with
739        #   '-', '?', ':'
740        # if it is followed by a non-space character.
741        #
742        # Note that we limit the last rule to the block context (except the
743        # '-' character) because we want the flow context to be space
744        # independent.
745        ch = self.peek()
746        return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`'  \
747                or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
748                        and (ch == u'-' or (not self.flow_level and ch in u'?:')))
749
750    # Scanners.
751
752    def scan_to_next_token(self):
753        # We ignore spaces, line breaks and comments.
754        # If we find a line break in the block context, we set the flag
755        # `allow_simple_key` on.
756        # The byte order mark is stripped if it's the first character in the
757        # stream. We do not yet support BOM inside the stream as the
758        # specification requires. Any such mark will be considered as a part
759        # of the document.
760        #
761        # TODO: We need to make tab handling rules more sane. A good rule is
762        #   Tabs cannot precede tokens
763        #   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
764        #   KEY(block), VALUE(block), BLOCK-ENTRY
765        # So the checking code is
766        #   if <TAB>:
767        #       self.allow_simple_keys = False
768        # We also need to add the check for `allow_simple_keys == True` to
769        # `unwind_indent` before issuing BLOCK-END.
770        # Scanners for block, flow, and plain scalars need to be modified.
771
772        if self.index == 0 and self.peek() == u'\uFEFF':
773            self.forward()
774        found = False
775        while not found:
776            while self.peek() == u' ':
777                self.forward()
778            if self.peek() == u'#':
779                while self.peek() not in u'\0\r\n\x85\u2028\u2029':
780                    self.forward()
781            if self.scan_line_break():
782                if not self.flow_level:
783                    self.allow_simple_key = True
784            else:
785                found = True
786
787    def scan_directive(self):
788        # See the specification for details.
789        start_mark = self.get_mark()
790        self.forward()
791        name = self.scan_directive_name(start_mark)
792        value = None
793        if name == u'YAML':
794            value = self.scan_yaml_directive_value(start_mark)
795            end_mark = self.get_mark()
796        elif name == u'TAG':
797            value = self.scan_tag_directive_value(start_mark)
798            end_mark = self.get_mark()
799        else:
800            end_mark = self.get_mark()
801            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
802                self.forward()
803        self.scan_directive_ignored_line(start_mark)
804        return DirectiveToken(name, value, start_mark, end_mark)
805
806    def scan_directive_name(self, start_mark):
807        # See the specification for details.
808        length = 0
809        ch = self.peek(length)
810        while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
811                or ch in u'-_':
812            length += 1
813            ch = self.peek(length)
814        if not length:
815            raise ScannerError("while scanning a directive", start_mark,
816                    "expected alphabetic or numeric character, but found %r"
817                    % ch.encode('utf-8'), self.get_mark())
818        value = self.prefix(length)
819        self.forward(length)
820        ch = self.peek()
821        if ch not in u'\0 \r\n\x85\u2028\u2029':
822            raise ScannerError("while scanning a directive", start_mark,
823                    "expected alphabetic or numeric character, but found %r"
824                    % ch.encode('utf-8'), self.get_mark())
825        return value
826
827    def scan_yaml_directive_value(self, start_mark):
828        # See the specification for details.
829        while self.peek() == u' ':
830            self.forward()
831        major = self.scan_yaml_directive_number(start_mark)
832        if self.peek() != '.':
833            raise ScannerError("while scanning a directive", start_mark,
834                    "expected a digit or '.', but found %r"
835                    % self.peek().encode('utf-8'),
836                    self.get_mark())
837        self.forward()
838        minor = self.scan_yaml_directive_number(start_mark)
839        if self.peek() not in u'\0 \r\n\x85\u2028\u2029':
840            raise ScannerError("while scanning a directive", start_mark,
841                    "expected a digit or ' ', but found %r"
842                    % self.peek().encode('utf-8'),
843                    self.get_mark())
844        return (major, minor)
845
846    def scan_yaml_directive_number(self, start_mark):
847        # See the specification for details.
848        ch = self.peek()
849        if not (u'0' <= ch <= '9'):
850            raise ScannerError("while scanning a directive", start_mark,
851                    "expected a digit, but found %r" % ch.encode('utf-8'),
852                    self.get_mark())
853        length = 0
854        while u'0' <= self.peek(length) <= u'9':
855            length += 1
856        value = int(self.prefix(length))
857        self.forward(length)
858        return value
859
860    def scan_tag_directive_value(self, start_mark):
861        # See the specification for details.
862        while self.peek() == u' ':
863            self.forward()
864        handle = self.scan_tag_directive_handle(start_mark)
865        while self.peek() == u' ':
866            self.forward()
867        prefix = self.scan_tag_directive_prefix(start_mark)
868        return (handle, prefix)
869
870    def scan_tag_directive_handle(self, start_mark):
871        # See the specification for details.
872        value = self.scan_tag_handle('directive', start_mark)
873        ch = self.peek()
874        if ch != u' ':
875            raise ScannerError("while scanning a directive", start_mark,
876                    "expected ' ', but found %r" % ch.encode('utf-8'),
877                    self.get_mark())
878        return value
879
880    def scan_tag_directive_prefix(self, start_mark):
881        # See the specification for details.
882        value = self.scan_tag_uri('directive', start_mark)
883        ch = self.peek()
884        if ch not in u'\0 \r\n\x85\u2028\u2029':
885            raise ScannerError("while scanning a directive", start_mark,
886                    "expected ' ', but found %r" % ch.encode('utf-8'),
887                    self.get_mark())
888        return value
889
890    def scan_directive_ignored_line(self, start_mark):
891        # See the specification for details.
892        while self.peek() == u' ':
893            self.forward()
894        if self.peek() == u'#':
895            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
896                self.forward()
897        ch = self.peek()
898        if ch not in u'\0\r\n\x85\u2028\u2029':
899            raise ScannerError("while scanning a directive", start_mark,
900                    "expected a comment or a line break, but found %r"
901                        % ch.encode('utf-8'), self.get_mark())
902        self.scan_line_break()
903
904    def scan_anchor(self, TokenClass):
905        # The specification does not restrict characters for anchors and
906        # aliases. This may lead to problems, for instance, the document:
907        #   [ *alias, value ]
908        # can be interpteted in two ways, as
909        #   [ "value" ]
910        # and
911        #   [ *alias , "value" ]
912        # Therefore we restrict aliases to numbers and ASCII letters.
913        start_mark = self.get_mark()
914        indicator = self.peek()
915        if indicator == '*':
916            name = 'alias'
917        else:
918            name = 'anchor'
919        self.forward()
920        length = 0
921        ch = self.peek(length)
922        while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
923                or ch in u'-_':
924            length += 1
925            ch = self.peek(length)
926        if not length:
927            raise ScannerError("while scanning an %s" % name, start_mark,
928                    "expected alphabetic or numeric character, but found %r"
929                    % ch.encode('utf-8'), self.get_mark())
930        value = self.prefix(length)
931        self.forward(length)
932        ch = self.peek()
933        if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
934            raise ScannerError("while scanning an %s" % name, start_mark,
935                    "expected alphabetic or numeric character, but found %r"
936                    % ch.encode('utf-8'), self.get_mark())
937        end_mark = self.get_mark()
938        return TokenClass(value, start_mark, end_mark)
939
940    def scan_tag(self):
941        # See the specification for details.
942        start_mark = self.get_mark()
943        ch = self.peek(1)
944        if ch == u'<':
945            handle = None
946            self.forward(2)
947            suffix = self.scan_tag_uri('tag', start_mark)
948            if self.peek() != u'>':
949                raise ScannerError("while parsing a tag", start_mark,
950                        "expected '>', but found %r" % self.peek().encode('utf-8'),
951                        self.get_mark())
952            self.forward()
953        elif ch in u'\0 \t\r\n\x85\u2028\u2029':
954            handle = None
955            suffix = u'!'
956            self.forward()
957        else:
958            length = 1
959            use_handle = False
960            while ch not in u'\0 \r\n\x85\u2028\u2029':
961                if ch == u'!':
962                    use_handle = True
963                    break
964                length += 1
965                ch = self.peek(length)
966            handle = u'!'
967            if use_handle:
968                handle = self.scan_tag_handle('tag', start_mark)
969            else:
970                handle = u'!'
971                self.forward()
972            suffix = self.scan_tag_uri('tag', start_mark)
973        ch = self.peek()
974        if ch not in u'\0 \r\n\x85\u2028\u2029':
975            raise ScannerError("while scanning a tag", start_mark,
976                    "expected ' ', but found %r" % ch.encode('utf-8'),
977                    self.get_mark())
978        value = (handle, suffix)
979        end_mark = self.get_mark()
980        return TagToken(value, start_mark, end_mark)
981
982    def scan_block_scalar(self, style):
983        # See the specification for details.
984
985        if style == '>':
986            folded = True
987        else:
988            folded = False
989
990        chunks = []
991        start_mark = self.get_mark()
992
993        # Scan the header.
994        self.forward()
995        chomping, increment = self.scan_block_scalar_indicators(start_mark)
996        self.scan_block_scalar_ignored_line(start_mark)
997
998        # Determine the indentation level and go to the first non-empty line.
999        min_indent = self.indent+1
1000        if min_indent < 1:
1001            min_indent = 1
1002        if increment is None:
1003            breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
1004            indent = max(min_indent, max_indent)
1005        else:
1006            indent = min_indent+increment-1
1007            breaks, end_mark = self.scan_block_scalar_breaks(indent)
1008        line_break = u''
1009
1010        # Scan the inner part of the block scalar.
1011        while self.column == indent and self.peek() != u'\0':
1012            chunks.extend(breaks)
1013            leading_non_space = self.peek() not in u' \t'
1014            length = 0
1015            while self.peek(length) not in u'\0\r\n\x85\u2028\u2029':
1016                length += 1
1017            chunks.append(self.prefix(length))
1018            self.forward(length)
1019            line_break = self.scan_line_break()
1020            breaks, end_mark = self.scan_block_scalar_breaks(indent)
1021            if self.column == indent and self.peek() != u'\0':
1022
1023                # Unfortunately, folding rules are ambiguous.
1024                #
1025                # This is the folding according to the specification:
1026               
1027                if folded and line_break == u'\n'   \
1028                        and leading_non_space and self.peek() not in u' \t':
1029                    if not breaks:
1030                        chunks.append(u' ')
1031                else:
1032                    chunks.append(line_break)
1033               
1034                # This is Clark Evans's interpretation (also in the spec
1035                # examples):
1036                #
1037                #if folded and line_break == u'\n':
1038                #    if not breaks:
1039                #        if self.peek() not in ' \t':
1040                #            chunks.append(u' ')
1041                #        else:
1042                #            chunks.append(line_break)
1043                #else:
1044                #    chunks.append(line_break)
1045            else:
1046                break
1047
1048        # Chomp the tail.
1049        if chomping is not False:
1050            chunks.append(line_break)
1051        if chomping is True:
1052            chunks.extend(breaks)
1053
1054        # We are done.
1055        return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
1056                style)
1057
1058    def scan_block_scalar_indicators(self, start_mark):
1059        # See the specification for details.
1060        chomping = None
1061        increment = None
1062        ch = self.peek()
1063        if ch in u'+-':
1064            if ch == '+':
1065                chomping = True
1066            else:
1067                chomping = False
1068            self.forward()
1069            ch = self.peek()
1070            if ch in u'0123456789':
1071                increment = int(ch)
1072                if increment == 0:
1073                    raise ScannerError("while scanning a block scalar", start_mark,
1074                            "expected indentation indicator in the range 1-9, but found 0",
1075                            self.get_mark())
1076                self.forward()
1077        elif ch in u'0123456789':
1078            increment = int(ch)
1079            if increment == 0:
1080                raise ScannerError("while scanning a block scalar", start_mark,
1081                        "expected indentation indicator in the range 1-9, but found 0",
1082                        self.get_mark())
1083            self.forward()
1084            ch = self.peek()
1085            if ch in u'+-':
1086                if ch == '+':
1087                    chomping = True
1088                else:
1089                    chomping = False
1090                self.forward()
1091        ch = self.peek()
1092        if ch not in u'\0 \r\n\x85\u2028\u2029':
1093            raise ScannerError("while scanning a block scalar", start_mark,
1094                    "expected chomping or indentation indicators, but found %r"
1095                        % ch.encode('utf-8'), self.get_mark())
1096        return chomping, increment
1097
1098    def scan_block_scalar_ignored_line(self, start_mark):
1099        # See the specification for details.
1100        while self.peek() == u' ':
1101            self.forward()
1102        if self.peek() == u'#':
1103            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
1104                self.forward()
1105        ch = self.peek()
1106        if ch not in u'\0\r\n\x85\u2028\u2029':
1107            raise ScannerError("while scanning a block scalar", start_mark,
1108                    "expected a comment or a line break, but found %r"
1109                        % ch.encode('utf-8'), self.get_mark())
1110        self.scan_line_break()
1111
1112    def scan_block_scalar_indentation(self):
1113        # See the specification for details.
1114        chunks = []
1115        max_indent = 0
1116        end_mark = self.get_mark()
1117        while self.peek() in u' \r\n\x85\u2028\u2029':
1118            if self.peek() != u' ':
1119                chunks.append(self.scan_line_break())
1120                end_mark = self.get_mark()
1121            else:
1122                self.forward()
1123                if self.column > max_indent:
1124                    max_indent = self.column
1125        return chunks, max_indent, end_mark
1126
1127    def scan_block_scalar_breaks(self, indent):
1128        # See the specification for details.
1129        chunks = []
1130        end_mark = self.get_mark()
1131        while self.column < indent and self.peek() == u' ':
1132            self.forward()
1133        while self.peek() in u'\r\n\x85\u2028\u2029':
1134            chunks.append(self.scan_line_break())
1135            end_mark = self.get_mark()
1136            while self.column < indent and self.peek() == u' ':
1137                self.forward()
1138        return chunks, end_mark
1139
1140    def scan_flow_scalar(self, style):
1141        # See the specification for details.
1142        # Note that we loose indentation rules for quoted scalars. Quoted
1143        # scalars don't need to adhere indentation because " and ' clearly
1144        # mark the beginning and the end of them. Therefore we are less
1145        # restrictive then the specification requires. We only need to check
1146        # that document separators are not included in scalars.
1147        if style == '"':
1148            double = True
1149        else:
1150            double = False
1151        chunks = []
1152        start_mark = self.get_mark()
1153        quote = self.peek()
1154        self.forward()
1155        chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
1156        while self.peek() != quote:
1157            chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
1158            chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
1159        self.forward()
1160        end_mark = self.get_mark()
1161        return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
1162                style)
1163
1164    ESCAPE_REPLACEMENTS = {
1165        u'0':   u'\0',
1166        u'a':   u'\x07',
1167        u'b':   u'\x08',
1168        u't':   u'\x09',
1169        u'\t':  u'\x09',
1170        u'n':   u'\x0A',
1171        u'v':   u'\x0B',
1172        u'f':   u'\x0C',
1173        u'r':   u'\x0D',
1174        u'e':   u'\x1B',
1175        u' ':   u'\x20',
1176        u'\"':  u'\"',
1177        u'\\':  u'\\',
1178        u'N':   u'\x85',
1179        u'_':   u'\xA0',
1180        u'L':   u'\u2028',
1181        u'P':   u'\u2029',
1182    }
1183
1184    ESCAPE_CODES = {
1185        u'x':   2,
1186        u'u':   4,
1187        u'U':   8,
1188    }
1189
1190    def scan_flow_scalar_non_spaces(self, double, start_mark):
1191        # See the specification for details.
1192        chunks = []
1193        while True:
1194            length = 0
1195            while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
1196                length += 1
1197            if length:
1198                chunks.append(self.prefix(length))
1199                self.forward(length)
1200            ch = self.peek()
1201            if not double and ch == u'\'' and self.peek(1) == u'\'':
1202                chunks.append(u'\'')
1203                self.forward(2)
1204            elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
1205                chunks.append(ch)
1206                self.forward()
1207            elif double and ch == u'\\':
1208                self.forward()
1209                ch = self.peek()
1210                if ch in self.ESCAPE_REPLACEMENTS:
1211                    chunks.append(self.ESCAPE_REPLACEMENTS[ch])
1212                    self.forward()
1213                elif ch in self.ESCAPE_CODES:
1214                    length = self.ESCAPE_CODES[ch]
1215                    self.forward()
1216                    for k in range(length):
1217                        if self.peek(k) not in u'0123456789ABCDEFabcdef':
1218                            raise ScannerError("while scanning a double-quoted scalar", start_mark,
1219                                    "expected escape sequence of %d hexdecimal numbers, but found %r" %
1220                                        (length, self.peek(k).encode('utf-8')), self.get_mark())
1221                    code = int(self.prefix(length), 16)
1222                    chunks.append(unichr(code))
1223                    self.forward(length)
1224                elif ch in u'\r\n\x85\u2028\u2029':
1225                    self.scan_line_break()
1226                    chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
1227                else:
1228                    raise ScannerError("while scanning a double-quoted scalar", start_mark,
1229                            "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark())
1230            else:
1231                return chunks
1232
1233    def scan_flow_scalar_spaces(self, double, start_mark):
1234        # See the specification for details.
1235        chunks = []
1236        length = 0
1237        while self.peek(length) in u' \t':
1238            length += 1
1239        whitespaces = self.prefix(length)
1240        self.forward(length)
1241        ch = self.peek()
1242        if ch == u'\0':
1243            raise ScannerError("while scanning a quoted scalar", start_mark,
1244                    "found unexpected end of stream", self.get_mark())
1245        elif ch in u'\r\n\x85\u2028\u2029':
1246            line_break = self.scan_line_break()
1247            breaks = self.scan_flow_scalar_breaks(double, start_mark)
1248            if line_break != u'\n':
1249                chunks.append(line_break)
1250            elif not breaks:
1251                chunks.append(u' ')
1252            chunks.extend(breaks)
1253        else:
1254            chunks.append(whitespaces)
1255        return chunks
1256
1257    def scan_flow_scalar_breaks(self, double, start_mark):
1258        # See the specification for details.
1259        chunks = []
1260        while True:
1261            # Instead of checking indentation, we check for document
1262            # separators.
1263            prefix = self.prefix(3)
1264            if (prefix == u'---' or prefix == u'...')   \
1265                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
1266                raise ScannerError("while scanning a quoted scalar", start_mark,
1267                        "found unexpected document separator", self.get_mark())
1268            while self.peek() in u' \t':
1269                self.forward()
1270            if self.peek() in u'\r\n\x85\u2028\u2029':
1271                chunks.append(self.scan_line_break())
1272            else:
1273                return chunks
1274
1275    def scan_plain(self):
1276        # See the specification for details.
1277        # We add an additional restriction for the flow context:
1278        #   plain scalars in the flow context cannot contain ',', ':' and '?'.
1279        # We also keep track of the `allow_simple_key` flag here.
1280        # Indentation rules are loosed for the flow context.
1281        chunks = []
1282        start_mark = self.get_mark()
1283        end_mark = start_mark
1284        indent = self.indent+1
1285        # We allow zero indentation for scalars, but then we need to check for
1286        # document separators at the beginning of the line.
1287        #if indent == 0:
1288        #    indent = 1
1289        spaces = []
1290        while True:
1291            length = 0
1292            if self.peek() == u'#':
1293                break
1294            while True:
1295                ch = self.peek(length)
1296                if ch in u'\0 \t\r\n\x85\u2028\u2029'   \
1297                        or (not self.flow_level and ch == u':' and
1298                                self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \
1299                        or (self.flow_level and ch in u',:?[]{}'):
1300                    break
1301                length += 1
1302            # It's not clear what we should do with ':' in the flow context.
1303            if (self.flow_level and ch == u':'
1304                    and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):
1305                self.forward(length)
1306                raise ScannerError("while scanning a plain scalar", start_mark,
1307                    "found unexpected ':'", self.get_mark(),
1308                    "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
1309            if length == 0:
1310                break
1311            self.allow_simple_key = False
1312            chunks.extend(spaces)
1313            chunks.append(self.prefix(length))
1314            self.forward(length)
1315            end_mark = self.get_mark()
1316            spaces = self.scan_plain_spaces(indent, start_mark)
1317            if not spaces or self.peek() == u'#' \
1318                    or (not self.flow_level and self.column < indent):
1319                break
1320        return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
1321
1322    def scan_plain_spaces(self, indent, start_mark):
1323        # See the specification for details.
1324        # The specification is really confusing about tabs in plain scalars.
1325        # We just forbid them completely. Do not use tabs in YAML!
1326        chunks = []
1327        length = 0
1328        while self.peek(length) in u' ':
1329            length += 1
1330        whitespaces = self.prefix(length)
1331        self.forward(length)
1332        ch = self.peek()
1333        if ch in u'\r\n\x85\u2028\u2029':
1334            line_break = self.scan_line_break()
1335            self.allow_simple_key = True
1336            prefix = self.prefix(3)
1337            if (prefix == u'---' or prefix == u'...')   \
1338                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
1339                return
1340            breaks = []
1341            while self.peek() in u' \r\n\x85\u2028\u2029':
1342                if self.peek() == ' ':
1343                    self.forward()
1344                else:
1345                    breaks.append(self.scan_line_break())
1346                    prefix = self.prefix(3)
1347                    if (prefix == u'---' or prefix == u'...')   \
1348                            and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
1349                        return
1350            if line_break != u'\n':
1351                chunks.append(line_break)
1352            elif not breaks:
1353                chunks.append(u' ')
1354            chunks.extend(breaks)
1355        elif whitespaces:
1356            chunks.append(whitespaces)
1357        return chunks
1358
1359    def scan_tag_handle(self, name, start_mark):
1360        # See the specification for details.
1361        # For some strange reasons, the specification does not allow '_' in
1362        # tag handles. I have allowed it anyway.
1363        ch = self.peek()
1364        if ch != u'!':
1365            raise ScannerError("while scanning a %s" % name, start_mark,
1366                    "expected '!', but found %r" % ch.encode('utf-8'),
1367                    self.get_mark())
1368        length = 1
1369        ch = self.peek(length)
1370        if ch != u' ':
1371            while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
1372                    or ch in u'-_':
1373                length += 1
1374                ch = self.peek(length)
1375            if ch != u'!':
1376                self.forward(length)
1377                raise ScannerError("while scanning a %s" % name, start_mark,
1378                        "expected '!', but found %r" % ch.encode('utf-8'),
1379                        self.get_mark())
1380            length += 1
1381        value = self.prefix(length)
1382        self.forward(length)
1383        return value
1384
1385    def scan_tag_uri(self, name, start_mark):
1386        # See the specification for details.
1387        # Note: we do not check if URI is well-formed.
1388        chunks = []
1389        length = 0
1390        ch = self.peek(length)
1391        while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
1392                or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
1393            if ch == u'%':
1394                chunks.append(self.prefix(length))
1395                self.forward(length)
1396                length = 0
1397                chunks.append(self.scan_uri_escapes(name, start_mark))
1398            else:
1399                length += 1
1400            ch = self.peek(length)
1401        if length:
1402            chunks.append(self.prefix(length))
1403            self.forward(length)
1404            length = 0
1405        if not chunks:
1406            raise ScannerError("while parsing a %s" % name, start_mark,
1407                    "expected URI, but found %r" % ch.encode('utf-8'),
1408                    self.get_mark())
1409        return u''.join(chunks)
1410
1411    def scan_uri_escapes(self, name, start_mark):
1412        # See the specification for details.
1413        bytes = []
1414        mark = self.get_mark()
1415        while self.peek() == u'%':
1416            self.forward()
1417            for k in range(2):
1418                if self.peek(k) not in u'0123456789ABCDEFabcdef':
1419                    raise ScannerError("while scanning a %s" % name, start_mark,
1420                            "expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
1421                                (self.peek(k).encode('utf-8')), self.get_mark())
1422            bytes.append(chr(int(self.prefix(2), 16)))
1423            self.forward(2)
1424        try:
1425            value = unicode(''.join(bytes), 'utf-8')
1426        except UnicodeDecodeError, exc:
1427            raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
1428        return value
1429
1430    def scan_line_break(self):
1431        # Transforms:
1432        #   '\r\n'      :   '\n'
1433        #   '\r'        :   '\n'
1434        #   '\n'        :   '\n'
1435        #   '\x85'      :   '\n'
1436        #   '\u2028'    :   '\u2028'
1437        #   '\u2029     :   '\u2029'
1438        #   default     :   ''
1439        ch = self.peek()
1440        if ch in u'\r\n\x85':
1441            if self.prefix(2) == u'\r\n':
1442                self.forward(2)
1443            else:
1444                self.forward()
1445            return u'\n'
1446        elif ch in u'\u2028\u2029':
1447            self.forward()
1448            return ch
1449        return u''
1450
1451#try:
1452#    import psyco
1453#    psyco.bind(Scanner)
1454#except ImportError:
1455#    pass
1456
Note: See TracBrowser for help on using the repository browser.