source: branches/pyyaml3000/lib/yaml/scanner.py @ 47

Revision 47, 27.5 KB checked in by xi, 9 years ago (diff)

Working on the scanner.

RevLine 
[39]1
[43]2# Tokens:
3# YAML-DIRECTIVE(major_version, minor_version), TAG-DIRECTIVE(handle, prefix)
4# RESERVED-DIRECTIVE(name)
5# DOCUMENT-START, DOCUMENT-END
6# BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END
7# FLOW-SEQUENCE-START, FLOW-MAPPING-START, FLOW-SEQUENCE-END, FLOW-MAPPING-END
8# ENTRY, KEY, VALUE
9# ALIAS(name), ANCHOR(name), TAG(value), SCALAR(value, plain)
10
[46]11__all__ = ['Scanner', 'ScannerError']
[43]12
[46]13from error import YAMLError
14from tokens import *
[39]15
[46]16class ScannerError(YAMLError):
17    # TODO:
18    # ScannerError: while reading a quoted string
19    #         in '...', line 5, column 10:
20    # key: "valu\?e"
21    #      ^
22    # got unknown quote character '?'
23    #         in '...', line 5, column 15:
24    # key: "valu\?e"
25    #            ^
[47]26    def __init__(self, context=None, context_marker=None,
27            problem=None, problem_marker=None, description=None):
28        self.context = context
29        self.context_marker = context_marker
30        self.problem = problem
31        self.problem_marker = problem_marker
32        self.description = description
[43]33
[47]34    def __str__(self):
35        lines = []
36        for (place, marker) in [(self.context, self.context_marker),
37                                (self.problem, self.problem_marker)]:
38            if place is not None:
39                lines.append(place)
40                if marker is not None:
41                    lines.append(str(marker))
42        if self.description is not None:
43            lines.append(self.description)
44        return '\n'.join(lines)
45
[43]46class SimpleKey:
47    def __init__(self, token_number, required, index, line, column, marker):
48        self.token_number = token_number
49        self.required = required
50        self.index = index
51        self.line = line
52        self.column = column
53        self.marker = marker
54
[39]55class Scanner:
56
[46]57
58    def __init__(self, reader):
[39]59        """Initialize the scanner."""
[46]60        # The input stream. The Reader class do the dirty work of checking for
[43]61        # BOM and converting the input data to Unicode. It also adds NUL to
62        # the end.
[39]63        #
[46]64        # Reader supports the following methods
65        #   self.reader.peek(k=1)   # peek the next k characters
66        #   self.reader.forward(k=1)   # read the next k characters and move the
[39]67        #                           # pointer
[46]68        self.reader = reader
[39]69
70        # Had we reached the end of the stream?
71        self.done = False
72
73        # The number of unclosed '{' and '['. `flow_level == 0` means block
74        # context.
75        self.flow_level = 0
76
77        # List of processed tokens that are not yet emitted.
78        self.tokens = []
79
80        # Number of tokens that were emitted through the `get_token` method.
81        self.tokens_taken = 0
82
83        # The current indentation level.
84        self.indent = -1
85
86        # Past indentation levels.
87        self.indents = []
88
[43]89        # Variables related to simple keys treatment.
[39]90
91        # A simple key is a key that is not denoted by the '?' indicator.
92        # Example of simple keys:
93        #   ---
94        #   block simple key: value
95        #   ? not a simple key:
96        #   : { flow simple key: value }
97        # We emit the KEY token before all keys, so when we find a potential
98        # simple key, we try to locate the corresponding ':' indicator.
99        # Simple keys should be limited to a single line and 1024 characters.
100
[43]101        # Can a simple key start at the current position? A simple key may
102        # start:
103        # - at the beginning of the line, not counting indentation spaces
104        #       (in block context),
105        # - after '{', '[', ',' (in the flow context),
106        # - after '?', ':', '-' (in the block context).
107        # In the block context, this flag also signify if a block collection
108        # may start at the current position.
109        self.allow_simple_key = True
[39]110
111        # Keep track of possible simple keys. This is a dictionary. The key
112        # is `flow_level`; there can be no more that one possible simple key
[43]113        # for each level. The value is a SimpleKey record:
114        #   (token_number, required, index, line, column, marker)
115        # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
116        # '[', or '{' tokens.
[39]117        self.possible_simple_keys = {}
118
[43]119    # Two public methods.
[39]120
121    def peek_token(self):
122        """Get the current token."""
[43]123        while self.need_more_tokens():
[39]124            self.fetch_more_tokens()
125        if self.tokens:
126            return self.tokens[0]
127
128    def get_token(self):
[43]129        "Get the current token and remove it from the list of pending tokens."""
[39]130        while self.need_more_tokens():
131            self.fetch_more_tokens()
132        if self.tokens:
133            self.tokens_taken += 1
134            return self.tokens.pop(0)
135
[43]136    # Private methods.
[39]137
138    def need_more_tokens(self):
139        if self.done:
140            return False
141        if not self.tokens:
142            return True
143        # The current token may be a potential simple key, so we
144        # need to look further.
[43]145        self.stale_possible_simple_keys()
[39]146        if self.next_possible_simple_key() == self.tokens_taken:
147            return True
148
149    def fetch_more_tokens(self):
150
151        # Eat whitespaces and comments until we reach the next token.
[43]152        self.scan_to_next_token()
[39]153
[43]154        # Remove obsolete possible simple keys.
155        self.stale_possible_simple_keys()
156
[39]157        # Compare the current indentation and column. It may add some tokens
[43]158        # and decrease the current indentation level.
[46]159        self.unwind_indent(self.reader.column)
[39]160
161        # Peek the next character.
[46]162        ch = self.reader.peek()
[39]163
[46]164        # Is it the end of reader?
[43]165        if ch == u'\0':
[39]166            return self.fetch_end()
167
168        # Is it a directive?
169        if ch == u'%' and self.check_directive():
170            return self.fetch_directive()
171
172        # Is it the document start?
173        if ch == u'-' and self.check_document_start():
174            return self.fetch_document_start()
175
176        # Is it the document end?
177        if ch == u'.' and self.check_document_end():
178            return self.fetch_document_end()
179
180        # Note: the order of the following checks is NOT significant.
181
182        # Is it the flow sequence start indicator?
183        if ch == u'[':
184            return self.fetch_flow_sequence_start()
185
186        # Is it the flow mapping start indicator?
187        if ch == u'{':
188            return self.fetch_flow_mapping_start()
189
190        # Is it the flow sequence end indicator?
191        if ch == u']':
192            return self.fetch_flow_sequence_end()
193
194        # Is it the flow mapping end indicator?
195        if ch == u'}':
196            return self.fetch_flow_mapping_end()
197
[43]198        # Is it the entry indicator?
199        if ch in u'-,' and self.check_entry():
200            return self.fetch_entry()
201
[39]202        # Is it the key indicator?
203        if ch == u'?' and self.check_key():
204            return self.fetch_key()
205
206        # Is it the value indicator?
207        if ch == u':' and self.check_value():
208            return self.fetch_value()
209
210        # Is it an alias?
211        if ch == u'*':
212            return self.fetch_alias()
213
214        # Is it an anchor?
215        if ch == u'&':
216            return self.fetch_anchor()
217
[43]218        # Is it a tag?
[39]219        if ch == u'!':
220            return self.fetch_tag()
221
[43]222        # Is it a literal scalar?
223        if ch == u'|' and not self.flow_level:
[39]224            return self.fetch_literal()
225
226        # Is it a folded scalar?
[43]227        if ch == u'>' and not self.flow_level:
[39]228            return self.fetch_folded()
229
230        # Is it a single quoted scalar?
231        if ch == u'\'':
232            return self.fetch_single()
233
234        # Is it a double quoted scalar?
235        if ch == u'\"':
236            return self.fetch_double()
237
[43]238        # It must be a plain scalar then.
[39]239        if self.check_plain():
240            return self.fetch_plain()
241
[43]242        # No? It's an error. Let's produce a nice error message.
[39]243        self.invalid_token()
244
[43]245    # Simple keys treatment.
246
247    def next_possible_simple_key(self):
248        # Return the number of the nearest possible simple key. Actually we
249        # don't need to loop through the whole dictionary. We may replace it
250        # with the following code:
251        #   if not self.possible_simple_keys:
252        #       return None
253        #   return self.possible_simple_keys[
254        #           min(self.possible_simple_keys.keys())].token_number
255        min_token_number = None
256        for level in self.possible_simple_keys:
257            key = self.possible_simple_keys[level]
258            if min_token_number is None or key.token_number < min_token_number:
259                min_token_number = key.token_number
260        return min_token_number
261
262    def stale_possible_simple_keys(self):
263        # Remove entries that are no longer possible simple keys. According to
264        # the YAML specification, simple keys
265        # - should be limited to a single line,
266        # - should be no longer than 1024 characters.
267        # Disabling this procedure will allow simple keys of any length and
268        # height (may cause problems if indentation is broken though).
269        for level in self.possible_simple_keys.keys():
270            key = self.possible_simple_keys[level]
[46]271            if key.line != self.reader.line  \
272                    or self.reader.index-key.index > 1024:
[43]273                if key.required:
[47]274                    raise ScannerError("while scanning a simple key", key.marker,
275                            "could not found expected ':'", self.reader.get_marker())
[43]276                del self.possible_simple_keys[level]
277
278    def save_possible_simple_key(self):
279        # The next token may start a simple key. We check if it's possible
280        # and save its position. This function is called for
281        #   ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
282
283        # Check if a simple key is required at the current position.
[46]284        required = not self.flow_level and self.indent == self.reader.column
[43]285
[47]286        # A simple key is required only if it is the first token in the current
287        # line. Therefore it is always allowed.
288        assert self.allow_simple_key or not required
289
[43]290        # The next token might be a simple key. Let's save it's number and
291        # position.
292        if self.allow_simple_key:
293            self.remove_possible_simple_key()
294            token_number = self.tokens_taken+len(self.tokens)
[46]295            index = self.reader.index
296            line = self.reader.line
297            column = self.reader.column
298            marker = self.reader.get_marker()
[43]299            key = SimpleKey(token_number, required,
300                    index, line, column, marker)
301            self.possible_simple_keys[self.flow_level] = key
302
303    def remove_possible_simple_key(self):
304        # Remove the saved possible key position at the current flow level.
305        if self.flow_level in self.possible_simple_keys:
306            key = self.possible_simple_keys[self.flow_level]
[47]307           
308            # I don't think it's possible, but I could be wrong.
309            assert not key.required
310            #if key.required:
311            #    raise ScannerError("while scanning a simple key", key.marker,
312            #            "could not found expected ':'", self.reader.get_marker())
[43]313
314    # Indentation functions.
315
316    def unwind_indent(self, column):
317
318        # In flow context, tokens should respect indentation.
[47]319        # Actually the condition should be `self.indent >= column` according to
320        # the spec. But this condition will prohibit intuitively correct
321        # constructions such as
322        # key : {
323        # }
[43]324        if self.flow_level and self.indent > column:
[47]325            raise ScannerError(None, None,
326                    "invalid intendation or unclosed '[' or '{'",
327                    self.reader.get_marker())
[43]328
329        # In block context, we may need to issue the BLOCK-END tokens.
330        while self.indent > column:
[46]331            marker = self.reader.get_marker()
[43]332            self.indent = self.indents.pop()
[44]333            self.tokens.append(BlockEndToken(marker, marker))
[43]334
335    def add_indent(self, column):
336        # Check if we need to increase indentation.
337        if self.indent < column:
338            self.indents.append(self.indent)
339            self.indent = column
340            return True
341        return False
342
343    # Fetchers.
344
[39]345    def fetch_end(self):
346
347        # Set the current intendation to -1.
[43]348        self.unwind_indent(-1)
[39]349
350        # Reset everything (not really needed).
[43]351        self.allow_simple_key = False
[39]352        self.possible_simple_keys = {}
353
[43]354        # Read the token.
[46]355        marker = self.reader.get_marker()
[43]356       
357        # Add END.
[47]358        self.tokens.append(StreamEndToken(marker, marker))
[39]359
[46]360        # The reader is ended.
[39]361        self.done = True
362
[43]363    def fetch_directive(self):
364       
365        # Set the current intendation to -1.
366        self.unwind_indent(-1)
[39]367
[43]368        # Reset simple keys.
369        self.remove_possible_simple_key()
370        self.allow_simple_key = False
[39]371
[43]372        # Scan and add DIRECTIVE.
[47]373        self.tokens.append(self.scan_directive())
[39]374
375    def fetch_document_start(self):
[44]376        self.fetch_document_indicator(DocumentStartToken)
[39]377
[43]378    def fetch_document_end(self):
[44]379        self.fetch_document_indicator(DocumentEndToken)
[43]380
381    def fetch_document_indicator(self, TokenClass):
382
[39]383        # Set the current intendation to -1.
[43]384        self.unwind_indent(-1)
[39]385
[43]386        # Reset simple keys. Note that there could not be a block collection
387        # after '---'.
388        self.remove_possible_simple_key()
389        self.allow_simple_key = False
[39]390
[43]391        # Add DOCUMENT-START or DOCUMENT-END.
[46]392        start_marker = self.reader.get_marker()
393        self.reader.forward(3)
394        end_marker = self.reader.get_marker()
[43]395        self.tokens.append(TokenClass(start_marker, end_marker))
[39]396
[43]397    def fetch_flow_sequence_start(self):
[44]398        self.fetch_flow_collection_start(FlowSequenceStartToken)
[39]399
[43]400    def fetch_flow_mapping_start(self):
[44]401        self.fetch_flow_collection_start(FlowMappingStartToken)
[43]402
403    def fetch_flow_collection_start(self, TokenClass):
404
[44]405        # '[' and '{' may start a simple key.
406        self.save_possible_simple_key()
407
[43]408        # Increase the flow level.
409        self.flow_level += 1
410
411        # Simple keys are allowed after '[' and '{'.
412        self.allow_simple_key = True
413
414        # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
[46]415        start_marker = self.reader.get_marker()
416        self.reader.forward()
417        end_marker = self.reader.get_marker()
[43]418        self.tokens.append(TokenClass(start_marker, end_marker))
[39]419
[43]420    def fetch_flow_sequence_end(self):
[44]421        self.fetch_flow_collection_end(FlowSequenceEndToken)
[39]422
[43]423    def fetch_flow_mapping_end(self):
[44]424        self.fetch_flow_collection_end(FlowMappingEndToken)
[43]425
426    def fetch_flow_collection_end(self, TokenClass):
427
428        # Reset possible simple key on the current level.
429        self.remove_possible_simple_key()
430
431        # Decrease the flow level.
432        self.flow_level -= 1
433
434        # No simple keys after ']' or '}'.
435        self.allow_simple_key = False
436
437        # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
[46]438        start_marker = self.reader.get_marker()
439        self.reader.forward()
440        end_marker = self.reader.get_marker()
[43]441        self.tokens.append(TokenClass(start_marker, end_marker))
[39]442
[43]443    def fetch_entry(self):
[39]444
[43]445        # Block context needs additional checks.
446        if not self.flow_level:
[39]447
[43]448            # Are we allowed to start a new entry?
449            if not self.allow_simple_key:
[47]450                raise ScannerError(None, None,
451                        "sequence entries are not allowed here",
452                        self.reader.get_marker())
[39]453
[43]454            # We may need to add BLOCK-SEQUENCE-START.
[46]455            if self.add_indent(self.reader.column):
456                marker = self.reader.get_marker()
[44]457                self.tokens.append(BlockSequenceStartToken(marker, marker))
[39]458
[43]459        # Simple keys are allowed after '-' and ','.
460        self.allow_simple_key = True
[39]461
[43]462        # Reset possible simple key on the current level.
463        self.remove_possible_simple_key()
[39]464
[43]465        # Add ENTRY.
[46]466        start_marker = self.reader.get_marker()
467        self.reader.forward()
468        end_marker = self.reader.get_marker()
[44]469        self.tokens.append(EntryToken(start_marker, end_marker))
[39]470
[43]471    def fetch_key(self):
472       
473        # Block context needs additional checks.
474        if not self.flow_level:
[39]475
[43]476            # Are we allowed to start a key (not nessesary a simple)?
477            if not self.allow_simple_key:
[47]478                raise ScannerError(None, None,
479                        "mapping keys are not allowed here",
480                        self.reader.get_marker())
[43]481
482            # We may need to add BLOCK-MAPPING-START.
[46]483            if self.add_indent(self.reader.column):
484                marker = self.reader.get_marker()
[44]485                self.tokens.append(BlockMappingStartToken(marker, marker))
[43]486
487        # Simple keys are allowed after '?' in the block context.
488        self.allow_simple_key = not self.flow_level
489
490        # Reset possible simple key on the current level.
491        self.remove_possible_simple_key()
492
493        # Add KEY.
[46]494        start_marker = self.reader.get_marker()
495        self.reader.forward()
496        end_marker = self.reader.get_marker()
[44]497        self.tokens.append(KeyToken(start_marker, end_marker))
[39]498
[43]499    def fetch_value(self):
[39]500
[43]501        # Do we determine a simple key?
502        if self.flow_level in self.possible_simple_keys:
[39]503
[43]504            # Add KEY.
505            key = self.possible_simple_keys[self.flow_level]
506            del self.possible_simple_keys[self.flow_level]
507            self.tokens.insert(key.token_number-self.tokens_taken,
[44]508                    KeyToken(key.marker, key.marker))
[39]509
[43]510            # If this key starts a new block mapping, we need to add
511            # BLOCK-MAPPING-START.
512            if not self.flow_level:
513                if self.add_indent(key.column):
514                    self.tokens.insert(key.token_number-self.tokens_taken,
[44]515                            BlockMappingStartToken(key.marker, key.marker))
[37]516
[43]517            # There cannot be two simple keys one after another.
518            self.allow_simple_key = False
[37]519
[43]520        # It must be a part of a complex key.
521        else:
522           
[47]523            # Block context needs additional checks.
524            # (Do we really need them? They will be catched by the parser
525            # anyway.)
526            if not self.flow_level:
527
528                # We are allowed to start a complex value if and only if
529                # we can start a simple key.
530                if not self.allow_simple_key:
531                    raise ScannerError(None, None,
532                            "mapping values are not allowed here",
533                            self.reader.get_marker())
534
[43]535            # Simple keys are allowed after ':' in the block context.
536            self.allow_simple_key = not self.flow_level
[37]537
[43]538            # Reset possible simple key on the current level.
539            self.remove_possible_simple_key()
[37]540
[43]541        # Add VALUE.
[46]542        start_marker = self.reader.get_marker()
543        self.reader.forward()
544        end_marker = self.reader.get_marker()
[44]545        self.tokens.append(ValueToken(start_marker, end_marker))
[37]546
[43]547    def fetch_alias(self):
[37]548
[43]549        # ALIAS could be a simple key.
550        self.save_possible_simple_key()
[37]551
[43]552        # No simple keys after ALIAS.
553        self.allow_simple_key = False
[37]554
[43]555        # Scan and add ALIAS.
[47]556        self.tokens.append(self.scan_anchor(AliasToken))
[37]557
[43]558    def fetch_anchor(self):
[37]559
[43]560        # ANCHOR could start a simple key.
561        self.save_possible_simple_key()
[37]562
[43]563        # No simple keys after ANCHOR.
564        self.allow_simple_key = False
[37]565
[43]566        # Scan and add ANCHOR.
[47]567        self.tokens.append(self.scan_anchor(AnchorToken))
[37]568
[43]569    def fetch_tag(self):
[37]570
[43]571        # TAG could start a simple key.
572        self.save_possible_simple_key()
[37]573
[43]574        # No simple keys after TAG.
575        self.allow_simple_key = False
[37]576
[43]577        # Scan and add TAG.
[47]578        self.tokens.append(self.scan_tag())
[37]579
[43]580    def fetch_literal(self):
581        self.fetch_block_scalar(folded=False)
[37]582
[43]583    def fetch_folded(self):
584        self.fetch_block_scalar(folded=True)
[37]585
[43]586    def fetch_block_scalar(self, folded):
[37]587
[43]588        # A simple key may follow a block scalar.
589        self.allow_simple_key = True
[37]590
[43]591        # Reset possible simple key on the current level.
592        self.remove_possible_simple_key()
[37]593
[43]594        # Scan and add SCALAR.
[47]595        self.tokens.append(self.scan_block_scalar(folded))
[37]596
[43]597    def fetch_single(self):
598        self.fetch_flow_scalar(double=False)
[37]599
[43]600    def fetch_double(self):
601        self.fetch_flow_scalar(double=True)
[37]602
[43]603    def fetch_flow_scalar(self, double):
[37]604
[43]605        # A flow scalar could be a simple key.
606        self.save_possible_simple_key()
[37]607
[43]608        # No simple keys after flow scalars.
609        self.allow_simple_key = False
[37]610
[43]611        # Scan and add SCALAR.
[47]612        self.tokens.append(self.scan_flow_scalar(double))
[37]613
[43]614    def fetch_plain(self):
[37]615
[43]616        # A plain scalar could be a simple key.
617        self.save_possible_simple_key()
[37]618
[43]619        # No simple keys after plain scalars. But note that `scan_plain` will
620        # change this flag if the scan is finished at the beginning of the
621        # line.
622        self.allow_simple_key = False
[37]623
[43]624        # Scan and add SCALAR. May change `allow_simple_key`.
[47]625        self.tokens.append(self.scan_plain())
[37]626
[43]627    # Checkers.
[37]628
[43]629    def check_directive(self):
[37]630
[43]631        # DIRECTIVE:        ^ '%' ...
632        # The '%' indicator is already checked.
[46]633        if self.reader.column == 0:
[43]634            return True
[37]635
[43]636    def check_document_start(self):
[37]637
[43]638        # DOCUMENT-START:   ^ '---' (' '|'\n')
[46]639        if self.reader.column == 0:
640            prefix = self.reader.peek(4)
[43]641            if prefix[:3] == u'---' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
642                return True
[37]643
[43]644    def check_document_end(self):
[37]645
[43]646        # DOCUMENT-END:     ^ '...' (' '|'\n')
[46]647        if self.reader.column == 0:
648            prefix = self.reader.peek(4)
[43]649            if prefix[:3] == u'...' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
650                return True
[37]651
[43]652    def check_entry(self):
653
654        # ENTRY(flow context):      ','
655        if self.flow_level:
[46]656            return self.reader.peek() == u','
[43]657
658        # ENTRY(block context):     '-' (' '|'\n')
659        else:
[46]660            prefix = self.reader.peek(2)
[43]661            return prefix[0] == u'-' and prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
662
663    def check_key(self):
664
665        # KEY(flow context):    '?'
666        if self.flow_level:
[37]667            return True
[43]668
669        # KEY(block context):   '?' (' '|'\n')
[37]670        else:
[46]671            prefix = self.reader.peek(2)
[43]672            return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
[37]673
[43]674    def check_value(self):
675
676        # VALUE(flow context):  ':'
677        if self.flow_level:
[37]678            return True
[43]679
680        # VALUE(block context): ':' (' '|'\n')
[37]681        else:
[46]682            prefix = self.reader.peek(2)
[43]683            return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
[37]684
[43]685    def check_plain(self):
686        return True
[37]687
[43]688    # Scanners.
689
690    def scan_to_next_token(self):
[47]691        # We ignore spaces, line breaks and comments.
692        # If we find a line break in the block context, we set the flag
693        # `allow_simple_key` on.
[43]694        found = False
695        while not found:
[46]696            while self.reader.peek() == u' ':
697                self.reader.forward()
698            if self.reader.peek() == u'#':
[47]699                while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
[46]700                    self.reader.forward()
[47]701            if self.scan_line_break():
[43]702                if not self.flow_level:
703                    self.allow_simple_key = True
[37]704            else:
[43]705                found = True
[37]706
[43]707    def scan_directive(self):
[46]708        marker = self.reader.get_marker()
709        if self.reader.peek(5) == u'%YAML ':
[47]710            token = YAMLDirectiveToken(1, 1, marker, marker)
[46]711        elif self.reader.peek(4) == u'%TAG ':
[47]712            token = TagDirectiveToken(marker, marker)
[43]713        else:
[47]714            token = ReservedDirectiveToken('', marker, marker)
[46]715        while self.reader.peek() not in u'\0\r\n':
716            self.reader.forward()
717        self.reader.forward()
[47]718        return token
[37]719
[43]720    def scan_anchor(self, TokenClass):
[46]721        start_marker = self.reader.get_marker()
722        while self.reader.peek() not in u'\0 \t\r\n,:':
723            self.reader.forward()
724        end_marker = self.reader.get_marker()
[47]725        return TokenClass('', start_marker, end_marker)
[37]726
[43]727    def scan_tag(self):
[46]728        start_marker = self.reader.get_marker()
729        while self.reader.peek() not in u'\0 \t\r\n':
730            self.reader.forward()
731        end_marker = self.reader.get_marker()
[47]732        return TagToken('', start_marker, end_marker)
[43]733
734    def scan_block_scalar(self, folded):
[46]735        start_marker = self.reader.get_marker()
[43]736        indent = self.indent+1
737        if indent < 1:
738            indent = 1
739        while True:
[46]740            while self.reader.peek() and self.reader.peek() and self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
741                self.reader.forward()
742            if self.reader.peek() != u'\0':
743                self.reader.forward()
[43]744            count = 0
[46]745            while count < indent and self.reader.peek() == u' ':
746                self.reader.forward()
[43]747                count += 1
[46]748            if count < indent and self.reader.peek() not in u'#\r\n\x85\u2028\u2029':
[43]749                break
[47]750        return ScalarToken('', False, start_marker, start_marker)
[43]751
752    def scan_flow_scalar(self, double):
[46]753        marker = self.reader.get_marker()
754        quote = self.reader.peek()
755        self.reader.forward()
756        while self.reader.peek() != quote:
757            if double and self.reader.peek() == u'\\':
758                self.reader.forward(2)
759            elif not double and self.reader.peek(3)[1:] == u'\'\'':
760                self.reader.forward(3)
[37]761            else:
[46]762                self.reader.forward(1)
763        self.reader.forward(1)
[47]764        return ScalarToken('', False, marker, marker)
[37]765
[43]766    def scan_plain(self):
767        indent = self.indent+1
768        if indent < 1:
769            indent = 1
770        space = False
[46]771        marker = self.reader.get_marker()
[43]772        while True:
[46]773            while self.reader.peek() == u' ':
774                self.reader.forward()
[43]775                space = True
[46]776            while self.reader.peek() not in u'\0\r\n?:,[]{}#'   \
777                    or (not space and self.reader.peek() == '#')    \
778                    or (not self.flow_level and self.reader.peek() in '?,[]{}') \
779                    or (not self.flow_level and self.reader.peek() == ':' and self.reader.peek(2)[1] not in u' \0\r\n'):
780                space = self.reader.peek() not in u' \t'
781                self.reader.forward()
[43]782                self.allow_simple_key = False
[46]783            if self.reader.peek() not in u'\r\n':
[43]784                break
[46]785            while self.reader.peek() in u'\r\n':
786                self.reader.forward()
[43]787                if not self.flow_level:
788                    self.allow_simple_key = True
789            count = 0
[46]790            while self.reader.peek() == u' ' and count < indent:
791                self.reader.forward()
[43]792                count += 1
793            if count < indent:
794                break
795            space = True
[47]796        return ScalarToken('', True, marker, marker)
[37]797
[47]798    def scan_line_break(self):
799        # Transforms:
800        #   '\r\n'      :   '\n'
801        #   '\r'        :   '\n'
802        #   '\n'        :   '\n'
803        #   '\x85'      :   '\n'
804        #   '\u2028'    :   '\u2028'
805        #   '\u2029     :   '\u2029'
806        #   default     :   ''
807        ch = self.reader.peek()
808        if ch in u'\r\n\x85':
809            if self.reader.peek(2) == u'\r\n':
810                self.forward(2)
811            else:
812                self.reader.forward()
813            return u'\n'
814        elif ch in u'\u2028\u2029':
815            self.reader.forward()
816            return ch
817        return u''
818
[43]819    def invalid_token(self):
820        self.fail("invalid token")
821
[45]822#try:
823#    import psyco
824#    psyco.bind(Scanner)
825#except ImportError:
826#    pass
827
Note: See TracBrowser for help on using the repository browser.