source: branches/pyyaml3000/lib/yaml/scanner.py @ 45

Revision 45, 26.5 KB checked in by xi, 8 years ago (diff)

Stream and Marker are cleaned up.

RevLine 
[39]1
[43]2# Tokens:
3# YAML-DIRECTIVE(major_version, minor_version), TAG-DIRECTIVE(handle, prefix)
4# RESERVED-DIRECTIVE(name)
5# DOCUMENT-START, DOCUMENT-END
6# BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END
7# FLOW-SEQUENCE-START, FLOW-MAPPING-START, FLOW-SEQUENCE-END, FLOW-MAPPING-END
8# ENTRY, KEY, VALUE
9# ALIAS(name), ANCHOR(name), TAG(value), SCALAR(value, plain)
10
11
[39]12from marker import Marker
[43]13#from error import YAMLError
[39]14from stream import Stream
15
[43]16#class ScannerError(YAMLError):
17class ScannerError(Exception):
18    pass
19
20class Token:
21    def __init__(self, start_marker, end_marker):
22        self.start_marker = start_marker
23        self.end_marker = end_marker
24
[44]25class DirectiveToken(Token):
26    pass
27
28class YAMLDirectiveToken(DirectiveToken):
[43]29    def __init__(self, major_version, minor_version, start_marker, end_marker):
30        self.major_version = major_version
31        self.minor_version = minor_version
32        self.start_marker = start_marker
33        self.end_marker = end_marker
34
[44]35class TagDirectiveToken(DirectiveToken):
[43]36    pass
37
[44]38class ReservedDirectiveToken(DirectiveToken):
[43]39    def __init__(self, name, start_marker, end_marker):
40        self.name = name
41        self.start_marker = start_marker
42        self.end_marker = end_marker
43
[44]44class DocumentStartToken(Token):
[43]45    pass
46
[44]47class DocumentEndToken(Token):
[43]48    pass
49
[44]50class EndToken(Token):
[43]51    pass
52
[44]53class BlockSequenceStartToken(Token):
[43]54    pass
55
[44]56class BlockMappingStartToken(Token):
[43]57    pass
58
[44]59class BlockEndToken(Token):
[43]60    pass
61
[44]62class FlowSequenceStartToken(Token):
[43]63    pass
64
[44]65class FlowMappingStartToken(Token):
[43]66    pass
67
[44]68class FlowSequenceEndToken(Token):
[43]69    pass
70
[44]71class FlowMappingEndToken(Token):
[43]72    pass
73
[44]74class KeyToken(Token):
[43]75    pass
76
[44]77class ValueToken(Token):
[43]78    pass
79
[44]80class EntryToken(Token):
[43]81    pass
82
[44]83class AliasToken(Token):
[43]84    def __init__(self, value, start_marker, end_marker):
85        self.value = value
86        self.start_marker = start_marker
87        self.end_marker = end_marker
88
[44]89class AnchorToken(Token):
[43]90    def __init__(self, value, start_marker, end_marker):
91        self.value = value
92        self.start_marker = start_marker
93        self.end_marker = end_marker
94
[44]95class TagToken(Token):
[43]96    def __init__(self, value, start_marker, end_marker):
97        self.value = value
98        self.start_marker = start_marker
99        self.end_marker = end_marker
100
[44]101class ScalarToken(Token):
[43]102    def __init__(self, value, plain, start_marker, end_marker):
103        self.value = value
104        self.plain = plain
105        self.start_marker = start_marker
106        self.end_marker = end_marker
107
108class SimpleKey:
109    def __init__(self, token_number, required, index, line, column, marker):
110        self.token_number = token_number
111        self.required = required
112        self.index = index
113        self.line = line
114        self.column = column
115        self.marker = marker
116
[39]117class Scanner:
118
119    def __init__(self, source, data):
120        """Initialize the scanner."""
121        # The input stream. The Stream class do the dirty work of checking for
[43]122        # BOM and converting the input data to Unicode. It also adds NUL to
123        # the end.
[39]124        #
125        # Stream supports the following methods
126        #   self.stream.peek(k=1)   # peek the next k characters
[45]127        #   self.stream.forward(k=1)   # read the next k characters and move the
[39]128        #                           # pointer
129        self.stream = Stream(source, data)
130
131        # Had we reached the end of the stream?
132        self.done = False
133
134        # The number of unclosed '{' and '['. `flow_level == 0` means block
135        # context.
136        self.flow_level = 0
137
138        # List of processed tokens that are not yet emitted.
139        self.tokens = []
140
141        # Number of tokens that were emitted through the `get_token` method.
142        self.tokens_taken = 0
143
144        # The current indentation level.
145        self.indent = -1
146
147        # Past indentation levels.
148        self.indents = []
149
[43]150        # Variables related to simple keys treatment.
[39]151
152        # A simple key is a key that is not denoted by the '?' indicator.
153        # Example of simple keys:
154        #   ---
155        #   block simple key: value
156        #   ? not a simple key:
157        #   : { flow simple key: value }
158        # We emit the KEY token before all keys, so when we find a potential
159        # simple key, we try to locate the corresponding ':' indicator.
160        # Simple keys should be limited to a single line and 1024 characters.
161
[43]162        # Can a simple key start at the current position? A simple key may
163        # start:
164        # - at the beginning of the line, not counting indentation spaces
165        #       (in block context),
166        # - after '{', '[', ',' (in the flow context),
167        # - after '?', ':', '-' (in the block context).
168        # In the block context, this flag also signify if a block collection
169        # may start at the current position.
170        self.allow_simple_key = True
[39]171
172        # Keep track of possible simple keys. This is a dictionary. The key
173        # is `flow_level`; there can be no more that one possible simple key
[43]174        # for each level. The value is a SimpleKey record:
175        #   (token_number, required, index, line, column, marker)
176        # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
177        # '[', or '{' tokens.
[39]178        self.possible_simple_keys = {}
179
[43]180    # Two public methods.
[39]181
182    def peek_token(self):
183        """Get the current token."""
[43]184        while self.need_more_tokens():
[39]185            self.fetch_more_tokens()
186        if self.tokens:
187            return self.tokens[0]
188
189    def get_token(self):
[43]190        "Get the current token and remove it from the list of pending tokens."""
[39]191        while self.need_more_tokens():
192            self.fetch_more_tokens()
193        if self.tokens:
194            self.tokens_taken += 1
195            return self.tokens.pop(0)
196
[43]197    # Private methods.
[39]198
199    def need_more_tokens(self):
200        if self.done:
201            return False
202        if not self.tokens:
203            return True
204        # The current token may be a potential simple key, so we
205        # need to look further.
[43]206        self.stale_possible_simple_keys()
[39]207        if self.next_possible_simple_key() == self.tokens_taken:
208            return True
209
210    def fetch_more_tokens(self):
211
212        # Eat whitespaces and comments until we reach the next token.
[43]213        self.scan_to_next_token()
[39]214
[43]215        # Remove obsolete possible simple keys.
216        self.stale_possible_simple_keys()
217
[39]218        # Compare the current indentation and column. It may add some tokens
[43]219        # and decrease the current indentation level.
[39]220        self.unwind_indent(self.stream.column)
221
[43]222        #print
223        #print self.stream.get_marker().get_snippet()
224
[39]225        # Peek the next character.
226        ch = self.stream.peek()
227
228        # Is it the end of stream?
[43]229        if ch == u'\0':
[39]230            return self.fetch_end()
231
232        # Is it a directive?
233        if ch == u'%' and self.check_directive():
234            return self.fetch_directive()
235
236        # Is it the document start?
237        if ch == u'-' and self.check_document_start():
238            return self.fetch_document_start()
239
240        # Is it the document end?
241        if ch == u'.' and self.check_document_end():
242            return self.fetch_document_end()
243
244        # Note: the order of the following checks is NOT significant.
245
246        # Is it the flow sequence start indicator?
247        if ch == u'[':
248            return self.fetch_flow_sequence_start()
249
250        # Is it the flow mapping start indicator?
251        if ch == u'{':
252            return self.fetch_flow_mapping_start()
253
254        # Is it the flow sequence end indicator?
255        if ch == u']':
256            return self.fetch_flow_sequence_end()
257
258        # Is it the flow mapping end indicator?
259        if ch == u'}':
260            return self.fetch_flow_mapping_end()
261
[43]262        # Is it the entry indicator?
263        if ch in u'-,' and self.check_entry():
264            return self.fetch_entry()
265
[39]266        # Is it the key indicator?
267        if ch == u'?' and self.check_key():
268            return self.fetch_key()
269
270        # Is it the value indicator?
271        if ch == u':' and self.check_value():
272            return self.fetch_value()
273
274        # Is it an alias?
275        if ch == u'*':
276            return self.fetch_alias()
277
278        # Is it an anchor?
279        if ch == u'&':
280            return self.fetch_anchor()
281
[43]282        # Is it a tag?
[39]283        if ch == u'!':
284            return self.fetch_tag()
285
[43]286        # Is it a literal scalar?
287        if ch == u'|' and not self.flow_level:
[39]288            return self.fetch_literal()
289
290        # Is it a folded scalar?
[43]291        if ch == u'>' and not self.flow_level:
[39]292            return self.fetch_folded()
293
294        # Is it a single quoted scalar?
295        if ch == u'\'':
296            return self.fetch_single()
297
298        # Is it a double quoted scalar?
299        if ch == u'\"':
300            return self.fetch_double()
301
[43]302        # It must be a plain scalar then.
[39]303        if self.check_plain():
304            return self.fetch_plain()
305
[43]306        # No? It's an error. Let's produce a nice error message.
[39]307        self.invalid_token()
308
[43]309    # Simple keys treatment.
310
311    def next_possible_simple_key(self):
312        # Return the number of the nearest possible simple key. Actually we
313        # don't need to loop through the whole dictionary. We may replace it
314        # with the following code:
315        #   if not self.possible_simple_keys:
316        #       return None
317        #   return self.possible_simple_keys[
318        #           min(self.possible_simple_keys.keys())].token_number
319        min_token_number = None
320        for level in self.possible_simple_keys:
321            key = self.possible_simple_keys[level]
322            if min_token_number is None or key.token_number < min_token_number:
323                min_token_number = key.token_number
324        return min_token_number
325
326    def stale_possible_simple_keys(self):
327        # Remove entries that are no longer possible simple keys. According to
328        # the YAML specification, simple keys
329        # - should be limited to a single line,
330        # - should be no longer than 1024 characters.
331        # Disabling this procedure will allow simple keys of any length and
332        # height (may cause problems if indentation is broken though).
333        for level in self.possible_simple_keys.keys():
334            key = self.possible_simple_keys[level]
335            if key.line != self.stream.line  \
336                    or self.stream.index-key.index > 1024:
337                if key.required:
338                    self.fail("simple key is required")
339                del self.possible_simple_keys[level]
340
341    def save_possible_simple_key(self):
342        # The next token may start a simple key. We check if it's possible
343        # and save its position. This function is called for
344        #   ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
345
346        # Check if a simple key is required at the current position.
347        required = not self.flow_level and self.indent == self.stream.column
348
349        # The next token might be a simple key. Let's save it's number and
350        # position.
351        if self.allow_simple_key:
352            self.remove_possible_simple_key()
353            token_number = self.tokens_taken+len(self.tokens)
354            index = self.stream.index
355            line = self.stream.line
356            column = self.stream.column
357            marker = self.stream.get_marker()
358            key = SimpleKey(token_number, required,
359                    index, line, column, marker)
360            self.possible_simple_keys[self.flow_level] = key
361
362        # A simple key is required at the current position.
363        elif required:
364            self.fail("simple key is required")
365
366    def remove_possible_simple_key(self):
367        # Remove the saved possible key position at the current flow level.
368        if self.flow_level in self.possible_simple_keys:
369            key = self.possible_simple_keys[self.flow_level]
370            if key.required:
371                self.fail("simple key is required")
372
373    # Indentation functions.
374
375    def unwind_indent(self, column):
376
377        # In flow context, tokens should respect indentation.
378        if self.flow_level and self.indent > column:
379            self.fail("invalid intendation in the flow context")
380
381        # In block context, we may need to issue the BLOCK-END tokens.
382        while self.indent > column:
383            marker = self.stream.get_marker()
384            self.indent = self.indents.pop()
[44]385            self.tokens.append(BlockEndToken(marker, marker))
[43]386
387    def add_indent(self, column):
388        # Check if we need to increase indentation.
389        if self.indent < column:
390            self.indents.append(self.indent)
391            self.indent = column
392            return True
393        return False
394
395    # Fetchers.
396
[39]397    def fetch_end(self):
398
399        # Set the current intendation to -1.
[43]400        self.unwind_indent(-1)
[39]401
402        # Reset everything (not really needed).
[43]403        self.allow_simple_key = False
[39]404        self.possible_simple_keys = {}
405
[43]406        # Read the token.
[39]407        marker = self.stream.get_marker()
[43]408       
409        # Add END.
[44]410        self.tokens.append(EndToken(marker, marker))
[39]411
412        # The stream is ended.
413        self.done = True
414
[43]415    def fetch_directive(self):
416       
417        # Set the current intendation to -1.
418        self.unwind_indent(-1)
[39]419
[43]420        # Reset simple keys.
421        self.remove_possible_simple_key()
422        self.allow_simple_key = False
[39]423
[43]424        # Scan and add DIRECTIVE.
425        self.scan_directive()
[39]426
427    def fetch_document_start(self):
[44]428        self.fetch_document_indicator(DocumentStartToken)
[39]429
[43]430    def fetch_document_end(self):
[44]431        self.fetch_document_indicator(DocumentEndToken)
[43]432
433    def fetch_document_indicator(self, TokenClass):
434
[39]435        # Set the current intendation to -1.
[43]436        self.unwind_indent(-1)
[39]437
[43]438        # Reset simple keys. Note that there could not be a block collection
439        # after '---'.
440        self.remove_possible_simple_key()
441        self.allow_simple_key = False
[39]442
[43]443        # Add DOCUMENT-START or DOCUMENT-END.
444        start_marker = self.stream.get_marker()
[45]445        self.stream.forward(3)
[43]446        end_marker = self.stream.get_marker()
447        self.tokens.append(TokenClass(start_marker, end_marker))
[39]448
[43]449    def fetch_flow_sequence_start(self):
[44]450        self.fetch_flow_collection_start(FlowSequenceStartToken)
[39]451
[43]452    def fetch_flow_mapping_start(self):
[44]453        self.fetch_flow_collection_start(FlowMappingStartToken)
[43]454
455    def fetch_flow_collection_start(self, TokenClass):
456
[44]457        # '[' and '{' may start a simple key.
458        self.save_possible_simple_key()
459
[43]460        # Increase the flow level.
461        self.flow_level += 1
462
463        # Simple keys are allowed after '[' and '{'.
464        self.allow_simple_key = True
465
466        # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
[39]467        start_marker = self.stream.get_marker()
[45]468        self.stream.forward()
[43]469        end_marker = self.stream.get_marker()
470        self.tokens.append(TokenClass(start_marker, end_marker))
[39]471
[43]472    def fetch_flow_sequence_end(self):
[44]473        self.fetch_flow_collection_end(FlowSequenceEndToken)
[39]474
[43]475    def fetch_flow_mapping_end(self):
[44]476        self.fetch_flow_collection_end(FlowMappingEndToken)
[43]477
478    def fetch_flow_collection_end(self, TokenClass):
479
480        # Reset possible simple key on the current level.
481        self.remove_possible_simple_key()
482
483        # Decrease the flow level.
484        self.flow_level -= 1
485
486        # No simple keys after ']' or '}'.
487        self.allow_simple_key = False
488
489        # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
490        start_marker = self.stream.get_marker()
[45]491        self.stream.forward()
[39]492        end_marker = self.stream.get_marker()
[43]493        self.tokens.append(TokenClass(start_marker, end_marker))
[39]494
[43]495    def fetch_entry(self):
[39]496
[43]497        # Block context needs additional checks.
498        if not self.flow_level:
[39]499
[43]500            # Are we allowed to start a new entry?
501            if not self.allow_simple_key:
502                self.fail("Cannot start a new entry here")
[39]503
[43]504            # We may need to add BLOCK-SEQUENCE-START.
505            if self.add_indent(self.stream.column):
506                marker = self.stream.get_marker()
[44]507                self.tokens.append(BlockSequenceStartToken(marker, marker))
[39]508
[43]509        # Simple keys are allowed after '-' and ','.
510        self.allow_simple_key = True
[39]511
[43]512        # Reset possible simple key on the current level.
513        self.remove_possible_simple_key()
[39]514
[43]515        # Add ENTRY.
[39]516        start_marker = self.stream.get_marker()
[45]517        self.stream.forward()
[43]518        end_marker = self.stream.get_marker()
[44]519        self.tokens.append(EntryToken(start_marker, end_marker))
[39]520
[43]521    def fetch_key(self):
522       
523        # Block context needs additional checks.
524        if not self.flow_level:
[39]525
[43]526            # Are we allowed to start a key (not nessesary a simple)?
527            if not self.allow_simple_key:
528                self.fail("Cannot start a new key here")
529
530            # We may need to add BLOCK-MAPPING-START.
531            if self.add_indent(self.stream.column):
532                marker = self.stream.get_marker()
[44]533                self.tokens.append(BlockMappingStartToken(marker, marker))
[43]534
535        # Simple keys are allowed after '?' in the block context.
536        self.allow_simple_key = not self.flow_level
537
538        # Reset possible simple key on the current level.
539        self.remove_possible_simple_key()
540
541        # Add KEY.
542        start_marker = self.stream.get_marker()
[45]543        self.stream.forward()
[39]544        end_marker = self.stream.get_marker()
[44]545        self.tokens.append(KeyToken(start_marker, end_marker))
[39]546
[43]547    def fetch_value(self):
[39]548
[43]549        # Do we determine a simple key?
550        if self.flow_level in self.possible_simple_keys:
[39]551
[43]552            # Add KEY.
553            key = self.possible_simple_keys[self.flow_level]
554            del self.possible_simple_keys[self.flow_level]
555            self.tokens.insert(key.token_number-self.tokens_taken,
[44]556                    KeyToken(key.marker, key.marker))
[39]557
[43]558            # If this key starts a new block mapping, we need to add
559            # BLOCK-MAPPING-START.
560            if not self.flow_level:
561                if self.add_indent(key.column):
562                    self.tokens.insert(key.token_number-self.tokens_taken,
[44]563                            BlockMappingStartToken(key.marker, key.marker))
[37]564
[43]565            # There cannot be two simple keys one after another.
566            self.allow_simple_key = False
[37]567
[43]568        # It must be a part of a complex key.
569        else:
570           
571            # Simple keys are allowed after ':' in the block context.
572            self.allow_simple_key = not self.flow_level
[37]573
[43]574            # Reset possible simple key on the current level.
575            self.remove_possible_simple_key()
[37]576
[43]577        # Add VALUE.
578        start_marker = self.stream.get_marker()
[45]579        self.stream.forward()
[43]580        end_marker = self.stream.get_marker()
[44]581        self.tokens.append(ValueToken(start_marker, end_marker))
[37]582
[43]583    def fetch_alias(self):
[37]584
[43]585        # ALIAS could be a simple key.
586        self.save_possible_simple_key()
[37]587
[43]588        # No simple keys after ALIAS.
589        self.allow_simple_key = False
[37]590
[43]591        # Scan and add ALIAS.
[44]592        self.scan_anchor(AliasToken)
[37]593
[43]594    def fetch_anchor(self):
[37]595
[43]596        # ANCHOR could start a simple key.
597        self.save_possible_simple_key()
[37]598
[43]599        # No simple keys after ANCHOR.
600        self.allow_simple_key = False
[37]601
[43]602        # Scan and add ANCHOR.
[44]603        self.scan_anchor(AnchorToken)
[37]604
[43]605    def fetch_tag(self):
[37]606
[43]607        # TAG could start a simple key.
608        self.save_possible_simple_key()
[37]609
[43]610        # No simple keys after TAG.
611        self.allow_simple_key = False
[37]612
[43]613        # Scan and add TAG.
614        self.scan_tag()
[37]615
[43]616    def fetch_literal(self):
617        self.fetch_block_scalar(folded=False)
[37]618
[43]619    def fetch_folded(self):
620        self.fetch_block_scalar(folded=True)
[37]621
[43]622    def fetch_block_scalar(self, folded):
[37]623
[43]624        # A simple key may follow a block scalar.
625        self.allow_simple_key = True
[37]626
[43]627        # Reset possible simple key on the current level.
628        self.remove_possible_simple_key()
[37]629
[43]630        # Scan and add SCALAR.
631        self.scan_block_scalar(folded)
[37]632
[43]633    def fetch_single(self):
634        self.fetch_flow_scalar(double=False)
[37]635
[43]636    def fetch_double(self):
637        self.fetch_flow_scalar(double=True)
[37]638
[43]639    def fetch_flow_scalar(self, double):
[37]640
[43]641        # A flow scalar could be a simple key.
642        self.save_possible_simple_key()
[37]643
[43]644        # No simple keys after flow scalars.
645        self.allow_simple_key = False
[37]646
[43]647        # Scan and add SCALAR.
648        self.scan_flow_scalar(double)
[37]649
[43]650    def fetch_plain(self):
[37]651
[43]652        # A plain scalar could be a simple key.
653        self.save_possible_simple_key()
[37]654
[43]655        # No simple keys after plain scalars. But note that `scan_plain` will
656        # change this flag if the scan is finished at the beginning of the
657        # line.
658        self.allow_simple_key = False
[37]659
[43]660        # Scan and add SCALAR. May change `allow_simple_key`.
661        self.scan_plain()
[37]662
[43]663    # Checkers.
[37]664
[43]665    def check_directive(self):
[37]666
[43]667        # DIRECTIVE:        ^ '%' ...
668        # The '%' indicator is already checked.
669        if self.stream.column == 0:
670            return True
[37]671
[43]672    def check_document_start(self):
[37]673
[43]674        # DOCUMENT-START:   ^ '---' (' '|'\n')
675        if self.stream.column == 0:
676            prefix = self.stream.peek(4)
677            if prefix[:3] == u'---' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
678                return True
[37]679
[43]680    def check_document_end(self):
[37]681
[43]682        # DOCUMENT-END:     ^ '...' (' '|'\n')
683        if self.stream.column == 0:
684            prefix = self.stream.peek(4)
685            if prefix[:3] == u'...' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
686                return True
[37]687
[43]688    def check_entry(self):
689
690        # ENTRY(flow context):      ','
691        if self.flow_level:
692            return self.stream.peek() == u','
693
694        # ENTRY(block context):     '-' (' '|'\n')
695        else:
696            prefix = self.stream.peek(2)
697            return prefix[0] == u'-' and prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
698
699    def check_key(self):
700
701        # KEY(flow context):    '?'
702        if self.flow_level:
[37]703            return True
[43]704
705        # KEY(block context):   '?' (' '|'\n')
[37]706        else:
[43]707            prefix = self.stream.peek(2)
708            return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
[37]709
[43]710    def check_value(self):
711
712        # VALUE(flow context):  ':'
713        if self.flow_level:
[37]714            return True
[43]715
716        # VALUE(block context): ':' (' '|'\n')
[37]717        else:
[43]718            prefix = self.stream.peek(2)
719            return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
[37]720
[43]721    def check_plain(self):
722        return True
[37]723
[43]724    # Scanners.
725
726    def scan_to_next_token(self):
727        found = False
728        while not found:
729            while self.stream.peek() == u' ':
[45]730                self.stream.forward()
[43]731            if self.stream.peek() == u'#':
732                while self.stream.peek() not in u'\r\n':
[45]733                    self.stream.forward()
[43]734            if self.stream.peek() in u'\r\n':
[45]735                self.stream.forward()
[43]736                if not self.flow_level:
737                    self.allow_simple_key = True
[37]738            else:
[43]739                found = True
[37]740
[43]741    def scan_directive(self):
742        marker = self.stream.get_marker()
743        if self.stream.peek(5) == u'%YAML ':
[44]744            self.tokens.append(YAMLDirectiveToken(1, 1, marker, marker))
[43]745        elif self.stream.peek(4) == u'%TAG ':
[44]746            self.tokens.append(TagDirectiveToken(marker, marker))
[43]747        else:
[44]748            self.tokens.append(ReservedDirectiveToken('', marker, marker))
[43]749        while self.stream.peek() not in u'\0\r\n':
[45]750            self.stream.forward()
751        self.stream.forward()
[37]752
[43]753    def scan_anchor(self, TokenClass):
754        start_marker = self.stream.get_marker()
755        while self.stream.peek() not in u'\0 \t\r\n,:':
[45]756            self.stream.forward()
[43]757        end_marker = self.stream.get_marker()
758        self.tokens.append(TokenClass('', start_marker, end_marker))
[37]759
[43]760    def scan_tag(self):
761        start_marker = self.stream.get_marker()
762        while self.stream.peek() not in u'\0 \t\r\n':
[45]763            self.stream.forward()
[43]764        end_marker = self.stream.get_marker()
[44]765        self.tokens.append(TagToken('', start_marker, end_marker))
[43]766
767    def scan_block_scalar(self, folded):
768        start_marker = self.stream.get_marker()
769        indent = self.indent+1
770        if indent < 1:
771            indent = 1
772        while True:
[44]773            while self.stream.peek() and self.stream.peek() and self.stream.peek() not in u'\0\r\n\x85\u2028\u2029':
[45]774                self.stream.forward()
[43]775            if self.stream.peek() != u'\0':
[45]776                self.stream.forward()
[43]777            count = 0
778            while count < indent and self.stream.peek() == u' ':
[45]779                self.stream.forward()
[43]780                count += 1
[44]781            if count < indent and self.stream.peek() not in u'#\r\n\x85\u2028\u2029':
[43]782                break
[44]783        self.tokens.append(ScalarToken('', False, start_marker, start_marker))
[43]784
785    def scan_flow_scalar(self, double):
786        marker = self.stream.get_marker()
[45]787        quote = self.stream.peek()
788        self.stream.forward()
[43]789        while self.stream.peek() != quote:
790            if double and self.stream.peek() == u'\\':
[45]791                self.stream.forward(2)
[43]792            elif not double and self.stream.peek(3)[1:] == u'\'\'':
[45]793                self.stream.forward(3)
[37]794            else:
[45]795                self.stream.forward(1)
796        self.stream.forward(1)
[44]797        self.tokens.append(ScalarToken('', False, marker, marker))
[37]798
[43]799    def scan_plain(self):
800        indent = self.indent+1
801        if indent < 1:
802            indent = 1
803        space = False
804        marker = self.stream.get_marker()
805        while True:
806            while self.stream.peek() == u' ':
[45]807                self.stream.forward()
[43]808                space = True
809            while self.stream.peek() not in u'\0\r\n?:,[]{}#'   \
810                    or (not space and self.stream.peek() == '#')    \
811                    or (not self.flow_level and self.stream.peek() in '?,[]{}') \
812                    or (not self.flow_level and self.stream.peek() == ':' and self.stream.peek(2)[1] not in u' \0\r\n'):
813                space = self.stream.peek() not in u' \t'
[45]814                self.stream.forward()
[43]815                self.allow_simple_key = False
816            if self.stream.peek() not in u'\r\n':
817                break
818            while self.stream.peek() in u'\r\n':
[45]819                self.stream.forward()
[43]820                if not self.flow_level:
821                    self.allow_simple_key = True
822            count = 0
823            while self.stream.peek() == u' ' and count < indent:
[45]824                self.stream.forward()
[43]825                count += 1
826            if count < indent:
827                break
828            space = True
[44]829        self.tokens.append(ScalarToken('', True, marker, marker))
[37]830
[43]831    def invalid_token(self):
832        self.fail("invalid token")
833
834    def fail(self, message):
835        raise ScannerError(message)
836
[45]837#try:
838#    import psyco
839#    psyco.bind(Scanner)
840#except ImportError:
841#    pass
842
Note: See TracBrowser for help on using the repository browser.