Ticket #205: scanner12.py

File scanner12.py, 2.4 KB (added by peterkmurphy@…, 3 years ago)

Contains a new implementation of scan_anchor.

Line 
1from yaml.scanner import Scanner, ScannerError;
2from yaml.tokens import *;
3from yaml.reader import *
4from yaml.parser import *
5from yaml.composer import *
6from yaml.constructor import *
7from yaml.resolver import *
8
9import yaml;
10
11class Scanner12(Scanner):
12    ''' This is an updated version of the YAML scanner, so that it handles 1.2
13    adequately.
14    '''
15   
16    def __init__(self):
17        super(Scanner12, self ).__init__();
18   
19
20    def scan_anchor(self, TokenClass):
21        ''' After 1.2, it is clear which characters can be part of an anchor -
22            and '[ *alias, value ]' should always report an error because
23            commas are forbidden to be part of an anchor or alias name.
24       
25            So this code removes the restriction that anchor/alias values be
26            numbers and ASCII letters.
27        '''
28        start_mark = self.get_mark()
29        indicator = self.peek()
30        if indicator == u'*':
31            name = 'alias'
32        else:
33            name = 'anchor'
34        self.forward()
35        length = 0
36        ch = self.peek(length)
37#        while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'    \
38#                or ch in u'-_':
39        while ch not in u'\0 \t\r\n\x85\u2028\u2029,[]{}':
40            length += 1
41            ch = self.peek(length)
42        if not length:
43            raise ScannerError("while scanning an %s" % name, start_mark,
44                    "expected alphabetic or numeric character, but found %r"
45                    % ch.encode('utf-8'), self.get_mark())
46        value = self.prefix(length)
47        self.forward(length)
48        ch = self.peek()
49        if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
50            raise ScannerError("while scanning an %s" % name, start_mark,
51                    "expected alphabetic or numeric character, but found %r"
52                    % ch.encode('utf-8'), self.get_mark())
53        end_mark = self.get_mark()
54        return TokenClass(value, start_mark, end_mark)
55
56if __name__ == "__main__":
57
58    class Loader12(Reader, Scanner12, Parser, Composer, Constructor, Resolver):
59
60        def __init__(self, stream):
61            Reader.__init__(self, stream)
62            Scanner12.__init__(self)
63            Parser.__init__(self)
64            Composer.__init__(self)
65            Constructor.__init__(self)
66            Resolver.__init__(self)
67
68    p = open("bandbio.yml");
69    composition = yaml.compose(p, Loader12);
70    print yaml.serialize(composition);