source: branches/pyyaml3000/lib/yaml/parser.py @ 47

Revision 47, 11.3 KB checked in by xi, 8 years ago (diff)

Working on the scanner.

RevLine 
[43]1
2# Production rules:
3# stream            ::= implicit_document? explicit_document* END
4# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
5# implicit_document ::= block_node DOCUMENT-END?
6# block_node    ::= ALIAS | properties? block_content
7# flow_node     ::= ALIAS | properties? flow_content
8# properties    ::= TAG ANCHOR? | ANCHOR TAG?
9# block_content     ::= block_collection | flow_collection | SCALAR
10# flow_content      ::= flow_collection | SCALAR
11# block_collection  ::= block_sequence | block_mapping
12# block_sequence    ::= BLOCK-SEQUENCE-START (ENTRY block_node?)* BLOCK-END
13# block_mapping     ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END
14# block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence)
15# indentless_block_sequence         ::= (ENTRY block_node?)+
16# flow_collection   ::= flow_sequence | flow_mapping
17# flow_sequence     ::= FLOW-SEQUENCE-START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
18# flow_mapping      ::= FLOW-MAPPING-START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
19# flow_sequence_entry   ::= flow_node | KEY flow_node (VALUE flow_node?)?
20# flow_mapping_entry    ::= flow_node | KEY flow_node (VALUE flow_node?)?
21
22# FIRST(rule) sets:
23# stream: {}
24# explicit_document: { DIRECTIVE DOCUMENT-START }
25# implicit_document: block_node
26# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
27# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
28# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
29# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
30# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
31# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
32# block_sequence: { BLOCK-SEQUENCE-START }
33# block_mapping: { BLOCK-MAPPING-START }
34# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START ENTRY }
35# indentless_sequence: { ENTRY }
36# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
37# flow_sequence: { FLOW-SEQUENCE-START }
38# flow_mapping: { FLOW-MAPPING-START }
39# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
40# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
41
[46]42from error import YAMLError
43from tokens import *
[44]44
[46]45class ParserError(YAMLError):
[44]46    pass
47
48class Node:
49    def __repr__(self):
50        args = []
51        for attribute in ['anchor', 'tag', 'value']:
52            if hasattr(self, attribute):
53                args.append(repr(getattr(self, attribute)))
54        return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
55
56class AliasNode(Node):
57    def __init__(self, anchor):
58        self.anchor = anchor
59
60class ScalarNode(Node):
61    def __init__(self, anchor, tag, value):
62        self.anchor = anchor
63        self.tag = tag
64        self.value = value
65
66class SequenceNode(Node):
67    def __init__(self, anchor, tag, value):
68        self.anchor = anchor
69        self.tag = tag
70        self.value = value
71
72class MappingNode(Node):
73    def __init__(self, anchor, tag, value):
74        self.anchor = anchor
75        self.tag = tag
76        self.value = value
77
[43]78class Parser:
79
[46]80    def __init__(self, scanner):
81        self.scanner = scanner
[43]82
[44]83    def is_token(self, *choices):
84        token = self.scanner.peek_token()
85        for choice in choices:
86            if isinstance(token, choices):
87                return True
88        return False
89
90    def get_token(self):
91        return self.scanner.get_token()
92
93    def parse(self):
94        return self.parse_stream()
95
[43]96    def parse_stream(self):
97        documents = []
[47]98        if not self.is_token(DirectiveToken, DocumentStartToken, StreamEndToken):
[43]99            documents.append(self.parse_block_node())
[47]100        while not self.is_token(StreamEndToken):
[44]101            while self.is_token(DirectiveToken):
102                self.get_token()
103            if not self.is_token(DocumentStartToken):
104                self.fail('DOCUMENT-START is expected')
105            self.get_token()
106            if self.is_token(DirectiveToken,
[47]107                    DocumentStartToken, DocumentEndToken, StreamEndToken):
[43]108                documents.append(None)
109            else:
110                documents.append(self.parse_block_node())
[44]111            while self.is_token(DocumentEndToken):
112                self.get_token()
[47]113        if not self.is_token(StreamEndToken):
114            self.fail("STREAM-END is expected")
[44]115        return documents
[43]116
117    def parse_block_node(self):
[44]118        return self.parse_node(block=True)
[43]119
120    def parse_flow_node(self):
[44]121        return self.parse_node()
[43]122
123    def parse_block_node_or_indentless_sequence(self):
[44]124        return self.parse_node(block=True, indentless_sequence=True)
[43]125
[44]126    def parse_node(self, block=False, indentless_sequence=False):
127        if self.is_token(AliasToken):
128            token = self.get_token()
129            return AliasNode(token.value)
130        anchor = None
131        tag = None
132        if self.is_token(AnchorToken):
133            anchor = self.get_token().value
134            if self.is_token(TagToken):
135                tag = self.get_token().value
136        elif self.is_token(TagToken):
137            tag = self.get_token().value
138            if self.is_token(AnchorToken):
139                anchor = self.get_token().value
140        if indentless_sequence and self.is_token(EntryToken):
141            NodeClass = SequenceNode
142            value = self.parse_indentless_sequence()
143        else:
144            if self.is_token(ScalarToken):
145                NodeClass = ScalarNode
146            elif self.is_token(BlockSequenceStartToken, FlowSequenceStartToken):
147                NodeClass = SequenceNode
148            elif self.is_token(BlockMappingStartToken, FlowMappingStartToken):
149                NodeClass = MappingNode
150            if block:
151                value = self.parse_block_content()
152            else:
153                value = self.parse_flow_content()
154        return NodeClass(anchor, tag, value)
155
[43]156    def parse_block_content(self):
[44]157        if self.is_token(ScalarToken):
158            return self.get_token().value
159        elif self.is_token(BlockSequenceStartToken):
[43]160            return self.parse_block_sequence()
[44]161        elif self.is_token(BlockMappingStartToken):
[43]162            return self.parse_block_mapping()
[44]163        elif self.is_token(FlowSequenceStartToken):
[43]164            return self.parse_flow_sequence()
[44]165        elif self.is_token(FlowMappingStartToken):
[43]166            return self.parse_flow_mapping()
167        else:
[44]168            self.fail('block content is expected')
[43]169
170    def parse_flow_content(self):
[44]171        if self.is_token(ScalarToken):
172            return self.get_token().value
173        elif self.is_token(FlowSequenceStartToken):
[43]174            return self.parse_flow_sequence()
[44]175        elif self.is_token(FlowMappingStartToken):
[43]176            return self.parse_flow_mapping()
177        else:
[44]178            self.fail('flow content is expected')
[43]179
180    def parse_block_sequence(self):
181        sequence = []
[44]182        if not self.is_token(BlockSequenceStartToken):
183            self.fail('BLOCK-SEQUENCE-START is expected')
184        self.get_token()
185        while self.is_token(EntryToken):
186            self.get_token()
187            if not self.is_token(EntryToken, BlockEndToken):
[43]188                sequence.append(self.parse_block_node())
189            else:
190                sequence.append(None)
[44]191        if not self.is_token(BlockEndToken):
192            self.fail('BLOCK-END is expected')
193        self.get_token()
[43]194        return sequence
195
196    def parse_indentless_sequence(self):
197        sequence = []
[44]198        while self.is_token(EntryToken):
199            self.get_token()
200            if not self.is_token(EntryToken):
[43]201                sequence.append(self.parse_block_node())
202            else:
203                sequence.append(None)
204        return sequence
205
206    def parse_block_mapping(self):
207        mapping = []
[44]208        if not self.is_token(BlockMappingStartToken):
209            self.fail('BLOCK-MAPPING-START is expected')
210        self.get_token()
211        while self.is_token(KeyToken, ValueToken):
[43]212            key = None
213            value = None
[44]214            if self.is_token(KeyToken):
215                self.get_token()
216                if not self.is_token(KeyToken, ValueToken, BlockEndToken):
[43]217                    key = self.parse_block_node_or_indentless_sequence()
[44]218            if self.is_token(ValueToken):
219                self.get_token()
220                if not self.is_token(KeyToken, ValueToken, BlockEndToken):
[43]221                    value = self.parse_block_node_or_indentless_sequence()
222            mapping.append((key, value))
[44]223        if not self.is_token(BlockEndToken):
224            self.fail('BLOCK-END is expected')
225        self.get_token()
[43]226        return mapping
227
228    def parse_flow_sequence(self):
229        sequence = []
[44]230        if not self.is_token(FlowSequenceStartToken):
231            self.fail('FLOW-SEQUENCE-START is expected')
232        self.get_token()
233        while not self.is_token(FlowSequenceEndToken):
234            if self.is_token(KeyToken):
235                self.get_token()
[43]236                key = None
237                value = None
[44]238                if not self.is_token(ValueToken):
[43]239                    key = self.parse_flow_node()
[44]240                if self.is_token(ValueToken):
241                    self.get_token()
242                    if not self.is_token(EntryToken, FlowSequenceEndToken):
[43]243                        value = self.parse_flow_node()
[44]244                node = MappingNode(None, None, [(key, value)])
245                sequence.append(node)
[43]246            else:
247                sequence.append(self.parse_flow_node())
[44]248            if not self.is_token(EntryToken, FlowSequenceEndToken):
249                self.fail("ENTRY or FLOW-SEQUENCE-END are expected")
250            if self.is_token(EntryToken):
251                self.get_token()
252        if not self.is_token(FlowSequenceEndToken):
253            self.fail('FLOW-SEQUENCE-END is expected')
254        self.get_token()
[43]255        return sequence
256
257    def parse_flow_mapping(self):
258        mapping = []
[44]259        if not self.is_token(FlowMappingStartToken):
260            self.fail('FLOW-MAPPING-START is expected')
261        self.get_token()
262        while not self.is_token(FlowMappingEndToken):
263            if self.is_token(KeyToken):
264                self.get_token()
[43]265                key = None
266                value = None
[44]267                if not self.is_token(ValueToken):
[43]268                    key = self.parse_flow_node()
[44]269                if self.is_token(ValueToken):
270                    self.get_token()
271                    if not self.is_token(EntryToken, FlowMappingEndToken):
[43]272                        value = self.parse_flow_node()
273                mapping.append((key, value))
274            else:
275                mapping.append((self.parse_flow_node(), None))
[44]276            if not self.is_token(EntryToken, FlowMappingEndToken):
277                self.fail("ENTRY or FLOW-MAPPING-END are expected")
278            if self.is_token(EntryToken):
279                self.get_token()
280        if not self.is_token(FlowMappingEndToken):
281            self.fail('FLOW-MAPPING-END is expected')
282        self.get_token()
[43]283        return mapping
284
[44]285    def fail(self, message):
286        marker = self.scanner.peek_token().start_marker
[47]287        raise ParserError(message+':\n'+marker.get_snippet())
[43]288
Note: See TracBrowser for help on using the repository browser.