source: libyaml/trunk/src/scanner.c @ 185

Revision 185, 28.4 KB checked in by xi, 8 years ago (diff)

Implementing Scanner: macros and high-level functions.

RevLine 
[184]1
2/*
3 * Introduction
4 * ************
5 *
6 * The following notes assume that you are familiar with the YAML specification
7 * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
8 * some cases we are less restrictive that it requires.
9 *
10 * The process of transforming a YAML stream into a sequence of events is
11 * divided on two steps: Scanning and Parsing.
12 *
13 * The Scanner transforms the input stream into a sequence of tokens, while the
14 * parser transform the sequence of tokens produced by the Scanner into a
15 * sequence of parsing events.
16 *
17 * The Scanner is rather clever and complicated. The Parser, on the contrary,
18 * is a straightforward implementation of a recursive-descendant parser (or,
19 * LL(1) parser, as it is usually called).
20 *
21 * Actually there are two issues of Scanning that might be called "clever", the
22 * rest is quite straightforward.  The issues are "block collection start" and
23 * "simple keys".  Both issues are explained below in details.
24 *
25 * Here the Scanning step is explained and implemented.  We start with the list
26 * of all the tokens produced by the Scanner together with short descriptions.
27 *
28 * Now, tokens:
29 *
30 *      STREAM-START(encoding)          # The stream start.
31 *      STREAM-END                      # The stream end.
32 *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
33 *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
34 *      DOCUMENT-START                  # '---'
35 *      DOCUMENT-END                    # '...'
36 *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
37 *      BLOCK-MAPPING-START             # sequence or a block mapping.
38 *      BLOCK-END                       # Indentation decrease.
39 *      FLOW-SEQUENCE-START             # '['
40 *      FLOW-SEQUENCE-END               # ']'
41 *      BLOCK-SEQUENCE-START            # '{'
42 *      BLOCK-SEQUENCE-END              # '}'
43 *      BLOCK-ENTRY                     # '-'
44 *      FLOW-ENTRY                      # ','
45 *      KEY                             # '?' or nothing (simple keys).
46 *      VALUE                           # ':'
47 *      ALIAS(anchor)                   # '*anchor'
48 *      ANCHOR(anchor)                  # '&anchor'
49 *      TAG(handle,suffix)              # '!handle!suffix'
50 *      SCALAR(value,style)             # A scalar.
51 *
52 * The following two tokens are "virtual" tokens denoting the beginning and the
53 * end of the stream:
54 *
55 *      STREAM-START(encoding)
56 *      STREAM-END
57 *
58 * We pass the information about the input stream encoding with the
59 * STREAM-START token.
60 *
61 * The next two tokens are responsible for tags:
62 *
63 *      VERSION-DIRECTIVE(major,minor)
64 *      TAG-DIRECTIVE(handle,prefix)
65 *
66 * Example:
67 *
68 *      %YAML   1.1
69 *      %TAG    !   !foo
70 *      %TAG    !yaml!  tag:yaml.org,2002:
71 *      ---
72 *
73 * The correspoding sequence of tokens:
74 *
75 *      STREAM-START(utf-8)
76 *      VERSION-DIRECTIVE(1,1)
77 *      TAG-DIRECTIVE("!","!foo")
78 *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79 *      DOCUMENT-START
80 *      STREAM-END
81 *
82 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83 * line.
84 *
85 * The document start and end indicators are represented by:
86 *
87 *      DOCUMENT-START
88 *      DOCUMENT-END
89 *
90 * Note that if a YAML stream contains an implicit document (without '---'
91 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92 * produced.
93 *
94 * In the following examples, we present whole documents together with the
95 * produced tokens.
96 *
97 *      1. An implicit document:
98 *
99 *          'a scalar'
100 *
101 *      Tokens:
102 *
103 *          STREAM-START(utf-8)
104 *          SCALAR("a scalar",single-quoted)
105 *          STREAM-END
106 *
107 *      2. An explicit document:
108 *
109 *          ---
110 *          'a scalar'
111 *          ...
112 *
113 *      Tokens:
114 *
115 *          STREAM-START(utf-8)
116 *          DOCUMENT-START
117 *          SCALAR("a scalar",single-quoted)
118 *          DOCUMENT-END
119 *          STREAM-END
120 *
121 *      3. Several documents in a stream:
122 *
123 *          'a scalar'
124 *          ---
125 *          'another scalar'
126 *          ---
127 *          'yet another scalar'
128 *
129 *      Tokens:
130 *
131 *          STREAM-START(utf-8)
132 *          SCALAR("a scalar",single-quoted)
133 *          DOCUMENT-START
134 *          SCALAR("another scalar",single-quoted)
135 *          DOCUMENT-START
136 *          SCALAR("yet another scalar",single-quoted)
137 *          STREAM-END
138 *
139 * We have already introduced the SCALAR token above.  The following tokens are
140 * used to describe aliases, anchors, tag, and scalars:
141 *
142 *      ALIAS(anchor)
143 *      ANCHOR(anchor)
144 *      TAG(handle,suffix)
145 *      SCALAR(value,style)
146 *
147 * The following series of examples illustrate the usage of these tokens:
148 *
149 *      1. A recursive sequence:
150 *
151 *          &A [ *A ]
152 *
153 *      Tokens:
154 *
155 *          STREAM-START(utf-8)
156 *          ANCHOR("A")
157 *          FLOW-SEQUENCE-START
158 *          ALIAS("A")
159 *          FLOW-SEQUENCE-END
160 *          STREAM-END
161 *
162 *      2. A tagged scalar:
163 *
164 *          !!float "3.14"  # A good approximation.
165 *
166 *      Tokens:
167 *
168 *          STREAM-START(utf-8)
169 *          TAG("!!","float")
170 *          SCALAR("3.14",double-quoted)
171 *          STREAM-END
172 *
173 *      3. Various scalar styles:
174 *
175 *          --- # Implicit empty plain scalars do not produce tokens.
176 *          --- a plain scalar
177 *          --- 'a single-quoted scalar'
178 *          --- "a double-quoted scalar"
179 *          --- |-
180 *            a literal scalar
181 *          --- >-
182 *            a folded
183 *            scalar
184 *
185 *      Tokens:
186 *
187 *          STREAM-START(utf-8)
188 *          DOCUMENT-START
189 *          DOCUMENT-START
190 *          SCALAR("a plain scalar",plain)
191 *          DOCUMENT-START
192 *          SCALAR("a single-quoted scalar",single-quoted)
193 *          DOCUMENT-START
194 *          SCALAR("a double-quoted scalar",double-quoted)
195 *          DOCUMENT-START
196 *          SCALAR("a literal scalar",literal)
197 *          DOCUMENT-START
198 *          SCALAR("a folded scalar",folded)
199 *          STREAM-END
200 *
201 * Now it's time to review collection-related tokens. We will start with
202 * flow collections:
203 *
204 *      FLOW-SEQUENCE-START
205 *      FLOW-SEQUENCE-END
206 *      FLOW-MAPPING-START
207 *      FLOW-MAPPING-END
208 *      FLOW-ENTRY
209 *      KEY
210 *      VALUE
211 *
212 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214 * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
215 * indicators '?' and ':', which are used for denoting mapping keys and values,
216 * are represented by the KEY and VALUE tokens.
217 *
218 * The following examples show flow collections:
219 *
220 *      1. A flow sequence:
221 *
222 *          [item 1, item 2, item 3]
223 *
224 *      Tokens:
225 *
226 *          STREAM-START(utf-8)
227 *          FLOW-SEQUENCE-START
228 *          SCALAR("item 1",plain)
229 *          FLOW-ENTRY
230 *          SCALAR("item 2",plain)
231 *          FLOW-ENTRY
232 *          SCALAR("item 3",plain)
233 *          FLOW-SEQUENCE-END
234 *          STREAM-END
235 *
236 *      2. A flow mapping:
237 *
238 *          {
239 *              a simple key: a value,  # Note that the KEY token is produced.
240 *              ? a complex key: another value,
241 *          }
242 *
243 *      Tokens:
244 *
245 *          STREAM-START(utf-8)
246 *          FLOW-MAPPING-START
247 *          KEY
248 *          SCALAR("a simple key",plain)
249 *          VALUE
250 *          SCALAR("a value",plain)
251 *          FLOW-ENTRY
252 *          KEY
253 *          SCALAR("a complex key",plain)
254 *          VALUE
255 *          SCALAR("another value",plain)
256 *          FLOW-ENTRY
257 *          FLOW-MAPPING-END
258 *          STREAM-END
259 *
260 * A simple key is a key which is not denoted by the '?' indicator.  Note that
261 * the Scanner still produce the KEY token whenever it encounters a simple key.
262 *
263 * For scanning block collections, the following tokens are used (note that we
264 * repeat KEY and VALUE here):
265 *
266 *      BLOCK-SEQUENCE-START
267 *      BLOCK-MAPPING-START
268 *      BLOCK-END
269 *      BLOCK-ENTRY
270 *      KEY
271 *      VALUE
272 *
273 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274 * increase that precedes a block collection (cf. the INDENT token in Python).
275 * The token BLOCK-END denote indentation decrease that ends a block collection
276 * (cf. the DEDENT token in Python).  However YAML has some syntax pecularities
277 * that makes detections of these tokens more complex.
278 *
279 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280 * '-', '?', and ':' correspondingly.
281 *
282 * The following examples show how the tokens BLOCK-SEQUENCE-START,
283 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284 *
285 *      1. Block sequences:
286 *
287 *          - item 1
288 *          - item 2
289 *          -
290 *            - item 3.1
291 *            - item 3.2
292 *          -
293 *            key 1: value 1
294 *            key 2: value 2
295 *
296 *      Tokens:
297 *
298 *          STREAM-START(utf-8)
299 *          BLOCK-SEQUENCE-START
300 *          BLOCK-ENTRY
301 *          SCALAR("item 1",plain)
302 *          BLOCK-ENTRY
303 *          SCALAR("item 2",plain)
304 *          BLOCK-ENTRY
305 *          BLOCK-SEQUENCE-START
306 *          BLOCK-ENTRY
307 *          SCALAR("item 3.1",plain)
308 *          BLOCK-ENTRY
309 *          SCALAR("item 3.2",plain)
310 *          BLOCK-END
311 *          BLOCK-ENTRY
312 *          BLOCK-MAPPING-START
313 *          KEY
314 *          SCALAR("key 1",plain)
315 *          VALUE
316 *          SCALAR("value 1",plain)
317 *          KEY
318 *          SCALAR("key 2",plain)
319 *          VALUE
320 *          SCALAR("value 2",plain)
321 *          BLOCK-END
322 *          BLOCK-END
323 *          STREAM-END
324 *
325 *      2. Block mappings:
326 *
327 *          a simple key: a value   # The KEY token is produced here.
328 *          ? a complex key
329 *          : another value
330 *          a mapping:
331 *            key 1: value 1
332 *            key 2: value 2
333 *          a sequence:
334 *            - item 1
335 *            - item 2
336 *
337 *      Tokens:
338 *
339 *          STREAM-START(utf-8)
340 *          BLOCK-MAPPING-START
341 *          KEY
342 *          SCALAR("a simple key",plain)
343 *          VALUE
344 *          SCALAR("a value",plain)
345 *          KEY
346 *          SCALAR("a complex key",plain)
347 *          VALUE
348 *          SCALAR("another value",plain)
349 *          KEY
350 *          SCALAR("a mapping",plain)
351 *          BLOCK-MAPPING-START
352 *          KEY
353 *          SCALAR("key 1",plain)
354 *          VALUE
355 *          SCALAR("value 1",plain)
356 *          KEY
357 *          SCALAR("key 2",plain)
358 *          VALUE
359 *          SCALAR("value 2",plain)
360 *          BLOCK-END
361 *          KEY
362 *          SCALAR("a sequence",plain)
363 *          VALUE
364 *          BLOCK-SEQUENCE-START
365 *          BLOCK-ENTRY
366 *          SCALAR("item 1",plain)
367 *          BLOCK-ENTRY
368 *          SCALAR("item 2",plain)
369 *          BLOCK-END
370 *          BLOCK-END
371 *          STREAM-END
372 *
373 * YAML does not always require to start a new block collection from a new
374 * line.  If the current line contains only '-', '?', and ':' indicators, a new
375 * block collection may start at the current line.  The following examples
376 * illustrate this case:
377 *
378 *      1. Collections in a sequence:
379 *
380 *          - - item 1
381 *            - item 2
382 *          - key 1: value 1
383 *            key 2: value 2
384 *          - ? complex key
385 *            : complex value
386 *
387 *      Tokens:
388 *
389 *          STREAM-START(utf-8)
390 *          BLOCK-SEQUENCE-START
391 *          BLOCK-ENTRY
392 *          BLOCK-SEQUENCE-START
393 *          BLOCK-ENTRY
394 *          SCALAR("item 1",plain)
395 *          BLOCK-ENTRY
396 *          SCALAR("item 2",plain)
397 *          BLOCK-END
398 *          BLOCK-ENTRY
399 *          BLOCK-MAPPING-START
400 *          KEY
401 *          SCALAR("key 1",plain)
402 *          VALUE
403 *          SCALAR("value 1",plain)
404 *          KEY
405 *          SCALAR("key 2",plain)
406 *          VALUE
407 *          SCALAR("value 2",plain)
408 *          BLOCK-END
409 *          BLOCK-ENTRY
410 *          BLOCK-MAPPING-START
411 *          KEY
412 *          SCALAR("complex key")
413 *          VALUE
414 *          SCALAR("complex value")
415 *          BLOCK-END
416 *          BLOCK-END
417 *          STREAM-END
418 *
419 *      2. Collections in a mapping:
420 *
421 *          ? a sequence
422 *          : - item 1
423 *            - item 2
424 *          ? a mapping
425 *          : key 1: value 1
426 *            key 2: value 2
427 *
428 *      Tokens:
429 *
430 *          STREAM-START(utf-8)
431 *          BLOCK-MAPPING-START
432 *          KEY
433 *          SCALAR("a sequence",plain)
434 *          VALUE
435 *          BLOCK-SEQUENCE-START
436 *          BLOCK-ENTRY
437 *          SCALAR("item 1",plain)
438 *          BLOCK-ENTRY
439 *          SCALAR("item 2",plain)
440 *          BLOCK-END
441 *          KEY
442 *          SCALAR("a mapping",plain)
443 *          VALUE
444 *          BLOCK-MAPPING-START
445 *          KEY
446 *          SCALAR("key 1",plain)
447 *          VALUE
448 *          SCALAR("value 1",plain)
449 *          KEY
450 *          SCALAR("key 2",plain)
451 *          VALUE
452 *          SCALAR("value 2",plain)
453 *          BLOCK-END
454 *          BLOCK-END
455 *          STREAM-END
456 *
457 * YAML also permits non-indented sequences if they are included into a block
458 * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
459 *
460 *      key:
461 *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
462 *      - item 2
463 *
464 * Tokens:
465 *
466 *      STREAM-START(utf-8)
467 *      BLOCK-MAPPING-START
468 *      KEY
469 *      SCALAR("key",plain)
470 *      VALUE
471 *      BLOCK-ENTRY
472 *      SCALAR("item 1",plain)
473 *      BLOCK-ENTRY
474 *      SCALAR("item 2",plain)
475 *      BLOCK-END
476 */
477
478#if HAVE_CONFIG_H
479#include <config.h>
480#endif
481
482#include <yaml/yaml.h>
483
484#include <assert.h>
485
486/*
[185]487 * Ensure that the buffer contains the required number of characters.
488 * Return 1 on success, 0 on failure (reader error or memory error).
489 */
490
491#define UPDATE(parser,length)   \
492    (parser->unread >= (length) \
493        ? 1                     \
494        : yaml_parser_update_buffer(parser, (length)))
495
496/*
497 * Check the octet at the specified position.
498 */
499
500#define CHECK_AT(parser,octet,offset)   \
501    (parser->buffer[offset] == (yaml_char_t)(octet))
502
503/*
504 * Check the current octet in the buffer.
505 */
506
507#define CHECK(parser,octet) CHECK_AT(parser,(octet),0)
508
509/*
510 * Check if the character at the specified position is NUL.
511 */
512
513#define IS_Z_AT(parser,offset)    CHECK_AT(parser,'\0',(offset))
514
515#define IS_Z(parser)    IS_Z_AT(parser,0)
516
517/*
518 * Check if the character at the specified position is space.
519 */
520
521#define IS_SPACE_AT(parser,offset)  CHECK_AT(parser,' ',(offset))
522
523#define IS_SPACE(parser)    IS_SPACE_AT(parser,0)
524
525/*
526 * Check if the character at the specified position is tab.
527 */
528
529#define IS_TAB_AT(parser,offset)    CHECK_AT(parser,'\t',(offset))
530
531#define IS_TAB(parser)  IS_TAB_AT(parser,0)
532
533/*
534 * Check if the character at the specified position is blank (space or tab).
535 */
536
537#define IS_BLANK_AT(parser,offset)  \
538    (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset)))
539
540#define IS_BLANK(parser)    IS_BLANK_AT(parser,0)
541
542/*
543 * Check if the character at the specified position is a line break.
544 */
545
546#define IS_BREAK_AT(parser,offset)                                      \
547    (CHECK_AT(parser,'\r',(offset))                 /* CR (#xD)*/       \
548     || CHECK_AT(parser,'\n',(offset))              /* LF (#xA) */      \
549     || (CHECK_AT(parser,'\xC2',(offset))                               \
550         && CHECK_AT(parser,'\x85',(offset+1)))     /* NEL (#x85) */    \
551     || (CHECK_AT(parser,'\xE2',(offset))                               \
552         && CHECK_AT(parser,'\x80',(offset+1))                          \
553         && CHECK_AT(parser,'\xA8',(offset+2)))     /* LS (#x2028) */   \
554     || (CHECK_AT(parser,'\xE2',(offset))                               \
555         && CHECK_AT(parser,'\x80',(offset+1))                          \
556         && CHECK_AT(parser,'\xA9',(offset+2))))    /* LS (#x2029) */
557
558#define IS_BREAK(parser)    IS_BREAK_AT(parser,0)
559
560/*
561 * Check if the character is a line break or NUL.
562 */
563
564#define IS_BREAKZ_AT(parser,offset) \
565    (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset)))
566
567#define IS_BREAKZ(parser)   IS_BREAKZ_AT(parser,0)
568
569/*
570 * Check if the character is a line break, space, or NUL.
571 */
572
573#define IS_SPACEZ_AT(parser,offset) \
574    (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
575
576#define IS_SPACEZ(parser)   IS_SPACEZ_AT(parser,0)
577
578/*
579 * Check if the character is a line break, space, tab, or NUL.
580 */
581
582#define IS_BLANKZ_AT(parser,offset) \
583    (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
584
585#define IS_BLANKZ(parser)   IS_BLANKZ_AT(parser,0)
586
587/*
[184]588 * Public API declarations.
589 */
590
591YAML_DECLARE(yaml_token_t *)
592yaml_parser_get_token(yaml_parser_t *parser);
593
594YAML_DECLARE(yaml_token_t *)
595yaml_parser_peek_token(yaml_parser_t *parser);
596
597/*
[185]598 * Error handling.
599 */
600
601static int
602yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
603        yaml_mark_t context_mark, const char *problem);
604
605static yaml_mark_t
606yaml_parser_get_mark(yaml_parser_t *parser);
607
608/*
[184]609 * High-level token API.
610 */
611
612static int
613yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
614
615static int
616yaml_parser_fetch_next_token(yaml_parser_t *parser);
617
618/*
619 * Potential simple keys.
620 */
621
622static int
623yaml_parser_stale_simple_keys(yaml_parser_t *parser);
624
625static int
626yaml_parser_save_simple_key(yaml_parser_t *parser);
627
628static int
629yaml_parser_remove_simple_key(yaml_parser_t *parser);
630
631/*
632 * Indentation treatment.
633 */
634
635static int
[185]636yaml_parser_roll_indent(yaml_parser_t *parser, int column);
[184]637
638static int
[185]639yaml_parser_unroll_indent(yaml_parser_t *parser, int column);
[184]640
641/*
642 * Token fetchers.
643 */
644
645static int
646yaml_parser_fetch_stream_start(yaml_parser_t *parser);
647
648static int
649yaml_parser_fetch_stream_end(yaml_parser_t *parser);
650
651static int
652yaml_parser_fetch_directive(yaml_parser_t *parser);
653
654static int
655yaml_parser_fetch_document_start(yaml_parser_t *parser);
656
657static int
658yaml_parser_fetch_document_end(yaml_parser_t *parser);
659
660static int
661yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
662        yaml_token_type_t type);
663
664static int
665yaml_parser_fetch_flow_sequence_start(yaml_parser_t *parser);
666
667static int
668yaml_parser_fetch_flow_mapping_start(yaml_parser_t *parser);
669
670static int
671yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
672        yaml_token_type_t type);
673
674static int
675yaml_parser_fetch_flow_sequence_end(yaml_parser_t *parser);
676
677static int
678yaml_parser_fetch_flow_mapping_end(yaml_parser_t *parser);
679
680static int
681yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
682        yaml_token_type_t type);
683
684static int
685yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
686
687static int
688yaml_parser_fetch_block_entry(yaml_parser_t *parser);
689
690static int
691yaml_parser_fetch_key(yaml_parser_t *parser);
692
693static int
694yaml_parser_fetch_value(yaml_parser_t *parser);
695
696static int
697yaml_parser_fetch_alias(yaml_parser_t *parser);
698
699static int
700yaml_parser_fetch_anchor(yaml_parser_t *parser);
701
702static int
703yaml_parser_fetch_tag(yaml_parser_t *parser);
704
705static int
706yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
707
708static int
709yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
710
711static int
712yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
713
714/*
715 * Token scanners.
716 */
717
718static int
719yaml_parser_scan_to_next_token(yaml_parser_t *parser);
720
721static yaml_token_t *
722yaml_parser_scan_directive(yaml_parser_t *parser);
723
724static int
725yaml_parser_scan_directive_name(yaml_parser_t *parser,
726        yaml_mark_t start_mark, yaml_char_t **name);
727
728static int
729yaml_parser_scan_yaml_directive_value(yaml_parser_t *parser,
730        yaml_mark_t start_mark, int *major, int *minor);
731
732static int
733yaml_parser_scan_yaml_directive_number(yaml_parser_t *parser,
734        yaml_mark_t start_mark, int *number);
735
736static int
737yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
738        yaml_char_t **handle, yaml_char_t **prefix);
739
740static yaml_token_t *
741yaml_parser_scan_anchor(yaml_parser_t *parser,
742        yaml_token_type_t type);
743
744static yaml_token_t *
745yaml_parser_scan_tag(yaml_parser_t *parser);
746
747static int
748yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
749        yaml_mark_t start_mark, yaml_char_t **handle);
750
751static int
752yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
753        yaml_mark_t start_mark, yaml_char_t **url);
754
755static yaml_token_t *
756yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal);
757
758static int
759yaml_parser_scan_block_scalar_indicators(yaml_parser_t *parser,
760        yaml_mark_t start_mark, int *chomping, int *increment);
761
762static yaml_token_t *
763yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single);
764
765static yaml_token_t *
766yaml_parser_scan_plain_scalar(yaml_parser_t *parser);
767
[185]768/*
769 * Get the next token and remove it from the tokens queue.
770 */
771
772YAML_DECLARE(yaml_token_t *)
773yaml_parser_get_token(yaml_parser_t *parser)
774{
775    yaml_token_t *token;
776
777    assert(parser); /* Non-NULL parser object is expected. */
778    assert(!parser->stream_end_produced);   /* No tokens after STREAM-END. */
779
780    /* Ensure that the tokens queue contains enough tokens. */
781
782    if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
783
784    /* Fetch the next token from the queue. */
785
786    token = parser->tokens[parser->tokens_head];
787
788    /* Move the queue head. */
789
790    parser->tokens[parser->tokens_head++] = NULL;
791    if (parser->tokens_head == parser->tokens_size)
792        parser->tokens_head = 0;
793
794    parser->tokens_parsed++;
795
796    return token;
797}
798
799/*
800 * Get the next token, but don't remove it from the queue.
801 */
802
803YAML_DECLARE(yaml_token_t *)
804yaml_parser_peek_token(yaml_parser_t *parser)
805{
806    assert(parser); /* Non-NULL parser object is expected. */
807    assert(!parser->stream_end_produced);   /* No tokens after STREAM-END. */
808
809    /* Ensure that the tokens queue contains enough tokens. */
810
811    if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
812
813    /* Fetch the next token from the queue. */
814
815    return parser->tokens[parser->tokens_head];
816}
817
818/*
819 * Set the scanner error and return 0.
820 */
821
822static int
823yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
824        yaml_mark_t context_mark, const char *problem)
825{
826    parser->error = YAML_SCANNER_ERROR;
827    parser->context = context;
828    parser->context_mark = context_mark;
829    parser->problem = problem;
830    parser->problem_mark = yaml_parser_get_mark(parser);
831}
832
833/*
834 * Get the mark for the current buffer position.
835 */
836
837static yaml_mark_t
838yaml_parser_get_mark(yaml_parser_t *parser)
839{
840    yaml_mark_t mark = { parser->index, parser->line, parser->column };
841
842    return mark;
843}
844
845
846/*
847 * Ensure that the tokens queue contains at least one token which can be
848 * returned to the Parser.
849 */
850
851static int
852yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
853{
854    int need_more_tokens;
855    int k;
856
857    /* While we need more tokens to fetch, do it. */
858
859    while (1)
860    {
861        /*
862         * Check if we really need to fetch more tokens.
863         */
864
865        need_more_tokens = 0;
866
867        if (parser->tokens_head == parser->tokens_tail)
868        {
869            /* Queue is empty. */
870
871            need_more_tokens = 1;
872        }
873        else
874        {
875            /* Check if any potential simple key may occupy the head position. */
876
877            for (k = 0; k <= parser->flow_level; k++) {
878                yaml_simple_key_t *simple_key = parser->simple_keys[k];
879                if (simple_key
880                        && (simple_key->token_number == parser->tokens_parsed)) {
881                    need_more_tokens = 1;
882                    break;
883                }
884            }
885        }
886
887        /* We are finished. */
888
889        if (!need_more_tokens)
890            break;
891
892        /* Fetch the next token. */
893
894        if (!yaml_parser_fetch_next_token(parser))
895            return 0;
896    }
897
898    return 1;
899}
900
901/*
902 * The dispatcher for token fetchers.
903 */
904
905static int
906yaml_parser_fetch_next_token(yaml_parser_t *parser)
907{
908    /* Ensure that the buffer is initialized. */
909
910    if (!UPDATE(parser, 1))
911        return 0;
912
913    /* Check if we just started scanning.  Fetch STREAM-START then. */
914
915    if (!parser->stream_start_produced)
916        return yaml_parser_fetch_stream_start(parser);
917
918    /* Eat whitespaces and comments until we reach the next token. */
919
920    if (!yaml_parser_scan_to_next_token(parser))
921        return 0;
922
923    /* Check the indentation level against the current column. */
924
925    if (!yaml_parser_unroll_indent(parser, parser->column))
926        return 0;
927
928    /*
929     * Ensure that the buffer contains at least 4 characters.  4 is the length
930     * of the longest indicators ('--- ' and '... ').
931     */
932
933    if (!UPDATE(parser, 4))
934        return 0;
935
936    /* Is it the end of the stream? */
937
938    if (IS_Z(parser))
939        return yaml_parser_fetch_stream_end(parser);
940
941    /* Is it a directive? */
942
943    if (parser->column == 0 && CHECK(parser, '%'))
944        return yaml_parser_fetch_directive(parser);
945
946    /* Is it the document start indicator? */
947
948    if (parser->column == 0
949            && CHECK_AT(parser, '-', 0)
950            && CHECK_AT(parser, '-', 1)
951            && CHECK_AT(parser, '-', 2)
952            && IS_BLANKZ_AT(parser, 3))
953        return yaml_parser_fetch_document_start(parser);
954
955    /* Is it the document end indicator? */
956
957    if (parser->column == 0
958            && CHECK_AT(parser, '.', 0)
959            && CHECK_AT(parser, '.', 1)
960            && CHECK_AT(parser, '.', 2)
961            && IS_BLANKZ_AT(parser, 3))
962        return yaml_parser_fetch_document_start(parser);
963
964    /* Is it the flow sequence start indicator? */
965
966    if (CHECK(parser, '['))
967        return yaml_parser_fetch_flow_sequence_start(parser);
968
969    /* Is it the flow mapping start indicator? */
970
971    if (CHECK(parser, '{'))
972        return yaml_parser_fetch_flow_mapping_start(parser);
973
974    /* Is it the flow sequence end indicator? */
975
976    if (CHECK(parser, ']'))
977        return yaml_parser_fetch_flow_sequence_end(parser);
978
979    /* Is it the flow mapping end indicator? */
980
981    if (CHECK(parser, '}'))
982        return yaml_parser_fetch_flow_mapping_end(parser);
983
984    /* Is it the flow entry indicator? */
985
986    if (CHECK(parser, ','))
987        return yaml_parser_fetch_flow_entry(parser);
988
989    /* Is it the block entry indicator? */
990
991    if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1))
992        return yaml_parser_fetch_block_entry(parser);
993
994    /* Is it the key indicator? */
995
996    if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1)))
997        return yaml_parser_fetch_key(parser);
998
999    /* Is it the value indicator? */
1000
1001    if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1)))
1002        return yaml_parser_fetch_value(parser);
1003
1004    /* Is it an alias? */
1005
1006    if (CHECK(parser, '*'))
1007        return yaml_parser_fetch_alias(parser);
1008
1009    /* Is it an anchor? */
1010
1011    if (CHECK(parser, '&'))
1012        return yaml_parser_fetch_anchor(parser);
1013
1014    /* Is it a tag? */
1015
1016    if (CHECK(parser, '!'))
1017        return yaml_parser_fetch_tag(parser);
1018
1019    /* Is it a literal scalar? */
1020
1021    if (CHECK(parser, '|') && !parser->flow_level)
1022        return yaml_parser_fetch_block_scalar(parser, 1);
1023
1024    /* Is it a folded scalar? */
1025
1026    if (CHECK(parser, '>') && !parser->flow_level)
1027        return yaml_parser_fetch_block_scalar(parser, 0);
1028
1029    /* Is it a single-quoted scalar? */
1030
1031    if (CHECK(parser, '\''))
1032        return yaml_parser_fetch_flow_scalar(parser, 1);
1033
1034    /* Is it a double-quoted scalar? */
1035
1036    if (CHECK(parser, '"'))
1037        return yaml_parser_fetch_flow_scalar(parser, 0);
1038
1039    /*
1040     * Is it a plain scalar?
1041     *
1042     * A plain scalar may start with any non-blank characters except
1043     *
1044     *      '-', '?', ':', ',', '[', ']', '{', '}',
1045     *      '#', '&', '*', '!', '|', '>', '\'', '\"',
1046     *      '%', '@', '`'.
1047     *
1048     * In the block context, it may also start with the characters
1049     *
1050     *      '-', '?', ':'
1051     *
1052     * if it is followed by a non-space character.
1053     *
1054     * The last rule is more restrictive than the specification requires.
1055     */
1056
1057    if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?')
1058                || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[')
1059                || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}')
1060                || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*')
1061                || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>')
1062                || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%')
1063                || CHECK(parser, '@') || CHECK(parser, '`')) ||
1064            (!parser->flow_level &&
1065             (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) &&
1066             IS_BLANKZ_AT(parser, 1)))
1067        return yaml_parser_fetch_plain_scalar(parser);
1068
1069    /*
1070     * If we don't determine the token type so far, it is an error.
1071     */
1072
1073    return yaml_parser_set_scanner_error(parser, "while scanning for the next token",
1074            yaml_parser_get_mark(parser), "found character that cannot start any token");
1075}
1076
Note: See TracBrowser for help on using the repository browser.