| | 487 | * Ensure that the buffer contains the required number of characters. |
|---|
| | 488 | * Return 1 on success, 0 on failure (reader error or memory error). |
|---|
| | 489 | */ |
|---|
| | 490 | |
|---|
| | 491 | #define UPDATE(parser,length) \ |
|---|
| | 492 | (parser->unread >= (length) \ |
|---|
| | 493 | ? 1 \ |
|---|
| | 494 | : yaml_parser_update_buffer(parser, (length))) |
|---|
| | 495 | |
|---|
| | 496 | /* |
|---|
| | 497 | * Check the octet at the specified position. |
|---|
| | 498 | */ |
|---|
| | 499 | |
|---|
| | 500 | #define CHECK_AT(parser,octet,offset) \ |
|---|
| | 501 | (parser->buffer[offset] == (yaml_char_t)(octet)) |
|---|
| | 502 | |
|---|
| | 503 | /* |
|---|
| | 504 | * Check the current octet in the buffer. |
|---|
| | 505 | */ |
|---|
| | 506 | |
|---|
| | 507 | #define CHECK(parser,octet) CHECK_AT(parser,(octet),0) |
|---|
| | 508 | |
|---|
| | 509 | /* |
|---|
| | 510 | * Check if the character at the specified position is NUL. |
|---|
| | 511 | */ |
|---|
| | 512 | |
|---|
| | 513 | #define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset)) |
|---|
| | 514 | |
|---|
| | 515 | #define IS_Z(parser) IS_Z_AT(parser,0) |
|---|
| | 516 | |
|---|
| | 517 | /* |
|---|
| | 518 | * Check if the character at the specified position is space. |
|---|
| | 519 | */ |
|---|
| | 520 | |
|---|
| | 521 | #define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset)) |
|---|
| | 522 | |
|---|
| | 523 | #define IS_SPACE(parser) IS_SPACE_AT(parser,0) |
|---|
| | 524 | |
|---|
| | 525 | /* |
|---|
| | 526 | * Check if the character at the specified position is tab. |
|---|
| | 527 | */ |
|---|
| | 528 | |
|---|
| | 529 | #define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset)) |
|---|
| | 530 | |
|---|
| | 531 | #define IS_TAB(parser) IS_TAB_AT(parser,0) |
|---|
| | 532 | |
|---|
| | 533 | /* |
|---|
| | 534 | * Check if the character at the specified position is blank (space or tab). |
|---|
| | 535 | */ |
|---|
| | 536 | |
|---|
| | 537 | #define IS_BLANK_AT(parser,offset) \ |
|---|
| | 538 | (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset))) |
|---|
| | 539 | |
|---|
| | 540 | #define IS_BLANK(parser) IS_BLANK_AT(parser,0) |
|---|
| | 541 | |
|---|
| | 542 | /* |
|---|
| | 543 | * Check if the character at the specified position is a line break. |
|---|
| | 544 | */ |
|---|
| | 545 | |
|---|
| | 546 | #define IS_BREAK_AT(parser,offset) \ |
|---|
| | 547 | (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ |
|---|
| | 548 | || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ |
|---|
| | 549 | || (CHECK_AT(parser,'\xC2',(offset)) \ |
|---|
| | 550 | && CHECK_AT(parser,'\x85',(offset+1))) /* NEL (#x85) */ \ |
|---|
| | 551 | || (CHECK_AT(parser,'\xE2',(offset)) \ |
|---|
| | 552 | && CHECK_AT(parser,'\x80',(offset+1)) \ |
|---|
| | 553 | && CHECK_AT(parser,'\xA8',(offset+2))) /* LS (#x2028) */ \ |
|---|
| | 554 | || (CHECK_AT(parser,'\xE2',(offset)) \ |
|---|
| | 555 | && CHECK_AT(parser,'\x80',(offset+1)) \ |
|---|
| | 556 | && CHECK_AT(parser,'\xA9',(offset+2)))) /* LS (#x2029) */ |
|---|
| | 557 | |
|---|
| | 558 | #define IS_BREAK(parser) IS_BREAK_AT(parser,0) |
|---|
| | 559 | |
|---|
| | 560 | /* |
|---|
| | 561 | * Check if the character is a line break or NUL. |
|---|
| | 562 | */ |
|---|
| | 563 | |
|---|
| | 564 | #define IS_BREAKZ_AT(parser,offset) \ |
|---|
| | 565 | (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset))) |
|---|
| | 566 | |
|---|
| | 567 | #define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0) |
|---|
| | 568 | |
|---|
| | 569 | /* |
|---|
| | 570 | * Check if the character is a line break, space, or NUL. |
|---|
| | 571 | */ |
|---|
| | 572 | |
|---|
| | 573 | #define IS_SPACEZ_AT(parser,offset) \ |
|---|
| | 574 | (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) |
|---|
| | 575 | |
|---|
| | 576 | #define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0) |
|---|
| | 577 | |
|---|
| | 578 | /* |
|---|
| | 579 | * Check if the character is a line break, space, tab, or NUL. |
|---|
| | 580 | */ |
|---|
| | 581 | |
|---|
| | 582 | #define IS_BLANKZ_AT(parser,offset) \ |
|---|
| | 583 | (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) |
|---|
| | 584 | |
|---|
| | 585 | #define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0) |
|---|
| | 586 | |
|---|
| | 587 | /* |
|---|
| | 768 | /* |
|---|
| | 769 | * Get the next token and remove it from the tokens queue. |
|---|
| | 770 | */ |
|---|
| | 771 | |
|---|
| | 772 | YAML_DECLARE(yaml_token_t *) |
|---|
| | 773 | yaml_parser_get_token(yaml_parser_t *parser) |
|---|
| | 774 | { |
|---|
| | 775 | yaml_token_t *token; |
|---|
| | 776 | |
|---|
| | 777 | assert(parser); /* Non-NULL parser object is expected. */ |
|---|
| | 778 | assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ |
|---|
| | 779 | |
|---|
| | 780 | /* Ensure that the tokens queue contains enough tokens. */ |
|---|
| | 781 | |
|---|
| | 782 | if (!yaml_parser_fetch_more_tokens(parser)) return NULL; |
|---|
| | 783 | |
|---|
| | 784 | /* Fetch the next token from the queue. */ |
|---|
| | 785 | |
|---|
| | 786 | token = parser->tokens[parser->tokens_head]; |
|---|
| | 787 | |
|---|
| | 788 | /* Move the queue head. */ |
|---|
| | 789 | |
|---|
| | 790 | parser->tokens[parser->tokens_head++] = NULL; |
|---|
| | 791 | if (parser->tokens_head == parser->tokens_size) |
|---|
| | 792 | parser->tokens_head = 0; |
|---|
| | 793 | |
|---|
| | 794 | parser->tokens_parsed++; |
|---|
| | 795 | |
|---|
| | 796 | return token; |
|---|
| | 797 | } |
|---|
| | 798 | |
|---|
| | 799 | /* |
|---|
| | 800 | * Get the next token, but don't remove it from the queue. |
|---|
| | 801 | */ |
|---|
| | 802 | |
|---|
| | 803 | YAML_DECLARE(yaml_token_t *) |
|---|
| | 804 | yaml_parser_peek_token(yaml_parser_t *parser) |
|---|
| | 805 | { |
|---|
| | 806 | assert(parser); /* Non-NULL parser object is expected. */ |
|---|
| | 807 | assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ |
|---|
| | 808 | |
|---|
| | 809 | /* Ensure that the tokens queue contains enough tokens. */ |
|---|
| | 810 | |
|---|
| | 811 | if (!yaml_parser_fetch_more_tokens(parser)) return NULL; |
|---|
| | 812 | |
|---|
| | 813 | /* Fetch the next token from the queue. */ |
|---|
| | 814 | |
|---|
| | 815 | return parser->tokens[parser->tokens_head]; |
|---|
| | 816 | } |
|---|
| | 817 | |
|---|
| | 818 | /* |
|---|
| | 819 | * Set the scanner error and return 0. |
|---|
| | 820 | */ |
|---|
| | 821 | |
|---|
| | 822 | static int |
|---|
| | 823 | yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, |
|---|
| | 824 | yaml_mark_t context_mark, const char *problem) |
|---|
| | 825 | { |
|---|
| | 826 | parser->error = YAML_SCANNER_ERROR; |
|---|
| | 827 | parser->context = context; |
|---|
| | 828 | parser->context_mark = context_mark; |
|---|
| | 829 | parser->problem = problem; |
|---|
| | 830 | parser->problem_mark = yaml_parser_get_mark(parser); |
|---|
| | 831 | } |
|---|
| | 832 | |
|---|
| | 833 | /* |
|---|
| | 834 | * Get the mark for the current buffer position. |
|---|
| | 835 | */ |
|---|
| | 836 | |
|---|
| | 837 | static yaml_mark_t |
|---|
| | 838 | yaml_parser_get_mark(yaml_parser_t *parser) |
|---|
| | 839 | { |
|---|
| | 840 | yaml_mark_t mark = { parser->index, parser->line, parser->column }; |
|---|
| | 841 | |
|---|
| | 842 | return mark; |
|---|
| | 843 | } |
|---|
| | 844 | |
|---|
| | 845 | |
|---|
| | 846 | /* |
|---|
| | 847 | * Ensure that the tokens queue contains at least one token which can be |
|---|
| | 848 | * returned to the Parser. |
|---|
| | 849 | */ |
|---|
| | 850 | |
|---|
| | 851 | static int |
|---|
| | 852 | yaml_parser_fetch_more_tokens(yaml_parser_t *parser) |
|---|
| | 853 | { |
|---|
| | 854 | int need_more_tokens; |
|---|
| | 855 | int k; |
|---|
| | 856 | |
|---|
| | 857 | /* While we need more tokens to fetch, do it. */ |
|---|
| | 858 | |
|---|
| | 859 | while (1) |
|---|
| | 860 | { |
|---|
| | 861 | /* |
|---|
| | 862 | * Check if we really need to fetch more tokens. |
|---|
| | 863 | */ |
|---|
| | 864 | |
|---|
| | 865 | need_more_tokens = 0; |
|---|
| | 866 | |
|---|
| | 867 | if (parser->tokens_head == parser->tokens_tail) |
|---|
| | 868 | { |
|---|
| | 869 | /* Queue is empty. */ |
|---|
| | 870 | |
|---|
| | 871 | need_more_tokens = 1; |
|---|
| | 872 | } |
|---|
| | 873 | else |
|---|
| | 874 | { |
|---|
| | 875 | /* Check if any potential simple key may occupy the head position. */ |
|---|
| | 876 | |
|---|
| | 877 | for (k = 0; k <= parser->flow_level; k++) { |
|---|
| | 878 | yaml_simple_key_t *simple_key = parser->simple_keys[k]; |
|---|
| | 879 | if (simple_key |
|---|
| | 880 | && (simple_key->token_number == parser->tokens_parsed)) { |
|---|
| | 881 | need_more_tokens = 1; |
|---|
| | 882 | break; |
|---|
| | 883 | } |
|---|
| | 884 | } |
|---|
| | 885 | } |
|---|
| | 886 | |
|---|
| | 887 | /* We are finished. */ |
|---|
| | 888 | |
|---|
| | 889 | if (!need_more_tokens) |
|---|
| | 890 | break; |
|---|
| | 891 | |
|---|
| | 892 | /* Fetch the next token. */ |
|---|
| | 893 | |
|---|
| | 894 | if (!yaml_parser_fetch_next_token(parser)) |
|---|
| | 895 | return 0; |
|---|
| | 896 | } |
|---|
| | 897 | |
|---|
| | 898 | return 1; |
|---|
| | 899 | } |
|---|
| | 900 | |
|---|
| | 901 | /* |
|---|
| | 902 | * The dispatcher for token fetchers. |
|---|
| | 903 | */ |
|---|
| | 904 | |
|---|
| | 905 | static int |
|---|
| | 906 | yaml_parser_fetch_next_token(yaml_parser_t *parser) |
|---|
| | 907 | { |
|---|
| | 908 | /* Ensure that the buffer is initialized. */ |
|---|
| | 909 | |
|---|
| | 910 | if (!UPDATE(parser, 1)) |
|---|
| | 911 | return 0; |
|---|
| | 912 | |
|---|
| | 913 | /* Check if we just started scanning. Fetch STREAM-START then. */ |
|---|
| | 914 | |
|---|
| | 915 | if (!parser->stream_start_produced) |
|---|
| | 916 | return yaml_parser_fetch_stream_start(parser); |
|---|
| | 917 | |
|---|
| | 918 | /* Eat whitespaces and comments until we reach the next token. */ |
|---|
| | 919 | |
|---|
| | 920 | if (!yaml_parser_scan_to_next_token(parser)) |
|---|
| | 921 | return 0; |
|---|
| | 922 | |
|---|
| | 923 | /* Check the indentation level against the current column. */ |
|---|
| | 924 | |
|---|
| | 925 | if (!yaml_parser_unroll_indent(parser, parser->column)) |
|---|
| | 926 | return 0; |
|---|
| | 927 | |
|---|
| | 928 | /* |
|---|
| | 929 | * Ensure that the buffer contains at least 4 characters. 4 is the length |
|---|
| | 930 | * of the longest indicators ('--- ' and '... '). |
|---|
| | 931 | */ |
|---|
| | 932 | |
|---|
| | 933 | if (!UPDATE(parser, 4)) |
|---|
| | 934 | return 0; |
|---|
| | 935 | |
|---|
| | 936 | /* Is it the end of the stream? */ |
|---|
| | 937 | |
|---|
| | 938 | if (IS_Z(parser)) |
|---|
| | 939 | return yaml_parser_fetch_stream_end(parser); |
|---|
| | 940 | |
|---|
| | 941 | /* Is it a directive? */ |
|---|
| | 942 | |
|---|
| | 943 | if (parser->column == 0 && CHECK(parser, '%')) |
|---|
| | 944 | return yaml_parser_fetch_directive(parser); |
|---|
| | 945 | |
|---|
| | 946 | /* Is it the document start indicator? */ |
|---|
| | 947 | |
|---|
| | 948 | if (parser->column == 0 |
|---|
| | 949 | && CHECK_AT(parser, '-', 0) |
|---|
| | 950 | && CHECK_AT(parser, '-', 1) |
|---|
| | 951 | && CHECK_AT(parser, '-', 2) |
|---|
| | 952 | && IS_BLANKZ_AT(parser, 3)) |
|---|
| | 953 | return yaml_parser_fetch_document_start(parser); |
|---|
| | 954 | |
|---|
| | 955 | /* Is it the document end indicator? */ |
|---|
| | 956 | |
|---|
| | 957 | if (parser->column == 0 |
|---|
| | 958 | && CHECK_AT(parser, '.', 0) |
|---|
| | 959 | && CHECK_AT(parser, '.', 1) |
|---|
| | 960 | && CHECK_AT(parser, '.', 2) |
|---|
| | 961 | && IS_BLANKZ_AT(parser, 3)) |
|---|
| | 962 | return yaml_parser_fetch_document_start(parser); |
|---|
| | 963 | |
|---|
| | 964 | /* Is it the flow sequence start indicator? */ |
|---|
| | 965 | |
|---|
| | 966 | if (CHECK(parser, '[')) |
|---|
| | 967 | return yaml_parser_fetch_flow_sequence_start(parser); |
|---|
| | 968 | |
|---|
| | 969 | /* Is it the flow mapping start indicator? */ |
|---|
| | 970 | |
|---|
| | 971 | if (CHECK(parser, '{')) |
|---|
| | 972 | return yaml_parser_fetch_flow_mapping_start(parser); |
|---|
| | 973 | |
|---|
| | 974 | /* Is it the flow sequence end indicator? */ |
|---|
| | 975 | |
|---|
| | 976 | if (CHECK(parser, ']')) |
|---|
| | 977 | return yaml_parser_fetch_flow_sequence_end(parser); |
|---|
| | 978 | |
|---|
| | 979 | /* Is it the flow mapping end indicator? */ |
|---|
| | 980 | |
|---|
| | 981 | if (CHECK(parser, '}')) |
|---|
| | 982 | return yaml_parser_fetch_flow_mapping_end(parser); |
|---|
| | 983 | |
|---|
| | 984 | /* Is it the flow entry indicator? */ |
|---|
| | 985 | |
|---|
| | 986 | if (CHECK(parser, ',')) |
|---|
| | 987 | return yaml_parser_fetch_flow_entry(parser); |
|---|
| | 988 | |
|---|
| | 989 | /* Is it the block entry indicator? */ |
|---|
| | 990 | |
|---|
| | 991 | if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1)) |
|---|
| | 992 | return yaml_parser_fetch_block_entry(parser); |
|---|
| | 993 | |
|---|
| | 994 | /* Is it the key indicator? */ |
|---|
| | 995 | |
|---|
| | 996 | if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) |
|---|
| | 997 | return yaml_parser_fetch_key(parser); |
|---|
| | 998 | |
|---|
| | 999 | /* Is it the value indicator? */ |
|---|
| | 1000 | |
|---|
| | 1001 | if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) |
|---|
| | 1002 | return yaml_parser_fetch_value(parser); |
|---|
| | 1003 | |
|---|
| | 1004 | /* Is it an alias? */ |
|---|
| | 1005 | |
|---|
| | 1006 | if (CHECK(parser, '*')) |
|---|
| | 1007 | return yaml_parser_fetch_alias(parser); |
|---|
| | 1008 | |
|---|
| | 1009 | /* Is it an anchor? */ |
|---|
| | 1010 | |
|---|
| | 1011 | if (CHECK(parser, '&')) |
|---|
| | 1012 | return yaml_parser_fetch_anchor(parser); |
|---|
| | 1013 | |
|---|
| | 1014 | /* Is it a tag? */ |
|---|
| | 1015 | |
|---|
| | 1016 | if (CHECK(parser, '!')) |
|---|
| | 1017 | return yaml_parser_fetch_tag(parser); |
|---|
| | 1018 | |
|---|
| | 1019 | /* Is it a literal scalar? */ |
|---|
| | 1020 | |
|---|
| | 1021 | if (CHECK(parser, '|') && !parser->flow_level) |
|---|
| | 1022 | return yaml_parser_fetch_block_scalar(parser, 1); |
|---|
| | 1023 | |
|---|
| | 1024 | /* Is it a folded scalar? */ |
|---|
| | 1025 | |
|---|
| | 1026 | if (CHECK(parser, '>') && !parser->flow_level) |
|---|
| | 1027 | return yaml_parser_fetch_block_scalar(parser, 0); |
|---|
| | 1028 | |
|---|
| | 1029 | /* Is it a single-quoted scalar? */ |
|---|
| | 1030 | |
|---|
| | 1031 | if (CHECK(parser, '\'')) |
|---|
| | 1032 | return yaml_parser_fetch_flow_scalar(parser, 1); |
|---|
| | 1033 | |
|---|
| | 1034 | /* Is it a double-quoted scalar? */ |
|---|
| | 1035 | |
|---|
| | 1036 | if (CHECK(parser, '"')) |
|---|
| | 1037 | return yaml_parser_fetch_flow_scalar(parser, 0); |
|---|
| | 1038 | |
|---|
| | 1039 | /* |
|---|
| | 1040 | * Is it a plain scalar? |
|---|
| | 1041 | * |
|---|
| | 1042 | * A plain scalar may start with any non-blank characters except |
|---|
| | 1043 | * |
|---|
| | 1044 | * '-', '?', ':', ',', '[', ']', '{', '}', |
|---|
| | 1045 | * '#', '&', '*', '!', '|', '>', '\'', '\"', |
|---|
| | 1046 | * '%', '@', '`'. |
|---|
| | 1047 | * |
|---|
| | 1048 | * In the block context, it may also start with the characters |
|---|
| | 1049 | * |
|---|
| | 1050 | * '-', '?', ':' |
|---|
| | 1051 | * |
|---|
| | 1052 | * if it is followed by a non-space character. |
|---|
| | 1053 | * |
|---|
| | 1054 | * The last rule is more restrictive than the specification requires. |
|---|
| | 1055 | */ |
|---|
| | 1056 | |
|---|
| | 1057 | if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?') |
|---|
| | 1058 | || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[') |
|---|
| | 1059 | || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}') |
|---|
| | 1060 | || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*') |
|---|
| | 1061 | || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>') |
|---|
| | 1062 | || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%') |
|---|
| | 1063 | || CHECK(parser, '@') || CHECK(parser, '`')) || |
|---|
| | 1064 | (!parser->flow_level && |
|---|
| | 1065 | (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) && |
|---|
| | 1066 | IS_BLANKZ_AT(parser, 1))) |
|---|
| | 1067 | return yaml_parser_fetch_plain_scalar(parser); |
|---|
| | 1068 | |
|---|
| | 1069 | /* |
|---|
| | 1070 | * If we don't determine the token type so far, it is an error. |
|---|
| | 1071 | */ |
|---|
| | 1072 | |
|---|
| | 1073 | return yaml_parser_set_scanner_error(parser, "while scanning for the next token", |
|---|
| | 1074 | yaml_parser_get_mark(parser), "found character that cannot start any token"); |
|---|
| | 1075 | } |
|---|
| | 1076 | |
|---|