Changeset 190
- Timestamp:
- 06/18/06 13:20:25 (7 years ago)
- Location:
- libyaml/trunk/src
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
libyaml/trunk/src/api.c
r185 r190 269 269 270 270 YAML_DECLARE(yaml_token_t *) 271 yaml_stream_start_token (yaml_encoding_t encoding,271 yaml_stream_start_token_new(yaml_encoding_t encoding, 272 272 yaml_mark_t start_mark, yaml_mark_t end_mark) 273 273 { … … 287 287 288 288 YAML_DECLARE(yaml_token_t *) 289 yaml_stream_end_token (yaml_mark_t start_mark, yaml_mark_t end_mark)289 yaml_stream_end_token_new(yaml_mark_t start_mark, yaml_mark_t end_mark) 290 290 { 291 291 yaml_token_t *token = yaml_token_new(YAML_STREAM_END_TOKEN, -
libyaml/trunk/src/scanner.c
r189 r190 983 983 yaml_mark_t start_mark, yaml_mark_t *end_mark); 984 984 985 static int986 yaml_parser_scan_block_scalar_indicators(yaml_parser_t *parser,987 yaml_mark_t start_mark, int *chomping, int *increment);988 989 985 static yaml_token_t * 990 986 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single); … … 3250 3246 yaml_mark_t end_mark; 3251 3247 yaml_string_t string = yaml_parser_new_string(parser); 3252 yaml_string_t l ine_break = yaml_parser_new_string(parser);3253 yaml_string_t breaks = yaml_parser_new_string(parser);3248 yaml_string_t leading_break = yaml_parser_new_string(parser); 3249 yaml_string_t trailing_breaks = yaml_parser_new_string(parser); 3254 3250 yaml_token_t *token = NULL; 3255 3251 int chomping = 0; … … 3260 3256 3261 3257 if (!string.buffer) goto error; 3262 if (!l ine_break.buffer) goto error;3263 if (! breaks.buffer) goto error;3258 if (!leading_break.buffer) goto error; 3259 if (!trailing_breaks.buffer) goto error; 3264 3260 3265 3261 /* Eat the indicator '|' or '>'. */ … … 3368 3364 /* Scan the leading line breaks and determine the indentation level if needed. */ 3369 3365 3370 if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, & breaks,3366 if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, 3371 3367 start_mark, &end_mark)) goto error; 3372 3368 … … 3387 3383 /* Check if we need to fold the leading line break. */ 3388 3384 3389 if (!literal && (*l ine_break.buffer == '\n')3385 if (!literal && (*leading_break.buffer == '\n') 3390 3386 && !leading_blank && !trailing_blank) 3391 3387 { 3392 3388 /* Do we need to join the lines by space? */ 3393 3389 3394 if (* breaks.buffer == '\0') {3390 if (*trailing_breaks.buffer == '\0') { 3395 3391 if (!RESIZE(parser, string)) goto error; 3396 3392 *(string.pointer ++) = ' '; 3397 3393 } 3398 3394 3399 yaml_parser_clear_string(parser, &l ine_break);3395 yaml_parser_clear_string(parser, &leading_break); 3400 3396 } 3401 3397 else { 3402 if (!JOIN(parser, string, l ine_break)) goto error;3398 if (!JOIN(parser, string, leading_break)) goto error; 3403 3399 } 3404 3400 3405 3401 /* Append the remaining line breaks. */ 3406 3402 3407 if (!JOIN(parser, string, breaks)) goto error;3403 if (!JOIN(parser, string, trailing_breaks)) goto error; 3408 3404 3409 3405 /* Is it a leading whitespace? */ … … 3423 3419 if (!UPDATE(parser, 2)) goto error; 3424 3420 3425 COPY_LINE(parser, l ine_break);3421 COPY_LINE(parser, leading_break); 3426 3422 3427 3423 /* Eat the following intendation spaces and line breaks. */ 3428 3424 3429 3425 if (!yaml_parser_scan_block_scalar_breaks(parser, 3430 &indent, & breaks, start_mark, &end_mark)) goto error;3426 &indent, &trailing_breaks, start_mark, &end_mark)) goto error; 3431 3427 } 3432 3428 … … 3434 3430 3435 3431 if (chomping != -1) { 3436 if (!JOIN(parser, string, l ine_break)) goto error;3432 if (!JOIN(parser, string, leading_break)) goto error; 3437 3433 } 3438 3434 if (chomping == 1) { 3439 if (!JOIN(parser, string, breaks)) goto error;3435 if (!JOIN(parser, string, trailing_breaks)) goto error; 3440 3436 } 3441 3437 … … 3450 3446 } 3451 3447 3452 yaml_free(l ine_break.buffer);3453 yaml_free( breaks.buffer);3448 yaml_free(leading_break.buffer); 3449 yaml_free(trailing_breaks.buffer); 3454 3450 3455 3451 return token; … … 3457 3453 error: 3458 3454 yaml_free(string.buffer); 3459 yaml_free(l ine_break.buffer);3460 yaml_free( breaks.buffer);3455 yaml_free(leading_break.buffer); 3456 yaml_free(trailing_breaks.buffer); 3461 3457 3462 3458 return NULL; … … 3525 3521 } 3526 3522 3523 /* 3524 * Scan a quoted scalar. 3525 */ 3526 3527 static yaml_token_t * 3528 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) 3529 { 3530 yaml_mark_t start_mark; 3531 yaml_mark_t end_mark; 3532 yaml_string_t string = yaml_parser_new_string(parser); 3533 yaml_string_t leading_break = yaml_parser_new_string(parser); 3534 yaml_string_t trailing_breaks = yaml_parser_new_string(parser); 3535 yaml_string_t whitespaces = yaml_parser_new_string(parser); 3536 yaml_token_t *token = NULL; 3537 int leading_blanks; 3538 3539 if (!string.buffer) goto error; 3540 if (!leading_break.buffer) goto error; 3541 if (!trailing_breaks.buffer) goto error; 3542 if (!whitespaces.buffer) goto error; 3543 3544 /* Eat the left quote. */ 3545 3546 start_mark = yaml_parser_get_mark(parser); 3547 3548 FORWARD(parser); 3549 3550 /* Consume the content of the quoted scalar. */ 3551 3552 while (1) 3553 { 3554 /* Check that there are no document indicators at the beginning of the line. */ 3555 3556 if (!UPDATE(parser, 4)) goto error; 3557 3558 if (parser->column == 0 && 3559 ((CHECK_AT(parser, '-', 0) && 3560 CHECK_AT(parser, '-', 1) && 3561 CHECK_AT(parser, '-', 2)) || 3562 (CHECK_AT(parser, '.', 0) && 3563 CHECK_AT(parser, '.', 1) && 3564 CHECK_AT(parser, '.', 2))) && 3565 IS_BLANKZ_AT(parser, 3)) 3566 { 3567 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 3568 start_mark, "found unexpected document indicator"); 3569 goto error; 3570 } 3571 3572 /* Check for EOF. */ 3573 3574 if (IS_Z(parser)) { 3575 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 3576 start_mark, "found unexpected end of stream"); 3577 goto error; 3578 } 3579 3580 /* Consume non-blank characters. */ 3581 3582 if (!UPDATE(parser, 2)) goto error; 3583 if (!RESIZE(parser, string)) goto error; 3584 3585 leading_blanks = 0; 3586 3587 while (!IS_BLANKZ(parser)) 3588 { 3589 /* Check for an escaped single quote. */ 3590 3591 if (single && CHECK_AT(parser, '\'', 0) && CHECK_AT(parser, '\'', 1)) 3592 { 3593 *(string.pointer++) = '\''; 3594 FORWARD(parser); 3595 FORWARD(parser); 3596 } 3597 3598 /* Check for the right quote. */ 3599 3600 else if (CHECK(parser, single ? '\'' : '"')) 3601 { 3602 break; 3603 } 3604 3605 /* Check for an escaped line break. */ 3606 3607 else if (!single && CHECK(parser, '\\') && IS_BREAK_AT(parser, 1)) 3608 { 3609 if (!UPDATE(parser, 3)) goto error; 3610 FORWARD(parser); 3611 FORWARD_LINE(parser); 3612 leading_blanks = 1; 3613 break; 3614 } 3615 3616 /* Check for an escape sequence. */ 3617 3618 else if (!single && CHECK(parser, '\\')) 3619 { 3620 int code_length = 0; 3621 3622 /* Check the escape character. */ 3623 3624 switch (parser->pointer[1]) 3625 { 3626 case '0': 3627 *(string.pointer++) = '\0'; 3628 break; 3629 3630 case 'a': 3631 *(string.pointer++) = '\x07'; 3632 break; 3633 3634 case 'b': 3635 *(string.pointer++) = '\x08'; 3636 break; 3637 3638 case 't': 3639 case '\t': 3640 *(string.pointer++) = '\x09'; 3641 break; 3642 3643 case 'n': 3644 *(string.pointer++) = '\x0A'; 3645 break; 3646 3647 case 'v': 3648 *(string.pointer++) = '\x0B'; 3649 break; 3650 3651 case 'f': 3652 *(string.pointer++) = '\x0C'; 3653 break; 3654 3655 case 'r': 3656 *(string.pointer++) = '\x0D'; 3657 break; 3658 3659 case 'e': 3660 *(string.pointer++) = '\x1B'; 3661 break; 3662 3663 case ' ': 3664 *(string.pointer++) = '\x20'; 3665 break; 3666 3667 case '"': 3668 *(string.pointer++) = '"'; 3669 break; 3670 3671 case '\'': 3672 *(string.pointer++) = '\''; 3673 break; 3674 3675 case 'N': /* NEL (#x85) */ 3676 *(string.pointer++) = '\xC2'; 3677 *(string.pointer++) = '\x85'; 3678 break; 3679 3680 case '_': /* #xA0 */ 3681 *(string.pointer++) = '\xC2'; 3682 *(string.pointer++) = '\xA0'; 3683 break; 3684 3685 case 'L': /* LS (#x2028) */ 3686 *(string.pointer++) = '\xE2'; 3687 *(string.pointer++) = '\x80'; 3688 *(string.pointer++) = '\xA8'; 3689 break; 3690 3691 case 'P': /* PS (#x2029) */ 3692 *(string.pointer++) = '\xE2'; 3693 *(string.pointer++) = '\x80'; 3694 *(string.pointer++) = '\xA8'; 3695 break; 3696 3697 case 'x': 3698 code_length = 2; 3699 break; 3700 3701 case 'u': 3702 code_length = 4; 3703 break; 3704 3705 case 'U': 3706 code_length = 8; 3707 break; 3708 3709 default: 3710 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 3711 start_mark, "found unknown escape character"); 3712 goto error; 3713 } 3714 3715 FORWARD(parser); 3716 FORWARD(parser); 3717 3718 /* Consume an arbitrary escape code. */ 3719 3720 if (code_length) 3721 { 3722 unsigned int value = 0; 3723 int k; 3724 3725 /* Scan the character value. */ 3726 3727 if (!UPDATE(parser, code_length)) goto error; 3728 3729 for (k = 0; k < code_length; k ++) { 3730 if (!IS_HEX_AT(parser, k)) { 3731 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 3732 start_mark, "did not find expected hexdecimal number"); 3733 goto error; 3734 } 3735 value = (value << 4) + AS_HEX_AT(parser, k); 3736 } 3737 3738 /* Check the value and write the character. */ 3739 3740 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { 3741 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 3742 start_mark, "found invalid Unicode character escape code"); 3743 goto error; 3744 } 3745 3746 if (value <= 0x7F) { 3747 *(string.pointer++) = value; 3748 } 3749 else if (value <= 0x7FF) { 3750 *(string.pointer++) = 0xC0 + (value >> 6); 3751 *(string.pointer++) = 0x80 + (value & 0x3F); 3752 } 3753 else if (value <= 0xFFFF) { 3754 *(string.pointer++) = 0xE0 + (value >> 12); 3755 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); 3756 *(string.pointer++) = 0x80 + (value & 0x3F); 3757 } 3758 else { 3759 *(string.pointer++) = 0xF0 + (value >> 18); 3760 *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F); 3761 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); 3762 *(string.pointer++) = 0x80 + (value & 0x3F); 3763 } 3764 3765 /* Advance the pointer. */ 3766 3767 for (k = 0; k < code_length; k ++) { 3768 FORWARD(parser); 3769 } 3770 } 3771 } 3772 3773 else 3774 { 3775 /* It is a non-escaped non-blank character. */ 3776 3777 COPY(parser, string); 3778 } 3779 3780 if (!UPDATE(parser, 2)) goto error; 3781 if (!RESIZE(parser, string)) goto error; 3782 } 3783 3784 /* Check if we are at the end of the scalar. */ 3785 3786 if (CHECK(parser, single ? '\'' : '"')) 3787 break; 3788 3789 /* Consume blank characters. */ 3790 3791 if (!UPDATE(parser, 1)) goto error; 3792 3793 while (IS_BLANK(parser) || IS_BREAK(parser)) 3794 { 3795 if (IS_BLANK(parser)) 3796 { 3797 /* Consume a space or a tab character. */ 3798 3799 if (!leading_blanks) { 3800 if (!RESIZE(parser, whitespaces)) goto error; 3801 COPY(parser, whitespaces); 3802 } 3803 } 3804 else 3805 { 3806 if (!UPDATE(parser, 2)) goto error; 3807 3808 /* Check if it is a first line break. */ 3809 3810 if (!leading_blanks) 3811 { 3812 yaml_parser_clear_string(parser, &whitespaces); 3813 COPY_LINE(parser, leading_break); 3814 leading_blanks = 1; 3815 } 3816 else 3817 { 3818 if (!RESIZE(parser, trailing_breaks)) goto error; 3819 COPY_LINE(parser, trailing_breaks); 3820 } 3821 } 3822 if (!UPDATE(parser, 1)) goto error; 3823 } 3824 3825 /* Join the whitespaces or fold line breaks. */ 3826 3827 if (!RESIZE(parser, string)) goto error; 3828 3829 if (leading_blanks) 3830 { 3831 /* Do we need to fold line breaks? */ 3832 3833 if (leading_break.buffer[0] == '\n') { 3834 if (trailing_breaks.buffer[0] == '\0') { 3835 *(string.pointer++) = ' '; 3836 } 3837 else { 3838 if (!JOIN(parser, string, trailing_breaks)) goto error; 3839 } 3840 yaml_parser_clear_string(parser, &leading_break); 3841 } 3842 else { 3843 if (!JOIN(parser, string, leading_break)) goto error; 3844 if (!JOIN(parser, string, trailing_breaks)) goto error; 3845 } 3846 } 3847 else 3848 { 3849 if (!JOIN(parser, string, whitespaces)) goto error; 3850 } 3851 } 3852 3853 /* Eat the right quote. */ 3854 3855 FORWARD(parser); 3856 3857 end_mark = yaml_parser_get_mark(parser); 3858 3859 /* Create a token. */ 3860 3861 token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, 3862 single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE, 3863 start_mark, end_mark); 3864 if (!token) { 3865 parser->error = YAML_MEMORY_ERROR; 3866 return 0; 3867 } 3868 3869 yaml_free(leading_break.buffer); 3870 yaml_free(trailing_breaks.buffer); 3871 yaml_free(whitespaces.buffer); 3872 3873 return token; 3874 3875 error: 3876 yaml_free(string.buffer); 3877 yaml_free(leading_break.buffer); 3878 yaml_free(trailing_breaks.buffer); 3879 yaml_free(whitespaces.buffer); 3880 3881 return NULL; 3882 } 3883 3884 /* 3885 * Scan a plain scalar. 3886 */ 3887 3888 static yaml_token_t * 3889 yaml_parser_scan_plain_scalar(yaml_parser_t *parser) 3890 { 3891 yaml_mark_t start_mark; 3892 yaml_mark_t end_mark; 3893 yaml_string_t string = yaml_parser_new_string(parser); 3894 yaml_string_t leading_break = yaml_parser_new_string(parser); 3895 yaml_string_t trailing_breaks = yaml_parser_new_string(parser); 3896 yaml_string_t whitespaces = yaml_parser_new_string(parser); 3897 yaml_token_t *token = NULL; 3898 int leading_blanks = 0; 3899 int indent = parser->indent+1; 3900 3901 if (!string.buffer) goto error; 3902 if (!leading_break.buffer) goto error; 3903 if (!trailing_breaks.buffer) goto error; 3904 if (!whitespaces.buffer) goto error; 3905 3906 start_mark = yaml_parser_get_mark(parser); 3907 3908 /* Consume the content of the plain scalar. */ 3909 3910 while (1) 3911 { 3912 /* Check for a document indicator. */ 3913 3914 if (!UPDATE(parser, 4)) goto error; 3915 3916 if (parser->column == 0 && 3917 ((CHECK_AT(parser, '-', 0) && 3918 CHECK_AT(parser, '-', 1) && 3919 CHECK_AT(parser, '-', 2)) || 3920 (CHECK_AT(parser, '.', 0) && 3921 CHECK_AT(parser, '.', 1) && 3922 CHECK_AT(parser, '.', 2))) && 3923 IS_BLANKZ_AT(parser, 3)) break; 3924 3925 /* Check for a comment. */ 3926 3927 if (CHECK(parser, '#')) 3928 break; 3929 3930 /* Consume non-blank characters. */ 3931 3932 while (!IS_BLANKZ(parser)) 3933 { 3934 /* Check for 'x:x' in the flow context. */ 3935 3936 if (parser->flow_level && CHECK(parser, ':') && !IS_BLANKZ_AT(parser, 1)) { 3937 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 3938 start_mark, "found unexpected ':'"); 3939 goto error; 3940 } 3941 3942 /* Check for indicators that may end a plain scalar. */ 3943 3944 if ((CHECK(parser, ':') && IS_BLANKZ_AT(parser, 1)) || 3945 (parser->flow_level && 3946 (CHECK(parser, ',') || CHECK(parser, ':') || 3947 CHECK(parser, '?') || CHECK(parser, '[') || 3948 CHECK(parser, ']') || CHECK(parser, '{') || 3949 CHECK(parser, '}')))) 3950 break; 3951 3952 /* Check if we need to join whitespaces and breaks. */ 3953 3954 if (leading_blanks || whitespaces.buffer != whitespaces.pointer) 3955 { 3956 if (!RESIZE(parser, string)) goto error; 3957 3958 if (leading_blanks) 3959 { 3960 /* Do we need to fold line breaks? */ 3961 3962 if (leading_break.buffer[0] == '\n') { 3963 if (trailing_breaks.buffer[0] == '\0') { 3964 *(string.pointer++) = ' '; 3965 } 3966 else { 3967 if (!JOIN(parser, string, trailing_breaks)) goto error; 3968 } 3969 yaml_parser_clear_string(parser, &leading_break); 3970 } 3971 else { 3972 if (!JOIN(parser, string, leading_break)) goto error; 3973 if (!JOIN(parser, string, trailing_breaks)) goto error; 3974 } 3975 3976 leading_blanks = 0; 3977 } 3978 else 3979 { 3980 if (!JOIN(parser, string, whitespaces)) goto error; 3981 } 3982 } 3983 3984 /* Copy the character. */ 3985 3986 if (!RESIZE(parser, string)) goto error; 3987 3988 COPY(parser, string); 3989 3990 end_mark = yaml_parser_get_mark(parser); 3991 3992 if (!UPDATE(parser, 2)) goto error; 3993 } 3994 3995 /* Is it the end? */ 3996 3997 if (!(IS_BLANK(parser) || IS_BREAK(parser))) 3998 break; 3999 4000 /* Consume blank characters. */ 4001 4002 if (!UPDATE(parser, 1)) goto error; 4003 4004 while (IS_BLANK(parser) || IS_BREAK(parser)) 4005 { 4006 if (IS_BLANK(parser)) 4007 { 4008 /* Check for tab character that abuse intendation. */ 4009 4010 if (leading_blanks && parser->column < indent && IS_TAB(parser)) { 4011 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 4012 start_mark, "found a tab character that violate intendation"); 4013 break; 4014 } 4015 4016 /* Consume a space or a tab character. */ 4017 4018 if (!leading_blanks) { 4019 if (!RESIZE(parser, whitespaces)) goto error; 4020 COPY(parser, whitespaces); 4021 } 4022 } 4023 else 4024 { 4025 if (!UPDATE(parser, 2)) goto error; 4026 4027 /* Check if it is a first line break. */ 4028 4029 if (!leading_blanks) 4030 { 4031 yaml_parser_clear_string(parser, &whitespaces); 4032 COPY_LINE(parser, leading_break); 4033 leading_blanks = 1; 4034 } 4035 else 4036 { 4037 if (!RESIZE(parser, trailing_breaks)) goto error; 4038 COPY_LINE(parser, trailing_breaks); 4039 } 4040 } 4041 if (!UPDATE(parser, 1)) goto error; 4042 } 4043 4044 /* Check intendation level. */ 4045 4046 if (parser->column < indent) 4047 break; 4048 } 4049 4050 /* Create a token. */ 4051 4052 token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, 4053 YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark); 4054 if (!token) { 4055 parser->error = YAML_MEMORY_ERROR; 4056 return 0; 4057 } 4058 4059 /* Note that we change the 'simple_key_allowed' flag. */ 4060 4061 if (leading_blanks) { 4062 parser->simple_key_allowed = 1; 4063 } 4064 4065 yaml_free(leading_break.buffer); 4066 yaml_free(trailing_breaks.buffer); 4067 yaml_free(whitespaces.buffer); 4068 4069 return token; 4070 4071 error: 4072 yaml_free(string.buffer); 4073 yaml_free(leading_break.buffer); 4074 yaml_free(trailing_breaks.buffer); 4075 yaml_free(whitespaces.buffer); 4076 4077 return NULL; 4078 } 4079
Note: See TracChangeset
for help on using the changeset viewer.
