POSIX.1 National Language Support API for MinGW
Revisión | bd8c6e043589d7809cd1645087b04afb3f2e59fc (tree) |
---|---|
Tiempo | 2007-05-15 04:50:17 |
Autor | Keith Marshall <keithmarshall@user...> |
Commiter | Keith Marshall |
Support all escape sequences required by POSIX 1003.1.
@@ -1,3 +1,14 @@ | ||
1 | +2007-05-14 Keith Marshall <keithmarshall@users.sourceforge.net> | |
2 | + | |
3 | + Support all escape sequences required by POSIX 1003.1. | |
4 | + | |
5 | + * mcsource.c (OCTAL_SEQUENCE_DECODE): New manifest constant. | |
6 | + (HEXADECIMAL_SEQUENCE_DECODE): Ditto, but not currently used. | |
7 | + (mc_source): Interpret "\n", "\r", "\b", "\t", "\v" and "\f" | |
8 | + standard escapes, and also "\ddd" generic octal digit sequence | |
9 | + escapes, appearing in message text definitions; "\\" is also | |
10 | + implicitly handled as required. | |
11 | + | |
1 | 12 | 2007-05-12 Keith Marshall <keithmarshall@users.sourceforge.net> |
2 | 13 | |
3 | 14 | Add support for `delset' directive. |
@@ -204,7 +204,7 @@ char *mc_update_workspace( char *buf, char *cache, unsigned int count ) | ||
204 | 204 | |
205 | 205 | struct msgdict *mc_source( const char *input ) |
206 | 206 | { |
207 | -# define CODESET_DECLARED codeset_decl_src, codeset_decl_lineno | |
207 | +# define CODESET_DECLARED codeset_decl_src, codeset_decl_lineno | |
208 | 208 | |
209 | 209 | long accumulator; |
210 | 210 | int fd, input_fd, count; |
@@ -229,6 +229,20 @@ struct msgdict *mc_source( const char *input ) | ||
229 | 229 | static iconv_t iconv_map[2] = {(iconv_t)(-1), (iconv_t)(-1)}; |
230 | 230 | char *messages; off_t msgloc, headroom; |
231 | 231 | |
232 | + /* | |
233 | + * This `shift' state index is used to control interpretation | |
234 | + * of octal escape sequences in message text; for normal text | |
235 | + * processing, it should be set to zero. | |
236 | + */ | |
237 | + unsigned shift = 0; | |
238 | + /* | |
239 | + * Other shift states supported, (they define the number of bits | |
240 | + * by which the accumulator must be shifted to the left, in order | |
241 | + * to multiply it by the associated number base), are:-- | |
242 | + */ | |
243 | +# define OCTAL_SEQUENCE_DECODE 3 | |
244 | +# define HEXADECIMAL_SEQUENCE_DECODE 4 | |
245 | + | |
232 | 246 | const char *dev_stdin = "/dev/stdin"; |
233 | 247 | if( (strcmp( input, "-") == 0) || (strcmp( input, dev_stdin ) == 0) ) |
234 | 248 | { |
@@ -300,7 +314,7 @@ struct msgdict *mc_source( const char *input ) | ||
300 | 314 | if( (status & (NEWLINE | CONTINUED)) == NEWLINE ) |
301 | 315 | { |
302 | 316 | /* When this new line is NOT simply a logical continuation |
303 | - * of the previous line... | |
317 | + * of the previous line ... | |
304 | 318 | */ |
305 | 319 | status &= ~MSGTEXT; |
306 | 320 | dfprintf(( stderr, "\n\n%s:%d:new input record", input, linenum )); |
@@ -362,7 +376,7 @@ struct msgdict *mc_source( const char *input ) | ||
362 | 376 | */ |
363 | 377 | if( id == keyword ) |
364 | 378 | { |
365 | - /* But, we didn't find any keyword... | |
379 | + /* But, we didn't find any keyword ... | |
366 | 380 | * |
367 | 381 | * This is a comment line, but it may be the special case of |
368 | 382 | * a codeset declaration comment, so we can't simply ignore it; |
@@ -431,14 +445,14 @@ struct msgdict *mc_source( const char *input ) | ||
431 | 445 | |
432 | 446 | else if( status & NUMERIC ) |
433 | 447 | { |
434 | - /* We are parsing a numeric value... | |
435 | - */ | |
448 | + /* We are parsing a numeric value ... | |
449 | + */ | |
436 | 450 | if( isdigit( c ) ) |
437 | 451 | { |
438 | - /* ...and the current character is part of the number, | |
452 | + /* ... and the current character is part of the number, | |
439 | 453 | * so add it into the accumulator. |
440 | 454 | */ |
441 | - accumulator = accumulator * 10 + c - L'0'; | |
455 | + accumulator = accumulator * 10 + c - L'0'; | |
442 | 456 | } |
443 | 457 | |
444 | 458 | else if( isspace( c ) ) |
@@ -616,105 +630,197 @@ struct msgdict *mc_source( const char *input ) | ||
616 | 630 | else if( status & MSGTEXT ) |
617 | 631 | { |
618 | 632 | /* We are compiling a message ... |
619 | - * Continue scanning the current input line, | |
620 | - * until we find the end-of-line marker. | |
621 | - */ | |
622 | - if( c != L'\n' ) | |
623 | - { | |
624 | - /* We haven't reached end-of-line yet... | |
625 | - * Check for other characters with special significance. | |
626 | - */ | |
627 | - if( status & ESCAPE ) | |
628 | - { | |
629 | - /* The current input character was escaped... | |
630 | - * Clear the ESCAPE flag, and interpret this case. | |
631 | - */ | |
632 | - size_t len = 0; | |
633 | - status &= ~ESCAPE; | |
634 | - dfprintf(( stderr, "%s:%u:", input, linenum )); | |
635 | - switch ( c ) | |
636 | - { | |
637 | - case L'r': /* embed a carriage return */ | |
638 | - len = mc_add_escape( iconv_map, messages + msgloc, L'\r' ); | |
639 | - break; | |
640 | - | |
641 | - case L'n': /* embed a newline */ | |
642 | - len = mc_add_escape( iconv_map, messages + msgloc, L'\n' ); | |
643 | - break; | |
644 | - | |
645 | - default: /* not a special case; just pass it through */ | |
646 | - xcount += skip; | |
647 | - dfprintf(( stderr, "pass through escape code: %0#4.4x", c )); | |
648 | - } | |
649 | - if( len > (size_t)(0) ) | |
633 | + */ | |
634 | + if( shift ) | |
635 | + { | |
636 | + /* The current input character is either part of an | |
637 | + * escaped octal digit sequence, or it terminates one. | |
638 | + */ | |
639 | + size_t len = 0; | |
640 | + switch( c ) | |
641 | + { | |
642 | + case L'0' ... L'7': | |
643 | + /* | |
644 | + * This is a continuation of the sequence ... | |
645 | + */ | |
646 | + accumulator = (accumulator << shift) + c - L'0'; | |
647 | + break; | |
648 | + | |
649 | + default: | |
650 | + /* | |
651 | + * This is the character immediately following | |
652 | + * an encoded octal digit sequence ... | |
653 | + */ | |
654 | + if( (accumulator > 0) && ((len = | |
655 | + mc_add_escape( iconv_map, messages + msgloc, accumulator )) | |
656 | + > (size_t)(0)) ) | |
657 | + { | |
658 | + headroom -= len; | |
659 | + msgloc += len; | |
660 | + } | |
661 | + shift = 0; | |
662 | + } | |
663 | + } | |
664 | + /* Do not use `else' here; the `shift' state may have changed | |
665 | + * since the preceding check, in which case, we may also need | |
666 | + * to do this ... | |
667 | + */ | |
668 | + if( shift == 0 ) | |
669 | + { | |
670 | + /* Continue scanning the current input line, | |
671 | + * until we find the end-of-line marker. | |
672 | + */ | |
673 | + if( c != L'\n' ) | |
674 | + { | |
675 | + /* We haven't reached end-of-line yet ... | |
676 | + * Check for other characters with special significance. | |
677 | + */ | |
678 | + if( status & ESCAPE ) | |
650 | 679 | { |
651 | - headroom -= len; | |
652 | - msgloc += len; | |
680 | + /* The current input character was escaped ... | |
681 | + * Clear the ESCAPE flag, and interpret this case. | |
682 | + */ | |
683 | + size_t len = 0; | |
684 | + status &= ~ESCAPE; | |
685 | + dfprintf(( stderr, "%s:%u:", input, linenum )); | |
686 | + switch( c ) | |
687 | + { | |
688 | + /* Thus, for the standard escape sequences ... | |
689 | + */ | |
690 | + case L'b': | |
691 | + /* | |
692 | + * The "\b" escape sequence is to be interpreted as | |
693 | + * a literal backspace; encode it ... | |
694 | + */ | |
695 | + len = mc_add_escape( iconv_map, messages + msgloc, L'\b' ); | |
696 | + break; | |
697 | + | |
698 | + case L'r': | |
699 | + /* | |
700 | + * Similarly for "\r", which is to be encoded as | |
701 | + * a carriage return ... | |
702 | + */ | |
703 | + len = mc_add_escape( iconv_map, messages + msgloc, L'\r' ); | |
704 | + break; | |
705 | + | |
706 | + case L'n': | |
707 | + /* | |
708 | + * And for "\n", representing a newline ... | |
709 | + */ | |
710 | + len = mc_add_escape( iconv_map, messages + msgloc, L'\n' ); | |
711 | + break; | |
712 | + | |
713 | + case L't': | |
714 | + /* | |
715 | + * ... "\t", representing a horizontal tab ... | |
716 | + */ | |
717 | + len = mc_add_escape( iconv_map, messages + msgloc, L'\t' ); | |
718 | + break; | |
719 | + | |
720 | + case L'v': | |
721 | + /* | |
722 | + * ... "\v", representing a vertical tab ... | |
723 | + */ | |
724 | + len = mc_add_escape( iconv_map, messages + msgloc, L'\v' ); | |
725 | + break; | |
726 | + | |
727 | + case L'f': | |
728 | + /* | |
729 | + * ... and "\f", representing a form feed. | |
730 | + */ | |
731 | + len = mc_add_escape( iconv_map, messages + msgloc, L'\f' ); | |
732 | + break; | |
733 | + | |
734 | + case L'0' ... L'7': | |
735 | + /* | |
736 | + * This is the first in a "\ddd" octal digit sequence; | |
737 | + * initialise the accumulator, and activate the appropriate | |
738 | + * shift state, to capture the remaining digits. | |
739 | + */ | |
740 | + accumulator = c - L'0'; | |
741 | + shift = OCTAL_SEQUENCE_DECODE; | |
742 | + break; | |
743 | + | |
744 | + default: | |
745 | + /* | |
746 | + * Anything else is not a special case; we can simply pass it | |
747 | + * through as a regular character. Notice that we don't need | |
748 | + * to treat "\\" as special; this default action produces the | |
749 | + * desired effect. | |
750 | + */ | |
751 | + xcount += skip; | |
752 | + dfprintf(( stderr, "pass through escape code: %0#4.4x", c )); | |
753 | + } | |
754 | + if( len > (size_t)(0) ) | |
755 | + { | |
756 | + headroom -= len; | |
757 | + msgloc += len; | |
758 | + } | |
653 | 759 | } |
654 | - } | |
655 | 760 | |
656 | - else if( c == L'\\' ) | |
657 | - { | |
658 | - /* This is the escape character... | |
659 | - * Set the parser flags, so that any cached message data is flushed, | |
660 | - * and switch to ESCAPE mode, to interpret the next character. | |
661 | - */ | |
662 | - status |= FLUSH | ESCAPE; | |
663 | - } | |
761 | + else if( c == L'\\' ) | |
762 | + { | |
763 | + /* This is the escape character ... | |
764 | + * Set the parser flags, so that cached message data is flushed, | |
765 | + * and switch to ESCAPE mode, to interpret the next character. | |
766 | + */ | |
767 | + status |= FLUSH | ESCAPE; | |
768 | + } | |
664 | 769 | |
665 | - else if( c == quote ) | |
666 | - { | |
667 | - dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" )); | |
668 | - status = (status ^ QUOTED) | FLUSH; | |
669 | - } | |
770 | + else if( c == quote ) | |
771 | + { | |
772 | + dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" )); | |
773 | + status = (status ^ QUOTED) | FLUSH; | |
774 | + } | |
670 | 775 | |
671 | - else | |
672 | - { | |
673 | - xcount += skip; | |
674 | - dfputc(( c, stderr )); | |
675 | - } | |
676 | - } | |
677 | - if( count < ICONV_MB_LEN_MAX ) | |
678 | - { | |
679 | - skip = 0; | |
680 | - status |= FLUSH; | |
776 | + else | |
777 | + { | |
778 | + xcount += skip; | |
779 | + dfputc(( c, stderr )); | |
780 | + } | |
781 | + } | |
782 | + if( count < ICONV_MB_LEN_MAX ) | |
783 | + { | |
784 | + skip = 0; | |
785 | + status |= FLUSH; | |
786 | + } | |
681 | 787 | } |
682 | - } | |
683 | 788 | |
684 | - if( c == L'\n' ) | |
685 | - { | |
686 | - /* Mark the end of the current input line, | |
687 | - * and schedule any pending message data from this line | |
688 | - * for flushing to the message collection buffer. | |
689 | - */ | |
690 | - status |= NEWLINE | FLUSH; | |
789 | + if( c == L'\n' ) | |
790 | + { | |
791 | + /* Mark the end of the current input line, | |
792 | + * and schedule any pending message data from this line | |
793 | + * for flushing to the message collection buffer. | |
794 | + */ | |
795 | + status |= NEWLINE | FLUSH; | |
691 | 796 | |
692 | - /* If "QUOTED" context remains active, at the end of this line, | |
693 | - * then we have an implicit continuation, so force it. | |
694 | - */ | |
695 | - if( (status & QUOTED) == QUOTED ) | |
696 | - status |= CONTINUED; | |
797 | + /* If "QUOTED" context remains active, at the end of this line, | |
798 | + * then we have an implicit continuation, so force it. | |
799 | + */ | |
800 | + if( (status & QUOTED) == QUOTED ) | |
801 | + status |= CONTINUED; | |
697 | 802 | |
698 | - /* Clean up the context of any pending directive processing. | |
699 | - */ | |
700 | - switch( status & CATEGORY ) | |
701 | - { | |
702 | - case DEFQUOTE: | |
703 | - /* | |
704 | - * If we see end of line with a DEFQUOTE pending, | |
705 | - * then there was no defining character with the "quote" directive, | |
706 | - * so we must disable "quote" character recognition. | |
707 | - */ | |
708 | - quote = L'\0'; | |
709 | - dfprintf(( stderr, ": none assigned" )); | |
710 | - break; | |
711 | - } | |
803 | + /* Clean up the context of any pending directive processing. | |
804 | + */ | |
805 | + switch( status & CATEGORY ) | |
806 | + { | |
807 | + case DEFQUOTE: | |
808 | + /* | |
809 | + * If we see end of line with a DEFQUOTE pending, then | |
810 | + * there was no defining character with the "quote" directive, | |
811 | + * so we must disable "quote" character recognition. | |
812 | + */ | |
813 | + quote = L'\0'; | |
814 | + dfprintf(( stderr, ": none assigned" )); | |
815 | + break; | |
816 | + } | |
712 | 817 | |
713 | - if( (status & CONTINUED) == 0 ) | |
714 | - { | |
715 | - status &= ~ENCODED; | |
716 | - } | |
717 | - } | |
818 | + if( (status & CONTINUED) == 0 ) | |
819 | + { | |
820 | + status &= ~ENCODED; | |
821 | + } | |
822 | + } | |
823 | + } | |
718 | 824 | } |
719 | 825 | |
720 | 826 | if( status & FLUSH ) |
@@ -801,12 +907,12 @@ struct msgdict *mc_source( const char *input ) | ||
801 | 907 | status &= ~MSGTEXT; |
802 | 908 | } |
803 | 909 | /* |
804 | - * At the end of the current input file... | |
910 | + * At the end of the current input file ... | |
805 | 911 | * Check that the parser finished in an appropriate termination state. |
806 | 912 | */ |
807 | 913 | if( status & QUOTED ) |
808 | 914 | { |
809 | - /* Abnormal termination... | |
915 | + /* Abnormal termination ... | |
810 | 916 | * EOF was encountered within a quoted literal, before the closing |
811 | 917 | * quote was found; diagnose abnormal termination state. |
812 | 918 | */ |
@@ -815,7 +921,7 @@ struct msgdict *mc_source( const char *input ) | ||
815 | 921 | |
816 | 922 | if( (status & NEWLINE) != NEWLINE ) |
817 | 923 | { |
818 | - /* Abnormal termination... | |
924 | + /* Abnormal termination ... | |
819 | 925 | * The input file lacks a terminating newline; diagnose abnormal |
820 | 926 | * termination state. |
821 | 927 | */ |
@@ -824,7 +930,7 @@ struct msgdict *mc_source( const char *input ) | ||
824 | 930 | |
825 | 931 | if( status & MSGTEXT ) |
826 | 932 | { |
827 | - /* Abnormal termination... | |
933 | + /* Abnormal termination ... | |
828 | 934 | * EOF was encountered while parsing a continued message definition; |
829 | 935 | * dignose abnormal termination state, and mark incomplete message |
830 | 936 | * for deletion. |
@@ -864,4 +970,4 @@ struct msgdict *mc_source( const char *input ) | ||
864 | 970 | return head; |
865 | 971 | } |
866 | 972 | |
867 | -/* $RCSfile$Revision: 1.5 $: end of file */ | |
973 | +/* $RCSfile$Revision: 1.6 $: end of file */ |