[uClibc-cvs] uClibc/libc/misc/wchar Makefile, 1.4, 1.5 wchar.c, 1.10, 1.11 wstdio.c, 1.3, 1.4
Manuel Novoa III
mjn3 at uclibc.org
Fri Aug 1 20:09:24 UTC 2003
- Previous message: [uClibc-cvs] uClibc/libc/misc/time Makefile, 1.32, 1.33 time.c, 1.12, 1.13
- Next message: [uClibc-cvs] uClibc/include xlocale.h, NONE, 1.1 ctype.h, 1.15, 1.16 langinfo.h, 1.5, 1.6 libintl.h, 1.1, 1.2 locale.h, 1.5, 1.6 signal.h, 1.6, 1.7 stdio.h, 1.27, 1.28 stdlib.h, 1.39, 1.40 string.h, 1.23, 1.24 time.h, 1.6, 1.7 wchar.h, 1.2, 1.3 wctype.h, 1.2, 1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /var/cvs/uClibc/libc/misc/wchar
In directory winder:/tmp/cvs-serv28208/libc/misc/wchar
Modified Files:
Makefile wchar.c wstdio.c
Log Message:
Add a new *scanf implementation, includeing the *wscanf functions.
Should be standards compliant and with several optional features,
including support for hexadecimal float notation, locale awareness,
glibc-like locale-specific digit grouping with the `'' flag, and
positional arg support. I tested it pretty well (finding several
bugs in glibc's scanf in the process), but it is brand new so be
aware.
The *wprintf functions now support floating point output. Also, a
couple of bugs were squashed. Finally, %a/%A conversions are
now implemented.
Implement the glibc xlocale interface for thread-specific locale
support. Also add the various *_l(args, locale_t loc_arg) funcs.
NOTE!!! setlocale() is NOT threadsafe! NOTE!!!
The strto{floating point} conversion functions are now locale aware.
The also now support hexadecimal floating point notation.
Add the wcsto{floating point} conversion functions.
Fix a bug in mktime() related to dst. Note that unlike glibc's mktime,
uClibc's version always normalizes the struct tm before attempting
to determine the correct dst setting if tm_isdst == -1 on entry.
Add a stub version of the libintl functions. (untested)
Fixed a known memory leak in setlocale() related to the collation data.
Add lots of new config options (which Erik agreed to sort out :-),
including finally exposing some of the stripped down stdio configs.
Be careful with those though, as they haven't been tested in a
long time.
(temporary) GOTCHAs...
The ctype functions are currently incorrect for 8-bit locales. They
will be fixed shortly.
The ctype functions are now table-based, resulting in larger staticly
linked binaries. I'll be adding an option to use the old approach
in the stub locale configuration.
Index: Makefile
===================================================================
RCS file: /var/cvs/uClibc/libc/misc/wchar/Makefile,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- Makefile 22 Nov 2002 03:05:20 -0000 1.4
+++ Makefile 1 Aug 2003 20:08:51 -0000 1.5
@@ -42,7 +42,6 @@
# getwc (fgetwc alias) getwc_unlocked (fgetwc_unlocked alias)
# putwc (fputwc alias) putwc_unlocked (fputwc_unlocked alias)
-# fwscanf wscanf swscanf vfwscanf vwscanf vswscanf
# wcsftime
OBJS=$(MOBJ1) $(MOBJ2)
Index: wchar.c
===================================================================
RCS file: /var/cvs/uClibc/libc/misc/wchar/wchar.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -d -r1.10 -r1.11
--- wchar.c 23 Nov 2002 16:43:57 -0000 1.10
+++ wchar.c 1 Aug 2003 20:08:51 -0000 1.11
@@ -86,6 +86,9 @@
* Add a couple of ugly hacks to support *wprintf.
* Add a mini iconv() and iconv implementation (requires locale support).
*
+ * Aug 1, 2003
+ * Bug fix for mbrtowc.
+ *
* Manuel
*/
@@ -101,13 +104,39 @@
#include <assert.h>
#include <locale.h>
#include <wchar.h>
+#include <bits/uClibc_uwchar.h>
+/**********************************************************************/
#ifdef __UCLIBC_HAS_LOCALE__
-#define ENCODING (__global_locale.encoding)
+#ifdef __UCLIBC_MJN3_ONLY__
+#ifdef L_iswspace
+/* generates one warning */
+#warning TODO: Fix Cc2wc* and Cwc2c* defines!
+#endif
+#endif /* __UCLIBC_MJN3_ONLY__ */
+
+#define ENCODING ((__UCLIBC_CURLOCALE_DATA).encoding)
+
+#define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT
+#define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN
+#define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX
+#define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT
+#define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT
+#define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN
+
#ifndef __CTYPE_HAS_UTF_8_LOCALES
#warning __CTYPE_HAS_UTF_8_LOCALES not set!
#endif
-#else
+
+#else /* __UCLIBC_HAS_LOCALE__ */
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#ifdef L_btowc
+/* emit only once */
+#warning fix preprocessor logic testing locale settings
+#endif
+#endif
+
#define ENCODING (__ctype_encoding_7_bit)
#ifdef __CTYPE_HAS_8_BIT_LOCALES
#error __CTYPE_HAS_8_BIT_LOCALES is defined!
@@ -117,7 +146,9 @@
#endif
#undef L__wchar_utf8sntowcs
#undef L__wchar_wcsntoutf8s
-#endif
+
+#endif /* __UCLIBC_HAS_LOCALE__ */
+/**********************************************************************/
#if WCHAR_MAX > 0xffffUL
#define UTF_8_MAX_LEN 6
@@ -266,11 +297,18 @@
#ifdef __CTYPE_HAS_UTF_8_LOCALES
/* Need to do this here since mbsrtowcs doesn't allow incompletes. */
if (ENCODING == __ctype_encoding_utf8) {
+ if (!pwc) {
+ pwc = wcbuf;
+ }
r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
}
#endif
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning TODO: This adds a trailing nul!
+#endif /* __UCLIBC_MJN3_ONLY__ */
+
r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
if (((ssize_t) r) >= 0) {
@@ -291,7 +329,10 @@
size_t wcrtomb(register char *__restrict s, wchar_t wc,
mbstate_t *__restrict ps)
{
- wchar_t wcbuf[2];
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning TODO: Should wcsnrtombs nul-terminate unconditionally? Check glibc.
+#endif /* __UCLIBC_MJN3_ONLY__ */
+ wchar_t wcbuf[1];
const wchar_t *pwc;
size_t r;
char buf[MB_LEN_MAX];
@@ -303,9 +344,8 @@
pwc = wcbuf;
wcbuf[0] = wc;
- wcbuf[1] = 0;
- r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
+ r = __wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
return (r != 0) ? r : 1;
}
@@ -418,7 +458,7 @@
if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
mask = 0x40;
#ifdef __UCLIBC_MJN3_ONLY__
-#warning fix range for 16 bit wides
+#warning TODO: Fix range for 16 bit wchar_t case.
#endif
if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
goto START;
@@ -495,7 +535,6 @@
COMPLETE:
*pwc = wc;
pwc += incr;
-
}
#ifdef DECODER
while (--count);
@@ -684,8 +723,8 @@
while (count) {
if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
wc -= 0x80;
- wc = __global_locale.tbl8c2wc[
- (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
+ wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
+ (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
<< Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
if (!wc) {
goto BAD;
@@ -797,12 +836,12 @@
} else {
u = 0;
if (wc <= Cwc2c_DOMAIN_MAX) {
- u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
+ u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
+ Cwc2c_TT_SHIFT)];
- u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ ((wc >> Cwc2c_TT_SHIFT)
& ((1 << Cwc2c_TI_SHIFT)-1))];
- u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
+ u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
+ (u << Cwc2c_TT_SHIFT)
+ (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
}
@@ -859,7 +898,8 @@
#ifdef L_wcswidth
#ifdef __UCLIBC_MJN3_ONLY__
-#warning if we start doing translit, wcwidth and wcswidth will need updating.
+#warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
+#warning TODO: Update wcwidth to match latest by Kuhn.
#endif
#if defined(__UCLIBC_HAS_LOCALE__) && \
@@ -1163,7 +1203,7 @@
*
*/
-const unsigned char codesets[] =
+const unsigned char __iconv_codesets[] =
"\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */
#if __BYTE_ORDER == __BIG_ENDIAN
"\x08\xec""UCS-4\x00" /* always BE */
@@ -1201,7 +1241,7 @@
const unsigned char *s;
int codeset;
- for (s = codesets ; *s ; s += *s) {
+ for (s = __iconv_codesets ; *s ; s += *s) {
if (!strcasecmp(s+2, name)) {
return s[1];
}
@@ -1212,10 +1252,10 @@
/* TODO: maybe CODESET_LIST + *s ??? */
/* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
codeset = 2;
- s = CODESET_LIST;
+ s = __LOCALE_DATA_CODESET_LIST;
do {
++codeset; /* Increment codeset first. */
- if (!strcasecmp(CODESET_LIST+*s, name)) {
+ if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
return codeset;
}
} while (*++s);
@@ -1223,7 +1263,7 @@
return 0; /* No matching codeset! */
}
-iconv_t iconv_open(const char *tocode, const char *fromcode)
+iconv_t weak_function iconv_open(const char *tocode, const char *fromcode)
{
register _UC_iconv_t *px;
int tocodeset, fromcodeset;
@@ -1244,16 +1284,17 @@
return (iconv_t)(-1);
}
-int iconv_close(iconv_t cd)
+int weak_function iconv_close(iconv_t cd)
{
free(cd);
return 0;
}
-size_t iconv(iconv_t cd, char **__restrict inbuf,
- size_t *__restrict inbytesleft,
- char **__restrict outbuf, size_t *__restrict outbytesleft)
+size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
+ size_t *__restrict inbytesleft,
+ char **__restrict outbuf,
+ size_t *__restrict outbytesleft)
{
_UC_iconv_t *px = (_UC_iconv_t *) cd;
size_t nrcount, r;
@@ -1362,9 +1403,9 @@
return (size_t)(-1);
}
#ifdef __UCLIBC_MJN3_ONLY__
-#warning optimize this
+#warning TODO: optimize this.
#endif
- if (p != NULL) { /* incomplet char case */
+ if (p != NULL) { /* incomplete char case */
goto INVALID;
}
p = *inbuf + 1; /* nul */
@@ -1374,10 +1415,10 @@
if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
goto ILLEGAL;
} else { /* some other 8-bit ascii-extension codeset */
- const codeset_8_bit_t *c8b
+ const __codeset_8_bit_t *c8b
= __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
wc -= 0x80;
- wc = __global_locale.tbl8c2wc[
+ wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
(c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
<< Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
if (!wc) {
@@ -1439,7 +1480,7 @@
r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
if (r != (size_t)(-1)) {
#ifdef __UCLIBC_MJN3_ONLY__
-#warning what happens for a nul?
+#warning TODO: What happens for a nul?
#endif
if (r == 0) {
if (wc != 0) {
@@ -1458,14 +1499,14 @@
**outbuf = wc;
} else {
if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
- const codeset_8_bit_t *c8b
+ const __codeset_8_bit_t *c8b
= __locale_mmap->codeset_8_bit + px->tocodeset - 3;
__uwchar_t u;
u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
- u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ ((wc >> Cwc2c_TT_SHIFT)
& ((1 << Cwc2c_TI_SHIFT)-1))];
- wc = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
+ wc = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
+ (u << Cwc2c_TT_SHIFT)
+ (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
if (wc) {
@@ -1497,7 +1538,7 @@
#include <stdarg.h>
#include <libgen.h>
-extern const unsigned char codesets[];
+extern const unsigned char __iconv_codesets[];
#define IBUF BUFSIZ
#define OBUF BUFSIZ
@@ -1572,12 +1613,12 @@
if (opts[5]) { /* -l */
fprintf(stderr, "Recognized codesets:\n");
- for (s = codesets ; *s ; s += *s) {
+ for (s = __iconv_codesets ; *s ; s += *s) {
fprintf(stderr," %s\n", s+2);
}
- s = CODESET_LIST;
+ s = __LOCALE_DATA_CODESET_LIST;
do {
- fprintf(stderr," %s\n", CODESET_LIST+ (unsigned char)(*s));
+ fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
} while (*++s);
return EXIT_SUCCESS;
Index: wstdio.c
===================================================================
RCS file: /var/cvs/uClibc/libc/misc/wchar/wstdio.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- wstdio.c 22 Nov 2002 03:05:20 -0000 1.3
+++ wstdio.c 1 Aug 2003 20:08:51 -0000 1.4
@@ -171,7 +171,6 @@
size_t r;
unsigned char c[1];
unsigned char sbuf[1];
- unsigned char ungot_width; /* Support ftell after wscanf ungetwc. */
wi = WEOF; /* Prepare for failure. */
@@ -183,8 +182,18 @@
stream->modeflags |= __FLAG_WIDE;
if (stream->modeflags & __MASK_UNGOT) {/* Any ungetwc()s? */
- assert( (stream->modeflags & (__FLAG_READING|__FLAG_ERROR))
- == __FLAG_READING);
+
+ assert(stream->modeflags & __FLAG_READING);
+
+/* assert( (stream->modeflags & (__FLAG_READING|__FLAG_ERROR)) */
+/* == __FLAG_READING); */
+
+ if ((((stream->modeflags & __MASK_UNGOT) > 1) || stream->ungot[1])) {
+ stream->ungot_width[0] = 0; /* Application ungot... */
+ } else {
+ stream->ungot_width[0] = stream->ungot_width[1]; /* scanf ungot */
+ }
+
wi = stream->ungot[(--stream->modeflags) & __MASK_UNGOT];
stream->ungot[1] = 0;
goto DONE;
@@ -196,7 +205,9 @@
++stream->bufend;
}
- ungot_width = 0;
+ if (stream->state.mask == 0) { /* If last was a complete char */
+ stream->ungot_width[0] = 0; /* then reset the width. */
+ }
LOOP:
if ((n = stream->bufread - stream->bufpos) == 0) {
@@ -204,12 +215,12 @@
}
r = mbrtowc(wc, stream->bufpos, n, &stream->state);
- if (((ssize_t) r) >= 0) { /* Single byte... */
+ if (((ssize_t) r) >= 0) { /* Success... */
if (r == 0) { /* Nul wide char... means 0 byte for us so */
++r; /* increment r and handle below as single. */
}
stream->bufpos += r;
- stream->ungot_width[0] = ungot_width + r;
+ stream->ungot_width[0] += r;
wi = *wc;
goto DONE;
}
@@ -217,7 +228,7 @@
if (r == ((size_t) -2)) {
/* Potentially valid but incomplete and no more buffered. */
stream->bufpos += n; /* Update bufpos for stream. */
- ungot_width += n;
+ stream->ungot_width[0] += n;
FILL_BUFFER:
if (_stdio_fread(c, (size_t) 1, stream) > 0) {
assert(stream->bufpos == stream->bufstart + 1);
@@ -371,7 +382,8 @@
#ifdef L_ungetwc
/*
* Note: This is the application-callable ungetwc. If wscanf calls this, it
- * should also set stream->ungot[1] to 0 if this is the only ungot.
+ * should also set stream->ungot[1] to 0 if this is the only ungot, as well
+ * as reset stream->ungot_width[1] for use by _stdio_adjpos().
*/
/* Reentrant. */
@@ -389,8 +401,7 @@
}
stream->modeflags |= __FLAG_WIDE;
- /* If can't read or there's been an error, or c == EOF, or ungot slots
- * already filled, then return EOF */
+ /* If can't read or c == WEOF or ungot slots already filled, then fail. */
if ((stream->modeflags
& (__MASK_UNGOT2|__FLAG_WRITEONLY
#ifndef __STDIO_AUTO_RW_TRANSITION
@@ -406,14 +417,18 @@
/* ungot_width */
#ifdef __STDIO_BUFFERS
- /* TODO: shouldn't allow writing??? */
+#ifdef __STDIO_AUTO_RW_TRANSITION
if (stream->modeflags & __FLAG_WRITING) {
fflush_unlocked(stream); /* Commit any write-buffered chars. */
}
+#endif /* __STDIO_AUTO_RW_TRANSITION */
#endif /* __STDIO_BUFFERS */
/* Clear EOF and WRITING flags, and set READING FLAG */
stream->modeflags &= ~(__FLAG_EOF|__FLAG_WRITING);
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning CONSIDER: Is setting the reading flag after an ungetwc necessary?
+#endif /* __UCLIBC_MJN3_ONLY__ */
stream->modeflags |= __FLAG_READING;
stream->ungot[1] = 1; /* Flag as app ungetc call; wscanf fixes up. */
stream->ungot[(stream->modeflags++) & __MASK_UNGOT] = c;
- Previous message: [uClibc-cvs] uClibc/libc/misc/time Makefile, 1.32, 1.33 time.c, 1.12, 1.13
- Next message: [uClibc-cvs] uClibc/include xlocale.h, NONE, 1.1 ctype.h, 1.15, 1.16 langinfo.h, 1.5, 1.6 libintl.h, 1.1, 1.2 locale.h, 1.5, 1.6 signal.h, 1.6, 1.7 stdio.h, 1.27, 1.28 stdlib.h, 1.39, 1.40 string.h, 1.23, 1.24 time.h, 1.6, 1.7 wchar.h, 1.2, 1.3 wctype.h, 1.2, 1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the uClibc-cvs
mailing list