[uClibc-cvs] uClibc/libc/misc/wchar Makefile, 1.4, 1.5 wchar.c, 1.10, 1.11 wstdio.c, 1.3, 1.4

Manuel Novoa III mjn3 at uclibc.org
Fri Aug 1 20:09:24 UTC 2003


Update of /var/cvs/uClibc/libc/misc/wchar
In directory winder:/tmp/cvs-serv28208/libc/misc/wchar

Modified Files:
	Makefile wchar.c wstdio.c 
Log Message:
Add a new *scanf implementation, includeing the *wscanf functions.
  Should be standards compliant and with several optional features,
  including support for hexadecimal float notation, locale awareness,
  glibc-like locale-specific digit grouping with the `'' flag, and
  positional arg support.  I tested it pretty well (finding several
  bugs in glibc's scanf in the process), but it is brand new so be
  aware.

The *wprintf functions now support floating point output.  Also, a
  couple of bugs were squashed.  Finally, %a/%A conversions are
  now implemented.

Implement the glibc xlocale interface for thread-specific locale
  support.  Also add the various *_l(args, locale_t loc_arg) funcs.

  NOTE!!!  setlocale() is NOT threadsafe!  NOTE!!!

The strto{floating point} conversion functions are now locale aware.
  The also now support hexadecimal floating point notation.

Add the wcsto{floating point} conversion functions.

Fix a bug in mktime() related to dst.  Note that unlike glibc's mktime,
  uClibc's version always normalizes the struct tm before attempting
  to determine the correct dst setting if tm_isdst == -1 on entry.

Add a stub version of the libintl functions.  (untested)

Fixed a known memory leak in setlocale() related to the collation data.

Add lots of new config options (which Erik agreed to sort out :-),
  including finally exposing some of the stripped down stdio configs.
  Be careful with those though, as they haven't been tested in a
  long time.


(temporary) GOTCHAs...

The ctype functions are currently incorrect for 8-bit locales.  They
  will be fixed shortly.

The ctype functions are now table-based, resulting in larger staticly
  linked binaries.  I'll be adding an option to use the old approach
  in the stub locale configuration.




Index: Makefile
===================================================================
RCS file: /var/cvs/uClibc/libc/misc/wchar/Makefile,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- Makefile	22 Nov 2002 03:05:20 -0000	1.4
+++ Makefile	1 Aug 2003 20:08:51 -0000	1.5
@@ -42,7 +42,6 @@
 # getwc (fgetwc alias) getwc_unlocked (fgetwc_unlocked alias)
 # putwc (fputwc alias) putwc_unlocked (fputwc_unlocked alias)
 
-# fwscanf  wscanf  swscanf  vfwscanf  vwscanf  vswscanf
 # wcsftime
 
 OBJS=$(MOBJ1) $(MOBJ2)

Index: wchar.c
===================================================================
RCS file: /var/cvs/uClibc/libc/misc/wchar/wchar.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -d -r1.10 -r1.11
--- wchar.c	23 Nov 2002 16:43:57 -0000	1.10
+++ wchar.c	1 Aug 2003 20:08:51 -0000	1.11
@@ -86,6 +86,9 @@
  * Add a couple of ugly hacks to support *wprintf.
  * Add a mini iconv() and iconv implementation (requires locale support).
  *
+ * Aug 1, 2003
+ * Bug fix for mbrtowc.
+ *
  * Manuel
  */
 
@@ -101,13 +104,39 @@
 #include <assert.h>
 #include <locale.h>
 #include <wchar.h>
+#include <bits/uClibc_uwchar.h>
 
+/**********************************************************************/
 #ifdef __UCLIBC_HAS_LOCALE__
-#define ENCODING (__global_locale.encoding)
+#ifdef __UCLIBC_MJN3_ONLY__
+#ifdef L_iswspace
+/* generates one warning */
+#warning TODO: Fix Cc2wc* and Cwc2c* defines!
+#endif
+#endif /* __UCLIBC_MJN3_ONLY__ */
+
+#define ENCODING		((__UCLIBC_CURLOCALE_DATA).encoding)
+
+#define Cc2wc_IDX_SHIFT		__LOCALE_DATA_Cc2wc_IDX_SHIFT
+#define Cc2wc_ROW_LEN		__LOCALE_DATA_Cc2wc_ROW_LEN
+#define Cwc2c_DOMAIN_MAX	__LOCALE_DATA_Cwc2c_DOMAIN_MAX
+#define Cwc2c_TI_SHIFT		__LOCALE_DATA_Cwc2c_TI_SHIFT
+#define Cwc2c_TT_SHIFT		__LOCALE_DATA_Cwc2c_TT_SHIFT
+#define Cwc2c_TI_LEN		__LOCALE_DATA_Cwc2c_TI_LEN
+
 #ifndef __CTYPE_HAS_UTF_8_LOCALES
 #warning __CTYPE_HAS_UTF_8_LOCALES not set!
 #endif
-#else
+
+#else  /* __UCLIBC_HAS_LOCALE__ */
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#ifdef L_btowc
+/* emit only once */
+#warning fix preprocessor logic testing locale settings
+#endif
+#endif
+
 #define ENCODING (__ctype_encoding_7_bit)
 #ifdef __CTYPE_HAS_8_BIT_LOCALES
 #error __CTYPE_HAS_8_BIT_LOCALES is defined!
@@ -117,7 +146,9 @@
 #endif
 #undef L__wchar_utf8sntowcs
 #undef L__wchar_wcsntoutf8s
-#endif
+
+#endif /* __UCLIBC_HAS_LOCALE__ */
+/**********************************************************************/
 
 #if WCHAR_MAX > 0xffffUL
 #define UTF_8_MAX_LEN 6
@@ -266,11 +297,18 @@
 #ifdef __CTYPE_HAS_UTF_8_LOCALES
 	/* Need to do this here since mbsrtowcs doesn't allow incompletes. */
 	if (ENCODING == __ctype_encoding_utf8) {
+		if (!pwc) {
+			pwc = wcbuf;
+		}
 		r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
 		return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
 	}
 #endif
 
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning TODO: This adds a trailing nul!
+#endif /* __UCLIBC_MJN3_ONLY__ */
+
 	r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
 
 	if (((ssize_t) r) >= 0) {
@@ -291,7 +329,10 @@
 size_t wcrtomb(register char *__restrict s, wchar_t wc,
 			   mbstate_t *__restrict ps)
 {
-	wchar_t wcbuf[2];
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning TODO: Should wcsnrtombs nul-terminate unconditionally?  Check glibc.
+#endif /* __UCLIBC_MJN3_ONLY__ */
+	wchar_t wcbuf[1];
 	const wchar_t *pwc;
 	size_t r;
 	char buf[MB_LEN_MAX];
@@ -303,9 +344,8 @@
 
 	pwc = wcbuf;
 	wcbuf[0] = wc;
-	wcbuf[1] = 0;
 
-	r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
+	r = __wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
 	return (r != 0) ? r : 1;
 }
 
@@ -418,7 +458,7 @@
 		if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
 			mask = 0x40;
 #ifdef __UCLIBC_MJN3_ONLY__
-#warning fix range for 16 bit wides
+#warning TODO: Fix range for 16 bit wchar_t case.
 #endif
 			if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
 				goto START;
@@ -495,7 +535,6 @@
 	COMPLETE:
 		*pwc = wc;
 		pwc += incr;
-
 	}
 #ifdef DECODER
 	while (--count);
@@ -684,8 +723,8 @@
 		while (count) {
 			if ((wc = ((unsigned char)(*s))) >= 0x80) {	/* Non-ASCII... */
 				wc -= 0x80;
-				wc = __global_locale.tbl8c2wc[
-						  (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
+				wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
+						  (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
 						   << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
 				if (!wc) {
 					goto BAD;
@@ -797,12 +836,12 @@
 			} else {
 				u = 0;
 				if (wc <= Cwc2c_DOMAIN_MAX) {
-					u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
+					u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
 														+ Cwc2c_TT_SHIFT)];
-					u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+					u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
 									+ ((wc >> Cwc2c_TT_SHIFT)
 									   & ((1 << Cwc2c_TI_SHIFT)-1))];
-					u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
+					u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
 									+ (u << Cwc2c_TT_SHIFT)
 									+ (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
 				}
@@ -859,7 +898,8 @@
 #ifdef L_wcswidth
 
 #ifdef __UCLIBC_MJN3_ONLY__
-#warning if we start doing translit, wcwidth and wcswidth will need updating.
+#warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
+#warning TODO: Update wcwidth to match latest by Kuhn.
 #endif
 
 #if defined(__UCLIBC_HAS_LOCALE__) && \
@@ -1163,7 +1203,7 @@
  *
  */
 
-const unsigned char codesets[] =
+const unsigned char __iconv_codesets[] =
 	"\x0a\xe0""WCHAR_T\x00"		/* superset of UCS-4 but platform-endian */
 #if __BYTE_ORDER == __BIG_ENDIAN
 	"\x08\xec""UCS-4\x00"		/* always BE */
@@ -1201,7 +1241,7 @@
 	const unsigned char *s;
 	int codeset;
 
-	for (s = codesets ; *s ; s += *s) {
+	for (s = __iconv_codesets ; *s ; s += *s) {
 		if (!strcasecmp(s+2, name)) {
 			return s[1];
 		}
@@ -1212,10 +1252,10 @@
 	/* TODO: maybe CODESET_LIST + *s ??? */
 	/* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
 	codeset = 2;
-	s = CODESET_LIST;
+	s = __LOCALE_DATA_CODESET_LIST;
 	do {
 		++codeset;		/* Increment codeset first. */
-		if (!strcasecmp(CODESET_LIST+*s, name)) {
+		if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
 			return codeset;
 		}
 	} while (*++s);
@@ -1223,7 +1263,7 @@
 	return 0;			/* No matching codeset! */
 }
 
-iconv_t iconv_open(const char *tocode, const char *fromcode)
+iconv_t weak_function iconv_open(const char *tocode, const char *fromcode)
 {
 	register _UC_iconv_t *px;
 	int tocodeset, fromcodeset;
@@ -1244,16 +1284,17 @@
 	return (iconv_t)(-1);
 }
 
-int iconv_close(iconv_t cd)
+int weak_function iconv_close(iconv_t cd)
 {
 	free(cd);
 
 	return 0;
 }
 
-size_t iconv(iconv_t cd, char **__restrict inbuf,
-			 size_t *__restrict inbytesleft,
-		     char **__restrict outbuf, size_t *__restrict outbytesleft)
+size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
+						   size_t *__restrict inbytesleft,
+						   char **__restrict outbuf,
+						   size_t *__restrict outbytesleft)
 {
 	_UC_iconv_t *px = (_UC_iconv_t *) cd;
 	size_t nrcount, r;
@@ -1362,9 +1403,9 @@
 					return (size_t)(-1);
 				}
 #ifdef __UCLIBC_MJN3_ONLY__
-#warning optimize this
+#warning TODO: optimize this.
 #endif
-				if (p != NULL) { /* incomplet char case */
+				if (p != NULL) { /* incomplete char case */
 					goto INVALID;
 				}
 				p = *inbuf + 1;	/* nul */
@@ -1374,10 +1415,10 @@
 			if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
 				goto ILLEGAL;
 			} else {			/* some other 8-bit ascii-extension codeset */
-				const codeset_8_bit_t *c8b
+				const __codeset_8_bit_t *c8b
 					= __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
 				wc -= 0x80;
-				wc = __global_locale.tbl8c2wc[
+				wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
 							 (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
 							  << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
 				if (!wc) {
@@ -1439,7 +1480,7 @@
 				r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
 				if (r != (size_t)(-1)) {
 #ifdef __UCLIBC_MJN3_ONLY__
-#warning what happens for a nul?
+#warning TODO: What happens for a nul?
 #endif
 					if (r == 0) {
 						if (wc != 0) {
@@ -1458,14 +1499,14 @@
 				**outbuf = wc;
 		} else {
 			if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
-				const codeset_8_bit_t *c8b
+				const __codeset_8_bit_t *c8b
 					= __locale_mmap->codeset_8_bit + px->tocodeset - 3;
 				__uwchar_t u;
 				u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
-				u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+				u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
 						 + ((wc >> Cwc2c_TT_SHIFT)
 							& ((1 << Cwc2c_TI_SHIFT)-1))];
-				wc = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
+				wc = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
 						 + (u << Cwc2c_TT_SHIFT)
 						 + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
 				if (wc) {
@@ -1497,7 +1538,7 @@
 #include <stdarg.h>
 #include <libgen.h>
 
-extern const unsigned char codesets[];
+extern const unsigned char __iconv_codesets[];
 
 #define IBUF BUFSIZ
 #define OBUF BUFSIZ
@@ -1572,12 +1613,12 @@
 
 	if (opts[5]) {				/* -l */
 		fprintf(stderr, "Recognized codesets:\n");
-		for (s = codesets ; *s ; s += *s) {
+		for (s = __iconv_codesets ; *s ; s += *s) {
 			fprintf(stderr,"  %s\n", s+2);
 		}
-		s = CODESET_LIST;
+		s = __LOCALE_DATA_CODESET_LIST;
 		do {
-			fprintf(stderr,"  %s\n", CODESET_LIST+ (unsigned char)(*s));
+			fprintf(stderr,"  %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
 		} while (*++s);
 
 		return EXIT_SUCCESS;

Index: wstdio.c
===================================================================
RCS file: /var/cvs/uClibc/libc/misc/wchar/wstdio.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- wstdio.c	22 Nov 2002 03:05:20 -0000	1.3
+++ wstdio.c	1 Aug 2003 20:08:51 -0000	1.4
@@ -171,7 +171,6 @@
 	size_t r;
 	unsigned char c[1];
 	unsigned char sbuf[1];
-	unsigned char ungot_width;	/* Support ftell after wscanf ungetwc. */
 
 	wi = WEOF;					/* Prepare for failure. */
 
@@ -183,8 +182,18 @@
 	stream->modeflags |= __FLAG_WIDE;
 
 	if (stream->modeflags & __MASK_UNGOT) {/* Any ungetwc()s? */
-		assert( (stream->modeflags & (__FLAG_READING|__FLAG_ERROR))
-				== __FLAG_READING);
+
+		assert(stream->modeflags & __FLAG_READING);
+
+/* 		assert( (stream->modeflags & (__FLAG_READING|__FLAG_ERROR)) */
+/* 				== __FLAG_READING); */
+
+		if ((((stream->modeflags & __MASK_UNGOT) > 1) || stream->ungot[1])) {
+			stream->ungot_width[0] = 0;	/* Application ungot... */
+		} else {
+			stream->ungot_width[0] = stream->ungot_width[1]; /* scanf ungot */
+		}
+
 		wi = stream->ungot[(--stream->modeflags) & __MASK_UNGOT];
 		stream->ungot[1] = 0;
 		goto DONE;
@@ -196,7 +205,9 @@
 		++stream->bufend;
 	}
 
-	ungot_width = 0;
+	if (stream->state.mask == 0) { /* If last was a complete char */
+		stream->ungot_width[0] = 0;	/* then reset the width. */
+	}
 
  LOOP:
 	if ((n = stream->bufread - stream->bufpos) == 0) {
@@ -204,12 +215,12 @@
 	}
 
 	r = mbrtowc(wc, stream->bufpos, n, &stream->state);
-	if (((ssize_t) r) >= 0) {	/* Single byte... */
+	if (((ssize_t) r) >= 0) {	/* Success... */
 		if (r == 0) {			/* Nul wide char... means 0 byte for us so */
 			++r;				/* increment r and handle below as single. */
 		}
 		stream->bufpos += r;
-		stream->ungot_width[0] = ungot_width + r;
+		stream->ungot_width[0] += r;
 		wi = *wc;
 		goto DONE;
 	}
@@ -217,7 +228,7 @@
 	if (r == ((size_t) -2)) {
 		/* Potentially valid but incomplete and no more buffered. */
 		stream->bufpos += n;	/* Update bufpos for stream. */
-		ungot_width += n;
+		stream->ungot_width[0] += n;
 	FILL_BUFFER:
 		if (_stdio_fread(c, (size_t) 1, stream) > 0) {
 			assert(stream->bufpos == stream->bufstart + 1);
@@ -371,7 +382,8 @@
 #ifdef L_ungetwc
 /*
  * Note: This is the application-callable ungetwc.  If wscanf calls this, it
- * should also set stream->ungot[1] to 0 if this is the only ungot.
+ * should also set stream->ungot[1] to 0 if this is the only ungot, as well
+ * as reset stream->ungot_width[1] for use by _stdio_adjpos().
  */
 
 /* Reentrant. */
@@ -389,8 +401,7 @@
 	}
 	stream->modeflags |= __FLAG_WIDE;
 
-	/* If can't read or there's been an error, or c == EOF, or ungot slots
-	 * already filled, then return EOF */
+	/* If can't read or c == WEOF or ungot slots already filled, then fail. */
 	if ((stream->modeflags
 		 & (__MASK_UNGOT2|__FLAG_WRITEONLY
 #ifndef __STDIO_AUTO_RW_TRANSITION
@@ -406,14 +417,18 @@
 /*  ungot_width */
 
 #ifdef __STDIO_BUFFERS
-								/* TODO: shouldn't allow writing??? */
+#ifdef __STDIO_AUTO_RW_TRANSITION
 	if (stream->modeflags & __FLAG_WRITING) {
 		fflush_unlocked(stream); /* Commit any write-buffered chars. */
 	}
+#endif /* __STDIO_AUTO_RW_TRANSITION */
 #endif /* __STDIO_BUFFERS */
 
 	/* Clear EOF and WRITING flags, and set READING FLAG */
 	stream->modeflags &= ~(__FLAG_EOF|__FLAG_WRITING);
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning CONSIDER: Is setting the reading flag after an ungetwc necessary?
+#endif /* __UCLIBC_MJN3_ONLY__ */
 	stream->modeflags |= __FLAG_READING;
 	stream->ungot[1] = 1;		/* Flag as app ungetc call; wscanf fixes up. */
 	stream->ungot[(stream->modeflags++) & __MASK_UNGOT] = c;




More information about the uClibc-cvs mailing list