svn commit: trunk/busybox: coreutils testsuite

vda at busybox.net vda at busybox.net
Sun Mar 1 04:50:18 UTC 2009


Author: vda
Date: 2009-03-01 04:50:18 +0000 (Sun, 01 Mar 2009)
New Revision: 25471

Log:
tr: support [:xdigit:], fix handling of ranges and [x]'s.
 add testsuite entry for each of 3 bugs fixed.

function                                             old     new   delta      
static.classes                                        73      82      +9      
expand                                              1738    1743      +5      
complement                                            74      72      -2      
tr_main                                              472     463      -9      
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/2 up/down: 14/-11)              Total: 3 bytes



Added:
   trunk/busybox/testsuite/tr.tests

Modified:
   trunk/busybox/coreutils/tr.c


Changeset:
Modified: trunk/busybox/coreutils/tr.c
===================================================================
--- trunk/busybox/coreutils/tr.c	2009-02-27 22:51:30 UTC (rev 25470)
+++ trunk/busybox/coreutils/tr.c	2009-03-01 04:50:18 UTC (rev 25471)
@@ -23,11 +23,11 @@
 #define ASCII 0377
 
 static void map(char *pvector,
-		unsigned char *string1, unsigned int string1_len,
-		unsigned char *string2, unsigned int string2_len)
+		unsigned char *string1, unsigned string1_len,
+		unsigned char *string2, unsigned string2_len)
 {
 	char last = '0';
-	unsigned int i, j;
+	unsigned i, j;
 
 	for (j = 0, i = 0; i < string1_len; i++) {
 		if (string2_len <= j)
@@ -39,12 +39,11 @@
 
 /* supported constructs:
  *   Ranges,  e.g.,  0-9   ==>  0123456789
- *   Ranges,  e.g.,  [0-9] ==>  0123456789
  *   Escapes, e.g.,  \a    ==>  Control-G
  *   Character classes, e.g. [:upper:] ==> A...Z
  *   Equiv classess, e.g. [=A=] ==> A   (hmmmmmmm?)
  */
-static unsigned int expand(const char *arg, char *buffer)
+static unsigned expand(const char *arg, char *buffer)
 {
 	char *buffer_start = buffer;
 	unsigned i; /* can't be unsigned char: must be able to hold 256 */
@@ -77,7 +76,8 @@
 				static const char classes[] ALIGN1 =
 					"alpha"CLO "alnum"CLO "digit"CLO
 					"lower"CLO "upper"CLO "space"CLO
-					"blank"CLO "punct"CLO "cntrl"CLO;
+					"blank"CLO "punct"CLO "cntrl"CLO
+					"xdigit"CLO;
 #define CLASS_invalid 0 /* we increment the retval */
 #define CLASS_alpha 1
 #define CLASS_alnum 2
@@ -88,16 +88,17 @@
 #define CLASS_blank 7
 #define CLASS_punct 8
 #define CLASS_cntrl 9
-//#define CLASS_xdigit 10
+#define CLASS_xdigit 10
 //#define CLASS_graph 11
 //#define CLASS_print 12
 				smalluint j;
-				{ /* not really pretty.. */
-					char *tmp = xstrndup(arg, 7); // warning: xdigit would need 8, not 7
+				{
+					/* xdigit needs 8, not 7 */
+					char *tmp = xstrndup(arg, 7 + (arg[0]=='x'));
 					j = index_in_strings(classes, tmp) + 1;
 					free(tmp);
 				}
-				if (j == CLASS_alnum || j == CLASS_digit) {
+				if (j == CLASS_alnum || j == CLASS_digit || j == CLASS_xdigit) {
 					for (i = '0'; i <= '9'; i++)
 						*buffer++ = i;
 				}
@@ -125,6 +126,12 @@
 						 || (j == CLASS_cntrl && iscntrl(i)))
 							*buffer++ = i;
 				}
+				if (j == CLASS_xdigit) {
+					for (i = 'A'; i <= 'F'; i++) {
+						*buffer++ = i;
+						*buffer++ = i | 0x20;
+					}
+				}
 				if (j == CLASS_invalid) {
 					*buffer++ = '[';
 					*buffer++ = ':';
@@ -140,19 +147,14 @@
 				arg += 3;	/* skip CHAR=] */
 				continue;
 			}
-			if (i == '\0' || *arg != '-') { /* not [x-...] - copy verbatim */
-				*buffer++ = '[';
-				arg--; /* points to x */
-				continue; /* copy all, including eventual ']' */
-			}
-			/* [x-z] */
-			arg++; /* skip - */
-			if (arg[0] == '\0' || arg[1] != ']')
-				bb_show_usage();
-			ac = *arg++;
-			while (i <= ac)
-				*buffer++ = i++;
-			arg++;	/* skip ] */
+			/* The rest of [xyz... cases is treated as normal
+			 * string, '[' has no special meaning here:
+			 * tr "[a-z]" "[A-Z]" can be written as tr "a-z" "A-Z",
+			 * also try tr "[a-z]" "_A-Z+" and you'll see that
+			 * [] is not special here.
+			 */
+			*buffer++ = '[';
+			arg--; /* points to x */
 			continue;
 		}
 		*buffer++ = *arg++;
@@ -162,29 +164,30 @@
 
 static int complement(char *buffer, int buffer_len)
 {
-	int i, j, ix;
+	int ch, j, len;
 	char conv[ASCII + 2];
 
-	ix = 0;
-	for (i = '\0'; i <= ASCII; i++) {
+	len = 0;
+	for (ch = '\0'; ch <= ASCII; ch++) {
 		for (j = 0; j < buffer_len; j++)
-			if (buffer[j] == i)
-				break;
-		if (j == buffer_len)
-			conv[ix++] = i & ASCII;
+			if (buffer[j] == ch)
+				goto next_ch;
+		/* Didn't find it */
+		conv[len++] = (char) ch;
+ next_ch:
+		continue;
 	}
-	memcpy(buffer, conv, ix);
-	return ix;
+	memcpy(buffer, conv, len);
+	return len;
 }
 
 int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int tr_main(int argc UNUSED_PARAM, char **argv)
 {
-	int output_length = 0, input_length;
 	int i;
 	smalluint flags;
-	ssize_t read_chars = 0;
-	size_t in_index = 0, out_index = 0;
+	ssize_t read_chars;
+	size_t in_index, out_index;
 	unsigned last = UCHAR_MAX + 1; /* not equal to any char */
 	unsigned char coded, c;
 	unsigned char *output = xmalloc(BUFSIZ);
@@ -206,6 +209,9 @@
 
 #define tr_buf bb_common_bufsiz1
 	if (*argv != NULL) {
+		int output_length = 0;
+		int input_length;
+
 		input_length = expand(*argv++, tr_buf);
 		if (flags & TR_OPT_complement)
 			input_length = complement(tr_buf, input_length);
@@ -221,30 +227,33 @@
 			outvec[output[i]] = TRUE;
 	}
 
+	goto start_from;
+
 	for (;;) {
 		/* If we're out of input, flush output and read more input. */
 		if ((ssize_t)in_index == read_chars) {
 			if (out_index) {
 				xwrite(STDOUT_FILENO, (char *)output, out_index);
+ start_from:
 				out_index = 0;
 			}
 			read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ);
 			if (read_chars <= 0) {
 				if (read_chars < 0)
 					bb_perror_msg_and_die(bb_msg_read_error);
-				exit(EXIT_SUCCESS);
+				break;
 			}
 			in_index = 0;
 		}
 		c = tr_buf[in_index++];
-		coded = vector[c];
 		if ((flags & TR_OPT_delete) && invec[c])
 			continue;
+		coded = vector[c];
 		if ((flags & TR_OPT_squeeze_reps) && last == coded
 		 && (invec[c] || outvec[coded]))
 			continue;
 		output[out_index++] = last = coded;
 	}
-	/* NOTREACHED */
+
 	return EXIT_SUCCESS;
 }

Added: trunk/busybox/testsuite/tr.tests
===================================================================
--- trunk/busybox/testsuite/tr.tests	                        (rev 0)
+++ trunk/busybox/testsuite/tr.tests	2009-03-01 04:50:18 UTC (rev 25471)
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+# Copyright 2009 by Denys Vlasenko <vda.linux at googlemail.com>
+# Licensed under GPL v2, see file LICENSE for details.
+
+. testing.sh
+
+# testing "description" "arguments" "result" "infile" "stdin"
+
+testing "tr does not treat [] in [a-z] as special" \
+	"tr '[q-z]' '_Q-Z+'" \
+	"_QWe+" "" "[qwe]"
+
+testing "tr understands 0-9A-F" \
+	"tr -cd '[0-9A-F]'" \
+	"19AF" "" "19AFH\n"
+
+testing "tr understands [:xdigit:]" \
+	"tr -cd '[:xdigit:]'" \
+	"19AF" "" "19AFH\n"
+
+exit $FAILCOUNT



More information about the busybox-cvs mailing list