[git commit master] ls: unicode fixes

Denys Vlasenko vda.linux at googlemail.com
Sun Jan 31 04:15:38 UTC 2010


commit: http://git.busybox.net/busybox/commit/?id=d8528b8e56bab7643722e4453121882d23c23c07
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 TODO_unicode              |    2 +-
 coreutils/ls.c            |  412 ++++++++++++++++++++++++---------------------
 include/libbb.h           |   19 ++-
 include/unicode.h         |    5 -
 libbb/Kbuild              |    1 +
 libbb/printable_string.c  |   65 +++++++
 testsuite/ls.mk_uni_tests |  111 ++++++++++++
 testsuite/ls.tests        |  136 +++++++++++++++
 8 files changed, 545 insertions(+), 206 deletions(-)
 create mode 100644 libbb/printable_string.c
 create mode 100644 testsuite/ls.mk_uni_tests
 create mode 100755 testsuite/ls.tests

diff --git a/TODO_unicode b/TODO_unicode
index c29fd93..b310e8d 100644
--- a/TODO_unicode
+++ b/TODO_unicode
@@ -7,7 +7,7 @@ dumpleases
 Applets which may need unicode handling (more extensive than sanitizing
 of filenames in error messages):
 
-ls - uses unicode_strlen, not scrlen
+ls - work in progress
 expand, unexpand - uses unicode_strlen, not scrlen
 ash, hush through lineedit - uses unicode_strlen, not scrlen
 top - need to sanitize process args
diff --git a/coreutils/ls.c b/coreutils/ls.c
index 6c898b7..d004ce8 100644
--- a/coreutils/ls.c
+++ b/coreutils/ls.c
@@ -241,9 +241,6 @@ struct dnode {
 	IF_SELINUX(security_context_t sid;)
 };
 
-static struct dnode **list_dir(const char *, unsigned *);
-static unsigned list_single(const struct dnode *);
-
 struct globals {
 #if ENABLE_FEATURE_LS_COLOR
 	smallint show_color;
@@ -528,31 +525,236 @@ static void dnsort(struct dnode **dn, int size)
 #endif
 
 
-static void showfiles(struct dnode **dn, unsigned nfiles)
+static unsigned calc_name_len(const char *name)
+{
+	unsigned len;
+	uni_stat_t uni_stat;
+
+	// TODO: quote tab as \t, etc, if -Q
+	name = printable_string(&uni_stat, name);
+
+	if (!(option_mask32 & OPT_Q)) {
+		return uni_stat.unicode_width;
+	}
+
+	len = 2 + uni_stat.unicode_width;
+	while (*name) {
+		if (*name == '"' || *name == '\\') {
+			len++;
+		}
+		name++;
+	}
+	return len;
+}
+
+
+/* Return the number of used columns.
+ * Note that only STYLE_COLUMNS uses return value.
+ * STYLE_SINGLE and STYLE_LONG don't care.
+ * coreutils 7.2 also supports:
+ * ls -b (--escape) = octal escapes (although it doesn't look like working)
+ * ls -N (--literal) = not escape at all
+ */
+static unsigned print_name(const char *name)
+{
+	unsigned len;
+	uni_stat_t uni_stat;
+
+	// TODO: quote tab as \t, etc, if -Q
+	name = printable_string(&uni_stat, name);
+
+	if (!(option_mask32 & OPT_Q)) {
+		fputs(name, stdout);
+		return uni_stat.unicode_width;
+	}
+
+	len = 2 + uni_stat.unicode_width;
+	putchar('"');
+	while (*name) {
+		if (*name == '"' || *name == '\\') {
+			putchar('\\');
+			len++;
+		}
+		putchar(*name++);
+	}
+	putchar('"');
+	return len;
+}
+
+/* Return the number of used columns.
+ * Note that only STYLE_COLUMNS uses return value,
+ * STYLE_SINGLE and STYLE_LONG don't care.
+ */
+static NOINLINE unsigned list_single(const struct dnode *dn)
 {
-	unsigned i, ncols, nrows, row, nc;
 	unsigned column = 0;
-	unsigned nexttab = 0;
-	unsigned column_width = 0; /* for STYLE_LONG and STYLE_SINGLE not used */
+	char *lpath = lpath; /* for compiler */
+#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
+	struct stat info;
+	char append;
+#endif
 
 	/* Never happens:
-	if (dn == NULL || nfiles < 1)
-		return;
+	if (dn->fullname == NULL)
+		return 0;
 	*/
 
-	if (all_fmt & STYLE_LONG) {
+#if ENABLE_FEATURE_LS_FILETYPES
+	append = append_char(dn->dstat.st_mode);
+#endif
+
+	/* Do readlink early, so that if it fails, error message
+	 * does not appear *inside* the "ls -l" line */
+	if (all_fmt & LIST_SYMLINK)
+		if (S_ISLNK(dn->dstat.st_mode))
+			lpath = xmalloc_readlink_or_warn(dn->fullname);
+
+	if (all_fmt & LIST_INO)
+		column += printf("%7llu ", (long long) dn->dstat.st_ino);
+	if (all_fmt & LIST_BLOCKS)
+		column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1));
+	if (all_fmt & LIST_MODEBITS)
+		column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode));
+	if (all_fmt & LIST_NLINKS)
+		column += printf("%4lu ", (long) dn->dstat.st_nlink);
+#if ENABLE_FEATURE_LS_USERNAME
+	if (all_fmt & LIST_ID_NAME) {
+		if (option_mask32 & OPT_g) {
+			column += printf("%-8.8s ",
+				get_cached_username(dn->dstat.st_uid));
+		} else {
+			column += printf("%-8.8s %-8.8s ",
+				get_cached_username(dn->dstat.st_uid),
+				get_cached_groupname(dn->dstat.st_gid));
+		}
+	}
+#endif
+	if (all_fmt & LIST_ID_NUMERIC) {
+		if (option_mask32 & OPT_g)
+			column += printf("%-8u ", (int) dn->dstat.st_uid);
+		else
+			column += printf("%-8u %-8u ",
+					(int) dn->dstat.st_uid,
+					(int) dn->dstat.st_gid);
+	}
+	if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) {
+		if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) {
+			column += printf("%4u, %3u ",
+					(int) major(dn->dstat.st_rdev),
+					(int) minor(dn->dstat.st_rdev));
+		} else {
+			if (all_fmt & LS_DISP_HR) {
+				column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ",
+					/* print st_size, show one fractional, use suffixes */
+					make_human_readable_str(dn->dstat.st_size, 1, 0)
+				);
+			} else {
+				column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size);
+			}
+		}
+	}
+#if ENABLE_FEATURE_LS_TIMESTAMPS
+	if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) {
+		char *filetime;
+		time_t ttime = dn->dstat.st_mtime;
+		if (all_fmt & TIME_ACCESS)
+			ttime = dn->dstat.st_atime;
+		if (all_fmt & TIME_CHANGE)
+			ttime = dn->dstat.st_ctime;
+		filetime = ctime(&ttime);
+		/* filetime's format: "Wed Jun 30 21:49:08 1993\n" */
+		if (all_fmt & LIST_FULLTIME)
+			column += printf("%.24s ", filetime);
+		else { /* LIST_DATE_TIME */
+			/* current_time_t ~== time(NULL) */
+			time_t age = current_time_t - ttime;
+			printf("%.6s ", filetime + 4); /* "Jun 30" */
+			if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) {
+				/* hh:mm if less than 6 months old */
+				printf("%.5s ", filetime + 11);
+			} else { /* year. buggy if year > 9999 ;) */
+				printf(" %.4s ", filetime + 20);
+			}
+			column += 13;
+		}
+	}
+#endif
+#if ENABLE_SELINUX
+	if (all_fmt & LIST_CONTEXT) {
+		column += printf("%-32s ", dn->sid ? dn->sid : "unknown");
+		freecon(dn->sid);
+	}
+#endif
+	if (all_fmt & LIST_FILENAME) {
+#if ENABLE_FEATURE_LS_COLOR
+		if (show_color) {
+			info.st_mode = 0; /* for fgcolor() */
+			lstat(dn->fullname, &info);
+			printf("\033[%u;%um", bold(info.st_mode),
+					fgcolor(info.st_mode));
+		}
+#endif
+		column += print_name(dn->name);
+		if (show_color) {
+			printf("\033[0m");
+		}
+	}
+	if (all_fmt & LIST_SYMLINK) {
+		if (S_ISLNK(dn->dstat.st_mode) && lpath) {
+			printf(" -> ");
+#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
+#if ENABLE_FEATURE_LS_COLOR
+			info.st_mode = 0; /* for fgcolor() */
+#endif
+			if (stat(dn->fullname, &info) == 0) {
+				append = append_char(info.st_mode);
+			}
+#endif
+#if ENABLE_FEATURE_LS_COLOR
+			if (show_color) {
+				printf("\033[%u;%um", bold(info.st_mode),
+					   fgcolor(info.st_mode));
+			}
+#endif
+			column += print_name(lpath) + 4;
+			if (show_color) {
+				printf("\033[0m");
+			}
+			free(lpath);
+		}
+	}
+#if ENABLE_FEATURE_LS_FILETYPES
+	if (all_fmt & LIST_FILETYPE) {
+		if (append) {
+			putchar(append);
+			column++;
+		}
+	}
+#endif
+
+	return column;
+}
+
+static void showfiles(struct dnode **dn, unsigned nfiles)
+{
+	unsigned i, ncols, nrows, row, nc;
+	unsigned column = 0;
+	unsigned nexttab = 0;
+	unsigned column_width = 0; /* used only by STYLE_COLUMNS */
+
+	if (all_fmt & STYLE_LONG) { /* STYLE_LONG or STYLE_SINGLE */
 		ncols = 1;
 	} else {
 		/* find the longest file name, use that as the column width */
 		for (i = 0; dn[i]; i++) {
-			int len = unicode_strlen(dn[i]->name);
+			int len = calc_name_len(dn[i]->name);
 			if (column_width < len)
 				column_width = len;
 		}
 		column_width += tabstops +
 			IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + )
-			             ((all_fmt & LIST_INO) ? 8 : 0) +
-			             ((all_fmt & LIST_BLOCKS) ? 5 : 0);
+				((all_fmt & LIST_INO) ? 8 : 0) +
+				((all_fmt & LIST_BLOCKS) ? 5 : 0);
 		ncols = (int) (terminal_width / column_width);
 	}
 
@@ -618,6 +820,8 @@ static off_t calculate_blocks(struct dnode **dn)
 #endif
 
 
+static struct dnode **list_dir(const char *, unsigned *);
+
 static void showdirs(struct dnode **dn, int first)
 {
 	unsigned nfiles;
@@ -733,188 +937,6 @@ static struct dnode **list_dir(const char *path, unsigned *nfiles_p)
 }
 
 
-static int print_name(const char *name)
-{
-	if (option_mask32 & OPT_Q) {
-#if ENABLE_FEATURE_ASSUME_UNICODE
-		unsigned len = 2 + unicode_strlen(name);
-#else
-		unsigned len = 2;
-#endif
-		putchar('"');
-		while (*name) {
-			if (*name == '"') {
-				putchar('\\');
-				len++;
-			}
-			putchar(*name++);
-			if (!ENABLE_FEATURE_ASSUME_UNICODE)
-				len++;
-		}
-		putchar('"');
-		return len;
-	}
-	/* No -Q: */
-#if ENABLE_FEATURE_ASSUME_UNICODE
-	fputs(name, stdout);
-	return unicode_strlen(name);
-#else
-	return printf("%s", name);
-#endif
-}
-
-
-static NOINLINE unsigned list_single(const struct dnode *dn)
-{
-	unsigned column = 0;
-	char *lpath = lpath; /* for compiler */
-#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
-	struct stat info;
-	char append;
-#endif
-
-	/* Never happens:
-	if (dn->fullname == NULL)
-		return 0;
-	*/
-
-#if ENABLE_FEATURE_LS_FILETYPES
-	append = append_char(dn->dstat.st_mode);
-#endif
-
-	/* Do readlink early, so that if it fails, error message
-	 * does not appear *inside* the "ls -l" line */
-	if (all_fmt & LIST_SYMLINK)
-		if (S_ISLNK(dn->dstat.st_mode))
-			lpath = xmalloc_readlink_or_warn(dn->fullname);
-
-	if (all_fmt & LIST_INO)
-		column += printf("%7llu ", (long long) dn->dstat.st_ino);
-	if (all_fmt & LIST_BLOCKS)
-		column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1));
-	if (all_fmt & LIST_MODEBITS)
-		column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode));
-	if (all_fmt & LIST_NLINKS)
-		column += printf("%4lu ", (long) dn->dstat.st_nlink);
-#if ENABLE_FEATURE_LS_USERNAME
-	if (all_fmt & LIST_ID_NAME) {
-		if (option_mask32 & OPT_g) {
-			column += printf("%-8.8s ",
-				get_cached_username(dn->dstat.st_uid));
-		} else {
-			column += printf("%-8.8s %-8.8s ",
-				get_cached_username(dn->dstat.st_uid),
-				get_cached_groupname(dn->dstat.st_gid));
-		}
-	}
-#endif
-	if (all_fmt & LIST_ID_NUMERIC) {
-		if (option_mask32 & OPT_g)
-			column += printf("%-8u ", (int) dn->dstat.st_uid);
-		else
-			column += printf("%-8u %-8u ",
-					(int) dn->dstat.st_uid,
-					(int) dn->dstat.st_gid);
-	}
-	if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) {
-		if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) {
-			column += printf("%4u, %3u ",
-					(int) major(dn->dstat.st_rdev),
-					(int) minor(dn->dstat.st_rdev));
-		} else {
-			if (all_fmt & LS_DISP_HR) {
-				column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ",
-					/* print st_size, show one fractional, use suffixes */
-					make_human_readable_str(dn->dstat.st_size, 1, 0)
-				);
-			} else {
-				column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size);
-			}
-		}
-	}
-#if ENABLE_FEATURE_LS_TIMESTAMPS
-	if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) {
-		char *filetime;
-		time_t ttime = dn->dstat.st_mtime;
-		if (all_fmt & TIME_ACCESS)
-			ttime = dn->dstat.st_atime;
-		if (all_fmt & TIME_CHANGE)
-			ttime = dn->dstat.st_ctime;
-		filetime = ctime(&ttime);
-		/* filetime's format: "Wed Jun 30 21:49:08 1993\n" */
-		if (all_fmt & LIST_FULLTIME)
-			column += printf("%.24s ", filetime);
-		else { /* LIST_DATE_TIME */
-			/* current_time_t ~== time(NULL) */
-			time_t age = current_time_t - ttime;
-			printf("%.6s ", filetime + 4); /* "Jun 30" */
-			if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) {
-				/* hh:mm if less than 6 months old */
-				printf("%.5s ", filetime + 11);
-			} else { /* year. buggy if year > 9999 ;) */
-				printf(" %.4s ", filetime + 20);
-			}
-			column += 13;
-		}
-	}
-#endif
-#if ENABLE_SELINUX
-	if (all_fmt & LIST_CONTEXT) {
-		column += printf("%-32s ", dn->sid ? dn->sid : "unknown");
-		freecon(dn->sid);
-	}
-#endif
-	if (all_fmt & LIST_FILENAME) {
-#if ENABLE_FEATURE_LS_COLOR
-		if (show_color) {
-			info.st_mode = 0; /* for fgcolor() */
-			lstat(dn->fullname, &info);
-			printf("\033[%u;%um", bold(info.st_mode),
-					fgcolor(info.st_mode));
-		}
-#endif
-		column += print_name(dn->name);
-		if (show_color) {
-			printf("\033[0m");
-		}
-	}
-	if (all_fmt & LIST_SYMLINK) {
-		if (S_ISLNK(dn->dstat.st_mode) && lpath) {
-			printf(" -> ");
-#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
-#if ENABLE_FEATURE_LS_COLOR
-			info.st_mode = 0; /* for fgcolor() */
-#endif
-			if (stat(dn->fullname, &info) == 0) {
-				append = append_char(info.st_mode);
-			}
-#endif
-#if ENABLE_FEATURE_LS_COLOR
-			if (show_color) {
-				printf("\033[%u;%um", bold(info.st_mode),
-					   fgcolor(info.st_mode));
-			}
-#endif
-			column += print_name(lpath) + 4;
-			if (show_color) {
-				printf("\033[0m");
-			}
-			free(lpath);
-		}
-	}
-#if ENABLE_FEATURE_LS_FILETYPES
-	if (all_fmt & LIST_FILETYPE) {
-		if (append) {
-			putchar(append);
-			column++;
-		}
-	}
-#endif
-
-	return column;
-}
-
-
 int ls_main(int argc UNUSED_PARAM, char **argv)
 {
 	struct dnode **dnd;
diff --git a/include/libbb.h b/include/libbb.h
index 73aea40..a86d644 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -577,11 +577,6 @@ char *strncpy_IFNAMSIZ(char *dst, const char *src) FAST_FUNC;
  * But potentially slow, don't use in one-billion-times loops */
 int bb_putchar(int ch) FAST_FUNC;
 char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC;
-/* Prints unprintable chars ch as ^C or M-c to file
- * (M-c is used only if ch is ORed with PRINTABLE_META),
- * else it is printed as-is (except for ch = 0x9b) */
-enum { PRINTABLE_META = 0x100 };
-void fputc_printable(int ch, FILE *file) FAST_FUNC;
 // gcc-4.1.1 still isn't good enough at optimizing it
 // (+200 bytes compared to macro)
 //static ALWAYS_INLINE
@@ -594,6 +589,20 @@ void fputc_printable(int ch, FILE *file) FAST_FUNC;
 #define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1])
 #define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2])))
 
+typedef struct uni_stat_t {
+	unsigned byte_count;
+	unsigned unicode_count;
+	unsigned unicode_width;
+} uni_stat_t;
+/* Returns a string with unprintable chars replaced by '?' or
+ * SUBST_WCHAR. This function is unicode-aware. */
+const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str);
+/* Prints unprintable char ch as ^C or M-c to file
+ * (M-c is used only if ch is ORed with PRINTABLE_META),
+ * else it is printed as-is (except for ch = 0x9b) */
+enum { PRINTABLE_META = 0x100 };
+void fputc_printable(int ch, FILE *file) FAST_FUNC;
+
 /* dmalloc will redefine these to it's own implementation. It is safe
  * to have the prototypes here unconditionally.  */
 void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC;
diff --git a/include/unicode.h b/include/unicode.h
index f32e565..25ef740 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -23,11 +23,6 @@ size_t FAST_FUNC unicode_strlen(const char *string);
 enum {
 	UNI_FLAG_PAD = (1 << 0),
 };
-typedef struct uni_stat_t {
-	unsigned byte_count;
-	unsigned unicode_count;
-	unsigned unicode_width;
-} uni_stat_t;
 //UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src);
 //UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags);
 char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src);
diff --git a/libbb/Kbuild b/libbb/Kbuild
index 243626d..7e79310 100644
--- a/libbb/Kbuild
+++ b/libbb/Kbuild
@@ -73,6 +73,7 @@ lib-y += perror_nomsg_and_die.o
 lib-y += pidfile.o
 lib-y += platform.o
 lib-y += printable.o
+lib-y += printable_string.o
 lib-y += print_flags.o
 lib-y += process_escape_sequence.o
 lib-y += procps.o
diff --git a/libbb/printable_string.c b/libbb/printable_string.c
new file mode 100644
index 0000000..47565de
--- /dev/null
+++ b/libbb/printable_string.c
@@ -0,0 +1,65 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Unicode support routines.
+ *
+ * Copyright (C) 2010 Denys Vlasenko
+ *
+ * Licensed under GPL version 2, see file LICENSE in this tarball for details.
+ */
+#include "libbb.h"
+#include "unicode.h"
+
+const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
+{
+	static char *saved[4];
+	static unsigned cur_saved; /* = 0 */
+
+	char *dst;
+	const char *s;
+
+	s = str;
+	while (1) {
+		unsigned char c = *s;
+		if (c == '\0') {
+			/* 99+% of inputs do not need conversion */
+			if (stats) {
+				stats->byte_count = (s - str);
+				stats->unicode_count = (s - str);
+				stats->unicode_width = (s - str);
+			}
+			return str;
+		}
+		if (c < ' ')
+			break;
+		if (c >= 0x7f)
+			break;
+		s++;
+	}
+
+#if ENABLE_FEATURE_ASSUME_UNICODE
+	dst = unicode_conv_to_printable(stats, str);
+#else
+	{
+		char *d = dst = xstrdup(str);
+		while (1) {
+			unsigned char c = *d;
+			if (c == '\0')
+				break;
+			if (c < ' ' || c >= 0x7f)
+				*d = '?';
+			d++;
+		}
+		if (stats) {
+			stats->byte_count = (d - dst);
+			stats->unicode_count = (d - dst);
+			stats->unicode_width = (d - dst);
+		}
+	}
+#endif
+
+	free(saved[cur_saved]);
+	saved[cur_saved] = dst;
+	cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);
+
+	return dst;
+}
diff --git a/testsuite/ls.mk_uni_tests b/testsuite/ls.mk_uni_tests
new file mode 100644
index 0000000..da0c29f
--- /dev/null
+++ b/testsuite/ls.mk_uni_tests
@@ -0,0 +1,111 @@
+# DO NOT EDIT THIS FILE! MOST TEXT EDITORS WILL DAMAGE IT!
+>'0001_1__Some_correct_UTF-8_text___________________________________________|'
+>'0002_2__Boundary_condition_test_cases_____________________________________|'
+>'0003_2.1__First_possible_sequence_of_a_certain_length_____________________|'
+>'0004_2.1.2__2_bytes__U-00000080_:________"€"______________________________|'
+>'0005_2.1.3__3_bytes__U-00000800_:________"à €"______________________________|'
+>'0006_2.1.4__4_bytes__U-00010000_:________"𐀀"______________________________|'
+>'0007_2.1.5__5_bytes__U-00200000_:________"øˆ€€€"______________________________|'
+>'0008_2.1.6__6_bytes__U-04000000_:________"ü„€€€€"______________________________|'
+>'0009_2.2__Last_possible_sequence_of_a_certain_length______________________|'
+>'0010_2.2.1__1_byte___U-0000007F_:________""______________________________|'
+>'0011_2.2.2__2_bytes__U-000007FF_:________"ß¿"______________________________|'
+>'0012_2.2.3__3_bytes__U-0000FFFF_:________"ï¿¿"______________________________|'
+>'0013_2.2.4__4_bytes__U-001FFFFF_:________"÷¿¿¿"______________________________|'
+>'0014_2.2.5__5_bytes__U-03FFFFFF_:________"û¿¿¿¿"______________________________|'
+>'0015_2.2.6__6_bytes__U-7FFFFFFF_:________"ý¿¿¿¿¿"______________________________|'
+>'0016_2.3__Other_boundary_conditions_______________________________________|'
+>'0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"퟿"___________________________________|'
+>'0018_2.3.2__U-0000E000_=_ee_80_80_=_""___________________________________|'
+>'0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"�"___________________________________|'
+>'0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"􏿿"________________________________|'
+>'0021_2.3.5__U-00110000_=_f4_90_80_80_=_"ô€€"________________________________|'
+>'0022_3__Malformed_sequences_______________________________________________|'
+>'0023_3.1__Unexpected_continuation_bytes___________________________________|'
+>'0024_3.1.1__First_continuation_byte_0x80:_"€"_____________________________|'
+>'0025_3.1.2__Last__continuation_byte_0xbf:_"¿"_____________________________|'
+>'0026_3.1.3__2_continuation_bytes:_"€¿"____________________________________|'
+>'0027_3.1.4__3_continuation_bytes:_"€¿€"___________________________________|'
+>'0028_3.1.5__4_continuation_bytes:_"€¿€¿"__________________________________|'
+>'0029_3.1.6__5_continuation_bytes:_"€¿€¿€"_________________________________|'
+>'0030_3.1.7__6_continuation_bytes:_"€¿€¿€¿"________________________________|'
+>'0031_3.1.8__7_continuation_bytes:_"€¿€¿€¿€"_______________________________|'
+>'0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|'
+>'0033____"€‚ƒ„…†‡ˆ‰Š‹ŒŽ_________________________________________________|'
+>'0034_____‘’“”•–—˜™š›œžŸ_________________________________________________|'
+>'0035_____ ¡¢£¤¥¦§¨©ª«¬­®¯_________________________________________________|'
+>'0036_____°±²³´µ¶·¸¹º»¼½¾¿"________________________________________________|'
+>'0037_3.2__Lonely_start_characters_________________________________________|'
+>'0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|'
+>'0039________each_followed_by_a_space_character:___________________________|'
+>'0040____"À_Á_Â_Ã_Ä_Å_Æ_Ç_È_É_Ê_Ë_Ì_Í_Î_Ï__________________________________|'
+>'0041_____Ð_Ñ_Ò_Ó_Ô_Õ_Ö_×_Ø_Ù_Ú_Û_Ü_Ý_Þ_ß_"________________________________|'
+>'0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|'
+>'0043________each_followed_by_a_space_character:___________________________|'
+>'0044____"à_á_â_ã_ä_å_æ_ç_è_é_ê_ë_ì_í_î_ï_"________________________________|'
+>'0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|'
+>'0046________each_followed_by_a_space_character:___________________________|'
+>'0047____"ð_ñ_ò_ó_ô_õ_ö_÷_"________________________________________________|'
+>'0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|'
+>'0049________each_followed_by_a_space_character:___________________________|'
+>'0050____"ø_ù_ú_û_"________________________________________________________|'
+>'0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|'
+>'0052________each_followed_by_a_space_character:___________________________|'
+>'0053____"ü_ý_"____________________________________________________________|'
+>'0054_3.3__Sequences_with_last_continuation_byte_missing___________________|'
+>'0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"À"______|'
+>'0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"à€"______|'
+>'0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"ð€€"______|'
+>'0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"ø€€€"______|'
+>'0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"ü€€€€"______|'
+>'0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"ß"______|'
+>'0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"ï¿"______|'
+>'0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"÷¿¿"______|'
+>'0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"û¿¿¿"______|'
+>'0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"ý¿¿¿¿"______|'
+>'0065_3.4__Concatenation_of_incomplete_sequences___________________________|'
+>'0066____"Àà€ð€€ø€€€ü€€€€ßï¿÷¿¿û¿¿¿ý¿¿¿¿"______________________________________________________|'
+>'0067_3.5__Impossible_bytes________________________________________________|'
+>'0068_3.5.1__fe_=_"þ"______________________________________________________|'
+>'0069_3.5.2__ff_=_"ÿ"______________________________________________________|'
+>'0070_3.5.3__fe_fe_ff_ff_=_"þþÿÿ"__________________________________________|'
+>'0071_4__Overlong_sequences________________________________________________|'
+>'0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|'
+>'0073_4.1.1_U+002F_=_c0_af_____________=_"À¯"_______________________________|'
+>'0074_4.1.2_U+002F_=_e0_80_af__________=_"à€¯"_______________________________|'
+>'0075_4.1.3_U+002F_=_f0_80_80_af_______=_"ð€€¯"_______________________________|'
+>'0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"ø€€€¯"_______________________________|'
+>'0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"ü€€€€¯"_______________________________|'
+>'0078_4.2__Maximum_overlong_sequences______________________________________|'
+>'0079_4.2.1__U-0000007F_=_c1_bf_____________=_"Á¿"__________________________|'
+>'0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"àŸ¿"__________________________|'
+>'0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"ð¿¿"__________________________|'
+>'0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"ø‡¿¿¿"__________________________|'
+>'0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"üƒ¿¿¿¿"__________________________|'
+>'0084_4.3__Overlong_representation_of_the_NUL_character____________________|'
+>'0085_4.3.1__U+0000_=_c0_80_____________=_"À€"______________________________|'
+>'0086_4.3.2__U+0000_=_e0_80_80__________=_"à€€"______________________________|'
+>'0087_4.3.3__U+0000_=_f0_80_80_80_______=_"ð€€€"______________________________|'
+>'0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"ø€€€€"______________________________|'
+>'0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"ü€€€€€"______________________________|'
+>'0090_5__Illegal_code_positions____________________________________________|'
+>'0091_5.1_Single_UTF-16_surrogates_________________________________________|'
+>'0092_5.1.1__U+D800_=_ed_a0_80_=_"í €"_______________________________________|'
+>'0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"í­¿"_______________________________________|'
+>'0094_5.1.3__U+DB80_=_ed_ae_80_=_"í®€"_______________________________________|'
+>'0095_5.1.4__U+DBFF_=_ed_af_bf_=_"í¯¿"_______________________________________|'
+>'0096_5.1.5__U+DC00_=_ed_b0_80_=_"í°€"_______________________________________|'
+>'0097_5.1.6__U+DF80_=_ed_be_80_=_"í¾€"_______________________________________|'
+>'0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"í¿¿"_______________________________________|'
+>'0099_5.2_Paired_UTF-16_surrogates_________________________________________|'
+>'0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"𐀀"______________________|'
+>'0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"𐏿"______________________|'
+>'0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"í­¿í°€"______________________|'
+>'0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"í­¿í¿¿"______________________|'
+>'0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"󰀀"______________________|'
+>'0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"󰏿"______________________|'
+>'0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"􏰀"______________________|'
+>'0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"􏿿"______________________|'
+>'0108_5.3_Other_illegal_code_positions_____________________________________|'
+>'0109_5.3.1__U+FFFE_=_ef_bf_be_=_"￾"_______________________________________|'
+>'0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"ï¿¿"_______________________________________|'
diff --git a/testsuite/ls.tests b/testsuite/ls.tests
new file mode 100755
index 0000000..b0c5da7
--- /dev/null
+++ b/testsuite/ls.tests
@@ -0,0 +1,136 @@
+#!/bin/sh
+# Copyright 2010 by Denys Vlasenko
+# Licensed under GPL v2, see file LICENSE for details.
+
+. ./testing.sh
+
+test -f "$bindir/.config" && . "$bindir/.config"
+
+rm -rf ls.testdir >/dev/null
+mkdir ls.testdir || exit 1
+
+# testing "test name" "command" "expected result" "file input" "stdin"
+
+# The test isn't passing correctly now - all | chars should line up
+# perfectly in the correctly passed test.
+test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
+&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
+&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
+&& testing "ls unicode test" \
+"(cd ls.testdir && sh ../ls.mk_uni_tests) && ls -1 ls.testdir" \
+'0001_1__Some_correct_UTF-8_text___________________________________________|
+0002_2__Boundary_condition_test_cases_____________________________________|
+0003_2.1__First_possible_sequence_of_a_certain_length_____________________|
+0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________|
+0005_2.1.3__3_bytes__U-00000800_:________"?"______________________________|
+0006_2.1.4__4_bytes__U-00010000_:________"?"______________________________|
+0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________|
+0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________|
+0009_2.2__Last_possible_sequence_of_a_certain_length______________________|
+0010_2.2.1__1_byte___U-0000007F_:________"?"______________________________|
+0011_2.2.2__2_bytes__U-000007FF_:________"?"______________________________|
+0012_2.2.3__3_bytes__U-0000FFFF_:________"?"______________________________|
+0013_2.2.4__4_bytes__U-001FFFFF_:________"?"______________________________|
+0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________|
+0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________|
+0016_2.3__Other_boundary_conditions_______________________________________|
+0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________|
+0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________|
+0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________|
+0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________|
+0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________|
+0022_3__Malformed_sequences_______________________________________________|
+0023_3.1__Unexpected_continuation_bytes___________________________________|
+0024_3.1.1__First_continuation_byte_0x80:_"?"_____________________________|
+0025_3.1.2__Last__continuation_byte_0xbf:_"?"_____________________________|
+0026_3.1.3__2_continuation_bytes:_"??"____________________________________|
+0027_3.1.4__3_continuation_bytes:_"???"___________________________________|
+0028_3.1.5__4_continuation_bytes:_"????"__________________________________|
+0029_3.1.6__5_continuation_bytes:_"?????"_________________________________|
+0030_3.1.7__6_continuation_bytes:_"??????"________________________________|
+0031_3.1.8__7_continuation_bytes:_"???????"_______________________________|
+0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|
+0033____"????????????????_________________________________________________|
+0034_____????????????????_________________________________________________|
+0035_____????????????????_________________________________________________|
+0036_____????????????????"________________________________________________|
+0037_3.2__Lonely_start_characters_________________________________________|
+0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|
+0039________each_followed_by_a_space_character:___________________________|
+0040____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?__________________________________|
+0041_____?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________|
+0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|
+0043________each_followed_by_a_space_character:___________________________|
+0044____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________|
+0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|
+0046________each_followed_by_a_space_character:___________________________|
+0047____"?_?_?_?_?_?_?_?_"________________________________________________|
+0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|
+0049________each_followed_by_a_space_character:___________________________|
+0050____"?_?_?_?_"________________________________________________________|
+0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|
+0052________each_followed_by_a_space_character:___________________________|
+0053____"?_?_"____________________________________________________________|
+0054_3.3__Sequences_with_last_continuation_byte_missing___________________|
+0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
+0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"??"______|
+0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"???"______|
+0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"????"______|
+0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?????"______|
+0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______|
+0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"??"______|
+0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"???"______|
+0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"????"______|
+0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?????"______|
+0065_3.4__Concatenation_of_incomplete_sequences___________________________|
+0066____"??????????????????????????????"______________________________________________________|
+0067_3.5__Impossible_bytes________________________________________________|
+0068_3.5.1__fe_=_"?"______________________________________________________|
+0069_3.5.2__ff_=_"?"______________________________________________________|
+0070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________|
+0071_4__Overlong_sequences________________________________________________|
+0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|
+0073_4.1.1_U+002F_=_c0_af_____________=_"??"_______________________________|
+0074_4.1.2_U+002F_=_e0_80_af__________=_"???"_______________________________|
+0075_4.1.3_U+002F_=_f0_80_80_af_______=_"????"_______________________________|
+0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?????"_______________________________|
+0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"??????"_______________________________|
+0078_4.2__Maximum_overlong_sequences______________________________________|
+0079_4.2.1__U-0000007F_=_c1_bf_____________=_"??"__________________________|
+0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________|
+0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________|
+0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________|
+0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________|
+0084_4.3__Overlong_representation_of_the_NUL_character____________________|
+0085_4.3.1__U+0000_=_c0_80_____________=_"??"______________________________|
+0086_4.3.2__U+0000_=_e0_80_80__________=_"???"______________________________|
+0087_4.3.3__U+0000_=_f0_80_80_80_______=_"????"______________________________|
+0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?????"______________________________|
+0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"??????"______________________________|
+0090_5__Illegal_code_positions____________________________________________|
+0091_5.1_Single_UTF-16_surrogates_________________________________________|
+0092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________|
+0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"?"_______________________________________|
+0094_5.1.3__U+DB80_=_ed_ae_80_=_"?"_______________________________________|
+0095_5.1.4__U+DBFF_=_ed_af_bf_=_"?"_______________________________________|
+0096_5.1.5__U+DC00_=_ed_b0_80_=_"?"_______________________________________|
+0097_5.1.6__U+DF80_=_ed_be_80_=_"?"_______________________________________|
+0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"?"_______________________________________|
+0099_5.2_Paired_UTF-16_surrogates_________________________________________|
+0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"??"______________________|
+0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"??"______________________|
+0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"??"______________________|
+0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"??"______________________|
+0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"??"______________________|
+0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"??"______________________|
+0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"??"______________________|
+0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"??"______________________|
+0108_5.3_Other_illegal_code_positions_____________________________________|
+0109_5.3.1__U+FFFE_=_ef_bf_be_=_"?"_______________________________________|
+0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"?"_______________________________________|
+' "" ""
+
+# Clean up
+rm -rf ls.testdir 2>/dev/null
+
+exit $FAILCOUNT
-- 
1.6.3.3



More information about the busybox-cvs mailing list