[git commit] hush: make ${#var} unicode-aware

Denys Vlasenko vda.linux at googlemail.com
Wed Aug 13 07:57:44 UTC 2014


commit: http://git.busybox.net/busybox/commit/?id=c538d5bcc304d1ac99783de2337937c70a7013c7
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

This mimics bash

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 shell/hush.c                             |   29 +++++++++++++++++++----------
 shell/hush_test/hush-misc/unicode1.right |    3 +++
 shell/hush_test/hush-misc/unicode1.tests |   13 +++++++++++++
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/shell/hush.c b/shell/hush.c
index e1d0ece..7d35471 100644
--- a/shell/hush.c
+++ b/shell/hush.c
@@ -1977,6 +1977,22 @@ static struct variable *set_vars_and_save_old(char **strings)
 
 
 /*
+ * Unicode helper
+ */
+static void reinit_unicode_for_hush(void)
+{
+	/* Unicode support should be activated even if LANG is set
+	 * _during_ shell execution, not only if it was set when
+	 * shell was started. Therefore, re-check LANG every time:
+	 */
+	const char *s = get_local_var_value("LC_ALL");
+	if (!s) s = get_local_var_value("LC_CTYPE");
+	if (!s) s = get_local_var_value("LANG");
+	reinit_unicode(s);
+}
+
+
+/*
  * in_str support
  */
 static int FAST_FUNC static_get(struct in_str *i)
@@ -2042,15 +2058,7 @@ static void get_user_input(struct in_str *i)
 	/* Enable command line editing only while a command line
 	 * is actually being read */
 	do {
-		/* Unicode support should be activated even if LANG is set
-		 * _during_ shell execution, not only if it was set when
-		 * shell was started. Therefore, re-check LANG every time:
-		 */
-		const char *s = get_local_var_value("LC_ALL");
-		if (!s) s = get_local_var_value("LC_CTYPE");
-		if (!s) s = get_local_var_value("LANG");
-		reinit_unicode(s);
-
+		reinit_unicode_for_hush();
 		G.flag_SIGINT = 0;
 		/* buglet: SIGINT will not make new prompt to appear _at once_,
 		 * only after <Enter>. (^C will work) */
@@ -5028,8 +5036,9 @@ static NOINLINE const char *expand_one_var(char **to_be_freed_pp, char *arg, cha
 
 	/* Handle any expansions */
 	if (exp_op == 'L') {
+		reinit_unicode_for_hush();
 		debug_printf_expand("expand: length(%s)=", val);
-		val = utoa(val ? strlen(val) : 0);
+		val = utoa(val ? unicode_strlen(val) : 0);
 		debug_printf_expand("%s\n", val);
 	} else if (exp_op) {
 		if (exp_op == '%' || exp_op == '#') {
diff --git a/shell/hush_test/hush-misc/unicode1.right b/shell/hush_test/hush-misc/unicode1.right
new file mode 100644
index 0000000..d3bbbf6
--- /dev/null
+++ b/shell/hush_test/hush-misc/unicode1.right
@@ -0,0 +1,3 @@
+1
+1
+Ok
diff --git a/shell/hush_test/hush-misc/unicode1.tests b/shell/hush_test/hush-misc/unicode1.tests
new file mode 100755
index 0000000..8788ba9
--- /dev/null
+++ b/shell/hush_test/hush-misc/unicode1.tests
@@ -0,0 +1,13 @@
+LANG=en_US.UTF-8
+
+# A combining character U+300
+a=`printf "\xcc\x80"`
+# Should print 1
+echo ${#a}
+
+# A Japanese katakana charachter U+30a3
+a=`printf "\xe3\x82\xa3"`
+# Should print 1
+echo ${#a}
+
+echo Ok


More information about the busybox-cvs mailing list