[BusyBox-cvs] CVS busybox/coreutils

CVS User landley landley at codepoet.org
Mon Jan 24 07:00:00 UTC 2005


Update of /var/cvs/busybox/coreutils
In directory nail:/tmp/cvs-serv3049/busybox/coreutils

Modified Files:
	Config.in sort.c 
Log Message:
Sort rewrite to be SUSv3 compliant.  New config option, updated help, and
a couple of infrastructure bits.


--- /var/cvs/busybox/coreutils/Config.in	2004/08/11 02:45:47	1.26
+++ /var/cvs/busybox/coreutils/Config.in	2005/01/24 06:59:59	1.27
@@ -398,6 +398,18 @@
 	help
 	  sort is used to sort lines of text in specified files.
 
+config CONFIG_SORT_BIG
+	bool "  full SuSv3 compliant sort (Support -ktcsbdfiozgM)"
+	default y
+	depends on CONFIG_SORT
+	help
+	  Without this, sort only supports  -r, -u, and an integer version
+	  of -n.  Selecting this adds sort keys, floating point support, and
+	  more.  This adds a little over 3k to a nonstatic build on x86.
+
+	  The SuSv3 sort standard is available at:
+	  http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html
+
 config CONFIG_STTY
 	bool "stty"
 	default n
--- /var/cvs/busybox/coreutils/sort.c	2003/03/19 09:11:34	1.35
+++ /var/cvs/busybox/coreutils/sort.c	2005/01/24 07:00:00	1.36
@@ -1,8 +1,8 @@
 /* vi: set sw=4 ts=4: */
 /*
- * Mini sort implementation for busybox
+ * SuS3 compliant sort implementation for busybox
  *
- * Copyright (C) 2000 by Matt Kraai <kraai at alumni.carnegiemellon.edu>
+ * Copyright (C) 2004 by Rob Landley <rob at landley.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -18,83 +18,321 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  *
+ * See SuS3 sort standard at:
+ * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html
  */
 
-/* BB_AUDIT SUSv3 _NOT_ compliant -- a number of options are not supported. */
-/* http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html */
-
-/* Mar 16, 2003      Manuel Novoa III   (mjn3 at codepoet.org)
- *
- * Now does proper error checking on i/o.  Plus some space savings.
- */
-
+#include <ctype.h>
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 #include <unistd.h>
 #include "busybox.h"
-#include "libcoreutils/coreutils.h"
 
-static int compare_ascii(const void *x, const void *y)
+int global_flags;
+
+/*
+	sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...]
+	sort -c [-bdfinru][-t char][-k keydef][file]
+*/
+
+/* These are sort types */
+#define FLAG_n			1		/* Numeric sort */
+#define FLAG_g			2		/* Sort using strtod() */
+#define FLAG_M			4		/* Sort date */
+/* ucsz apply to root level only, not keys.  b at root level implies bb */
+#define FLAG_u			8		/* Unique */
+#define FLAG_c			16		/* Check: no output, exit(!ordered) */
+#define FLAG_s			32		/* Stable sort, no ascii fallback at end */
+#define FLAG_z			64		/* Input is null terminated, not \n */
+/* These can be applied to search keys, the previous four can't */
+#define FLAG_b			128		/* Ignore leading blanks */
+#define FLAG_r			256		/* Reverse */
+#define FLAG_d			512		/* Ignore !(isalnum()|isspace()) */
+#define FLAG_f			1024	/* Force uppercase */
+#define FLAG_i			2048	/* Ignore !isprint() */
+#define FLAG_bb			32768	/* Ignore trailing blanks  */
+
+
+#ifdef CONFIG_SORT_BIG
+char key_separator;
+
+struct sort_key
 {
-	return strcmp(*(char **)x, *(char **)y);
-}
+	struct sort_key *next_key;	/* linked list */
+	unsigned short range[4];	/* start word, start char, end word, end char */
+	int flags;
+} *key_list;
 
-static int compare_numeric(const void *x, const void *y)
+static char *get_key(char *str, struct sort_key *key, int flags)
 {
-	int z = atoi(*(char **)x) - atoi(*(char **)y);
-	return z ? z : strcmp(*(char **)x, *(char **)y);
+	int start=0,end,len,i,j;
+
+	/* Special case whole string, so we don't have to make a copy */
+	if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
+		&& !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str;
+	/* Find start of key on first pass, end on second pass*/
+	len=strlen(str);
+
+	for(j=0;j<2;j++) {
+		if(!key->range[2*j]) end=len;
+		/* Loop through fields */
+		else {
+			end=0;
+			for(i=1;i<key->range[2*j]+j;i++) {
+				/* Skip leading blanks or first separator */
+				if(str[end]) {
+					if(key_separator) {
+						if(str[end]==key_separator) end++;
+					} else if(isspace(str[end]))
+						while(isspace(str[end])) end++;
+				}
+				/* Skip body of key */
+				for(;str[end];end++) {
+					if(key_separator) {
+						if(str[end]==key_separator) break;
+					} else if(isspace(str[end])) break;
+				}
+			}
+		}
+		if(!j) start=end;
+	}
+	/* Key with explicit separator starts after separator */
+	if(key_separator && str[start]==key_separator) start++;
+	/* Strip leading whitespace if necessary */
+	if(flags&FLAG_b) while(isspace(str[start])) start++;
+	/* Strip trailing whitespace if necessary */
+	if(flags&FLAG_bb) while(end>start && isspace(str[end-1])) end--;
+	/* Handle offsets on start and end */
+	if(key->range[3]) {
+		end+=key->range[3]-1;
+		if(end>len) end=len;
+	}
+	if(key->range[1]) {
+		start+=key->range[1]-1;
+		if(start>len) start=len;
+	}
+	/* Make the copy */
+	if(end<start) end=start;
+	str=bb_xstrndup(str+start,end-start);
+	/* Handle -d */
+	if(flags&FLAG_d) {
+		for(start=end=0;str[end];end++)
+			if(isspace(str[end]) || isalnum(str[end])) str[start++]=str[end];
+		str[start]=0;
+	}
+	/* Handle -i */
+	if(flags&FLAG_i) {
+		for(start=end=0;str[end];end++)
+			if(isprint(str[end])) str[start++]=str[end];
+		str[start]=0;
+	}
+	/* Handle -f */
+	if(flags*FLAG_f) for(i=0;str[i];i++) str[i]=toupper(str[i]);
+
+	return str;
 }
 
-int sort_main(int argc, char **argv)
+static struct sort_key *add_key(void)
 {
-	FILE *fp;
-	char *line, **lines = NULL;
-	int i, nlines = 0, inc;
-	int (*compare)(const void *, const void *) = compare_ascii;
+	struct sort_key **pkey=&key_list;
+	while(*pkey) pkey=&((*pkey)->next_key);
+	return *pkey=xcalloc(1,sizeof(struct sort_key));
+}
 
-	int flags;
+#define GET_LINE(fp) (global_flags&FLAG_z) ? bb_get_chunk_from_file(fp) \
+										   : bb_get_chomped_line_from_file(fp)
+#else
+#define GET_LINE(fp)		bb_get_chomped_line_from_file(fp)
+#endif
 
-	bb_default_error_retval = 2;
+/* Iterate through keys list and perform comparisons */
+static int compare_keys(const void *xarg, const void *yarg)
+{
+	int flags=global_flags,retval=0;
+	char *x,*y;
 
-	flags = bb_getopt_ulflags(argc, argv, "nru");
-	if (flags & 1) {
-		compare = compare_numeric;
+#ifdef CONFIG_SORT_BIG
+	struct sort_key *key;
+	
+	for(key=key_list;!retval && key;key=key->next_key) {
+		flags=(key->flags) ? key->flags : global_flags;
+		/* Chop out and modify key chunks, handling -dfib */
+		x=get_key(*(char **)xarg,key,flags);
+		y=get_key(*(char **)yarg,key,flags);
+#else
+	/* This curly bracket serves no purpose but to match the nesting
+	   level of the for() loop we're not using */
+	{
+		x=*(char **)xarg;
+		y=*(char **)yarg;
+#endif
+		/* Perform actual comparison */
+		switch(flags&7) {
+			default:
+				bb_error_msg_and_die("Unknown sort type.");
+				break;
+			/* Ascii sort */
+			case 0:
+				retval=strcmp(x,y);
+				break;
+#ifdef CONFIG_SORT_BIG
+			case FLAG_g:
+			{
+				char *xx,*yy;
+				double dx=strtod(x,&xx), dy=strtod(y,&yy);
+				/* not numbers < NaN < -infinity < numbers < +infinity) */
+				if(x==xx) retval=(y==yy ? 0 : -1);
+				else if(y==yy) retval=1;
+				else if(isnan(dx)) retval=isnan(dy) ? 0 : -1;
+				else if(isnan(dy)) retval=1;
+				else if(isinf(dx)) {
+					if(dx<0) retval=((isinf(dy) && dy<0) ? 0 : -1);
+					else retval=((isinf(dy) && dy>0) ? 0 : 1);
+				} else if(isinf(dy)) retval=dy<0 ? 1 : -1;
+				else retval=dx>dy ? 1 : (dx<dy ? -1 : 0);
+				break;
+			}
+			case FLAG_M:
+			{
+				struct tm thyme;
+				int dx;
+				char *xx,*yy;
+
+				xx=strptime(x,"%b",&thyme);
+				dx=thyme.tm_mon;
+				yy=strptime(y,"%b",&thyme);
+				if(!xx) retval=(!yy ? 0 : -1);
+				else if(!yy) retval=1;
+				else retval=(dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon);
+				break;
+			}
+			/* Full floating point version of -n */
+			case FLAG_n:
+			{
+				double dx=atof(x),dy=atof(y);
+				retval=dx>dy ? 1 : (dx<dy ? -1 : 0);
+				break;
+			}
+		}
+		/* Free key copies. */
+		if(x!=*(char **)xarg) free(x);
+		if(y!=*(char **)yarg) free(y);
+		if(retval) break;
+#else
+			/* Integer version of -n for tiny systems */
+			case FLAG_n:
+				retval=atoi(x)-atoi(y);
+				break;
+		}
+#endif
 	}
+	/* Perform fallback sort if necessary */
+	if(!retval && !(global_flags&FLAG_s))
+			retval=strcmp(*(char **)xarg, *(char **)yarg);
+	return ((flags&FLAG_r)?-1:1)*retval;
+}
 
-	argv += optind;
-	if (!*argv) {
-		*--argv = "-";
-	}
+int sort_main(int argc, char **argv)
+{
+	FILE *fp,*outfile=NULL;
+	int linecount=0,i,flag;
+	char *line,**lines=NULL,c,*optlist="ngMucszbrdfimS:T:o:k:t:";
 
-	do {
-		fp = xgetoptfile_sort_uniq(argv, "r");
-		while ((line = bb_get_chomped_line_from_file(fp)) != NULL) {
-			lines = xrealloc(lines, sizeof(char *) * (nlines + 1));
-			lines[nlines++] = line;
+	bb_default_error_retval = 2;
+	/* Parse command line options */
+	while((c=getopt(argc,argv,optlist))>0) {
+		line=index(optlist,c);
+		if(!line) bb_show_usage();
+		switch(*line) {
+#ifdef CONFIG_SORT_BIG
+			case 'o':
+				if(outfile) bb_error_msg_and_die("Too many -o.");
+				outfile=bb_xfopen(optarg,"w");
+				break;
+			case 't':
+				if(key_separator || optarg[1])
+					bb_error_msg_and_die("Too many -t.");
+				key_separator=*optarg;
+				break;
+			/* parse sort key */
+			case 'k':
+			{
+				struct sort_key *key=add_key();
+				char *temp, *temp2;
+
+				temp=optarg;
+				for(i=0;*temp;) {
+					/* Start of range */
+					key->range[2*i]=(unsigned short)strtol(temp,&temp,10);
+					if(*temp=='.')
+						key->range[(2*i)+1]=(unsigned short)strtol(temp+1,&temp,10);
+					for(;*temp;temp++) {
+						if(*temp==',' && !i++) {
+							temp++;
+							break;
+						} /* no else needed: fall through to syntax error
+							 because comma isn't in optlist */
+						temp2=index(optlist,*temp);
+						flag=(1<<(temp2-optlist));
+						if(!temp2 || (flag>FLAG_M && flag<FLAG_b))
+							bb_error_msg_and_die("Unknown key option.");
+						/* b after , means strip _trailing_ space */
+						if(i && flag==FLAG_b) flag=FLAG_bb;
+						key->flags|=flag;
+					}
+				}
+				break;
+			}
+#endif
+			default:
+				global_flags|=(1<<(line-optlist));
+				/* global b strips leading and trailing spaces */
+				if(global_flags&FLAG_b) global_flags|=FLAG_bb;
+				break;
 		}
-		bb_xferror(fp, *argv);
-		bb_fclose_nonstdin(fp);
-	} while (*++argv);
-
-	/* sort it */
-	qsort(lines, nlines, sizeof(char *), compare);
-
-	/* print it */
-	i = 0;
-	--nlines;
-	if ((inc = 1 - (flags & 2)) < 0) {	/* reverse */
-		i = nlines;
 	}
-	flags &= 4;
-
-	while (nlines >= 0) {
-		if (!flags || !nlines || strcmp(lines[i+inc], lines[i])) {
-			puts(lines[i]);
+	/* Open input files and read data */
+	for(i=argv[optind] ? optind : optind-1;argv[i];i++) {
+		if(i<optind || (*argv[i]=='-' && !argv[i][1])) fp=stdin;
+		else fp=bb_xfopen(argv[i],"r");
+		for(;;) {
+			line=GET_LINE(fp);
+			if(!line) break;
+			if(!(linecount&63))
+				lines=xrealloc(lines, sizeof(char *)*(linecount+64));
+			lines[linecount++]=line;
 		}
-		i += inc;
-		--nlines;
+		fclose(fp);
 	}
-
+#ifdef CONFIG_SORT_BIG
+	/* if no key, perform alphabetic sort */
+    if(!key_list) add_key()->range[0]=1;
+	/* handle -c */	
+	if(global_flags&FLAG_c) {
+		int j=(global_flags&FLAG_u) ? -1 : 0;
+		for(i=1;i<linecount;i++)
+			if(compare_keys(&lines[i-1],&lines[i])>j) {
+				fprintf(stderr,"Check line %d\n",i);
+				return 1;
+			}
+		return 0;
+	}
+#endif
+	/* Perform the actual sort */
+	qsort(lines,linecount,sizeof(char *),compare_keys);
+	/* handle -u */
+	if(global_flags&FLAG_u) {
+		for(flag=0,i=1;i<linecount;i++) {
+			if(!compare_keys(&lines[flag],&lines[i])) free(lines[i]);
+			else lines[++flag]=lines[i];
+		}
+		if(linecount) linecount=flag+1;
+	}
+	/* Print it */
+	if(!outfile) outfile=stdout;
+	for(i=0;i<linecount;i++) fprintf(outfile,"%s\n",lines[i]);
 	bb_fflush_stdout_and_exit(EXIT_SUCCESS);
 }



More information about the busybox-cvs mailing list