--- ../build-tree.old/coreutils-6.10/lib/strnumcmp.c 2007-11-25 08:23:31.000000000 -0500 +++ ./coreutils-6.10/lib/strnumcmp.c 2009-01-02 04:21:35.000000000 -0500 @@ -27,5 +27,33 @@ strnumcmp (char const *a, char const *b, int decimal_point, int thousands_sep) { - return numcompare (a, b, decimal_point, thousands_sep); + return numcompare (a, b, decimal_point, thousands_sep, NULL, NULL); +} + +/* Externally-visible name for numcompare2. */ + +int +strnumcmp2 (char const *a, char const *b, + int decimal_point, int thousands_sep, + char const **a_endptr, char const **b_endptr) +{ + return numcompare (a, b, decimal_point, thousands_sep, a_endptr, b_endptr); +} + + +/* Externally-visible name for numeric_end. */ + +char const * +strnumend (char const *a, + int decimal_point, int thousands_sep) +{ + return numeric_end (a, decimal_point, thousands_sep); +} + +/* Externally-visible name for exponent_order. */ + +int strexponent (char m, char const *exponents, + int num_exponents) +{ + return exponent_order(m, exponents, num_exponents); } --- ../build-tree.old/coreutils-6.10/lib/strnumcmp.h 2007-05-29 10:22:10.000000000 -0400 +++ ./coreutils-6.10/lib/strnumcmp.h 2009-01-02 04:20:19.000000000 -0500 @@ -1,2 +1,5 @@ int strintcmp (char const *, char const *); int strnumcmp (char const *, char const *, int, int); +int strnumcmp2 (char const *, char const *, int, int, char const **, char const **); +char const *strnumend (char const *, int, int); +int strexponent (char, char const *, int); --- ../build-tree.old/coreutils-6.10/lib/strnumcmp-in.h 2007-11-25 08:23:31.000000000 -0500 +++ ./coreutils-6.10/lib/strnumcmp-in.h 2009-01-02 05:19:50.000000000 -0500 @@ -72,26 +72,34 @@ return 0 */ static inline int -fraccompare (char const *a, char const *b, char decimal_point) +fraccompare (char const *a, char const *b, char decimal_point, + char const **a_endptr, char const **b_endptr) { + if (a_endptr == NULL || b_endptr == NULL) + { + a_endptr = &a; + b_endptr = &b; + } if (*a == decimal_point && *b == decimal_point) { while (*++a == *++b) if (! ISDIGIT (*a)) - return 0; + goto fraction_identical; if (ISDIGIT (*a) && ISDIGIT (*b)) - return *a - *b; + goto fraction_difference; if (ISDIGIT (*a)) goto a_trailing_nonzero; if (ISDIGIT (*b)) goto b_trailing_nonzero; - return 0; + goto fraction_identical; } else if (*a++ == decimal_point) { a_trailing_nonzero: while (*a == NUMERIC_ZERO) a++; + *a_endptr = a; + *b_endptr = b; return ISDIGIT (*a); } else if (*b++ == decimal_point) @@ -99,9 +107,19 @@ b_trailing_nonzero: while (*b == NUMERIC_ZERO) b++; + *a_endptr = a; + *b_endptr = b; return - ISDIGIT (*b); } +fraction_identical: + *a_endptr = a; + *b_endptr = b; return 0; + +fraction_difference: + *a_endptr = a; + *b_endptr = b; + return *a - *b; } /* Compare strings A and B as numbers without explicitly converting @@ -113,7 +131,8 @@ static inline int numcompare (char const *a, char const *b, - int decimal_point, int thousands_sep) + int decimal_point, int thousands_sep, + char const **a_endptr, char const **b_endptr) { unsigned char tmpa = *a; unsigned char tmpb = *b; @@ -121,6 +140,11 @@ size_t log_a; size_t log_b; + if (a_endptr == NULL || b_endptr == NULL) + { + a_endptr = &a; + b_endptr = &b; + } if (tmpa == NEGATION_SIGN) { do @@ -133,13 +157,15 @@ tmpa = *++a; while (tmpa == NUMERIC_ZERO); if (ISDIGIT (tmpa)) - return -1; + goto asmaller_shortcut; while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep) tmpb = *++b; if (tmpb == decimal_point) do tmpb = *++b; while (tmpb == NUMERIC_ZERO); + *a_endptr = a; + *b_endptr = b; return - ISDIGIT (tmpb); } do @@ -158,7 +184,7 @@ if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa))) - return fraccompare (b, a, decimal_point); + return fraccompare (b, a, decimal_point, a_endptr, b_endptr); tmp = tmpb - tmpa; @@ -172,6 +198,8 @@ tmpb = *++b; while (tmpb == thousands_sep); + *a_endptr = a; + *b_endptr = b; if (log_a != log_b) return log_a < log_b ? 1 : -1; @@ -190,13 +218,15 @@ tmpb = *++b; while (tmpb == NUMERIC_ZERO); if (ISDIGIT (tmpb)) - return 1; + goto bsmaller_shortcut; while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep) tmpa = *++a; if (tmpa == decimal_point) do tmpa = *++a; while (tmpa == NUMERIC_ZERO); + *a_endptr = a; + *b_endptr = b; return ISDIGIT (tmpa); } else @@ -218,7 +248,7 @@ if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa))) - return fraccompare (a, b, decimal_point); + return fraccompare (a, b, decimal_point, a_endptr, b_endptr); tmp = tmpa - tmpb; @@ -232,6 +262,8 @@ tmpb = *++b; while (tmpb == thousands_sep); + *a_endptr = a; + *b_endptr = b; if (log_a != log_b) return log_a < log_b ? -1 : 1; @@ -240,6 +272,49 @@ return tmp; } +asmaller_shortcut: + *a_endptr = a; + *b_endptr = b; + return -1; +bsmaller_shortcut: + *a_endptr = a; + *b_endptr = b; + return 1; +numbers_identical: + *a_endptr = a; + *b_endptr = b; + return 0; +} + +/* Get the order of the exponent of the multiplier specified by + A. EXPONENT_IDENTIFIERS is an array of characters listing, + in increasing order, the list of recognized exponents (i.e. + K, M, G, T). NUM_EXPONENTS is the number of elements in the + EXPONENT_IDENTIFIERS array. */ + +static inline int +exponent_order (char a, char const *exponent_identifiers, + int num_exponents) +{ + /* FIXME: add support for aliases (i.e. k/K, m/M, g/G) */ + int i; + for (i = num_exponents - 1; i >= 0; i--) + { + if (a == exponent_identifiers[i]) + break; + } + return i; +} + +static inline char const * +numeric_end (char const *a, + int decimal_point, int thousands_sep) +{ + if (*a == NEGATION_SIGN) + a++; + while (*a == NUMERIC_ZERO || ISDIGIT(*a) || *a == decimal_point || *a == thousands_sep) + a++; + return a; } #endif --- ../build-tree.old/coreutils-6.10/src/sort.c 2007-11-25 08:23:31.000000000 -0500 +++ ./coreutils-6.10/src/sort.c 2009-01-02 05:17:27.000000000 -0500 @@ -170,6 +170,9 @@ bool random; /* Sort by random hash of key. */ bool general_numeric; /* Flag for general, numeric comparison. Handle numbers in exponential notation. */ + bool human_numeric; /* Flag for general, numeric comparison. + Handle numbers in suffixed exponential notation, + such as that created by du -hs */ bool month; /* Flag for comparison by month name. */ bool reverse; /* Reverse the sense of comparison. */ struct keyfield *next; /* Next keyfield to try. */ @@ -202,6 +205,8 @@ /* Translation table folding lower case to upper. */ static char fold_toupper[UCHAR_LIM]; +static char exponents[] = { 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' }; + #define MONTHS_PER_YEAR 12 /* Table mapping month names to integers. @@ -327,6 +332,8 @@ -i, --ignore-nonprinting consider only printable characters\n\ -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ -n, --numeric-sort compare according to string numerical value\n\ + -h, --human-sort compare according to string numerical value\n\ + with human-friendly suffixes\n\ -R, --random-sort sort by random hash of keys\n\ --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\ -r, --reverse reverse the result of comparisons\n\ @@ -394,7 +401,7 @@ RANDOM_SOURCE_OPTION }; -static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z"; +static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uy:z"; static struct option const long_options[] = { @@ -409,6 +416,7 @@ {"merge", no_argument, NULL, 'm'}, {"month-sort", no_argument, NULL, 'M'}, {"numeric-sort", no_argument, NULL, 'n'}, + {"human-sort", no_argument, NULL, 'h'}, {"random-sort", no_argument, NULL, 'R'}, {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION}, {"output", required_argument, NULL, 'o'}, @@ -1515,6 +1523,34 @@ } static int +human_numcompare (const char *a, const char *b) +{ + int raw_comparison; + int a_order, b_order; + + while (blanks[to_uchar (*a)]) + a++; + while (blanks[to_uchar (*b)]) + b++; + + raw_comparison = strnumcmp2 (a, b, decimal_point, thousands_sep, &a, &b); + + a = strnumend(a, decimal_point, thousands_sep); + b = strnumend(b, decimal_point, thousands_sep); + + /* FIXME: maybe add option to check for longer suffixes (i.e. gigabyte) */ + /* FIXME: maybe add option to allow for spacing between number and suffix */ + /* FIXME: maybe not use exponent if one of the strings uses an + unrecognized suffix that's not a blank */ + a_order = strexponent(*a, exponents, sizeof(exponents)); + b_order = strexponent(*b, exponents, sizeof(exponents)); + + /* FIXME: a_order - b_order || raw_comparison can be used - would that + be faster? */ + return (a_order == b_order ? raw_comparison : a_order - b_order); +} + +static int general_numcompare (const char *sa, const char *sb) { /* FIXME: add option to warn about failed conversions. */ @@ -1732,12 +1768,12 @@ if (key->random) diff = compare_random (texta, lena, textb, lenb); - else if (key->numeric | key->general_numeric) + else if (key->numeric | key->general_numeric | key->human_numeric) { char savea = *lima, saveb = *limb; *lima = *limb = '\0'; - diff = ((key->numeric ? numcompare : general_numcompare) + diff = ((key->numeric ? numcompare : key->general_numeric ? general_numcompare : human_numcompare) (texta, textb)); *lima = savea, *limb = saveb; } @@ -2586,7 +2622,7 @@ struct keyfield const *key; for (key = keylist; key; key = key->next) - if ((1 < (key->random + key->numeric + key->general_numeric + key->month + if ((1 < (key->random + key->numeric + key->general_numeric + key->human_numeric + key->month + !!key->ignore)) || (key->random && key->translate)) { @@ -2598,6 +2634,8 @@ *p++ = 'f'; if (key->general_numeric) *p++ = 'g'; + if (key->human_numeric) + *p++ = 'h'; if (key->ignore == nonprinting) *p++ = 'i'; if (key->month) @@ -2687,6 +2725,9 @@ case 'g': key->general_numeric = true; break; + case 'h': + key->human_numeric = true; + break; case 'i': /* Option order should not matter, so don't let -i override -d. -d implies -i, but -i does not imply -d. */ @@ -2830,7 +2871,7 @@ gkey.sword = gkey.eword = SIZE_MAX; gkey.ignore = NULL; gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.random = false; + gkey.numeric = gkey.general_numeric = gkey.human_numeric = gkey.random = false; gkey.month = gkey.reverse = false; gkey.skipsblanks = gkey.skipeblanks = false; @@ -2906,6 +2947,7 @@ case 'd': case 'f': case 'g': + case 'h': case 'i': case 'M': case 'n': @@ -3087,7 +3129,7 @@ if (! (key->ignore || key->translate || (key->skipsblanks | key->reverse | key->skipeblanks | key->month | key->numeric - | key->general_numeric + | key->general_numeric | key->human_numeric | key->random))) { key->ignore = gkey.ignore; @@ -3097,6 +3139,7 @@ key->month = gkey.month; key->numeric = gkey.numeric; key->general_numeric = gkey.general_numeric; + key->human_numeric = gkey.human_numeric; key->random = gkey.random; key->reverse = gkey.reverse; } @@ -3106,7 +3149,7 @@ if (!keylist && (gkey.ignore || gkey.translate || (gkey.skipsblanks | gkey.skipeblanks | gkey.month - | gkey.numeric | gkey.general_numeric + | gkey.numeric | gkey.general_numeric | gkey.human_numeric | gkey.random))) { insertkey (&gkey);