Rewrite human.awk script - config - OpenBSD system configuration

commit 201ed9b68116b1989a02a424aee47a36881eab93
parent 0fca4783239df6808e61c7d0dcd8d050be0950dc
Author: Jacob R. Edwards <n/a>
Date:   Mon, 28 Nov 2022 20:55:01 -0600

Rewrite human.awk script

Initially I just wanted to be able to specify letters for the START
parameter, but ended up restructuring the whole program. Now it is
able to read numbers already using binary prefixes. Thus to treat
unmarked numbers as say, kibibytes, you must do something like this:

	$ sed 's/$/K/' < kilobytes | human.awk

Diffstat:
M local/bin/bin/human.awk  | 70 ++++++++++++++++++++++++++++++++++++++++++----------------------------

1 file changed, 42 insertions(+), 28 deletions(-)
diff --git a/local/bin/bin/human.awk b/local/bin/bin/human.awk
@@ -2,42 +2,56 @@
 # Copyright 2021, 2022 Jacob R. Edwards
 # License: GPLv3
 #
-# human.awk -- Scale numbers using unit prefixes
+# human.awk -- Keep numbers short using SI units
 #
-# This awk(1) script tries to keep numeric input short by using
-# unit prefixes such as `Ki', `Mi', `Gi', etc. Whitespace in numeric
-# input is omitted from the result, non-numeric input is passed
-# through unchanged. Note that "numeric" is defined as `[0-9]+'
-# rendering negitive numbers "non-numeric".
+# When the first field (see awk(1) FS) is numeric (including SI
+# units), try and shorten it as much as possible while still keeping
+# it at or above 1 by by using larger units.
 #
-# METRIC  Metric `decimal' or `binary' (default `binary')
-# START   Input size (1 for `K', 2 for `M', etc.) (default 0)
+# NOTE: Perhaps instead of using awk fields, I should extract the
+# first "word" (non-space byte sequence), process it with humanize(),
+# and insert it into the same place, thus preserving whitespace.
 
-function tohuman(n)
+function humanize(bytes, _, i, start)
 {
-	for (i = START; n >= MULT && i < MAX; ++i)
-		n = (n / MULT) + (n % MULT / MULT)
-	return i ? sprintf("%.2f%c%c", n, SIZES[i], SUFX) : n;
-}
+        # Not needed right now, but could be with other applications
+	#if (!match(bytes, bytereg))
+	#	return bytes;
 
-BEGIN {
-	MAX = split("KMGTPEZY", SIZES, "");
-	if (!METRIC || METRIC == "binary") {
-		MULT = 1024;
-		SUFX = "i";
-	} else if (METRIC == "decimal") {
-		MULT = 1000;
-		SUFX = "b";
+	if (!match(bytes, suffixreg)) {
+		start = 0;
 	} else {
-		printf("%s: '%s': Invalid metric.\n", ARGV[0], METRIC) \
-		     > "/dev/stderr";
-		exit(1);
+		start = numbers[substr(bytes, RSTART, 1)];
+		if (!start)
+			return bytes;
+		bytes = substr(bytes, 1, RSTART);
 	}
+
+	for (i = start; bytes >= base && (!i || letters[i]); ++i)
+		bytes = bytes / base;
+	return i ? sprintf("%.1f%s%s", bytes, letters[i], type) : bytes;
 }
 
-/^[ \t]*[0-9]+[ \t]*$/ {
-	print(tohuman($0));
-	next;
+BEGIN {
+	# May be lowercase if desired
+	suffixes = "KMGTPEZY";
+
+	# Binary prefix; decimal would be '1000' and 'B'
+	base = 1024;
+	type = "iB";
+
+	# Create tables
+	split(suffixes, letters, "");
+	for (letter in letters)
+		numbers[letters[letter]] = letter;
+
+	# Cache regexes
+	suffixreg = sprintf("[%s%s](%s)?", toupper(suffixes), tolower(suffixes), type);
+	bytereg = sprintf("[0-9]+(%s)?", suffixreg);
 }
 
-1
+{
+	if (match($1, bytereg))
+		$1 = humanize($1);
+	print;
+}

	config OpenBSD system configuration
	git clone git://jacobedwards.org/config
	Log \| Files \| Refs \| README