commit 2c1c9b6aef8dbce4cef555fbf9d09069beb7cc1c
parent 2436e1d42ae49264b8e4e930047a61ef34d6e41f
Author: Jacob R. Edwards <n/a>
Date:   Wed, 30 Nov 2022 16:31:03 -0600
Add numsep; a number separator program
This program finds numbers and adds commas to them to make them
easier to read. For example, it would take '1000000' and make it
'1,000,000'.
Diffstat:
2 files changed, 129 insertions(+), 0 deletions(-)
diff --git a/local/src/src/single/Makefile b/local/src/src/single/Makefile
@@ -1,5 +1,6 @@
 names =\
 	datediff\
+	numsep\
 	urldecode\
 	urlencode\
 
diff --git a/local/src/src/single/numsep.c b/local/src/src/single/numsep.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2022 Jacob R. Edwards <jacob@jacobedwards.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* 
+ * This program finds unseparated decimal numbers on their own (with
+ * either nothing or a blank before and after them on the line) and
+ * inserts a comma every third character from the right in the whole
+ * number.
+ */
+
+#include <errno.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define Length(X) (sizeof(X) / sizeof(X[1]))
+
+int
+numsep(FILE *fp, int *regerr)
+{
+	char *line;
+	size_t size;
+	ssize_t len;
+	regex_t re;
+	regmatch_t matches[2];
+	regmatch_t *num;
+	int r;
+	int numlen, numcommas;
+	char *tmp;
+	int i;
+
+	/*
+         * I like this regex better, but it doesn't work (maybe ^
+         * and $ don't seem to be recognised, which agrees with one
+         * interpretation of re_format(7))
+	 * 	"(^|[ 	])([0-9][0-9][0-9][0-9]+)(\\.[0-9]+)?([ 	]|$)"
+	 * I could use [[:<:]] or whatever, but I don't like it.
+	 */
+
+	r = regcomp(&re,
+	    "^[ 	]*.*([0-9][0-9][0-9][0-9]+)(\\.[0-9]+)?[ 	]*.*$",
+	    REG_EXTENDED);
+
+	if (r) {
+		if (*regerr)
+			*regerr = r;
+		return -1;
+	}
+
+	size = 0;
+	line = NULL;
+	num = &matches[Length(matches) - 1];
+	while ((len = getline(&line, &size, fp)) >= 0) {
+		while ((r = regexec(&re, line, Length(matches), matches, 0)) == 0) {
+			/* Perhaps this should be moved to another function */
+			numlen = num->rm_eo - num->rm_so;
+			numcommas = numlen / 3 - !(numlen % 3);
+			if (size > numcommas + 1 && len > size - numcommas - 1) {
+				errno = EOVERFLOW;
+				goto errfree;
+			}
+			if (len + numcommas + 1 > size) {
+				/* NOTE: Should reallocate to bigger */
+				tmp = realloc(line, size + numcommas);
+				if (!tmp) {
+errfree:
+					free(line);
+					regfree(&re);
+					return -1;
+				}
+				line = tmp;
+				size += numcommas;
+			}
+			i = num->rm_eo;
+			while (i - num->rm_so > 3) {
+				i -= 3;
+				memmove(line + i + 1, line + i, len - i);
+				line[i] = ',';
+				++len;
+			}
+			line[len] = '\0';
+		}
+		if (r != REG_NOMATCH) {
+			if (*regerr)
+				*regerr = r;
+			goto errfree;
+		}
+		if (fwrite(line, 1, len, stdout) != len) {
+			goto errfree;
+		}
+	}
+
+	free(line);
+	regfree(&re);
+	return ferror(fp);
+}
+
+int
+main(void)
+{
+	int regerr;
+	char ebuf[128];
+
+	if (numsep(stdin, ®err) == 0)
+		return 0;
+
+	if (!regerr) {
+		fprintf(stderr, "error: %s\n", strerror(errno));
+	} else {
+		regerror(regerr, NULL, ebuf, sizeof(ebuf));
+		fprintf(stderr, "regex error: %s\n", regerr);
+	}
+	return 1;
+}