commit 2c1c9b6aef8dbce4cef555fbf9d09069beb7cc1c
parent 2436e1d42ae49264b8e4e930047a61ef34d6e41f
Author: Jacob R. Edwards <n/a>
Date: Wed, 30 Nov 2022 16:31:03 -0600
Add numsep; a number separator program
This program finds numbers and adds commas to them to make them
easier to read. For example, it would take '1000000' and make it
'1,000,000'.
Diffstat:
2 files changed, 129 insertions(+), 0 deletions(-)
diff --git a/local/src/src/single/Makefile b/local/src/src/single/Makefile
@@ -1,5 +1,6 @@
names =\
datediff\
+ numsep\
urldecode\
urlencode\
diff --git a/local/src/src/single/numsep.c b/local/src/src/single/numsep.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2022 Jacob R. Edwards <jacob@jacobedwards.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * This program finds unseparated decimal numbers on their own (with
+ * either nothing or a blank before and after them on the line) and
+ * inserts a comma every third character from the right in the whole
+ * number.
+ */
+
+#include <errno.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define Length(X) (sizeof(X) / sizeof(X[1]))
+
+int
+numsep(FILE *fp, int *regerr)
+{
+ char *line;
+ size_t size;
+ ssize_t len;
+ regex_t re;
+ regmatch_t matches[2];
+ regmatch_t *num;
+ int r;
+ int numlen, numcommas;
+ char *tmp;
+ int i;
+
+ /*
+ * I like this regex better, but it doesn't work (maybe ^
+ * and $ don't seem to be recognised, which agrees with one
+ * interpretation of re_format(7))
+ * "(^|[ ])([0-9][0-9][0-9][0-9]+)(\\.[0-9]+)?([ ]|$)"
+ * I could use [[:<:]] or whatever, but I don't like it.
+ */
+
+ r = regcomp(&re,
+ "^[ ]*.*([0-9][0-9][0-9][0-9]+)(\\.[0-9]+)?[ ]*.*$",
+ REG_EXTENDED);
+
+ if (r) {
+ if (*regerr)
+ *regerr = r;
+ return -1;
+ }
+
+ size = 0;
+ line = NULL;
+ num = &matches[Length(matches) - 1];
+ while ((len = getline(&line, &size, fp)) >= 0) {
+ while ((r = regexec(&re, line, Length(matches), matches, 0)) == 0) {
+ /* Perhaps this should be moved to another function */
+ numlen = num->rm_eo - num->rm_so;
+ numcommas = numlen / 3 - !(numlen % 3);
+ if (size > numcommas + 1 && len > size - numcommas - 1) {
+ errno = EOVERFLOW;
+ goto errfree;
+ }
+ if (len + numcommas + 1 > size) {
+ /* NOTE: Should reallocate to bigger */
+ tmp = realloc(line, size + numcommas);
+ if (!tmp) {
+errfree:
+ free(line);
+ regfree(&re);
+ return -1;
+ }
+ line = tmp;
+ size += numcommas;
+ }
+ i = num->rm_eo;
+ while (i - num->rm_so > 3) {
+ i -= 3;
+ memmove(line + i + 1, line + i, len - i);
+ line[i] = ',';
+ ++len;
+ }
+ line[len] = '\0';
+ }
+ if (r != REG_NOMATCH) {
+ if (*regerr)
+ *regerr = r;
+ goto errfree;
+ }
+ if (fwrite(line, 1, len, stdout) != len) {
+ goto errfree;
+ }
+ }
+
+ free(line);
+ regfree(&re);
+ return ferror(fp);
+}
+
+int
+main(void)
+{
+ int regerr;
+ char ebuf[128];
+
+ if (numsep(stdin, ®err) == 0)
+ return 0;
+
+ if (!regerr) {
+ fprintf(stderr, "error: %s\n", strerror(errno));
+ } else {
+ regerror(regerr, NULL, ebuf, sizeof(ebuf));
+ fprintf(stderr, "regex error: %s\n", regerr);
+ }
+ return 1;
+}