commit 5692fa2841fef9238096b7ba5996cd726c5084bc
parent 4518d2b1e263f74224cba2c581ec3749f3ea1faa
Author: Jacob R. Edwards <jacob@jacobedwards.org>
Date: Wed, 24 Jul 2024 12:04:48 -0700
Fix numsep putting commas in numbers after decimal points
I also took the opportunity to rework the whole thing.
Diffstat:
1 file changed, 55 insertions(+), 90 deletions(-)
diff --git a/local/src/src/singles/numsep.c b/local/src/src/singles/numsep.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2022 Jacob R. Edwards <jacob@jacobedwards.org>
+ * Copyright 2022, 2024 Jacob R. Edwards <jacob@jacobedwards.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -21,108 +21,73 @@
* number.
*/
-#include <errno.h>
-#include <regex.h>
+#include <ctype.h>
#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#define Length(X) (sizeof(X) / sizeof(X[1]))
+int numsep_stream(FILE *input, FILE *output);
+size_t numsep_bufs(char *i, char *o, int *newword, size_t len);
int
-numsep(FILE *fp, int *regerr)
+main(void)
{
- char *line;
- size_t size;
- ssize_t len;
- regex_t re;
- regmatch_t matches[2];
- regmatch_t *num;
- int r;
- int numlen, numcommas;
- char *tmp;
- int i;
-
- /*
- * I like this regex better, but it doesn't work (maybe ^
- * and $ don't seem to be recognised, which agrees with one
- * interpretation of re_format(7))
- * "(^|[ ])([0-9][0-9][0-9][0-9]+)(\\.[0-9]+)?([ ]|$)"
- * I could use [[:<:]] or whatever, but I don't like it.
- */
+ if (numsep_stream(stdin, stdout) != 0) {
+ perror("Unable to seperate numbers");
+ return 1;
+ }
+ return 0;
+}
- r = regcomp(&re,
- "^[ ]*.*([0-9][0-9][0-9][0-9]+)(\\.[0-9]+)?[ ]*.*$",
- REG_EXTENDED);
+int
+numsep_stream(FILE *input, FILE *output)
+{
+ char wb[4096], rb[(size_t)(sizeof(wb) * 0.75 + 1)];
+ int len;
+ int newword;
- if (r) {
- if (*regerr)
- *regerr = r;
- return -1;
+ newword = 1;
+ while ((len = fread(rb, 1, sizeof(rb), input)) > 0) {
+ len = numsep_bufs(rb, wb, &newword, len);
+ if (fwrite(wb, 1, len, output) != len)
+ return -1;
}
+ return ferror(input);
+}
- size = 0;
- line = NULL;
- num = &matches[Length(matches) - 1];
- while ((len = getline(&line, &size, fp)) >= 0) {
- while ((r = regexec(&re, line, Length(matches), matches, 0)) == 0) {
- /* Perhaps this should be moved to another function */
- numlen = num->rm_eo - num->rm_so;
- numcommas = numlen / 3 - !(numlen % 3);
- if (size > numcommas + 1 && len > size - numcommas - 1) {
- errno = EOVERFLOW;
- goto errfree;
- }
- if (len + numcommas + 1 > size) {
- /* NOTE: Should reallocate to bigger */
- tmp = realloc(line, size + numcommas);
- if (!tmp) {
-errfree:
- free(line);
- regfree(&re);
- return -1;
+size_t
+numsep_bufs(char *ib, char *ob, int *newword, size_t len)
+{
+ size_t newlen;
+ size_t ii, oi;
+ size_t num, numlen, numbrk, numend;
+ unsigned int i, offset;
+
+ oi = 0;
+ for (ii = 0; ii < len; ++ii) {
+ if (*newword && isdigit(ib[ii])) {
+ num = ii;
+ for (++ii; ii < len && isdigit(ib[ii]);)
+ ++ii;
+ numlen = ii - num;
+ if (numlen <= 3) {
+ for (i = 0; i < numlen; ++i)
+ ob[oi++] = ib[num + i];
+ } else {
+ offset = numlen % 3;
+ for (i = 0; i < numlen; ++i) {
+ if (i == offset || i + offset % 3 == 0)
+ ob[oi++] = ',';
+ ob[oi++] = ib[num + i];
}
- line = tmp;
- size += numcommas;
- }
- i = num->rm_eo;
- while (i - num->rm_so > 3) {
- i -= 3;
- memmove(line + i + 1, line + i, len - i);
- line[i] = ',';
- ++len;
}
- line[len] = '\0';
}
- if (r != REG_NOMATCH) {
- if (*regerr)
- *regerr = r;
- goto errfree;
- }
- if (fwrite(line, 1, len, stdout) != len) {
- goto errfree;
- }
- }
-
- free(line);
- regfree(&re);
- return ferror(fp);
-}
-int
-main(void)
-{
- int regerr;
- char ebuf[128];
+ if (isspace(ib[ii]))
+ *newword = 1;
+ else
+ *newword = 0;
- if (numsep(stdin, ®err) == 0)
- return 0;
-
- if (!regerr) {
- fprintf(stderr, "error: %s\n", strerror(errno));
- } else {
- regerror(regerr, NULL, ebuf, sizeof(ebuf));
- fprintf(stderr, "regex error: %s\n", regerr);
+ ob[oi++] = ib[ii];
}
- return 1;
+
+ return oi;
}