summaryrefslogtreecommitdiff
path: root/app/tools/halibut/misc.c
diff options
context:
space:
mode:
Diffstat (limited to 'app/tools/halibut/misc.c')
-rw-r--r--app/tools/halibut/misc.c580
1 files changed, 580 insertions, 0 deletions
diff --git a/app/tools/halibut/misc.c b/app/tools/halibut/misc.c
new file mode 100644
index 0000000..3f2483c
--- /dev/null
+++ b/app/tools/halibut/misc.c
@@ -0,0 +1,580 @@
+/*
+ * misc.c: miscellaneous useful items
+ */
+
+#include <stdarg.h>
+#include "halibut.h"
+
+char *adv(char *s) {
+ return s + 1 + strlen(s);
+}
+
+struct stackTag {
+ void **data;
+ int sp;
+ int size;
+};
+
+stack stk_new(void) {
+ stack s;
+
+ s = snew(struct stackTag);
+ s->sp = 0;
+ s->size = 0;
+ s->data = NULL;
+
+ return s;
+}
+
+void stk_free(stack s) {
+ sfree(s->data);
+ sfree(s);
+}
+
+void stk_push(stack s, void *item) {
+ if (s->size <= s->sp) {
+ s->size = s->sp + 32;
+ s->data = sresize(s->data, s->size, void *);
+ }
+ s->data[s->sp++] = item;
+}
+
+void *stk_pop(stack s) {
+ if (s->sp > 0)
+ return s->data[--s->sp];
+ else
+ return NULL;
+}
+
+void *stk_top(stack s) {
+ if (s->sp > 0)
+ return s->data[s->sp-1];
+ else
+ return NULL;
+}
+
+/*
+ * Small routines to amalgamate a string from an input source.
+ */
+const rdstring empty_rdstring = {0, 0, NULL};
+const rdstringc empty_rdstringc = {0, 0, NULL};
+
+void rdadd(rdstring *rs, wchar_t c) {
+ if (rs->pos >= rs->size-1) {
+ rs->size = rs->pos + 128;
+ rs->text = sresize(rs->text, rs->size, wchar_t);
+ }
+ rs->text[rs->pos++] = c;
+ rs->text[rs->pos] = 0;
+}
+void rdadds(rdstring *rs, wchar_t const *p) {
+ int len = ustrlen(p);
+ if (rs->pos >= rs->size - len) {
+ rs->size = rs->pos + len + 128;
+ rs->text = sresize(rs->text, rs->size, wchar_t);
+ }
+ ustrcpy(rs->text + rs->pos, p);
+ rs->pos += len;
+}
+wchar_t *rdtrim(rdstring *rs) {
+ rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
+ return rs->text;
+}
+
+void rdaddc(rdstringc *rs, char c) {
+ if (rs->pos >= rs->size-1) {
+ rs->size = rs->pos + 128;
+ rs->text = sresize(rs->text, rs->size, char);
+ }
+ rs->text[rs->pos++] = c;
+ rs->text[rs->pos] = 0;
+}
+void rdaddsc(rdstringc *rs, char const *p) {
+ rdaddsn(rs, p, strlen(p));
+}
+void rdaddsn(rdstringc *rs, char const *p, int len) {
+ if (rs->pos >= rs->size - len) {
+ rs->size = rs->pos + len + 128;
+ rs->text = sresize(rs->text, rs->size, char);
+ }
+ memcpy(rs->text + rs->pos, p, len);
+ rs->pos += len;
+ rs->text[rs->pos] = 0;
+}
+char *rdtrimc(rdstringc *rs) {
+ rs->text = sresize(rs->text, rs->pos + 1, char);
+ return rs->text;
+}
+
+static int compare_wordlists_literally(word *a, word *b) {
+ int t;
+ while (a && b) {
+ if (a->type != b->type)
+ return (a->type < b->type ? -1 : +1); /* FIXME? */
+ t = a->type;
+ if ((t != word_Normal && t != word_Code &&
+ t != word_WeakCode && t != word_Emph) ||
+ a->alt || b->alt) {
+ int c;
+ if (a->text && b->text) {
+ c = ustricmp(a->text, b->text);
+ if (c)
+ return c;
+ }
+ c = compare_wordlists_literally(a->alt, b->alt);
+ if (c)
+ return c;
+ a = a->next;
+ b = b->next;
+ } else {
+ wchar_t *ap = a->text, *bp = b->text;
+ while (*ap && *bp) {
+ wchar_t ac = *ap, bc = *bp;
+ if (ac != bc)
+ return (ac < bc ? -1 : +1);
+ if (!*++ap && a->next && a->next->type == t && !a->next->alt)
+ a = a->next, ap = a->text;
+ if (!*++bp && b->next && b->next->type == t && !b->next->alt)
+ b = b->next, bp = b->text;
+ }
+ if (*ap || *bp)
+ return (*ap ? +1 : -1);
+ a = a->next;
+ b = b->next;
+ }
+ }
+
+ if (a || b)
+ return (a ? +1 : -1);
+ else
+ return 0;
+}
+
+int compare_wordlists(word *a, word *b) {
+ /*
+ * First we compare only the alphabetic content of the word
+ * lists, with case not a factor. If that comes out equal,
+ * _then_ we compare the word lists literally.
+ */
+ struct {
+ word *w;
+ int i;
+ wchar_t c;
+ } pos[2];
+
+ pos[0].w = a;
+ pos[1].w = b;
+ pos[0].i = pos[1].i = 0;
+
+ while (1) {
+ /*
+ * Find the next alphabetic character in each word list.
+ */
+ int k;
+
+ for (k = 0; k < 2; k++) {
+ /*
+ * Advance until we hit either an alphabetic character
+ * or the end of the word list.
+ */
+ while (1) {
+ if (!pos[k].w) {
+ /* End of word list. */
+ pos[k].c = 0;
+ break;
+ } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
+ /* No characters remaining in this word; move on. */
+ pos[k].w = pos[k].w->next;
+ pos[k].i = 0;
+ } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
+ /* This character isn't alphabetic; move on. */
+ pos[k].i++;
+ } else {
+ /* We have an alphabetic! Lowercase it and continue. */
+ pos[k].c = utolower(pos[k].w->text[pos[k].i]);
+ break;
+ }
+ }
+ }
+
+#ifdef HAS_WCSCOLL
+ {
+ wchar_t a[2], b[2];
+ int ret;
+
+ a[0] = pos[0].c;
+ b[0] = pos[1].c;
+ a[1] = b[1] = L'\0';
+
+ ret = wcscoll(a, b);
+ if (ret)
+ return ret;
+ }
+#else
+ if (pos[0].c < pos[1].c)
+ return -1;
+ else if (pos[0].c > pos[1].c)
+ return +1;
+#endif
+
+ if (!pos[0].c)
+ break; /* they're equal */
+
+ pos[0].i++;
+ pos[1].i++;
+ }
+
+ /*
+ * If we reach here, the strings were alphabetically equal, so
+ * compare in more detail.
+ */
+ return compare_wordlists_literally(a, b);
+}
+
+void mark_attr_ends(word *words)
+{
+ word *w, *wp;
+
+ wp = NULL;
+ for (w = words; w; w = w->next) {
+ int both;
+ if (!isvis(w->type))
+ /* Invisible elements should not affect this calculation */
+ continue;
+ both = (isattr(w->type) &&
+ wp && isattr(wp->type) &&
+ sameattr(wp->type, w->type));
+ w->aux |= both ? attr_Always : attr_First;
+ if (wp && !both) {
+ /* If previous considered word turns out to have been
+ * the end of a run, tidy it up. */
+ int wp_attr = attraux(wp->aux);
+ wp->aux = (wp->aux & ~attr_mask) |
+ ((wp_attr == attr_Always) ? attr_Last
+ /* attr_First */ : attr_Only);
+ }
+ wp = w;
+ }
+
+ /* Tidy up last word touched */
+ if (wp) {
+ int wp_attr = attraux(wp->aux);
+ wp->aux = (wp->aux & ~attr_mask) |
+ ((wp_attr == attr_Always) ? attr_Last
+ /* attr_First */ : attr_Only);
+ }
+}
+
+/*
+ * This function implements the optimal paragraph wrapping
+ * algorithm, pretty much as used in TeX. A cost function is
+ * defined for each line of the wrapped paragraph (typically some
+ * convex function of the difference between the line's length and
+ * its desired length), and a dynamic programming approach is used
+ * to optimise globally across all possible layouts of the
+ * paragraph to find the one with the minimum total cost.
+ *
+ * The function as implemented here gives a choice of two options
+ * for the cost function:
+ *
+ * - If `natural_space' is zero, then the algorithm attempts to
+ * make each line the maximum possible width (either `width' or
+ * `subsequentwidth' depending on whether it's the first line of
+ * the paragraph or not), and the cost function is simply the
+ * square of the unused space at the end of each line. This is a
+ * simple mechanism suitable for use in fixed-pitch environments
+ * such as plain text displayed on a terminal.
+ *
+ * - However, if `natural_space' is positive, the algorithm
+ * assumes the medium is fully graphical and that the width of
+ * space characters can be adjusted finely, and it attempts to
+ * make each _space character_ the width given in
+ * `natural_space'. (The provided width function should return
+ * the _minimum_ acceptable width of a space character in this
+ * case.) Therefore, the cost function for a line is dependent
+ * on the number of spaces on that line as well as the amount by
+ * which the line width differs from the optimum.
+ */
+wrappedline *wrap_para(word *text, int width, int subsequentwidth,
+ int (*widthfn)(void *, word *), void *ctx,
+ int natural_space) {
+ wrappedline *head = NULL, **ptr = &head;
+ int nwords, wordsize;
+ struct wrapword {
+ word *begin, *end;
+ int width;
+ int spacewidth;
+ int cost;
+ int nwords;
+ } *wrapwords;
+ int i, j, n;
+
+ /*
+ * Break the line up into wrappable components.
+ */
+ nwords = wordsize = 0;
+ wrapwords = NULL;
+ while (text) {
+ if (nwords >= wordsize) {
+ wordsize = nwords + 64;
+ wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
+ }
+ wrapwords[nwords].width = 0;
+ wrapwords[nwords].begin = text;
+ while (text) {
+ wrapwords[nwords].width += widthfn(ctx, text);
+ wrapwords[nwords].end = text->next;
+ if (text->next && (text->next->type == word_WhiteSpace ||
+ text->next->type == word_EmphSpace ||
+ text->breaks))
+ break;
+ text = text->next;
+ }
+ if (text && text->next && (text->next->type == word_WhiteSpace ||
+ text->next->type == word_EmphSpace)) {
+ wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
+ text = text->next;
+ } else {
+ wrapwords[nwords].spacewidth = 0;
+ }
+ nwords++;
+ if (text)
+ text = text->next;
+ }
+
+ /*
+ * Perform the dynamic wrapping algorithm: work backwards from
+ * nwords-1, determining the optimal wrapping for each terminal
+ * subsequence of the paragraph.
+ */
+ for (i = nwords; i-- ;) {
+ int best = -1;
+ int bestcost = 0;
+ int cost;
+ int linelen = 0, spacewidth = 0, minspacewidth = 0;
+ int nspaces;
+ int thiswidth = (i == 0 ? width : subsequentwidth);
+
+ j = 0;
+ nspaces = 0;
+ while (i+j < nwords) {
+ /*
+ * See what happens if we put j+1 words on this line.
+ */
+ if (spacewidth) {
+ nspaces++;
+ minspacewidth = spacewidth;
+ }
+ linelen += spacewidth + wrapwords[i+j].width;
+ spacewidth = wrapwords[i+j].spacewidth;
+ j++;
+ if (linelen > thiswidth) {
+ /*
+ * If we're over the width limit, abandon ship,
+ * _unless_ there is no best-effort yet (which will
+ * only happen if the first word is too long all by
+ * itself).
+ */
+ if (best > 0)
+ break;
+ }
+
+ /*
+ * Compute the cost of this line. The method of doing
+ * this differs hugely depending on whether
+ * natural_space is nonzero or not.
+ */
+ if (natural_space) {
+ if (!nspaces && linelen > thiswidth) {
+ /*
+ * Special case: if there are no spaces at all
+ * on the line because one single word is too
+ * long for its line, cost is zero because
+ * there's nothing we can do about it anyway.
+ */
+ cost = 0;
+ } else {
+ int shortfall = thiswidth - linelen;
+ int spaceextra = shortfall / (nspaces ? nspaces : 1);
+ int spaceshortfall = natural_space -
+ (minspacewidth + spaceextra);
+
+ if (i+j == nwords && spaceshortfall < 0) {
+ /*
+ * Special case: on the very last line of
+ * the paragraph, we don't score penalty
+ * points for having to _stretch_ the line,
+ * since we won't stretch it anyway.
+ * However, we score penalties as normal
+ * for having to squeeze it.
+ */
+ cost = 0;
+ } else {
+ /*
+ * Squaring this number is tricky since
+ * it's liable to be quite big. Let's
+ * divide it through by 256.
+ */
+ int x = spaceshortfall >> 8;
+ int xf = spaceshortfall & 0xFF;
+
+ /*
+ * Not counting strange variable-fixed-
+ * point oddities, we are computing
+ *
+ * (x+xf)^2 = x^2 + 2*x*xf + xf*xf
+ *
+ * except that _our_ xf is 256 times the
+ * one listed there.
+ */
+
+ cost = x * x;
+ cost += (2 * x * xf) >> 8;
+ }
+ }
+ } else {
+ if (i+j == nwords) {
+ /*
+ * Special case: if we're at the very end of the
+ * paragraph, we don't score penalty points for the
+ * white space left on the line.
+ */
+ cost = 0;
+ } else {
+ cost = (thiswidth-linelen) * (thiswidth-linelen);
+ }
+ }
+
+ /*
+ * Add in the cost of wrapping all lines after this
+ * point too.
+ */
+ if (i+j < nwords)
+ cost += wrapwords[i+j].cost;
+
+ /*
+ * We compare bestcost >= cost, not bestcost > cost,
+ * because in cases where the costs are identical we
+ * want to try to look like the greedy algorithm,
+ * because readers are likely to have spent a lot of
+ * time looking at greedy-wrapped paragraphs and
+ * there's no point violating the Principle of Least
+ * Surprise if it doesn't actually gain anything.
+ */
+ if (best < 0 || bestcost >= cost) {
+ bestcost = cost;
+ best = j;
+ }
+ }
+ /*
+ * Now we know the optimal answer for this terminal
+ * subsequence, so put it in wrapwords.
+ */
+ wrapwords[i].cost = bestcost;
+ wrapwords[i].nwords = best;
+ }
+
+ /*
+ * We've wrapped the paragraph. Now build the output
+ * `wrappedline' list.
+ */
+ i = 0;
+ while (i < nwords) {
+ wrappedline *w = snew(wrappedline);
+ *ptr = w;
+ ptr = &w->next;
+ w->next = NULL;
+
+ n = wrapwords[i].nwords;
+ w->begin = wrapwords[i].begin;
+ w->end = wrapwords[i+n-1].end;
+
+ /*
+ * Count along the words to find nspaces and shortfall.
+ */
+ w->nspaces = 0;
+ w->shortfall = width;
+ for (j = 0; j < n; j++) {
+ w->shortfall -= wrapwords[i+j].width;
+ if (j < n-1 && wrapwords[i+j].spacewidth) {
+ w->nspaces++;
+ w->shortfall -= wrapwords[i+j].spacewidth;
+ }
+ }
+ i += n;
+ }
+
+ sfree(wrapwords);
+
+ return head;
+}
+
+void wrap_free(wrappedline *w) {
+ while (w) {
+ wrappedline *t = w->next;
+ sfree(w);
+ w = t;
+ }
+}
+
+void cmdline_cfg_add(paragraph *cfg, char *string)
+{
+ wchar_t *ustring;
+ int upos, ulen, pos, len;
+
+ ulen = 0;
+ while (cfg->keyword[ulen])
+ ulen += 1 + ustrlen(cfg->keyword+ulen);
+ len = 0;
+ while (cfg->origkeyword[len])
+ len += 1 + strlen(cfg->origkeyword+len);
+
+ ustring = ufroma_locale_dup(string);
+
+ upos = ulen;
+ ulen += 2 + ustrlen(ustring);
+ cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
+ ustrcpy(cfg->keyword+upos, ustring);
+ cfg->keyword[ulen-1] = L'\0';
+
+ pos = len;
+ len += 2 + strlen(string);
+ cfg->origkeyword = sresize(cfg->origkeyword, len, char);
+ strcpy(cfg->origkeyword+pos, string);
+ cfg->origkeyword[len-1] = '\0';
+
+ sfree(ustring);
+}
+
+paragraph *cmdline_cfg_new(void)
+{
+ paragraph *p;
+
+ p = snew(paragraph);
+ memset(p, 0, sizeof(*p));
+ p->type = para_Config;
+ p->next = NULL;
+ p->fpos.filename = "<command line>";
+ p->fpos.line = p->fpos.col = -1;
+ p->keyword = ustrdup(L"\0");
+ p->origkeyword = dupstr("\0");
+
+ return p;
+}
+
+paragraph *cmdline_cfg_simple(char *string, ...)
+{
+ va_list ap;
+ char *s;
+ paragraph *p;
+
+ p = cmdline_cfg_new();
+ cmdline_cfg_add(p, string);
+
+ va_start(ap, string);
+ while ((s = va_arg(ap, char *)) != NULL)
+ cmdline_cfg_add(p, s);
+ va_end(ap);
+
+ return p;
+}