1 files changed, 580 insertions, 0 deletions
diff --git a/app/tools/halibut/misc.c b/app/tools/halibut/misc.c
new file mode 100644
index 0000000..3f2483c
--- /dev/null
+++ b/app/tools/halibut/misc.c
@@ -0,0 +1,580 @@
+/*
+ * misc.c: miscellaneous useful items
+ */
+
+#include <stdarg.h>
+#include "halibut.h"
+
+char *adv(char *s) {
+    return s + 1 + strlen(s);
+}
+
+struct stackTag {
+    void **data;
+    int sp;
+    int size;
+};
+
+stack stk_new(void) {
+    stack s;
+
+    s = snew(struct stackTag);
+    s->sp = 0;
+    s->size = 0;
+    s->data = NULL;
+
+    return s;
+}
+
+void stk_free(stack s) {
+    sfree(s->data);
+    sfree(s);
+}
+
+void stk_push(stack s, void *item) {
+    if (s->size <= s->sp) {
+	s->size = s->sp + 32;
+	s->data = sresize(s->data, s->size, void *);
+    }
+    s->data[s->sp++] = item;
+}
+
+void *stk_pop(stack s) {
+    if (s->sp > 0)
+	return s->data[--s->sp];
+    else
+	return NULL;
+}
+
+void *stk_top(stack s) {
+    if (s->sp > 0)
+	return s->data[s->sp-1];
+    else
+	return NULL;
+}
+
+/*
+ * Small routines to amalgamate a string from an input source.
+ */
+const rdstring empty_rdstring = {0, 0, NULL};
+const rdstringc empty_rdstringc = {0, 0, NULL};
+
+void rdadd(rdstring *rs, wchar_t c) {
+    if (rs->pos >= rs->size-1) {
+	rs->size = rs->pos + 128;
+	rs->text = sresize(rs->text, rs->size, wchar_t);
+    }
+    rs->text[rs->pos++] = c;
+    rs->text[rs->pos] = 0;
+}
+void rdadds(rdstring *rs, wchar_t const *p) {
+    int len = ustrlen(p);
+    if (rs->pos >= rs->size - len) {
+	rs->size = rs->pos + len + 128;
+	rs->text = sresize(rs->text, rs->size, wchar_t);
+    }
+    ustrcpy(rs->text + rs->pos, p);
+    rs->pos += len;
+}
+wchar_t *rdtrim(rdstring *rs) {
+    rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
+    return rs->text;
+}
+
+void rdaddc(rdstringc *rs, char c) {
+    if (rs->pos >= rs->size-1) {
+	rs->size = rs->pos + 128;
+	rs->text = sresize(rs->text, rs->size, char);
+    }
+    rs->text[rs->pos++] = c;
+    rs->text[rs->pos] = 0;
+}
+void rdaddsc(rdstringc *rs, char const *p) {
+    rdaddsn(rs, p, strlen(p));
+}
+void rdaddsn(rdstringc *rs, char const *p, int len) {
+    if (rs->pos >= rs->size - len) {
+	rs->size = rs->pos + len + 128;
+	rs->text = sresize(rs->text, rs->size, char);
+    }
+    memcpy(rs->text + rs->pos, p, len);
+    rs->pos += len;
+    rs->text[rs->pos] = 0;
+}
+char *rdtrimc(rdstringc *rs) {
+    rs->text = sresize(rs->text, rs->pos + 1, char);
+    return rs->text;
+}
+
+static int compare_wordlists_literally(word *a, word *b) {
+    int t;
+    while (a && b) {
+	if (a->type != b->type)
+	    return (a->type < b->type ? -1 : +1);   /* FIXME? */
+	t = a->type;
+	if ((t != word_Normal && t != word_Code &&
+	     t != word_WeakCode && t != word_Emph) ||
+	    a->alt || b->alt) {
+	    int c;
+	    if (a->text && b->text) {
+		c = ustricmp(a->text, b->text);
+		if (c)
+		    return c;
+	    }
+	    c = compare_wordlists_literally(a->alt, b->alt);
+	    if (c)
+		return c;
+	    a = a->next;
+	    b = b->next;
+	} else {
+	    wchar_t *ap = a->text, *bp = b->text;
+	    while (*ap && *bp) {
+		wchar_t ac = *ap, bc = *bp;
+		if (ac != bc)
+		    return (ac < bc ? -1 : +1);
+		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
+		    a = a->next, ap = a->text;
+		if (!*++bp && b->next && b->next->type == t && !b->next->alt)
+		    b = b->next, bp = b->text;
+	    }
+	    if (*ap || *bp)
+		return (*ap ? +1 : -1);
+	    a = a->next;
+	    b = b->next;
+	}
+    }
+
+    if (a || b)
+	return (a ? +1 : -1);
+    else
+	return 0;
+}
+
+int compare_wordlists(word *a, word *b) {
+    /*
+     * First we compare only the alphabetic content of the word
+     * lists, with case not a factor. If that comes out equal,
+     * _then_ we compare the word lists literally.
+     */
+    struct {
+	word *w;
+	int i;
+	wchar_t c;
+    } pos[2];
+
+    pos[0].w = a;
+    pos[1].w = b;
+    pos[0].i = pos[1].i = 0;
+
+    while (1) {
+	/*
+	 * Find the next alphabetic character in each word list.
+	 */
+	int k;
+
+	for (k = 0; k < 2; k++) {
+	    /*
+	     * Advance until we hit either an alphabetic character
+	     * or the end of the word list.
+	     */
+	    while (1) {
+		if (!pos[k].w) {
+		    /* End of word list. */
+		    pos[k].c = 0;
+		    break;
+		} else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
+		    /* No characters remaining in this word; move on. */
+		    pos[k].w = pos[k].w->next;
+		    pos[k].i = 0;
+		} else if (!uisalpha(pos[k].w->text[pos[k].i])) {
+		    /* This character isn't alphabetic; move on. */
+		    pos[k].i++;
+		} else {
+		    /* We have an alphabetic! Lowercase it and continue. */
+		    pos[k].c = utolower(pos[k].w->text[pos[k].i]);
+		    break;
+		}
+	    }
+	}
+
+#ifdef HAS_WCSCOLL
+	{
+	    wchar_t a[2], b[2];
+	    int ret;
+
+	    a[0] = pos[0].c;
+	    b[0] = pos[1].c;
+	    a[1] = b[1] = L'\0';
+
+	    ret = wcscoll(a, b);
+	    if (ret)
+		return ret;
+	}
+#else
+	if (pos[0].c < pos[1].c)
+	    return -1;
+	else if (pos[0].c > pos[1].c)
+	    return +1;
+#endif
+
+	if (!pos[0].c)
+	    break;		       /* they're equal */
+
+	pos[0].i++;
+	pos[1].i++;
+    }
+
+    /*
+     * If we reach here, the strings were alphabetically equal, so
+     * compare in more detail.
+     */
+    return compare_wordlists_literally(a, b);
+}
+
+void mark_attr_ends(word *words)
+{
+    word *w, *wp;
+
+    wp = NULL;
+    for (w = words; w; w = w->next) {
+	int both;
+	if (!isvis(w->type))
+	    /* Invisible elements should not affect this calculation */
+	    continue;
+	both = (isattr(w->type) &&
+		wp && isattr(wp->type) &&
+		sameattr(wp->type, w->type));
+	w->aux |= both ? attr_Always : attr_First;
+	if (wp && !both) {
+	    /* If previous considered word turns out to have been
+	     * the end of a run, tidy it up. */
+	    int wp_attr = attraux(wp->aux);
+	    wp->aux = (wp->aux & ~attr_mask) |
+		((wp_attr == attr_Always) ? attr_Last
+			 /* attr_First */ : attr_Only);
+	}
+	wp = w;
+    }
+
+    /* Tidy up last word touched */
+    if (wp) {
+	int wp_attr = attraux(wp->aux);
+	wp->aux = (wp->aux & ~attr_mask) |
+	    ((wp_attr == attr_Always) ? attr_Last
+		     /* attr_First */ : attr_Only);
+    }
+}
+
+/*
+ * This function implements the optimal paragraph wrapping
+ * algorithm, pretty much as used in TeX. A cost function is
+ * defined for each line of the wrapped paragraph (typically some
+ * convex function of the difference between the line's length and
+ * its desired length), and a dynamic programming approach is used
+ * to optimise globally across all possible layouts of the
+ * paragraph to find the one with the minimum total cost.
+ * 
+ * The function as implemented here gives a choice of two options
+ * for the cost function:
+ * 
+ *  - If `natural_space' is zero, then the algorithm attempts to
+ *    make each line the maximum possible width (either `width' or
+ *    `subsequentwidth' depending on whether it's the first line of
+ *    the paragraph or not), and the cost function is simply the
+ *    square of the unused space at the end of each line. This is a
+ *    simple mechanism suitable for use in fixed-pitch environments
+ *    such as plain text displayed on a terminal.
+ * 
+ *  - However, if `natural_space' is positive, the algorithm
+ *    assumes the medium is fully graphical and that the width of
+ *    space characters can be adjusted finely, and it attempts to
+ *    make each _space character_ the width given in
+ *    `natural_space'. (The provided width function should return
+ *    the _minimum_ acceptable width of a space character in this
+ *    case.) Therefore, the cost function for a line is dependent
+ *    on the number of spaces on that line as well as the amount by
+ *    which the line width differs from the optimum.
+ */
+wrappedline *wrap_para(word *text, int width, int subsequentwidth,
+		       int (*widthfn)(void *, word *), void *ctx,
+		       int natural_space) {
+    wrappedline *head = NULL, **ptr = &head;
+    int nwords, wordsize;
+    struct wrapword {
+	word *begin, *end;
+	int width;
+	int spacewidth;
+	int cost;
+	int nwords;
+    } *wrapwords;
+    int i, j, n;
+
+    /*
+     * Break the line up into wrappable components.
+     */
+    nwords = wordsize = 0;
+    wrapwords = NULL;
+    while (text) {
+	if (nwords >= wordsize) {
+	    wordsize = nwords + 64;
+	    wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
+	}
+	wrapwords[nwords].width = 0;
+	wrapwords[nwords].begin = text;
+	while (text) {
+	    wrapwords[nwords].width += widthfn(ctx, text);
+	    wrapwords[nwords].end = text->next;
+	    if (text->next && (text->next->type == word_WhiteSpace ||
+			       text->next->type == word_EmphSpace ||
+			       text->breaks))
+		break;
+	    text = text->next;
+	}
+	if (text && text->next && (text->next->type == word_WhiteSpace ||
+			   text->next->type == word_EmphSpace)) {
+	    wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
+	    text = text->next;
+	} else {
+	    wrapwords[nwords].spacewidth = 0;
+	}
+	nwords++;
+	if (text)
+	    text = text->next;
+    }
+
+    /*
+     * Perform the dynamic wrapping algorithm: work backwards from
+     * nwords-1, determining the optimal wrapping for each terminal
+     * subsequence of the paragraph.
+     */
+    for (i = nwords; i-- ;) {
+	int best = -1;
+	int bestcost = 0;
+	int cost;
+	int linelen = 0, spacewidth = 0, minspacewidth = 0;
+	int nspaces;
+	int thiswidth = (i == 0 ? width : subsequentwidth);
+
+	j = 0;
+	nspaces = 0;
+	while (i+j < nwords) {
+	    /*
+	     * See what happens if we put j+1 words on this line.
+	     */
+	    if (spacewidth) {
+		nspaces++;
+		minspacewidth = spacewidth;
+	    }
+	    linelen += spacewidth + wrapwords[i+j].width;
+	    spacewidth = wrapwords[i+j].spacewidth;
+	    j++;
+	    if (linelen > thiswidth) {
+		/*
+		 * If we're over the width limit, abandon ship,
+		 * _unless_ there is no best-effort yet (which will
+		 * only happen if the first word is too long all by
+		 * itself).
+		 */
+		if (best > 0)
+		    break;
+	    }
+
+	    /*
+	     * Compute the cost of this line. The method of doing
+	     * this differs hugely depending on whether
+	     * natural_space is nonzero or not.
+	     */
+	    if (natural_space) {
+		if (!nspaces && linelen > thiswidth) {
+		    /*
+		     * Special case: if there are no spaces at all
+		     * on the line because one single word is too
+		     * long for its line, cost is zero because
+		     * there's nothing we can do about it anyway.
+		     */
+		    cost = 0;
+		} else {
+		    int shortfall = thiswidth - linelen;
+		    int spaceextra = shortfall / (nspaces ? nspaces : 1);
+		    int spaceshortfall = natural_space -
+			(minspacewidth + spaceextra);
+
+		    if (i+j == nwords && spaceshortfall < 0) {
+			/*
+			 * Special case: on the very last line of
+			 * the paragraph, we don't score penalty
+			 * points for having to _stretch_ the line,
+			 * since we won't stretch it anyway.
+			 * However, we score penalties as normal
+			 * for having to squeeze it.
+			 */
+			cost = 0;
+		    } else {
+			/*
+			 * Squaring this number is tricky since
+			 * it's liable to be quite big. Let's
+			 * divide it through by 256.
+			 */
+			int x = spaceshortfall >> 8;
+			int xf = spaceshortfall & 0xFF;
+
+			/*
+			 * Not counting strange variable-fixed-
+			 * point oddities, we are computing
+			 * 
+			 *   (x+xf)^2 = x^2 + 2*x*xf + xf*xf
+			 * 
+			 * except that _our_ xf is 256 times the
+			 * one listed there.
+			 */
+
+			cost = x * x;
+			cost += (2 * x * xf) >> 8;
+		    }
+		}
+	    } else {
+		if (i+j == nwords) {
+		    /*
+		     * Special case: if we're at the very end of the
+		     * paragraph, we don't score penalty points for the
+		     * white space left on the line.
+		     */
+		    cost = 0;
+		} else {
+		    cost = (thiswidth-linelen) * (thiswidth-linelen);
+		}
+	    }
+
+	    /*
+	     * Add in the cost of wrapping all lines after this
+	     * point too.
+	     */
+	    if (i+j < nwords)
+		cost += wrapwords[i+j].cost;
+
+	    /*
+	     * We compare bestcost >= cost, not bestcost > cost,
+	     * because in cases where the costs are identical we
+	     * want to try to look like the greedy algorithm,
+	     * because readers are likely to have spent a lot of
+	     * time looking at greedy-wrapped paragraphs and
+	     * there's no point violating the Principle of Least
+	     * Surprise if it doesn't actually gain anything.
+	     */
+	    if (best < 0 || bestcost >= cost) {
+		bestcost = cost;
+		best = j;
+	    }
+	}
+	/*
+	 * Now we know the optimal answer for this terminal
+	 * subsequence, so put it in wrapwords.
+	 */
+	wrapwords[i].cost = bestcost;
+	wrapwords[i].nwords = best;
+    }
+
+    /*
+     * We've wrapped the paragraph. Now build the output
+     * `wrappedline' list.
+     */
+    i = 0;
+    while (i < nwords) {
+	wrappedline *w = snew(wrappedline);
+	*ptr = w;
+	ptr = &w->next;
+	w->next = NULL;
+
+	n = wrapwords[i].nwords;
+	w->begin = wrapwords[i].begin;
+	w->end = wrapwords[i+n-1].end;
+
+	/*
+	 * Count along the words to find nspaces and shortfall.
+	 */
+	w->nspaces = 0;
+	w->shortfall = width;
+	for (j = 0; j < n; j++) {
+	    w->shortfall -= wrapwords[i+j].width;
+	    if (j < n-1 && wrapwords[i+j].spacewidth) {
+		w->nspaces++;
+		w->shortfall -= wrapwords[i+j].spacewidth;
+	    }
+	}
+	i += n;
+    }
+
+    sfree(wrapwords);
+
+    return head;
+}
+
+void wrap_free(wrappedline *w) {
+    while (w) {
+	wrappedline *t = w->next;
+	sfree(w);
+	w = t;
+    }
+}
+
+void cmdline_cfg_add(paragraph *cfg, char *string)
+{
+    wchar_t *ustring;
+    int upos, ulen, pos, len;
+
+    ulen = 0;
+    while (cfg->keyword[ulen])
+	ulen += 1 + ustrlen(cfg->keyword+ulen);
+    len = 0;
+    while (cfg->origkeyword[len])
+	len += 1 + strlen(cfg->origkeyword+len);
+
+    ustring = ufroma_locale_dup(string);
+
+    upos = ulen;
+    ulen += 2 + ustrlen(ustring);
+    cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
+    ustrcpy(cfg->keyword+upos, ustring);
+    cfg->keyword[ulen-1] = L'\0';
+
+    pos = len;
+    len += 2 + strlen(string);
+    cfg->origkeyword = sresize(cfg->origkeyword, len, char);
+    strcpy(cfg->origkeyword+pos, string);
+    cfg->origkeyword[len-1] = '\0';
+
+    sfree(ustring);
+}
+
+paragraph *cmdline_cfg_new(void)
+{
+    paragraph *p;
+
+    p = snew(paragraph);
+    memset(p, 0, sizeof(*p));
+    p->type = para_Config;
+    p->next = NULL;
+    p->fpos.filename = "<command line>";
+    p->fpos.line = p->fpos.col = -1;
+    p->keyword = ustrdup(L"\0");
+    p->origkeyword = dupstr("\0");
+
+    return p;
+}
+
+paragraph *cmdline_cfg_simple(char *string, ...)
+{
+    va_list ap;
+    char *s;
+    paragraph *p;
+
+    p = cmdline_cfg_new();
+    cmdline_cfg_add(p, string);
+
+    va_start(ap, string);
+    while ((s = va_arg(ap, char *)) != NULL)
+	cmdline_cfg_add(p, s);
+    va_end(ap);
+
+    return p;
+}