diff options
Diffstat (limited to 'app/tools/halibut/index.c')
-rw-r--r-- | app/tools/halibut/index.c | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/app/tools/halibut/index.c b/app/tools/halibut/index.c new file mode 100644 index 0000000..628934e --- /dev/null +++ b/app/tools/halibut/index.c @@ -0,0 +1,270 @@ +/* + * index.c: create and collate index data structures + */ + +#include <stdio.h> +#include <stdlib.h> +#include "halibut.h" + +static int compare_tags(void *av, void *bv); +static int compare_entries(void *av, void *bv); + +indexdata *make_index(void) { + indexdata *ret = snew(indexdata); + ret->tags = newtree234(compare_tags); + ret->entries = newtree234(compare_entries); + return ret; +} + +static indextag *make_indextag(void) { + indextag *ret = snew(indextag); + ret->name = NULL; + ret->implicit_text = NULL; + ret->explicit_texts = NULL; + ret->explicit_fpos = NULL; + ret->nexplicit = ret->explicit_size = ret->nrefs = 0; + ret->refs = NULL; + return ret; +} + +static int compare_tags(void *av, void *bv) { + indextag *a = (indextag *)av, *b = (indextag *)bv; + return ustricmp(a->name, b->name); +} + +static int compare_to_find_tag(void *av, void *bv) { + wchar_t *a = (wchar_t *)av; + indextag *b = (indextag *)bv; + return ustricmp(a, b->name); +} + +static int compare_entries(void *av, void *bv) { + indexentry *a = (indexentry *)av, *b = (indexentry *)bv; + return compare_wordlists(a->text, b->text); +} + +/* + * Back-end utility: find the indextag with a given name. + */ +indextag *index_findtag(indexdata *idx, wchar_t *name) { + return find234(idx->tags, name, compare_to_find_tag); +} + +/* + * Add a \IM. `tags' points to a zero-terminated chain of + * zero-terminated strings ("first\0second\0thirdandlast\0\0"). + * `text' points to a word list. + * + * Guarantee on calling sequence: all implicit merges are given + * before the explicit ones. + */ +void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text, + filepos *fpos) { + indextag *t, *existing; + + /* + * For an implicit merge, we want to remove all emphasis, + * because the chances are that the user didn't really want to + * index the term as emphasised. + */ + { + word *w; + + for (w = text; w; w = w->next) { + if (w->type == word_Emph) + w->type = word_Normal; + else if (w->type == word_EmphSpace) + w->type = word_WhiteSpace; + else if (w->type == word_EmphQuote) + w->type = word_Quote; + } + } + + /* + * FIXME: want to warn on overlapping source sets. + */ + for (; *tags; tags = uadv(tags)) { + t = make_indextag(); + t->name = tags; + existing = add234(idx->tags, t); + if (existing == t) { + /* + * Duplicate this so we can free it independently. + */ + t->name = ustrdup(tags); + + /* + * Every tag has an implicit \IM. So if this tag + * doesn't exist and we're explicit, then we should + * warn (and drop it, since it won't be referenced). + */ + if (is_explicit) { + error(err_nosuchidxtag, fpos, tags); + continue; + } + + /* + * Otherwise, this is a new tag with an implicit \IM. + */ + t->implicit_text = text; + t->implicit_fpos = *fpos; + } else { + if (!is_explicit) { + /* + * An implicit \IM for a tag that's had an implicit + * \IM before. FIXME: we should check the text + * against the existing text and warn on + * differences. And check the tag for case match + * against the existing tag, likewise. + */ + + /* + * Check the tag against its previous occurrence to + * see if the cases match. + */ + if (ustrcmp(t->name, existing->name)) { + error(err_indexcase, fpos, t->name, + &existing->implicit_fpos, existing->name); + } + + sfree(t); + } else { + /* + * An explicit \IM added to a valid tag. In + * particular, this removes the implicit \IM if + * present. + */ + sfree(t); + t = existing; + if (t->implicit_text) { + free_word_list(t->implicit_text); + t->implicit_text = NULL; + } + if (t->nexplicit >= t->explicit_size) { + t->explicit_size = t->nexplicit + 8; + t->explicit_texts = sresize(t->explicit_texts, + t->explicit_size, word *); + t->explicit_fpos = sresize(t->explicit_fpos, + t->explicit_size, filepos); + } + t->explicit_texts[t->nexplicit] = text; + t->explicit_fpos[t->nexplicit] = *fpos; + t->nexplicit++; + } + } + } +} + +/* + * Build the final-form index. We now have every tag, with every + * \IM, set up in a 2-3 tree indexed by tag. We now want to collate + * the RHSes of the \IMs, and sort by final form, and decorate the + * entries in the original 2-3 tree with pointers to the RHS + * entries. + */ +void build_index(indexdata *i) { + indextag *t; + word **ta; + filepos *fa; + int ti; + int j; + + for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) { + if (t->implicit_text) { + t->nrefs = 1; + ta = &t->implicit_text; + fa = &t->implicit_fpos; + } else { + t->nrefs = t->nexplicit; + ta = t->explicit_texts; + fa = t->explicit_fpos; + } + if (t->nrefs) { + t->refs = snewn(t->nrefs, indexentry *); + for (j = 0; j < t->nrefs; j++) { + indexentry *ent = snew(indexentry); + ent->text = *ta++; + ent->fpos = *fa++; + t->refs[j] = add234(i->entries, ent); + if (t->refs[j] != ent) /* duplicate */ + sfree(ent); + } + } + } +} + +void cleanup_index(indexdata *i) { + indextag *t; + indexentry *ent; + int ti; + + for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) { + sfree(t->name); + free_word_list(t->implicit_text); + sfree(t->explicit_texts); + sfree(t->refs); + sfree(t); + } + freetree234(i->tags); + for (ti = 0; (ent = (indexentry *)index234(i->entries, ti))!=NULL; ti++) { + sfree(ent); + } + freetree234(i->entries); + sfree(i); +} + +static void dbg_prtwordlist(int level, word *w); +static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text); + +void index_debug(indexdata *i) { + indextag *t; + indexentry *y; + int ti; + int j; + + printf("\nINDEX TAGS\n==========\n\n"); + for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) { + printf("\n"); + if (t->implicit_text) + dbg_prtmerge(0, t->name, t->implicit_text); + for (j = 0; j < t->nexplicit; j++) + dbg_prtmerge(1, t->name, t->explicit_texts[j]); + } + + printf("\nINDEX ENTRIES\n=============\n\n"); + for (ti = 0; (y = (indexentry *)index234(i->entries, ti)) != NULL; ti++) { + printf("\n"); + printf("{\n"); + dbg_prtwordlist(1, y->text); + printf("}\n"); + } +} + +static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) { + printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im"); + for (; *tag; tag++) + putchar(*tag); + printf("\" {\n"); + dbg_prtwordlist(1, text); + printf("}\n"); +} + +static void dbg_prtwordlist(int level, word *w) { + for (; w; w = w->next) { + wchar_t *wp; + printf("%*sword %d ", level*4, "", w->type); + if (w->text) { + printf("\""); + for (wp = w->text; *wp; wp++) + putchar(*wp); + printf("\""); + } else + printf("(no text)"); + if (w->alt) { + printf(" alt = {\n"); + dbg_prtwordlist(level+1, w->alt); + printf("%*s}", level*4, ""); + } + printf("\n"); + } +} |