diff options
Diffstat (limited to 'app/tools/halibut/charset/shiftjis.c')
-rw-r--r-- | app/tools/halibut/charset/shiftjis.c | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/app/tools/halibut/charset/shiftjis.c b/app/tools/halibut/charset/shiftjis.c new file mode 100644 index 0000000..4ad879f --- /dev/null +++ b/app/tools/halibut/charset/shiftjis.c @@ -0,0 +1,128 @@ +/* + * shiftjis.c - multibyte encoding of Shift-JIS + */ + +#ifndef ENUM_CHARSETS + +#include "charset.h" +#include "internal.h" + +/* + * Shift-JIS has no associated data, so `charset' may be ignored. + */ + +static void read_sjis(charset_spec const *charset, long int input_chr, + charset_state *state, + void (*emit)(void *ctx, long int output), void *emitctx) +{ + UNUSEDARG(charset); + + /* + * For reading Shift-JIS, state->s0 simply contains the single + * stored lead byte when we are half way through a double-byte + * character, or 0 if we aren't. + */ + + if (state->s0 == 0) { + if ((input_chr >= 0x81 && input_chr <= 0x9F) || + (input_chr >= 0xE0 && input_chr <= 0xEF)) { + /* + * Lead byte. Just store it. + */ + state->s0 = input_chr; + } else { + /* + * Anything else we translate through JIS X 0201. + */ + if (input_chr == 0x5C) + input_chr = 0xA5; + else if (input_chr == 0x7E) + input_chr = 0x203E; + else if (input_chr >= 0xA1 && input_chr <= 0xDF) + input_chr += 0xFF61 - 0xA1; + else if (input_chr < 0x80) + /* do nothing */; + else + input_chr = ERROR; + emit(emitctx, input_chr); + } + } else { + /* + * We have a stored lead byte. We expect a valid followup + * byte. + */ + if (input_chr >= 0x40 && input_chr <= 0xFC && input_chr != 0x7F) { + int r, c; + r = state->s0; + if (r >= 0xE0) r -= (0xE0 - 0xA0); + r -= 0x81; + c = input_chr; + if (c > 0x7F) c--; + c -= 0x40; + r *= 2; + if (c >= 94) + r++, c -= 94; + emit(emitctx, jisx0208_to_unicode(r, c)); + } else { + emit(emitctx, ERROR); + } + state->s0 = 0; + } +} + +/* + * Shift-JIS is a stateless multi-byte encoding (in the sense that + * just after any character has been completed, the state is always + * the same); hence when writing it, there is no need to use the + * charset_state. + */ + +static int write_sjis(charset_spec const *charset, long int input_chr, + charset_state *state, + void (*emit)(void *ctx, long int output), void *emitctx) +{ + UNUSEDARG(charset); + UNUSEDARG(state); + + if (input_chr == -1) + return TRUE; /* stateless; no cleanup required */ + + if (input_chr < 0x80 && input_chr != 0x5C && input_chr != 0x7E) { + emit(emitctx, input_chr); + return TRUE; + } else if (input_chr == 0xA5) { + emit(emitctx, 0x5C); + return TRUE; + } else if (input_chr == 0x203E) { + emit(emitctx, 0x7E); + return TRUE; + } else if (input_chr >= 0xFF61 && input_chr <= 0xFF9F) { + emit(emitctx, input_chr - (0xFF61 - 0xA1)); + return TRUE; + } else { + int r, c; + if (unicode_to_jisx0208(input_chr, &r, &c)) { + c += 94 * (r % 2); + r /= 2; + r += 0x81; + if (r >= 0xA0) r += 0xE0 - 0xA0; + c += 0x40; + if (c >= 0x7F) c++; + emit(emitctx, r); + emit(emitctx, c); + return TRUE; + } else { + return FALSE; + } + } +} + +const charset_spec charset_CS_SHIFT_JIS = { + CS_SHIFT_JIS, read_sjis, write_sjis, NULL +}; + +#else /* ENUM_CHARSETS */ + +ENUM_CHARSET(CS_SHIFT_JIS) + +#endif /* ENUM_CHARSETS */ |