/*
 * KAKASI (Kanji Kana Simple inversion program)
 * $Id: kakasi.c,v 1.2 2000/04/25 08:28:50 rug Exp $
 * Copyright (C) 1992
 * Hironobu Takahashi (takahasi@tiny.or.jp)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either versions 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with KAKASI, see the file COPYING.  If not, write to the Free
 * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */
/*
  Modified by NOKUBI Takatsugu <knok@daionet.gr.jp>
  1999/05/09
     Fix kakasi_do returning no values.
  1999/04/14
     Add more valuables initialize routine.
  1999/04/12
     Add initialize routine for some valuables to funtion kakasi_getopt_argv.
  1999/03/04
     Rename PERLMOD macro to LIBRARY
  1999/01/08
      Add PERLMOD macro.
*/

#include <stdio.h>
#include "kakasi.h"
#ifdef LIBRARY
#include "libkakasi.h"
#endif

#define KAKASIBUF 1024

int romaji_type = HEPBURN;
int romaji_capitalize = 0;
int romaji_upcase = 0;
int heiki_mode = 0;
int bunkatu_mode = 0;
int furigana_mode = 0;
int cr_eat_mode = 0;
int flush_mode = 0;
#ifdef WAKATIGAKI
int wakatigaki_mode = 0;
int terminate_done = 0;
int wo_mode = 0;
#endif /* WAKATIGAKI */

int kanji_digest;
int separator_out;
char cr_eat_string[KAKASIBUF];
Character n[KAKASIBUF];

#ifdef LIBRARY
extern FILE *kanwadict;
static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
/* ASCII, JISROMAN, KATAKANA, GRAPHIC, ZENKAKU-KIGOU, ZENKAKU-KATAKANA, ZENKAKU-HIRAGANA, KANJI, */
#endif

extern int input_term_type;
extern int output_term_type;

#ifdef LIBRARY
void digest_start_copy(c, r)
#else
static void digest_start_copy(c, r)
#endif
     Character *c;
     Character *r;
{
    for(;;) {
	r->type = c->type;
	r->c1 = c->c1;
	r->c2 = c->c2;
	if ((r->type == OTHER) && (r->c1 == 0)) return;
	++r, ++c;
    }
}

#ifdef LIBRARY
void put_separator()
#else
static void put_separator()
#endif
{
#ifdef WAKATIGAKI
    if (bunkatu_mode) {
        if(! terminate_done) {
	    if (separator_out == 1)
	        separator_out = 2;
        }
    }
#else
    if (bunkatu_mode)
	if (separator_out == 1)
	    separator_out = 2;
#endif /* WAKATIGAKI */
}

#ifdef LIBRARY
void putchars(results)
#else
static void putchars(results)
#endif
     Character *results;
{
    while(results->c1 != '\0') {
	putkanji(results);
	++ results;
    }
}

#ifdef LIBRARY
void digest_out(c, ret)
#else
static void digest_out(c, ret)
#endif
     Character *c;
     int ret;
{
    Character sep, *ptr;
    int i;
 
    if (kanji_digest) {
	put_separator();
	if (romaji_capitalize) {
	    if ((n[0].type == ASCII) || (n[0].type == JISROMAN))
		if (('a' <= n[0].c1) && (n[0].c1 <= 'z'))
		    n[0].c1 = n[0].c1 - 0x20;
	} else if (romaji_upcase) {
	    for (ptr = n; ptr->c1 != '\0'; ++ptr) {
		if ((ptr->type == ASCII) || (ptr->type == JISROMAN))
		    if (('a' <= ptr->c1) && (ptr->c1 <= 'z'))
			ptr->c1 = ptr->c1 - 0x20;
	    }
	}
    }

    if ((kanji_digest) && (furigana_mode)) {
	for (i = 0; i < ret; ++ i)
	    putkanji(c+i);
	sep.type = OTHER;
	sep.c1 = '[';
	putkanji(&sep);
	putchars(n);
	sep.c1 = ']';
	putkanji(&sep);
#ifdef WAKATIGAKI
    } else if ((kanji_digest) && (wakatigaki_mode)) {
	for (i = 0; i < ret; ++ i)
	    putkanji(c+i);
#endif /* WAKATIGAKI */
    } else {
	putchars(n);
    }
    if (flush_mode) fflush(stdout);
}

#ifdef LIBRARY
int digest(c, clen, r, rlen, type, proc)
#else
static int digest(c, clen, r, rlen, type, proc)
#endif
     Character *c;
     int clen;
     Character *r;
     int rlen;
     int type;
     int (*proc)();
{
    int ret, i, j, k;
    Character new;
    char *p;

    ret = (* proc)(c, n);
    if (ret == 0) ret = 1;

    if ((ret < 0) && (rlen < KAKASIBUF)) {
	getkanji(&new);
	if(new.type == type) {
	    r[rlen].type = c[clen].type = type;
	    r[rlen].c1 = c[clen].c1 = new.c1;
	    r[rlen].c2 = c[clen].c2 = new.c2;
	    r[rlen+1].type = c[clen+1].type = OTHER;
	    r[rlen+1].c1 = c[clen+1].c1 = '\0';
	    return digest(c, clen+1, r, rlen+1, type, proc);
	} else if (cr_eat_mode) {
	    if ((new.type == ASCII) || (new.type == JISROMAN) || (new.type == OTHER)) {
		for (p = cr_eat_string; *p != '\0'; ++ p) {
		    if ((unsigned)(*p) == new.c1) {
			r[rlen].type = new.type;
			r[rlen].c1 = new.c1;
			r[rlen].c2 = new.c2;
			r[rlen+1].type = OTHER;
			r[rlen+1].c1 = '\0';
			return digest(c, clen, r, rlen+1, type, proc);
		    }
		}
	    }
	}
	ungetkanji(&new);
	ret = -ret;
    }

    digest_out(c, ret);

    k = ret;
    j = 0;
    for (i = 0;; ++ i) {
	if ((r[i].type == type) && (k > 0)) {
	    -- k;
	} else {
	    c[j].type = r[i].type;
	    c[j].c1 = r[i].c1;
	    c[j].c2 = r[i].c2;
	    if (c[j].c1 == '\0')
		break;
	    ++ j;
	}
    }
    return rlen - ret;
}

#ifdef LIBRARY
void digest_shift(c, s)
#else
static void digest_shift(c, s)
#endif
     Character *c;
     int s;
{
    int i;

    for (i = 0;; ++ i) { /* Yes, I know following lines can be written in
			    1 line, but I have doubts of compatibilities.. */
	c[i].type = c[i+s].type;
	c[i].c1 = c[i+s].c1;
	c[i].c2 = c[i+s].c2;
	if (c[i+s].c1 == '\0')
	    break;
    }
}

#ifndef LIBRARY
int main(argc, argv)
     int argc;
     char **argv;
#else
int kakasi_getopt_argv(argc, argv)
     int argc;
     char **argv;
#endif
{
#ifdef LIBRARY
  int retval = 0;
#endif
#ifndef LIBRARY
    Character c[KAKASIBUF], r[KAKASIBUF];
    int clen, ptype, pctype;
    static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
    /* ASCII, JISROMAN, KATAKANA, GRAPHIC, ZENKAKU-KIGOU, ZENKAKU-KATAKANA, ZENKAKU-HIRAGANA, KANJI, */
#endif

#ifdef LIBRARY
    int i;
    /* Initialize some valuables. */
    for (i = 0; i < 8; i ++) {
      proc[i] = NULL;
    }
    romaji_type = HEPBURN;
    heiki_mode = 0;
    bunkatu_mode = 0;
    furigana_mode = 0;
    cr_eat_mode = 0;
    romaji_capitalize = 0;
    romaji_upcase = 0;
    flush_mode = 0;
#ifdef WAKATIGAKI
    wakatigaki_mode = 0;
    terminate_done = 0;
    wo_mode = 0;
#endif /* WAKATIGAKI*/
#endif

    while(--argc > 0) {
	++ argv;
	if ((*argv)[0] != '-') break;
	switch((*argv)[1]) {
	  case 'a':
	    switch((*argv)[2]) {
	      case 'j':	proc[0] = a2j; break;
	      case 'E':	proc[0] = a2E; break;
	      default:  proc[0] = NULL;
	    }
	    break;
	  case 'j':
	    switch((*argv)[2]) {
	      case 'a':	proc[1] = j2a; break;
	      case 'E':	proc[1] = j2E; break;
	      default:  proc[1] = NULL;
	    }
	    break;
	  case 'g':
	    switch((*argv)[2]) {
	      case 'a':	proc[2] = g2a; break;
	      case 'j':	proc[2] = g2j; break;
	      case 'E':	proc[2] = g2E; break;
	      default:  proc[2] = NULL;
	    }
	    break;
	  case 'k':
	    switch((*argv)[2]) {
	      case 'a':	proc[3] = k2a; break;
	      case 'j':	proc[3] = k2j; break;
	      case 'K':	proc[3] = k2K; break;
	      case 'H':	proc[3] = k2H; break;
	      default:  proc[3] = NULL;
	    }
	    break;
	  case 'E':
	    switch((*argv)[2]) {
	      case 'a':	proc[4] = E2a; break;
	      case 'j':	proc[4] = E2j; break;
	      default:  proc[4] = NULL;
	    }
	    break;
	  case 'K':
	    switch((*argv)[2]) {
	      case 'a':	proc[5] = K2a; break;
	      case 'j':	proc[5] = K2j; break;
	      case 'k':	proc[5] = K2k; break;
	      case 'H':	proc[5] = K2H; break;
	      default:  proc[5] = NULL;
	    }
	    break;
	  case 'H':
	    switch((*argv)[2]) {
	      case 'a':	proc[6] = H2a; break;
	      case 'j':	proc[6] = H2j; break;
	      case 'k':	proc[6] = H2k; break;
	      case 'K':	proc[6] = H2K; break;
	      default:  proc[6] = NULL;
	    }
	    break;
	  case 'J':
	    switch((*argv)[2]) {
	      case 'a':	proc[7] = J2a; break;
	      case 'j':	proc[7] = J2j; break;
	      case 'k':	proc[7] = J2k; break;
	      case 'K':	proc[7] = J2K; break;
	      case 'H':	proc[7] = J2H; break;
	      default:  proc[7] = NULL;
	    }
	    break;
	  case 'i':
	    if ((*argv)[2] != '\0')
		set_input_term(term_type_str((*argv)+2));
	    else
		if (argc > 1) {
		    -- argc;
		    set_input_term(term_type_str(*(++ argv)));
		}
	    break;
	  case 'o':
	    if ((*argv)[2] != '\0')
		set_output_term(term_type_str((*argv)+2));
	    else
		if (argc > 1) {
		    -- argc;
		    set_output_term(term_type_str(*(++ argv)));
		}
	    break;
	  case 'r':
	    if ((*argv)[2] == 'k')
		romaji_type = KUNREI;
	    break;
	  case 'p':
	    heiki_mode = 1;
	    break;
	  case 's':
	    bunkatu_mode = 1;
	    break;
	  case 'f':
	    furigana_mode = 1;
	    break;
	  case 'c':
	    cr_eat_mode = 1;
	    sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
	    break;
	  case 'C':
	    romaji_capitalize = 1;
	    break;
	  case 'U':
	    romaji_upcase = 1;
	    break;
	  case 'u':
	    flush_mode = 1;
	    break;
#ifdef WAKATIGAKI
	  case 'w':
	    wakatigaki_mode = 1;
	    bunkatu_mode = 1;
	    cr_eat_mode = 1;
	    sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
	    proc[5] = K2K;
	    proc[6] = H2H;
	    proc[7] = J2H;
	    break;
#endif /* WAKATIGAKI */
	  case '?':
	  default:
#ifndef LIBRARY
	    fprintf(stderr, "KAKASI - Kanji Kana Simple Inverter  Version %s\n", VERSION);
	    fprintf(stderr, "Copyright (C) 1992-1999 Hironobu Takahashi. All rights reserved.\n");
	    fprintf(stderr, "\n");
	    fprintf(stderr, "Usage: kakasi -a[jE] -j[aE] -g[ajE] -k[ajKH] -E[aj] -K[ajkH] -H[ajkK] -J[ajkKH]\n");
	    fprintf(stderr, "              -i{oldjis,newjis,dec,euc,sjis} -o{oldjis,newjis,dec,euc,sjis}\n");
	    fprintf(stderr, "              -r{hepburn,kunrei} -p -s -f -c\"chars\"  [jisyo1, jisyo2,,,]\n");
	    fprintf(stderr, "\n");
	    fprintf(stderr, "       Character Sets:\n");
	    fprintf(stderr, "       a: ascii        j: jisroman   g: graphic    k: kana (j,k     defined in jisx0201)\n");
	    fprintf(stderr, "       E: kigou        K: katakana   H: hiragana   J: kanji(E,K,H,J defined in jisx0208)\n");
	    fprintf(stderr, "\n");
	    fprintf(stderr, "       Options:\n");
	    fprintf(stderr, "       -i: input coding system   -o output coding system\n");
	    fprintf(stderr, "       -r: romaji conversion system\n");
	    fprintf(stderr, "       -p: list all readings (with -J option)\n");
	    fprintf(stderr, "       -s: insert separate characters (with -J option)\n");
	    fprintf(stderr, "       -f: furigana mode (with -J option)\n");
	    fprintf(stderr, "       -c: skip chars within jukugo (with -J option: default TAB CR LF BLANK)\n");
	    fprintf(stderr, "       -C: romaji Capitalize (with -Ja or -Jj option)\n");
	    fprintf(stderr, "       -U: romaji Upcase     (with -Ja or -Jj option)\n");
	    fprintf(stderr, "       -u: call fflush() after 1 character output\n");
#ifdef WAKATIGAKI
	    fprintf(stderr, "       -w: wakatigaki mode\n");
#endif /* WAKATIGAKI */
	    exit(1);
#else /* LIBRARY */
	    retval = 1;
#endif
	}
    }

    if ((input_term_type != UNKNOWN) && (output_term_type == UNKNOWN))
	set_output_term(input_term_type);

    if (proc[7] != NULL) {
	init_jisyo();
	init_kanwa();
	for (; argc > 0; -- argc)
	    add_jisyo(*(argv ++));
    }

#ifdef LIBRARY
    return retval;
}

char *kakasi_do(str)
     char *str;
{
    Character c[KAKASIBUF], r[KAKASIBUF];
    int clen, ptype, pctype;

    setcharbuffer(str);
#endif

    pctype = OTHER;
    separator_out = 0;
    for(;;) {
	getkanji(c);
	if ((c[0].type == OTHER) && (c[0].c1 == 0xff)) break;
	c[1].type = OTHER;
	c[1].c1 = '\0';
	clen = 1;
	while (clen > 0) {
	    kanji_digest = 0;
	    switch (c[0].type) {
	      case ASCII:
	      case JISROMAN:
	      case GRAPHIC:
	      case KATAKANA:
		if ((c[0].type != OTHER) && (c[0].type != pctype)) {
		    put_separator();
		    pctype = c[0].type;
		}
		if ((*proc[(int)(c[0].type)]) == NULL) {
		    putkanji(c); digest_shift(c, 1); -- clen;
		    if (flush_mode) fflush(stdout);
		} else {
		    digest_start_copy(c, r);
		    clen = digest(c, clen, r, clen, (int)(c[0].type), *proc[(int)(c[0].type)]);
		}
#ifdef WAKATIGAKI
		terminate_done = 0;
#endif /* WAKATIGAKI */
		break;
	      case JIS83:
		if (c[0].c1 >= 0xb0) {
		    ptype = 7;
		    kanji_digest = 1;
#ifdef WAKATIGAKI
		} else if ((c[0].c1 == 0xa1) && /* charcter code(\241\270),charcter code(\241\271),charcter code(\241\272) */
			   (c[0].c2 >= 0xb8 && c[0].c2 <= 0xba)) {
		    ptype = 7;
		    kanji_digest = 1;
		} else if ((c[0].c1 == 0xa5) && /* charcter code(\245\365),charcter code(\245\366) */
			   (c[0].c2 >= 0xf5 && c[0].c2 <= 0xf6)) {
		    ptype = 7;
		    kanji_digest = 1;
#endif /* WAKATIGAKI */
    		} else if (c[0].c1 == 0xa4) {
		    ptype = 6;
#ifdef WAKATIGAKI
		} else if ((c[0].c1 == 0xa1) && /* charcter code(\241\263),charcter code(\241\264),charcter code(\241\265),charcter code(\241\266) */
			   (c[0].c2 >= 0xb3 && c[0].c2 <= 0xb6)) {
		    ptype = 6;
#endif /* WAKATIGAKI */
		} else if (c[0].c1 == 0xa5) {
		    ptype = 5;
		} else if ((c[0].c1 == 0xa1) && (c[0].c2 == 0xbc)) {
		    ptype = 5;
		} else {
		    ptype = 4;
		}
		if (ptype != pctype) {
		    put_separator();
		    pctype = ptype;
#ifdef WAKATIGAKI		    
		    if( bunkatu_mode ) 
		        wo_mode = 0;     
		} else {
		    if( bunkatu_mode ) {
		        if( wo_mode != 2 )
			    wo_mode = 1;
		    }
#endif /* WAKATIGAKI */
		}
		if ((*proc[ptype]) == NULL) {
		    putkanji(c); digest_shift(c, 1); -- clen;
		    if (flush_mode) fflush(stdout);
		} else {
		    digest_start_copy(c, r);
		    clen = digest(c, clen, r, clen, JIS83, *proc[ptype]);
		}
#ifdef WAKATIGAKI
		terminate_done = 0;
#endif /* WAKATIGAKI */
		break;
	      default:
#ifdef WAKATIGAKI
		terminate_done = 1;
#endif /* WAKATIGAKI */
		putkanji(c); digest_shift(c, 1); -- clen;
#ifndef LIBRARY
		if (flush_mode) fflush(stdout);
#endif
	    }
	}
    }
#ifndef LIBRARY
    return 0;
#else /* LIBRARY */
    return getpbstr();
#endif
}

#ifdef LIBRARY
int kakasi_close_kanwadict()
{
    if (kanwadict != NULL) {
	fclose(kanwadict);
	kanwadict = NULL;
	return 0;
    }
    return 1;
}
#endif /* LIBRARY */
