#include "mendex.h" #include #include #include "qsort.h" #include "exkana.h" #include "exvar.h" #include "kp.h" #define BUFFERLEN 4096 #define BUFFERLEN1 256 #define BUFFERLEN2 (BUFFERLEN1*3) #define BUFFERLEN3 (BUFFERLEN2+50) struct dictionary{ UChar* dic[2]; }; static struct dictionary *dictable,*envdic; static int dlines=0,elines=0; static int dicvalread(const char *filename, struct dictionary *dicval, int line); UChar * u_xstrdup (const UChar *string) { return u_strcpy(xmalloc((u_strlen(string)+1)*sizeof(UChar)), string); } /* initialize kana table */ void initkanatable(void) { int i; akasatana=u_xstrdup(AKASATANA); for (i=0;;i++) { if (akasatana[i]==0) break; akasatana[i]+=KATATOP-HIRATOP; /* hiragana -> katakana */ } aiueo=u_xstrdup(AIUEO); for (i=0;;i++) { if (aiueo[i]==0) break; if (aiueo[i]==0xd82c) { i++; if (aiueo[i]==0xdd1f) aiueo[i]+=3; /* U+1B11F -> U+1B122 */ if (aiueo[i]==0xdd21) { UChar ch[] = {0xd82c, 0xdc01, 0x0}; if (is_jpn_kana(ch)) AIUEO[i]=0xdc01; /* U+1B121 -> U+1B001 */ } } else aiueo[i]+=KATATOP-HIRATOP; /* hiragana -> katakana */ } u_strcpy(atama,akasatana); } /* get dictionary */ int dicread(const char *filename) { int i,ecount=0; const char *envfile; char buff[BUFFERLEN]; FILE *fp; if (filename!=NULL) { filename = KP_find_file(&kp_dict,filename); if(kpse_in_name_ok(filename)) fp=fopen(filename,"rb"); else fp = NULL; if (fp==NULL) { warn_printf(efp,"Warning: Couldn't find dictionary file %s.\n",filename); goto ENV; } verb_printf(efp,"Scanning dictionary file %s.",filename); for (i=0;;i++) { if (fgets(buff,BUFFERLEN-1,fp)==NULL) break; if ((buff[0]=='\r')||(buff[0]=='\n')||(buff[0]=='\0')) i--; } fclose(fp); dictable=xmalloc(sizeof(struct dictionary)*i); dlines=dicvalread(filename,dictable,i); verb_printf(efp,"...done.\n"); } ENV: envfile=kpse_var_value("INDEXDEFAULTDICTIONARY"); if ((envfile!=NULL)&&(strlen(envfile)!=0)) { envfile = KP_find_file(&kp_dict,envfile); if(kpse_in_name_ok(envfile)) fp=fopen(envfile,"rb"); else fp = NULL; if (fp==NULL) { warn_printf(efp,"Warning: Couldn't find environment dictionary file %s.\n",envfile); return ecount; } verb_printf(efp,"Scanning environment dictionary file %s.",envfile); for (i=0;;i++) { if (fgets(buff,255,fp)==NULL) break; if ((buff[0]=='\r')||(buff[0]=='\n')||(buff[0]=='\0')) i--; } fclose(fp); envdic=xmalloc(sizeof(struct dictionary)*i); elines=dicvalread(envfile,envdic,i); verb_printf(efp,"...done.\n"); } return 0; /* FIXME: is this right? */ } static int dcomp(const void *bf1, const void *bf2); /* read dictionary file */ static int dicvalread(const char *filename, struct dictionary *dicval, int line) { int i,j,k; char buff[BUFFERLEN1],buff2[BUFFERLEN1]; UChar ubuff[BUFFERLEN1],ubuff2[BUFFERLEN1]; FILE *fp; if(kpse_in_name_ok(filename)) fp=fopen(filename,"rb"); else { fprintf(stderr, "upmendex: %s is forbidden to open for reading.\n",filename); exit(255); } for (i=0;i(*buff2).dic[0][i]) return -1; } return 0; } /* convert to capital-hiragana character */ int convert(UChar *buff1, UChar *buff2) { int i=0,j=0,k; char errbuff[BUFFERLEN2],errbuff2[BUFFERLEN3]; int wclen; UChar buff3[3]; while(1) { if (buff1[i]==L'\0') { buff2[j]=L'\0'; break; } else { wclen = is_surrogate_pair(&buff1[i]) ? 2 : 1; buff3[0] =buff1[i]; if (wclen==2){ buff3[1] =buff1[i+1]; } buff3[wclen]=L'\0'; if ( lorder==1 &&( buff1[i]==' ' || buff1[i]=='\t' || buff3[0]==0x00A0 || buff3[0]==0x202F || buff3[0]==0x2060 || buff3[0]==0xFEFF )) { i++; } else if (buff1[i]<0x20 && buff1[i]!='\t') { /* ignore control characters */ i++; } else if (buff1[i]<0x7F) { buff2[j]=buff1[i]; i++; j++; } else if (buff1[i]<0xA0) { /* ignore control characters */ i++; } else if (is_latin(buff3)||is_cyrillic(buff3)||is_greek(buff3) ||is_jpn_kana(buff3)||is_kor_hngl(buff3)||is_zhuyin(buff3) ||is_numeric(buff3)==1||is_type_symbol(buff3)==1 ||is_devanagari(buff3)||is_thai(buff3)||is_arabic(buff3)||is_hebrew(buff3) ||is_type_mark_or_punct(buff3)) { buff2[j]=buff3[0]; if (wclen==2) buff2[j+1]=buff3[1]; i+=wclen; j+=wclen; } else { for (k=0;k