/* mapping from Shree-Dev-0708 font code points to ISCII codes or code sequences */ /* note that some contextual substitutions and re-orderings are needed for a correct mapping */ /* freq counts from a 2.7M-char sample of rediff.com */ /* comments use Unicode names (e.g. "dda") rather than ISCII ones (e.g. "hard da") except for "ri" instead of "vocal r" */ /* ISCII idioms: half-consonant: C halant nukta conjunct C1 C2: C1 halant C2 explicit halant: C1 halant halant */ /* we map repha to 0xE8 to make it easier to re-order later on */ unsigned char *SImap[] = { /* 0: */ NULL, /* 1: */ NULL, /* 2: */ NULL, /* 3: */ NULL, /* 4: */ NULL, /* 5: */ NULL, /* 6: */ NULL, /* 7: */ NULL, /* 8: */ NULL, /* 9: */ NULL, /* a: */ NULL, /* b: */ NULL, /* c: */ NULL, /* d: */ NULL, /* e: */ NULL, /* f: */ NULL, /* 10: */ NULL, /* 11: */ NULL, /* 12: */ NULL, /* 13: */ NULL, /* 14: */ NULL, /* 15: */ NULL, /* 16: */ NULL, /* 17: */ NULL, /* 18: */ NULL, /* 19: */ NULL, /* 1a: */ NULL, /* 1b: */ NULL, /* 1c: */ NULL, /* 1d: */ NULL, /* 1e: */ NULL, /* 1f: */ NULL, /* 20: width 200 space */ " ", /* above renders as real space */ /* freq 485,587/2.7M */ /* 21: */ "!", /* 22: */ "`", /* 23: ? 0 in sample */ NULL, /* 24: width 156 space */ "", /* above is not rendered by MSWord or IE */ /* freq 155,502/2.7M */ /* what is it used for? */ /* 25: */ "%", /* 26: danda (viram/full stop) */ "\xEA", /* 27: */ "'", /* 28: */ "(", /* 29: */ ")", /* 2a: */ "*", /* 2b: */ "+", /* 2c: */ ",", /* 2d: */ "-", /* 2e: */ ".", /* 2f: */ "/", /* 30: dev. digit 0 */ "\xF1", /* 31: dev. digit 1 */ "\xF2", /* 32: dev. digit 2 */ "\xF3", /* 33: dev. digit 3 */ "\xF4", /* 34: dev. digit 4 */ "\xF5", /* 35: dev. digit 5 */ "\xF6", /* 36: dev. digit 6 */ "\xF7", /* 37: dev. digit 7 */ "\xF8", /* 38: dev. digit 8 */ "\xF9", /* 39: dev. digit 9 */ "\xFA", /* 3a: */ ":", /* 3b: */ ";", /* 3c: */ NULL, /* 3d: */ "=", /* 3e: width 37 space */ "", /* freq 25,619/2.7M */ /* probably is non-rendering? */ /* 3f: */ "?", /* 40: avagraha = danda+nukta */ "\xEA\xE9", /* 41: vowel a */ "\xA4", /* 42: vowel i */ "\xA6", /* 43: vowel u */ "\xA8", /* 44: vowel uu */ "\xA9", /* 45: vowel e */ "\xAC", /* 46: conj. tta + ra ??? */ NULL, /* above may be variant of "ri" ?? - freq 40/2.7M */ /* 47: vowel ri */ "\xAA", /* 48: consonant ka */ "\xB3", /* 49: consonant kha */ "\xB4", /* 4a: consonant ga */ "\xB5", /* 4b: consonant gha */ "\xB6", /* 4c: consonant nga */ "\xB7", /* 4d: consonant ca (I cha) */ "\xB8", /* 4e: consonant cha (I ccha)*/ "\xB9", /* 4f: consonant ja */ "\xBA", /* 50: consonant jha */ "\xBB", /* 51: consonant tta */ "\xBD", /* 52: consonant ttha */ "\xBE", /* 53: consonant dda */ "\xBF", /* 54: consonant ddha */ "\xC0", /* 55: consonant nna */ "\xC1", /* 56: consonant ta */ "\xC2", /* 57: consonant tha */ "\xC3", /* 58: consonant da */ "\xC4", /* 59: consonant dha */ "\xC5", /* 5a: consonant na */ "\xC6", /* 5b: vowel i (matra) */ "\xDA", /* 5c: consonant pha */ "\xC9", /* 5d: ra.sup+e_matra+anusvara */ "\xFE\xE1\xA2", /* above is freq 21,982/2.7M */ /* 5e: consonant bha */ "\xCB", /* 5f: consonant ma */ "\xCC", /* 60: consonant ya */ "\xCD", /* 61: consonant ra */ "\xCF", /* 62: consonant la */ "\xD1", /* 63: not sure what this is */ NULL, /* something like la/lla? freq 2/2.7M */ /* 64: consonant va */ "\xD4", /* 65: consonant sha */ "\xD5", /* 66: consonant ssa */ "\xD6", /* 67: consonant sa */ "\xD7", /* 68: consonant ha */ "\xD8", /* 69: consonant lla */ "\xD2", /* 6a: conjunct ka+ssa */ "\xB3\xE8\xD6", /* 6b: conjunct ja+nya */ "\xBA\xE8\xBC", /* 6c: conjunct sha+ra */ "\xD5\xE8\xCF", /* 6d: vowel aa_matra */ "\xDA", /* 6e: consonant pa */ "\xC8", /* 6f: vowel e_matra */ "\xE1", /* 70: vowel i_matra */ "\xDB", /* above is with wide superscript stroke */ /* 71: vowel i_matra+anusvara */ "\xDB\xA2", /* above is with narrow superscript stroke */ /* 72: vowel ii (matra) */ "\xDC", /* above is with narrow superscript stroke */ /* 73: vowel ii_matra */ "\xDC", /* above is with wide superscript stroke */ /* 74: vowel ii_matra+anusvara */ "\xDC\xA2", /* above is with narrow superscript stroke */ /* 75: vowel ra.sup+ii_matra */ "\xFE\xDC", /* above is with narrow superscript stroke */ /* ra.sup -> ra halant in ISCII (given appropriate rest of context)?*/ /* 76: ra.sup+ii_matra+anusvara */ "\xFE\xDC\xA2", /* check order of above */ /* above freq 1/2.7M */ /* 77: vowel u_matra */ "\xDD", /* 78: vowel u_matra */ "\xDD", /* above = 77 lower down */ /* above freq 1/2.7M */ /* 79: vowel uu_matra */ "\xDE", /* 7a: not sure what this is */ NULL, /* above freq 31/2.7M */ /* 7b: vowel i_matra */ "\xDB", /* above has medium-width superscript stroke */ /* 7c: vowel e_matra+anusvara */ "\xE1\xA2", /* 7d: vowel ra.sup+e_matra */ "\xFE\xE1", /* 7e: consonant ba */ "\xCA", /* 7f: undefined */ NULL, /* 80: undefined */ NULL, /* 81: undefined */ NULL, /* 82: half-form conjunct ja+ja */ "\xBA\xE8\xBA\xE8\xE9", /* better check above */ /* above freq is 27/2.7M */ /* 83: conjunct ca+ca */ "\xB8\xE8\xB8", /* 84: conjunct la+la */ "\xD1\xE8\xD1", /* 85: visarga ? */ "\xA3", /* 86: conjunct ha+nna */ "\xD8\xE8\xC1", /* above is freq 0 */ /* 87: conjunct ha+la */ "\xD8\xE8\xD1", /* above is freq 0 */ /* 88: conjunct ha+va */ "\xD8\xE8\xD4", /* above is freq 45/2.7M */ /* 89: combining form of va ? */ NULL, /* above is freq 0 */ /* 8a: half-form ka */ "\xB3\xE8\xE9", /* 8b: nukta */ "\xE9", /* above has offset 225, width 5, freq 842/2.7M */ /* 8c: ?? freq 0/2.7M */ NULL, /* 8d: undefined */ NULL, /* 8e: undefined */ NULL, /* 8f: undefined */ NULL, /* 90: undefined */ NULL, /* 91: conjunct nga-ka */ "\xB7\xE8\xB3", /* above has freq 0/2.7M */ /* 92: conjunct nga+kha */ "\xB7\xE8\xB4", /* above has freq 0 */ /* 93: conjunct nga+ga */ "\xB7\xE8\xB5", /* above has freq 0 */ /* 94: conjunct nga+gha */ "\xB7\xE8\xB6", /* 95: conjunct ha+na */ "\xD8\xE8\xC6", /* 96: conjunct dda+ddha */ "\xBF\xE8\xC0", /* above has freq 0 */ /* 97: ra.sub */ "\xCF\xE8", /* above is caret-shaped subscript */ /* 98: kha nukta */ "\xB4\xE9", /* 99: half-consonant kha+nukta */ "\xB4\xE8\xE9\xE9", /* note that above has explicit nukta */ /* is the order right? */ /* above is freq 0 */ /* 9a: ?? freq 0 */ NULL, /* 9b: om sign */ "\xA2\xE9", /* above has freq 5/2.7M, ISCII idiom: anusvara+nukta */ /* 9c: conjunct sha+va */ "\xD5\xE8\xD4", /* above has freq 326/2.7M */ /* 9d: undefined */ NULL, /* 9e: undefined in font */ NULL, /* above has freq 1/2.7M */ /* 9f: width 125 space */ "", /* freq 1,593/2.7M */ /* a0: width 5 space */ "", /* freq 10/2.7M */ /* probably is non-rendering */ /* a1: vowel ai_matra */ "\xE2", /* a2: vowel ai_matra+anusvara */ "\xE2\xA2", /* a3: ra.sup + ai_matra */ "\xFE\xE2", /* above has freq 1 */ /* a4: ra.sup+ai_matra+anusvara */ "\xFE\xE2\xA2", /* above has freq 0 */ /* a5: vowel ri_matra */ "\xDF", /* above is c-shaped subscript */ /* a6: vocalic rr matra */ NULL, /* above has freq 0 */ /* above is Unicode 0944 -- what is ISCII? */ /* a7: anusvara */ "\xA2", /* a8: anusvara */ "\xA2", /* above is version for narrow glyph? */ /* a9: ra.sup */ "\xFE", /* above is superior diacritic for r */ /* freq freq 20,740/2.7M */ /* ISCII idiom: ra halant (+re-ordering?) */ /* aa: ra.sup+anusvara */ "\xFE\xA2", /* above is freq 260/2.7M */ /* re-ordering?? */ /* ab: == 8C but diff offset */ "\xE8\xCF", /* above has freq 7105/2.7M -- check! */ /* appears to be slash form of RA(sub) e.g. combining with pha */ /* and thus probably equivalent to "halant ra" */ /* ac: conjunct nga+ka+ssa */ NULL, /* above has freq 0 */ /* ad: just ascii hyphen? */ "-", /* above has freq 2752/2.7M */ /* ae: ?? freq 0 */ NULL, /* af: not sure what */ NULL, /* above has freq 2/2.7M */ /* b0: candra o */ "\xE3", /* above has freq 1226 */ /* == ISCII "vowel aye" */ /* b1: candrabindu */ "\xE3\xA2", /* above has freq 2 */ /* b2: explicit halant */ "\E8x\xE8", /* b3: ?? freq 9 */ NULL, /* b4: ?? freq 1084 */ NULL, /* above is mid-left diacritic */ /* b5: undefined */ NULL, /* above is undefined in font but freq 116/2.7M */ /* b6: nga-ma conjunct */ "\xB7\xE8\xCC", /* b7: width 5 space */ "", /* above is freq 1357/2.7M */ /* should it be ignored or printed as space? */ /* b8: conjunct ka+ka */ "\xB3\xE8\xB3", /* b9: conjunct ka+va */ "\xB3\xE8\xD4", /* ba: ka-ta conjunct */ "\xB3\xE8\xC2", /* above is freq 1180/2.7M */ /* bb: half-consonant kha */ "\xB4\xE8\xE9", /* bc: conjunct kha-ra */ "\xB4\xE8\xCF", /* bd: half-consonant ga */ "\xB5\xE8\xE9", /* be: conjunct ga+na */ "\xB5\xC6\xE8", /* bf: half-consonant gha */ "\xB6\xE8\xE9", /* c0: half-consonant ca */ "\xB8\xE8\xE9", /* c1: half-consonant ja */ "\xBA\xE8\xE9", /* c2: half-consonant jha */ "\xBB\xE8\xE9", /* c3: conjunct jha+tra */ "\xBB\xE8\xC2\xE8\xCF", /* c4: half-consonant nya */ "\xBC\xE8\xE9", /* above is ISCII (half-) jna */ /* c5: conjunct tta+tta */ "\xBD\xE8\xBD", /* c6: conjunct tta+ttha */ "\xBD\xE8\xBE", /* c7: conjunct ttha+ttha */ "\xBE\xE8\xBE", /* c8: conjunct dda+dda */ "\xBF\xE8\xBF", /* c9: conjunct ddha+ddha */ "\xC0\xE8\xC0", /* ca: half-consonant nna */ "\xC1\xE8\xE9", /* cb: half-consonant ta */ "\xC2\xE8\xE9", /* cc: conjunct ta+ra */ "\xC2\xE8\xCF", /* cd: half conjunct ta+ra */ "\xC2\xE8\xCF\xE8\xE9", /* above is freq 0 */ /* ce: half conjunct ta+ta */ "\xC2\xE8\xC2\xE8", /* check above ... */ /* cf: half-consonant tha */ "\xC3\xE8\xE9", /* d0: conjunct da+ra */ "\xC4\xE8\xCF", /* d1: da+ri_matra */ "\xD4\xDF", /* above is freq 29 */ /* d2: conjunct da+ga */ "\xC4\xE8\xB5", /* above is freq 2 */ /* d3: conjunct da+gha */ "\xC4\xE8\xB6", /* d4: conjunct da+da */ "\xC4\xE8\xC4", /* d5: conjunct da+dha */ "\xC4\xE8\xC5", /* d6: conjunct da+na */ "\xC4\xE8\xC6", /* d7: conjunct da+ba */ "\xC4\xE8\xCA", /* d8: conjunct da+bha */ "\xC4\xE8\xCB", /* above is freq 4 */ /* d9: conjunct da+ma */ "\xC4\xE8\xCC", /* da: conjunct da+ya */ "\xC4\xE8\xCD", /* db: conjunct da+va */ "\xC4\xE8\xD4", /* dc: half-consonant dha */ "\xC5\xE8\xE9", /* dd: half-consonant na */ "\xC6\xE8\xE9", /* de: conjunct na+na */ "\xC6\xE8\xC6", /* df: half-consonant pa */ "\xC8\xE8\xE9", /* e0: conjunct pa+ra */ "\xC8\xE8\xCF", /* e1: conjunct pa+ta */ "\xC8\xE8\xC2", /* e2: half-consonant pha */ "\xC9\xE8\xE9", /* e3: half-consonant ba */ "\xCA\xE8\xE9", /* e4: half-consonant bha */ "\xCB\xE8\xE9", /* e5: half-consonant ma */ "\xCC\xE8\xE9", /* e6: half-consonant ya */ "\xCD\xE8\xE9", /* e7: half-consonant nya */ "\xBC\xE8\xE9", /* really? above looks like conjunct */ /* e8: ra+halant ?? freq 0 */ NULL, /* e9: conjunct ra+u_matra */ "\xCF\xE8\xDD", /* ea: conjunct ra+uu_matra */ "\xCF\xE8\xDE", /* eb: half-consonant la */ "\xD1\xE8\xE9", /* ec: half-consonant va */ "\xD4\x8\xE9", /* ed: half-consonant sha */ "\xD5\x8\xE9", /* ee: half-consonant ssa */ "\xD6\x8\xE9", /* ef: conj ssa+tta */ "\xD6\xE8\xBD", /* above is freq 2069/2.7M */ /* f0: conjunct ssa+ttha */ "\xD6\xE8\xBE", /* f1: half-consonant sa */ "\xD7\xE8\xE9", /* f2: conjunct sa+ra */ "\xD7\xE8\xCF", /* f3: conjunct sa+tra */ "\xD7\xE8\xC2\xE8\xCF", /* f4: half- conj ??+?? freq 52 */ NULL, /* f5: freq 0 */ NULL, /* f6: half-form conjunct ha+ya */ "\xD8\xE8\xCD\xE8\xE9", /* half form of f8 -- freq 122 */ /* f7: conjunct ha+ma */ "\xD8\xE8\xCC", /* f8: conjunct ha+ya */ "\xD8\xE8\xCD", /* or ha+ri?? - freq 5 */ /* f9: half 0x69 freq 0 */ NULL, /* fa: half- conjunct ka+ssa */ "\xB3\xE8\xD6\xE8\xE9", /* fb: ?? freq 5 */ NULL, /* fc: 0xFB + ca */ NULL, /* fd: 0xFB + na */ NULL, /* fe: u_matra */ "\xDD", /* above is freq 1660 */ /* above = shorter copy of 0x77 */ /* ff: uu_matra */ "\xDE" /* above is freq 93 */ /* above = shorter copy of 0x79 */ };