Generating Nepali Unicode Sequence For Characters
Kushal Paudyal August 25th, 2010
While I was working on a translation project for myself, I got into some localization phase. Since Nepali Characters are represented in unicode (\u09xx), it was difficult to write Strings in this form. So, I wrote this class called unicode generator, where the input to this is common english keys used to write in nepali using romanized keyboard layout. This means, if you are going to write ‘Kushal’ in nepali, you would key in ‘kuSl’. Now, when you pass ‘kuSl’ to the format method of the following class, you will retrieve string like ‘\u09xx\u00xx\u00xx\u00xx’ where xx will be replaced by some number for correct characters. For instance, for the work ‘ka’ in Nepali, \u0915 is the unicode. You can use this string to be displayed in GUI’s.
/**
*UnicodeConverter.java
*Converts from standard keyboard input to nepali unicode characters.
*Written On: 12th November 2005
*Author: Kushal Paudyal (kushalzone@gmail.com)
* http://java.sanjaal.com
*/
package com.kushal.utilities;
import java.util.Vector;
import javax.swing.JOptionPane;
public class UnicodeConverter {
/**
* Declare a vector that will store all nepali unicode characters
*/
Vector unicodeVector;
/**
* Declare and define a standard string that holds the
* keyboard input fornepali unicode sets in ascending order.
* There are some charaters forwhich no keys are assigned on
* the keyboard for their input. I have useddummy characters
* or their unicode values themselves in preparing the standard
* string of keysets.
*/
/*
* Current faults discovered 123456789
* u0944 u0945 u0946 u0949 u094A
* u094E u094F u0951 u0953 u0954
*/
static String standard = " 1234567890VM:u0904HA["+
"{fFZu090Cu090Du090E]}u0911"
+ "u0912OWkKgG
+ "u0934vSzsh`~aiIuURu0944u0945u0946eEu0949"
+ "u094Aow/u094Eu094Fu0951|u0953u0954u0958u0959";
String language;
public UnicodeConverter(String language) {
this.language = language;
if (language == "np") {
prepareVector();
}
}
/*
* This method will take a string formed by proper keying for text that has
* to be converted into unicode. It will read the string characterwise and
* generate the index of that character from the standard string. Using this
* index, it will generate a corresponding nepali unicode from the
* unicodeVector.
*/
public String format(String str) {
if (language == "np") {
String formattedStr = "";
for (int i = 0; i < str.length(); i++) {
char temp = str.charAt(i);
int index = standard.indexOf(temp);
if (index < 0) // handle non-nepali characters
{
System.out.println("no index found for " + temp);
formattedStr += temp;
} else {
System.out.println("index for " + temp + " is:" + index);
formattedStr += unicodeVector.elementAt(index);
}
}
return formattedStr;
} else
return str;
}
public void prepareVector() {
unicodeVector = new Vector();
/*
* Keep all the nepali unicode characters in the vector
*/
unicodeVector.addElement(" "); // space
unicodeVector.addElement("u0966"); // zero
unicodeVector.addElement("u0967"); // one
unicodeVector.addElement("u0968"); // two
unicodeVector.addElement("u0969"); // three
unicodeVector.addElement("u096A"); // four
unicodeVector.addElement("u096B"); // five
unicodeVector.addElement("u096C"); // six
unicodeVector.addElement("u096D"); // seven
unicodeVector.addElement("u096E"); // eight
unicodeVector.addElement("u096F"); // nine
unicodeVector.addElement("u0901");
unicodeVector.addElement("u0902");
unicodeVector.addElement("u0903");
unicodeVector.addElement("u0904");
unicodeVector.addElement("u0905"); // a
unicodeVector.addElement("u0906"); // aa
unicodeVector.addElement("u0907"); // i
unicodeVector.addElement("u0908"); // ii
unicodeVector.addElement("u0909"); // u
unicodeVector.addElement("u090A"); // U
unicodeVector.addElement("u090B"); // vocalic R (ri)
unicodeVector.addElement("u090C"); // vocalic L (lri)
unicodeVector.addElement("u090D"); // candra E
unicodeVector.addElement("u090E"); // short E
unicodeVector.addElement("u090F"); // E
unicodeVector.addElement("u0910"); // AI
unicodeVector.addElement("u0911"); // candra o
unicodeVector.addElement("u0912"); // short o
unicodeVector.addElement("u0913"); // O
unicodeVector.addElement("u0914"); // AU
unicodeVector.addElement("u0915"); // ka
unicodeVector.addElement("u0916"); // kha
unicodeVector.addElement("u0917"); // ga
unicodeVector.addElement("u0918"); // gha
unicodeVector.addElement("u0919"); // nga
unicodeVector.addElement("u091A"); // cha
unicodeVector.addElement("u091B"); // chha
unicodeVector.addElement("u091C"); // ja
unicodeVector.addElement("u091D"); // jha
unicodeVector.addElement("u091E"); // nya
unicodeVector.addElement("u091F"); // tta
unicodeVector.addElement("u0920"); // ttha
unicodeVector.addElement("u0921"); // dda
unicodeVector.addElement("u0922"); // ddha
unicodeVector.addElement("u0923"); // nna
unicodeVector.addElement("u0924"); // ta
unicodeVector.addElement("u0925"); // tha
unicodeVector.addElement("u0926"); // da
unicodeVector.addElement("u0927"); // dha
unicodeVector.addElement("u0928"); // na
unicodeVector.addElement("u0929"); // nna
unicodeVector.addElement("u092A"); // pa
unicodeVector.addElement("u092B"); // pha
unicodeVector.addElement("u092C"); // ba
unicodeVector.addElement("u092D"); // bha
unicodeVector.addElement("u092E"); // ma
unicodeVector.addElement("u092F"); // ya
unicodeVector.addElement("u0930"); // ra
unicodeVector.addElement("u0931"); // rra
unicodeVector.addElement("u0932"); // la
unicodeVector.addElement("u0933"); // lla
unicodeVector.addElement("u0934"); // llla
unicodeVector.addElement("u0935");
unicodeVector.addElement("u0936");
unicodeVector.addElement("u0937");
unicodeVector.addElement("u0938");
unicodeVector.addElement("u0939");
unicodeVector.addElement("u093C");
unicodeVector.addElement("u093D");
unicodeVector.addElement("u093E");
unicodeVector.addElement("u093F");
unicodeVector.addElement("u0940");
unicodeVector.addElement("u0941");
unicodeVector.addElement("u0942");
unicodeVector.addElement("u0943");
unicodeVector.addElement("u0944");
unicodeVector.addElement("u0945");
unicodeVector.addElement("u0946");
unicodeVector.addElement("u0947");
unicodeVector.addElement("u0948");
unicodeVector.addElement("u0949");
unicodeVector.addElement("u094A");
unicodeVector.addElement("u094B");
unicodeVector.addElement("u094C");
unicodeVector.addElement("u094D");
unicodeVector.addElement("u094E");
unicodeVector.addElement("u094F");
unicodeVector.addElement("u0950");
unicodeVector.addElement("u0951");
unicodeVector.addElement("u0952");
unicodeVector.addElement("u0953");
unicodeVector.addElement("u0954");
unicodeVector.addElement("u0958");
unicodeVector.addElement("u0959");
}
/**
* Usage Example
*/
public static void main(String args[]) {
UnicodeConverter generator = new UnicodeConverter("np");
/*
* Usage: If you want to print mero naam Kushal ho, you would be keying
* in the following for romanized unicode keyborads mero nam kuSl ho
*/
// String test = generator.format("raz/q/riy"); // Rastriya
String test = generator.format("mero nam kuSl ho");
JOptionPane.showMessageDialog(null, test);
}
}
Sanjaal.com is owned and maintained by Sanjaal Corps, Nepal. The company offers Webhosting and Domain Registration Services, IT Solutions and Business Analysis. Sanjaal.com website features H1B Visa Information, Entertainment Portal, Link Directory Service, Free Articles, Free Open Source Tutorials on Java and J2EE Platform, Digital Photography, High Resolution Picture Gallery and Free Reliable Image Hosting Services. Future plan includes Open Source Software Development Portal, Technical Solutions and Customizable Movie and Music Arena. We would be introducing data backup, data recovery, data hosting and voip solutions. Stay free from phishing – our website does not ask for your credit card and banking information. Happy Surfing!
Originally posted 2008-07-12 21:11:09.