001 /** 002 * Copyright (c) 2003 Daffodil Software Ltd all rights reserved, 003 * Modifications Copyright (c) 2008 Regiscope Digital Imaging Co, LLC, All rights reserved. 004 * This program is free software; you can redistribute it and/or modify 005 * it under the terms of version 2 of the GNU General Public License as 006 * published by the Free Software Foundation. 007 * There are special exceptions to the terms and conditions of the GPL 008 * as it is applied to this software. See the GNU General Public License for more details. 009 * 010 * This program is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 013 * GNU General Public License for more details. 014 * 015 * You should have received a copy of the GNU General Public License 016 * along with this program; if not, write to the Free Software 017 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018 */ 019 020 package org.dbreplicator.replication; 021 022 import java.util.Locale; 023 import java.util.Map; 024 import java.util.Hashtable; 025 026 /** 027 * <p>Title: </p> 028 * <p>Description: </p> 029 * <p>Copyright: Copyright (c) 2005</p> 030 * <p>Company: </p> 031 * @author not attributable 032 * @version 1.0 033 */ 034 035 public class EncoderDecoder { 036 private static Map entities; 037 038 public EncoderDecoder() { 039 } 040 041 042 /** 043 * This method convert the ASCII value to UNICODE escape. 044 * 045 * The uppercase letters 'A' through 'Z' ('\u0041' through '\u005a'), 046 * The lowercase letters 'a' through 'z' ('\u0061' through '\u007a'), 047 * The digits '0' through '9' ('\u0030' through '\u0039'), 048 * The dash character '-' ('\u002d', HYPHEN-MINUS), 049 * The colon character ':' ('\u003a', COLON), and 050 * The underscore character '_' ('\u005f', LOW LINE). 051 * 052 * 053 * @param str 054 * @param escapeAscii 055 * @return String 056 */ 057 public static String escapeUnicodeStringOLD(String str, boolean escapeAscii) 058 { 059 String ostr = new String(); 060 for(int i=0; i<str.length(); i++) { 061 char ch = str.charAt(i); 062 if (!escapeAscii && ((ch >= 0x0020) && (ch <= 0x007e))) 063 ostr += ch ; 064 else { 065 ostr += "\\u" ; 066 String hex = Integer.toHexString(str.charAt(i) & 0xFFFF); 067 if (hex.length() == 2) 068 ostr += "00" ; 069 ostr += hex.toUpperCase(Locale.ENGLISH); 070 } 071 } 072 return (ostr); 073 } 074 075 076 public static String escapeUnicodeString1(String str, boolean escapeAscii) 077 { 078 StringBuffer ostr = new StringBuffer(); 079 for(int i=0; i<str.length(); i++) { 080 char ch = str.charAt(i); 081 if (!escapeAscii && ((ch >= 0x0020) && (ch <= 0x007e))){ 082 ostr.append(ch) ; 083 } 084 else { 085 ostr.append("\\u"); 086 String hex = Integer.toHexString(str.charAt(i) & 0xFFFF); 087 // System.out.println(ch+" hex string "+ hex); 088 if (hex.length() == 2) 089 ostr.append("00"); 090 else if(hex.length() == 1) 091 ostr.append("000"); 092 else if(hex.length() == 3) 093 ostr.append("0"); 094 095 ostr.append( hex.toUpperCase(Locale.ENGLISH)); 096 } 097 } 098 //System.out.println(" ostr.toString() ="+ostr.toString()); 099 return ostr.toString(); 100 } 101 102 103 private synchronized static Map getEntities() 104 { 105 if (entities==null) 106 { 107 entities=new Hashtable(); 108 //Quotation mark 109 entities.put("quot","\""); 110 //Ampersand 111 entities.put("amp","\u0026"); 112 //Less than 113 entities.put("lt","\u003C"); 114 //Greater than 115 entities.put("gt","\u003E"); 116 //Nonbreaking space 117 entities.put("nbsp","\u00A0"); 118 //Inverted exclamation point 119 entities.put("iexcl","\u00A1"); 120 //Cent sign 121 entities.put("cent","\u00A2"); 122 //Pound sign 123 entities.put("pound","\u00A3"); 124 //General currency sign 125 entities.put("curren","\u00A4"); 126 //Yen sign 127 entities.put("yen","\u00A5"); 128 //Broken vertical bar 129 entities.put("brvbar","\u00A6"); 130 //Section sign 131 entities.put("sect","\u00A7"); 132 //Umlaut 133 entities.put("uml","\u00A8"); 134 //Copyright 135 entities.put("copy","\u00A9"); 136 //Feminine ordinal 137 entities.put("ordf","\u00AA"); 138 //Left angle quote 139 entities.put("laquo","\u00AB"); 140 //Not sign 141 entities.put("not","\u00AC"); 142 //Soft hyphen 143 entities.put("shy","\u00AD"); 144 //Registered trademark 145 entities.put("reg","\u00AE"); 146 //Macron accent 147 entities.put("macr","\u00AF"); 148 //Degree sign 149 entities.put("deg","\u00B0"); 150 //Plus or minus 151 entities.put("plusmn","\u00B1"); 152 //Superscript 2 153 entities.put("sup2","\u00B2"); 154 //Superscript 3 155 entities.put("sup3","\u00B3"); 156 //Acute accent 157 entities.put("acute","\u00B4"); 158 //Micro sign (Greek mu) 159 entities.put("micro","\u00B5"); 160 //Paragraph sign 161 entities.put("para","\u00B6"); 162 //Middle dot 163 entities.put("middot","\u00B7"); 164 //Cedilla 165 entities.put("cedil","\u00B8"); 166 //Superscript 1 167 entities.put("sup1","\u00B9"); 168 //Masculine ordinal 169 entities.put("ordm","\u00BA"); 170 //Right angle quote 171 entities.put("raquo","\u00BB"); 172 //Fraction one-fourth 173 entities.put("frac14","\u00BC"); 174 //Fraction one-half 175 entities.put("frac12","\u00BD"); 176 //Fraction three-fourths 177 entities.put("frac34","\u00BE"); 178 //Inverted question mark 179 entities.put("iquest","\u00BF"); 180 //Capital A, grave accent 181 entities.put("Agrave","\u00C0"); 182 //Capital A, acute accent 183 entities.put("Aacute","\u00C1"); 184 //Capital A, circumflex accent 185 entities.put("Acirc","\u00C2"); 186 //Capital A, tilde 187 entities.put("Atilde","\u00C3"); 188 //Capital A, umlaut 189 entities.put("Auml","\u00C4"); 190 //Capital A, ring 191 entities.put("Aring","\u00C5"); 192 //Capital AE ligature 193 entities.put("AElig","\u00C6"); 194 //Capital C, cedilla 195 entities.put("Ccedil","\u00C7"); 196 //Capital E, grave accent 197 entities.put("Egrave","\u00C8"); 198 //Capital E, acute accent 199 entities.put("Eacute","\u00C9"); 200 //Capital E, circumflex accent 201 entities.put("Ecirc","\u00CA"); 202 //Capital E, umlaut 203 entities.put("Euml","\u00CB"); 204 //Capital I, grave accent 205 entities.put("Igrave","\u00CC"); 206 //Capital I, acute accent 207 entities.put("Iacute","\u00CD"); 208 //Capital I, circumflex accent 209 entities.put("Icirc","\u00CE"); 210 //Capital I, umlaut 211 entities.put("Iuml","\u00CF"); 212 //Capital eth, Icelandic 213 entities.put("ETH","\u00D0"); 214 //Capital N, tilde 215 entities.put("Ntilde","\u00D1"); 216 //Capital O, grave accent 217 entities.put("Ograve","\u00D2"); 218 //Capital O, acute accent 219 entities.put("Oacute","\u00D3"); 220 //Capital O, circumflex accent 221 entities.put("Ocirc","\u00D4"); 222 //Capital O, tilde 223 entities.put("Otilde","\u00D5"); 224 //Capital O, umlaut 225 entities.put("Ouml","\u00D6"); 226 //Multiply sign 227 entities.put("times","\u00D7"); 228 //Capital O, slash 229 entities.put("Oslash","\u00D8"); 230 //Capital U, grave accent 231 entities.put("Ugrave","\u00D9"); 232 //Capital U, acute accent 233 entities.put("Uacute","\u00DA"); 234 //Capital U, circumflex accent 235 entities.put("Ucirc","\u00DB"); 236 //Capital U, umlaut 237 entities.put("Uuml","\u00DC"); 238 //Capital Y, acute accent 239 entities.put("Yacute","\u00DD"); 240 //Capital thorn, Icelandic 241 entities.put("THORN","\u00DE"); 242 //Small sz ligature, German 243 entities.put("szlig","\u00DF"); 244 //Small a, grave accent 245 entities.put("agrave","\u00E0"); 246 //Small a, acute accent 247 entities.put("aacute","\u00E1"); 248 //Small a, circumflex accent 249 entities.put("acirc","\u00E2"); 250 //Small a, tilde 251 entities.put("atilde","\u00E3"); 252 //Small a, umlaut 253 entities.put("auml","\u00E4"); 254 //Small a, ring 255 entities.put("aring","\u00E5"); 256 //Small ae ligature 257 entities.put("aelig","\u00E6"); 258 //Small c, cedilla 259 entities.put("ccedil","\u00E7"); 260 //Small e, grave accent 261 entities.put("egrave","\u00E8"); 262 //Small e, acute accent 263 entities.put("eacute","\u00E9"); 264 //Small e, circumflex accent 265 entities.put("ecirc","\u00EA"); 266 //Small e, umlaut 267 entities.put("euml","\u00EB"); 268 //Small i, grave accent 269 entities.put("igrave","\u00EC"); 270 //Small i, acute accent 271 entities.put("iacute","\u00ED"); 272 //Small i, circumflex accent 273 entities.put("icirc","\u00EE"); 274 //Small i, umlaut 275 entities.put("iuml","\u00EF"); 276 //Small eth, Icelandic 277 entities.put("eth","\u00F0"); 278 //Small n, tilde 279 entities.put("ntilde","\u00F1"); 280 //Small o, grave accent 281 entities.put("ograve","\u00F2"); 282 //Small o, acute accent 283 entities.put("oacute","\u00F3"); 284 //Small o, circumflex accent 285 entities.put("ocirc","\u00F4"); 286 //Small o, tilde 287 entities.put("otilde","\u00F5"); 288 //Small o, umlaut 289 entities.put("ouml","\u00F6"); 290 //Division sign 291 entities.put("divide","\u00F7"); 292 //Small o, slash 293 entities.put("oslash","\u00F8"); 294 //Small u, grave accent 295 entities.put("ugrave","\u00F9"); 296 //Small u, acute accent 297 entities.put("uacute","\u00FA"); 298 //Small u, circumflex accent 299 entities.put("ucirc","\u00FB"); 300 //Small u, umlaut 301 entities.put("uuml","\u00FC"); 302 //Small y, acute accent 303 entities.put("yacute","\u00FD"); 304 //Small thorn, Icelandic 305 entities.put("thorn","\u00FE"); 306 //Small y, umlaut 307 entities.put("yuml","\u00FF"); 308 } 309 return entities; 310 } 311 312 313 /** 314 * This method convert the UNICODE value to ASCII 315 * For example '\u0061' to 'a' 316 * @param str 317 * @return String 318 */ 319 320 public static String decode(String str) 321 { 322 StringBuffer ostr = new StringBuffer(); 323 int i1=0; 324 int i2=0; 325 326 while(i2<str.length()) 327 { 328 i1 = str.indexOf("&",i2); 329 if (i1 == -1 ) { 330 ostr.append(str.substring(i2, str.length())); 331 break ; 332 } 333 ostr.append(str.substring(i2, i1)); 334 i2 = str.indexOf(";", i1); 335 if (i2 == -1 ) { 336 ostr.append(str.substring(i1, str.length())); 337 break ; 338 } 339 340 String tok = str.substring(i1+1, i2); 341 if (tok.charAt(0)=='#') 342 { 343 tok=tok.substring(1); 344 try { 345 int radix = 10 ; 346 if (tok.trim().charAt(0) == 'x') { 347 radix = 16 ; 348 tok = tok.substring(1,tok.length()); 349 } 350 ostr.append((char) Integer.parseInt(tok, radix)); 351 } catch (NumberFormatException exp) { 352 ostr.append('?'); 353 } 354 } else 355 { 356 tok=(String)getEntities().get(tok); 357 if (tok!=null) 358 ostr.append(tok); 359 else 360 ostr.append('?'); 361 } 362 i2++ ; 363 } 364 return ostr.toString(); 365 } 366 367 public static String decodeNew(String str){ 368 StringBuffer sb = new StringBuffer(); 369 int i =2,j=6; 370 //System.out.println("EncoderDecoder.decodeNew(str) ="+str); 371 while(i<=str.length()){ 372 String ss= str.substring(i,j); 373 char c2 =(char) Integer.parseInt(ss,16); 374 i+=6; 375 j+=6; 376 sb.append(c2); 377 } 378 return sb.toString(); 379 } 380 381 382 }

