package com.cloudera.util;
import java.nio.charset.Charset;
import java.util.Arrays;
import org.junit.Assert;
import org.junit.Test;
@Test
System.out.println("Default: " + Charset.defaultCharset());
String charset = Charset.defaultCharset().displayName();
System.out.println(charset);
}
@Test
System.out.println("== charsets");
for (String s : Charset.availableCharsets().keySet()) {
System.out.println(s);
}
}
byte[] b = new byte[256];
for (int i = 0; i < 256; i++) {
b[i] = (byte) i;
}
return b;
}
StringBuilder s = new StringBuilder();
for (int i = 0; i < b.length; i++) {
s.append(String.format("%02x ", b[i]));
}
return s.toString();
}
@Test
byte[] all = allbytes();
String auto = new String(all);
String utf8 = new String(all, Charset.forName("UTF-8"));
String ascii = new String(all, Charset.forName("US-ASCII"));
String latin1 = new String(all, Charset.forName("ISO-8859-1"));
System.out.printf("lengths: auto: %d utf8: %d ascii: %d latin1: %d\n", auto
.length(), utf8.length(), ascii.length(), latin1.length());
System.out.printf("bytelen: auto: %d utf8: %d ascii: %d latin1: %d\n", auto
.getBytes().length, utf8.getBytes(Charset.forName("UTF-8")).length,
ascii.getBytes(Charset.forName("US-ASCII")).length, latin1
.getBytes(Charset.forName("ISO-8859-1")).length);
System.out.printf("original : %s\n", dumpHex(all));
System.out.printf("auto : %s\n", dumpHex(auto.getBytes()));
System.out.printf("utf-8 : %s\n", dumpHex(utf8.getBytes(Charset
.forName("UTF-8"))));
System.out.printf("ascii : %s\n", dumpHex(ascii.getBytes(Charset
.forName("US-ASCII"))));
System.out.printf("latin1 : %s\n", dumpHex(latin1.getBytes(Charset
.forName("ISO-8859-1"))));
Assert.assertTrue(Arrays.equals(all, latin1
.getBytes(Charset.forName("ISO-8859-1"))));
}
}