I couldn’t find a way to identify those columns that actually had a character with a code point value greater than 255. As a result, I ended up writing the following. If you have better way, please reply.
import java.sql.*;
public class checkUnicode {
public static void main(String args[]) {
try {
Class.forName("oracle.jdbc.driver.OracleDriver");
Connection conn = DriverManager.getConnection("jdbc:oracle:thin:*****/****@******:2484/fake_db_service");
PreparedStatement pst = conn.prepareStatement("select fake_col1, fake_col2 from large_fake_table sample(1)");
ResultSet rst = pst.executeQuery();
String val = "";
double tot = 0;
double found = 0;
while (rst.next()) {
val = rst.getString(2);
for (int i = 0; i < val.length(); i++) {
if (val.codePointAt(i) > 255) {
found++;
System.out.println(rst.getInt(1) + " has a code point value of " + val.codePointAt(i));
break;
}
}
tot++;
if (tot % 100 == 0) {
System.out.println("Have checked " + tot + " rows, " + ((found / tot) * 100) + "% of which have unicode.");
}
}
System.out.println("Checked " + tot + " rows");
}
catch (Exception e) {
e.printStackTrace();
}
}
}