Do we actually use characters that require more than one byte?

I couldn’t find a way to identify those columns that actually had a character with a code point value greater than 255. As a result, I ended up writing the following. If you have better way, please reply.

import java.sql.*;

public class checkUnicode {
  public static void main(String args[]) {
    try {
      Class.forName("oracle.jdbc.driver.OracleDriver");
      Connection conn = DriverManager.getConnection("jdbc:oracle:thin:*****/****@******:2484/fake_db_service");
      PreparedStatement pst = conn.prepareStatement("select fake_col1, fake_col2 from large_fake_table sample(1)");
      ResultSet rst = pst.executeQuery();
      String val = "";
      double tot = 0;
      double found = 0;
      while (rst.next()) {
        val = rst.getString(2);
        for (int i = 0; i < val.length(); i++) {
          if (val.codePointAt(i) > 255) {
	    found++;
            System.out.println(rst.getInt(1) + " has a code point value of " + val.codePointAt(i));
            break;
          }
        }
        tot++;
        if (tot % 100 == 0) {
          System.out.println("Have checked " + tot + " rows, " + ((found / tot) * 100) + "% of which have unicode.");
	}
      }
      System.out.println("Checked " + tot + " rows");
    }
    catch (Exception e) {
      e.printStackTrace();
    }
  }
}

Post navigation

Leave a Reply