Java 判斷中文字符

Jingbin_ 發布于2019-08-14 15:11 / 843人閱讀

摘要：項目偶爾需要對中文字符做一些處理，所以搜集了這個判斷中文字符的代碼片段，分享給大家。等符號是否全是漢字根據漢字編碼范圍進行判斷統一漢字不包含中文的，。等符號如果僅僅去判斷是否是中文，不需判斷中文標點的話，推薦使用正則去匹配，可能更高效點。

項目偶爾需要對中文字符做一些處理，所以搜集了這個判斷中文字符的代碼片段，分享給大家。

直接貼出代碼了，里面有詳細的注釋。

package com.coder4j.main;

import java.util.regex.Pattern;

/**
 * Java 判斷中文字符
 * 
 * @author Chinaxiang
 * @date 2015-08-11
 *
 */
public class CheckChinese {

    public static void main(String[] args) {
        // 純英文
        String s1 = "Hello,Tom.!@#$%^&*()_+-={}|[];":"?";
        // 純中文（不含中文標點）
        String s2 = "你好中國";
        // 純中文（含中文標點）
        String s3 = "你好，中國。《》：“”‘’；（）【】！￥、";
        // 韓文
        String s4 = "????";
        // 日文
        String s5 = "ぎじゅつ";
        // 特殊字符
        String s6 = "??";
        String s7 = "╃";
        String s8 = "╂";
        // 繁體中文
        String s9 = "蒼老師";

        // 1 使用字符范圍判斷
        System.out.println("s1是否包含中文：" + hasChineseByRange(s1));// false
        System.out.println("s2是否包含中文：" + hasChineseByRange(s2));// true
        System.out.println("s3是否包含中文：" + hasChineseByRange(s3));// true
        System.out.println("s4是否包含中文：" + hasChineseByRange(s4));// false
        System.out.println("s5是否包含中文：" + hasChineseByRange(s5));// false
        System.out.println("s6是否包含中文：" + hasChineseByRange(s6));// false
        System.out.println("s7是否包含中文：" + hasChineseByRange(s7));// false
        System.out.println("s8是否包含中文：" + hasChineseByRange(s8));// false
        System.out.println("s9是否包含中文：" + hasChineseByRange(s9));// true
        System.out.println("-------分割線-------");
        System.out.println("s1是否全是中文：" + isChineseByRange(s1));// false
        System.out.println("s2是否全是中文：" + isChineseByRange(s2));// true
        System.out.println("s3是否全是中文：" + isChineseByRange(s3));// false 中文標點不在范圍內
        System.out.println("s4是否全是中文：" + isChineseByRange(s4));// false
        System.out.println("s5是否全是中文：" + isChineseByRange(s5));// false
        System.out.println("s6是否全是中文：" + isChineseByRange(s6));// false
        System.out.println("s7是否全是中文：" + isChineseByRange(s7));// false
        System.out.println("s8是否全是中文：" + isChineseByRange(s8));// false
        System.out.println("s9是否全是中文：" + isChineseByRange(s9));// true
        System.out.println("-------分割線-------");
        // 2 使用字符范圍正則判斷（結果同1）
        System.out.println("s1是否包含中文：" + hasChineseByReg(s1));// false
        System.out.println("s2是否包含中文：" + hasChineseByReg(s2));// true
        System.out.println("s3是否包含中文：" + hasChineseByReg(s3));// true
        System.out.println("s4是否包含中文：" + hasChineseByReg(s4));// false
        System.out.println("s5是否包含中文：" + hasChineseByReg(s5));// false
        System.out.println("s6是否包含中文：" + hasChineseByReg(s6));// false
        System.out.println("s7是否包含中文：" + hasChineseByReg(s7));// false
        System.out.println("s8是否包含中文：" + hasChineseByReg(s8));// false
        System.out.println("s9是否包含中文：" + hasChineseByReg(s9));// true
        System.out.println("-------分割線-------");
        System.out.println("s1是否全是中文：" + isChineseByReg(s1));// false
        System.out.println("s2是否全是中文：" + isChineseByReg(s2));// true
        System.out.println("s3是否全是中文：" + isChineseByReg(s3));// false 中文標點不在范圍內
        System.out.println("s4是否全是中文：" + isChineseByReg(s4));// false
        System.out.println("s5是否全是中文：" + isChineseByReg(s5));// false
        System.out.println("s6是否全是中文：" + isChineseByReg(s6));// false
        System.out.println("s7是否全是中文：" + isChineseByReg(s7));// false
        System.out.println("s8是否全是中文：" + isChineseByReg(s8));// false
        System.out.println("s9是否全是中文：" + isChineseByReg(s9));// true
        System.out.println("-------分割線-------");
        // 3 使用CJK字符集判斷
        System.out.println("s1是否包含中文：" + hasChinese(s1));// false
        System.out.println("s2是否包含中文：" + hasChinese(s2));// true
        System.out.println("s3是否包含中文：" + hasChinese(s3));// true
        System.out.println("s4是否包含中文：" + hasChinese(s4));// false
        System.out.println("s5是否包含中文：" + hasChinese(s5));// false
        System.out.println("s6是否包含中文：" + hasChinese(s6));// false
        System.out.println("s7是否包含中文：" + hasChinese(s7));// false
        System.out.println("s8是否包含中文：" + hasChinese(s8));// false
        System.out.println("s9是否包含中文：" + hasChinese(s9));// true
        System.out.println("-------分割線-------");
        System.out.println("s1是否全是中文：" + isChinese(s1));// false
        System.out.println("s2是否全是中文：" + isChinese(s2));// true
        System.out.println("s3是否全是中文：" + isChinese(s3));// true 中文標點也被包含進來
        System.out.println("s4是否全是中文：" + isChinese(s4));// false
        System.out.println("s5是否全是中文：" + isChinese(s5));// false
        System.out.println("s6是否全是中文：" + isChinese(s6));// false
        System.out.println("s7是否全是中文：" + isChinese(s7));// false
        System.out.println("s8是否全是中文：" + isChinese(s8));// false
        System.out.println("s9是否全是中文：" + isChinese(s9));// true

    }

    /**
     * 是否包含中文字符

     * 包含中文標點符號

     * 
     * @param str
     * @return
     */
    public static boolean hasChinese(String str) {
        if (str == null) {
            return false;
        }
        char[] ch = str.toCharArray();
        for (char c : ch) {
            if (isChinese(c)) {
                return true;
            }
        }
        return false;
    }

    /**
     * 是否全是中文字符

     * 包含中文標點符號

     * 
     * @param str
     * @return
     */
    public static boolean isChinese(String str) {
        if (str == null) {
            return false;
        }
        char[] ch = str.toCharArray();
        for (char c : ch) {
            if (!isChinese(c)) {
                return false;
            }
        }
        return true;
    }

    /**
     * 是否是中文字符

     * 包含中文標點符號

     * 
     * @param c
     * @return
     */
    private static boolean isChinese(char c) {
        Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D) {
            return true;
        } else if (ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
            return true;
        } else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
            return true;
        }
        return false;
    }

    /**
     * 是否包含漢字

     * 根據漢字編碼范圍進行判斷

     * CJK統一漢字（不包含中文的，。《》（）“‘’”、！￥等符號）

     * 
     * @param str
     * @return
     */
    public static boolean hasChineseByReg(String str) {
        if (str == null) {
            return false;
        }
        Pattern pattern = Pattern.compile("[u4E00-u9FBF]+");
        return pattern.matcher(str).find();
    }

    /**
     * 是否全是漢字

     * 根據漢字編碼范圍進行判斷

     * CJK統一漢字（不包含中文的，。《》（）“‘’”、！￥等符號）

     * 
     * @param str
     * @return
     */
    public static boolean isChineseByReg(String str) {
        if (str == null) {
            return false;
        }
        Pattern pattern = Pattern.compile("[u4E00-u9FBF]+");
        return pattern.matcher(str).matches();
    }

    /**
     * 是否包含漢字

     * 根據漢字編碼范圍進行判斷

     * CJK統一漢字（不包含中文的，。《》（）“‘’”、！￥等符號）

     * 
     * @param str
     * @return
     */
    public static boolean hasChineseByRange(String str) {
        if (str == null) {
            return false;
        }
        char[] ch = str.toCharArray();
        for (char c : ch) {
            if (c >= 0x4E00 && c <= 0x9FBF) {
                return true;
            }
        }
        return false;
    }

    /**
     * 是否全是漢字

     * 根據漢字編碼范圍進行判斷

     * CJK統一漢字（不包含中文的，。《》（）“‘’”、！￥等符號）

     * 
     * @param str
     * @return
     */
    public static boolean isChineseByRange(String str) {
        if (str == null) {
            return false;
        }
        char[] ch = str.toCharArray();
        for (char c : ch) {
            if (c < 0x4E00 || c > 0x9FBF) {
                return false;
            }
        }
        return true;
    }

}

如果僅僅去判斷是否是中文，不需判斷中文標點的話，推薦使用正則去匹配，可能更高效點。

GPU云服務器云服務器 java判斷小寫字符 java語句判斷字符 java判斷中文相同字符判斷

文章版權歸作者所有，未經允許請勿轉載,若此文章存在違規行為，您可以聯系管理員刪除。

轉載請注明本文地址：http://specialneedsforspecialkids.com/yun/65430.html

【面試】Java基礎的那些事-Thr

摘要：為了提高程序運行的效率，在軟件發布后，檢查默認是被關閉的。注意不能保證原子性，不能代替，且其會阻止編譯器對代碼的優化。以下方法用來判斷一個字符串中是否包含中文字符。前言面試中對于技術職位，一般分筆試與面談，如果面試官的一些小問題你可以立馬找到對應的知識點擴展開來，那么這就是你的優勢，本系列將講述一些java面試中的事，不會很詳細，但是應該比較全面吧。主要內容 assert有什么作...

ShowerSun 2019-08-19 11:06 評論0 收藏0
Java 輸入/輸出 I/O流 RandomAccessFile

摘要：當使用節點流進行輸入輸出時，程序直接連接到實際的數據源，和時間的輸入輸出節點連接處理流則用于對一個已存在的流進行連接或封裝，通過封裝后的流來實現數據讀寫功能，處理流也被稱為高級流。文件的編碼文本文件就是字節序列，可以是任意編碼形式。在中文操作系統上直接創建文本文件，則該文本文件只能識別ANSI編碼，其他編碼方式會產生亂碼 package imooc.io; import java...

Eirunye 2019-08-14 17:17 評論0 收藏0
java實現瀏覽器下載文件，并解決兼容各瀏覽器的文件下載中文亂碼

摘要：瀏覽器下載代碼如下通過循環將讀入的文件的內容輸出到瀏覽器中如果文件名為中文，上面的代碼下載的文件名會亂碼。解決中文亂碼方法拿到瀏覽器請求的判斷是否包含，是則直接講文件名轉換為，否則使用轉換。場景描述：由于項目需求，需要支持瀏覽器下載文件，比如招聘網站的在線簡歷下載。瀏覽器下載代碼如下： public static void downloadFile(File file,...

tunny 2019-08-16 10:37 評論0 收藏0
Java編程基礎21——IO(字節流)

摘要：流按操作類型分為兩種字節流字節流可以操作任何數據因為在計算機中任何數據都是以字節的形式存儲的字符流字符流只能操作純字符數據，比較方便。 1_IO流概述及其分類 1.概念 IO流用來處理設備之間的數據傳輸 Java對數據的操作是通過流的方式 Java用于操作流的類都在IO包中流按流向分為兩種：輸入流，輸出流。流按操作類型分為兩種：字節流 : 字節流可以操作任何數據,因為在...

yanbingyun1990 2019-08-19 11:16 評論0 收藏0
java學習筆記 - 標識符和關鍵字

摘要：標識符和關鍵字標識符可以用來表示文件名，變量名，類名，接口名和成員方法等。字符集是字符集，該字符集一個字符占兩個字節。標示符就是由字母和數字組成的，除了關鍵字之外的字符序列。標識符和關鍵字標識符可以用來表示文件名，變量名，類名，接口名和成員方法等。關鍵字是Java語言中保留的一些英文單詞，具有特殊的含義。 java字符集是Unicode字符集，該字符集一個字符占兩個字節。 java...

qianfeng 2019-08-14 17:01 評論0 收藏0