java获取字符串编码函数

encoding.java

package org.loon.test.encoding;

/** *//**
 * <p>
 * title: loonframework
 * </p>
 * <p>
 * description:编码基本类型集合
 * </p>
 * <p>
 * copyright: copyright (c) 2008
 * </p>
 * <p>
 * company: loonframework
 * </p>
 * <p>
 * license: http://www.apache.org/licenses/license-2.0
 * </p>
 *
 * @author chenpeng
 * @email:ceponline@yahoo.com.cn
 * @version 0.1
 */
public class encoding ...{

    // 支持的字符格式
    public static int gb2312 = 0;

    public static int gbk = 1;
   
    public static int big5 = 2;

    public static int utf8 = 3;

    public static int unicode = 4;

    public static int euc_kr = 5;

    public static int sjis = 6;

    public static int euc_jp = 7;

    public static int ascii = 8;

    public static int unknown = 9;

    public static int totalt = 10;

    public final static int simp = 0;

    public final static int trad = 1;

    // 解析名称用
    public static string[] javaname;

    // 编码用
    public static string[] nicename;

    // 应用于html中的字符集
    public static string[] htmlname;

    public encoding() ...{
        javaname = new string[totalt];
        nicename = new string[totalt];
        htmlname = new string[totalt];
        javaname[gb2312] = "gb2312";
        javaname[gbk] = "gbk";
        javaname[big5] = "big5";
        javaname[utf8] = "utf8";
        javaname[unicode] = "unicode";
        javaname[euc_kr] = "euc_kr";
        javaname[sjis] = "sjis";
        javaname[euc_jp] = "euc_jp";
        javaname[ascii] = "ascii";
        javaname[unknown] = "iso8859_1";

        // 分配编码名称
        htmlname[gb2312] = "gb2312";
        htmlname[gbk] = "gbk";
        htmlname[big5] = "big5";
        htmlname[utf8] = "utf-8";
        htmlname[unicode] = "utf-16";
        htmlname[euc_kr] = "euc-kr";
        htmlname[sjis] = "shift_jis";
        htmlname[euc_jp] = "euc-jp";
        htmlname[ascii] = "ascii";
        htmlname[unknown] = "iso8859-1";

        // 分配可读名称
        nicename[gb2312] = "gb-2312";
        nicename[gbk] = "gbk";
        nicename[big5] = "big5";
        nicename[utf8] = "utf-8";
        nicename[unicode] = "unicode";
        nicename[euc_kr] = "euc-kr";
        nicename[sjis] = "shift-jis";
        nicename[euc_jp] = "euc-jp";
        nicename[ascii] = "ascii";
        nicename[unknown] = "unknown";

    }

    public string toencoding(final int type) ...{
        return (javaname[type] + "," + nicename[type] + "," + htmlname[type])
                .intern();
    }

}

encode,java(省略,见源码)

parseencoding.java

package org.loon.test.encoding;

import java.io.bytearrayoutputstream;
import java.io.file;
import java.io.fileinputstream;
import java.io.filenotfoundexception;
import java.io.ioexception;
import java.io.inputstream;
import java.net.malformedurlexception;
import java.net.url;

/** *//**
 * <p>
 * title: loonframework
 * </p>
 * <p>
 * description:
 * </p>
 * <p>
 * copyright: copyright (c) 2008
 * </p>
 * <p>
 * company: loonframework
 * </p>
 * <p>
 * license: http://www.apache.org/licenses/license-2.0
 * </p>
 *
 * @author chenpeng
 * @email:ceponline@yahoo.com.cn
 * @version 0.1
 */
public class parseencoding extends encode ...{

    public parseencoding() ...{
        super();
        gb2312format = new int[94][94];
        gbkformat = new int[126][191];
        big5format = new int[94][158];
        euc_krformat = new int[94][94];
        jpformat = new int[94][94];

        // 初始化编码格式
        init();
    }

    public string getencoding(final string path) ...{
        return check(getencodevalue(path));
    }

    public string getencoding(final inputstream in) ...{
        return check(getencodevalue(in));
    }

    public string getencoding(final byte[] buffer) ...{
        return check(getencodevalue(buffer));
    }

    public string getencoding(final url url) ...{
        return check(getencodevalue(url));
    }

    private string check(final int result) ...{
        if (result == -1) ...{
            return nicename[unknown];
        }
        return nicename[result];
    }

    /** *//**
     * 解析指定字符串路径编码所用格式
     *
     * @param path
     * @return
     */
    private int getencodevalue(string path) ...{
        int express = unknown;
        if (path.startswith("http://")) ...{
            try ...{
                express = getencodevalue(new url(path));
            } catch (malformedurlexception e) ...{
                express = -1;
            }
        } else ...{
            express = getencodevalue(new file(path));
        }
        return express;
    }

    /** *//**
     *
     * 解析指定inputstream所用编码,返回或然率最高的编码类型数值
     *
     * @param in
     * @return
     */
    public int getencodevalue(inputstream in) ...{
        byte[] rawtext = new byte[8192];
        int bytesread = 0, byteoffset = 0;
        int express = unknown;
        inputstream stream = in;
        try ...{
            while ((bytesread = stream.read(rawtext, byteoffset, rawtext.length
                    - byteoffset)) > 0) ...{
                byteoffset += bytesread;
            }
            ;
            stream.close();
            express = getencodevalue(rawtext);
        } catch (exception e) ...{
            express = -1;
        }
        return express;
    }

    /** *//**
     * 解析指定url下数据所用编码,返回或然率最高的编码类型数值
     *
     * @param url
     * @return
     */
    public int getencodevalue(url url) ...{

        inputstream stream;
        try ...{
            stream = url.openstream();
        } catch (ioexception e) ...{
            stream = null;
        }

        return getencodevalue(stream);
    }

    /** *//**
     * 解析指定file所用编码,返回或然率最高的编码类型数值
     *
     * @param file
     * @return
     */
    public int getencodevalue(file file) ...{
        byte[] buffer;
        try ...{
            buffer = read(new fileinputstream(file));
        } catch (filenotfoundexception e) ...{
            buffer = null;
        }
        return getencodevalue(buffer);
    }

    /** *//**
     * 将inputstream转为byte[]
     *
     * @param inputstream
     * @return
     */
    private final byte[] read(final inputstream inputstream) ...{
        byte[] arraybyte = null;
        bytearrayoutputstream bytearrayoutputstream = new bytearrayoutputstream();
        byte[] bytes = new byte[8192];
        try ...{
            bytes = new byte[inputstream.available()];
            int read;
            while ((read = inputstream.read(bytes)) >= 0) ...{
                bytearrayoutputstream.write(bytes, 0, read);
            }
            arraybyte = bytearrayoutputstream.tobytearray();
        } catch (ioexception e) ...{
            return null;
        }
        return arraybyte;
    }

    /** *//**
     * 解析指定byte[]所用编码,返回或然率最高的数值类型
     *
     * @param content
     * @return
     */
    public int getencodevalue(byte[] content) ...{
        if (content == null)
            return -1;
        int[] scores;
        int index, maxscore = 0;
        int encoding = unknown;
        scores = new int[totalt];
        // 分配或然率
        scores[gb2312] = gb2312probability(content);
        scores[gbk] = gbkprobability(content);
        scores[big5] = big5probability(content);
        scores[utf8] = utf8probability(content);
        scores[unicode] = utf16probability(content);
        scores[euc_kr] = euc_krprobability(content);
        scores[ascii] = asciiprobability(content);
        scores[sjis] = sjisprobability(content);
        scores[euc_jp] = euc_jpprobability(content);
        scores[unknown] = 0;

        // 概率比较
        for (index = 0; index < totalt; index++) ...{
            if (scores[index] > maxscore) ...{
                // 索引
                encoding = index;
                // 最大几率
                maxscore = scores[index];
            }
        }
        // 返回或然率大于50%的数据
        if (maxscore <= 50) ...{
            encoding = unknown;
        }
        return encoding;
    }

    /** *//**
     * gb2312数据或然率计算
     *
     * @param content
     * @return
     */
    private int gb2312probability(byte[] content) ...{
        int i, rawtextlen = 0;

        int dbchars = 1, gbchars = 1;
        long gbformat = 0, totalformat = 1;
        float rangeval = 0, formatval = 0;
        int row, column;

        // 检查是否在亚洲汉字范围内
        rawtextlen = content.length;
        for (i = 0; i < rawtextlen - 1; i++) ...{
            if (content[i] >= 0) ...{
            } else ...{
                dbchars++;
                // 汉字gb码由两个字节组成,每个字节的范围是0xa1 ~ 0xfe
                if ((byte) 0xa1 <= content[i] && content[i] <= (byte) 0xf7
                        && (byte) 0xa1 <= content[i + 1]
                        && content[i + 1] <= (byte) 0xfe) ...{
                    gbchars++;
                    totalformat += 500;
                    row = content[i] + 256 - 0xa1;
                    column = content[i + 1] + 256 - 0xa1;
                    if (gb2312format[row][column] != 0) ...{
                        gbformat += gb2312format[row][column];
                    } else if (15 <= row && row < 55) ...{
                        // 在gb编码范围
                        gbformat += 200;
                    }

                }
                i++;
            }
        }
        rangeval = 50 * ((float) gbchars / (float) dbchars);
        formatval = 50 * ((float) gbformat / (float) totalformat);

        return (int) (rangeval + formatval);
    }

    /** *//**
     * gb2312或然率计算
     *
     * @param content
     * @return
     */
    private int gbkprobability(byte[] content) ...{
        int i, rawtextlen = 0;

        int dbchars = 1, gbchars = 1;
        long gbformat = 0, totalformat = 1;
        float rangeval = 0, formatval = 0;
        int row, column;
        rawtextlen = content.length;
        for (i = 0; i < rawtextlen - 1; i++) ...{
            if (content[i] >= 0) ...{
            } else ...{
                dbchars++;
                if ((byte) 0xa1 <= content[i] && content[i] <= (byte) 0xf7
                        && // gb范围
                        (byte) 0xa1 <= content[i + 1]
                        && content[i + 1] <= (byte) 0xfe) ...{
                    gbchars++;
                    totalformat += 500;
                    row = content[i] + 256 - 0xa1;
                    column = content[i + 1] + 256 - 0xa1;
                    if (gb2312format[row][column] != 0) ...{
                        gbformat += gb2312format[row][column];
                    } else if (15 <= row && row < 55) ...{
                        gbformat += 200;
                    }

                } else if ((byte) 0x81 <= content[i]
                        && content[i] <= (byte) 0xfe && // gb扩展区域
                        (((byte) 0x80 <= content[i + 1] && content[i + 1] <= (byte) 0xfe) || ((byte) 0x40 <= content[i + 1] && content[i + 1] <= (byte) 0x7e))) ...{
                    gbchars++;
                    totalformat += 500;
                    row = content[i] + 256 - 0x81;
                    if (0x40 <= content[i + 1] && content[i + 1] <= 0x7e) ...{
                        column = content[i + 1] - 0x40;
                    } else ...{
                        column = content[i + 1] + 256 - 0x40;
                    }
                    if (gbkformat[row][column] != 0) ...{
                        gbformat += gbkformat[row][column];
                    }
                }
                i++;
            }
        }
        rangeval = 50 * ((float) gbchars / (float) dbchars);
        formatval = 50 * ((float) gbformat / (float) totalformat);
        return (int) (rangeval + formatval) - 1;
    }

    /** *//**
     * 解析为big5的或然率
     *
     * @param content
     * @return
     */
    private int big5probability(byte[] content) ...{
        int i, rawtextlen = 0;
        int dbchars = 1, bfchars = 1;
        float rangeval = 0, formatval = 0;
        long bfformat = 0, totalformat = 1;
        int row, column;
        rawtextlen = content.length;
        for (i = 0; i < rawtextlen - 1; i++) ...{
            if (content[i] >= 0) ...{
            } else ...{
                dbchars++;
                if ((byte) 0xa1 <= content[i]
                        && content[i] <= (byte) 0xf9
                        && (((byte) 0x40 <= content[i + 1] && content[i + 1] <= (byte) 0x7e) || ((byte) 0xa1 <= content[i + 1] && content[i + 1] <= (byte) 0xfe))) ...{
                    bfchars++;
                    totalformat += 500;
                    row = content[i] + 256 - 0xa1;
                    if (0x40 <= content[i + 1] && content[i + 1] <= 0x7e) ...{
                        column = content[i + 1] - 0x40;
                    } else ...{
                        column = content[i + 1] + 256 - 0x61;
                    }
                    if (big5format[row][column] != 0) ...{
                        bfformat += big5format[row][column];
                    } else if (3 <= row && row <= 37) ...{
                        bfformat += 200;
                    }
                }
                i++;
            }
        }
        rangeval = 50 * ((float) bfchars / (float) dbchars);
        formatval = 50 * ((float) bfformat / (float) totalformat);

        return (int) (rangeval + formatval);
    }

    /** *//**
     * 在utf-8中的或然率
     *
     * @param content
     * @return
     */
    private int utf8probability(byte[] content) ...{
        int score = 0;
        int i, rawtextlen = 0;
        int goodbytes = 0, asciibytes = 0;
        // 检查是否为汉字可接受范围
        rawtextlen = content.length;
        for (i = 0; i < rawtextlen; i++) ...{
            if ((content[i] & (byte) 0x7f) == content[i]) ...{
                asciibytes++;
            } else if (-64 <= content[i] && content[i] <= -33
                    && i + 1 < rawtextlen && -128 <= content[i + 1]
                    && content[i + 1] <= -65) ...{
                goodbytes += 2;
                i++;
            } else if (-32 <= content[i] && content[i] <= -17
                    && i + 2 < rawtextlen && -128 <= content[i + 1]
                    && content[i + 1] <= -65 && -128 <= content[i + 2]
                    && content[i + 2] <= -65) ...{
                goodbytes += 3;
                i += 2;
            }
        }

        if (asciibytes == rawtextlen) ...{
            return 0;
        }

        score = (int) (100 * ((float) goodbytes / (float) (rawtextlen - asciibytes)));
        // 如果不高于98则减少到零
        if (score > 98) ...{
            return score;
        } else if (score > 95 && goodbytes > 30) ...{
            return score;
        } else ...{
            return 0;
        }

    }

    /** *//**
     * 检查为utf-16的或然率
     *
     * @param content
     * @return
     */
    private int utf16probability(byte[] content) ...{

        if (content.length > 1
                && ((byte) 0xfe == content[0] && (byte) 0xff == content[1])
                || ((byte) 0xff == content[0] && (byte) 0xfe == content[1])) ...{
            return 100;
        }
        return 0;
    }

    /** *//**
     * 检查为ascii的或然率
     *
     * @param content
     * @return
     */
    private int asciiprobability(byte[] content) ...{
        int score = 75;
        int i, rawtextlen;

        rawtextlen = content.length;

        for (i = 0; i < rawtextlen; i++) ...{
            if (content[i] < 0) ...{
                score = score - 5;
            } else if (content[i] == (byte) 0x1b) ...{ // esc (used by iso 2022)
                score = score - 5;
            }
            if (score <= 0) ...{
                return 0;
            }
        }
        return score;
    }

    /** *//**
     * 检查为euc_kr的或然率
     *
     * @param content
     * @return
     */
    private int euc_krprobability(byte[] content) ...{
        int i, rawtextlen = 0;

        int dbchars = 1, krchars = 1;
        long krformat = 0, totalformat = 1;
        float rangeval = 0, formatval = 0;
        int row, column;
        rawtextlen = content.length;
        for (i = 0; i < rawtextlen - 1; i++) ...{
            if (content[i] >= 0) ...{
            } else ...{
                dbchars++;
                if ((byte) 0xa1 <= content[i] && content[i] <= (byte) 0xfe
                        && (byte) 0xa1 <= content[i + 1]
                        && content[i + 1] <= (byte) 0xfe) ...{
                    krchars++;
                    totalformat += 500;
                    row = content[i] + 256 - 0xa1;
                    column = content[i + 1] + 256 - 0xa1;
                    if (euc_krformat[row][column] != 0) ...{
                        krformat += euc_krformat[row][column];
                    } else if (15 <= row && row < 55) ...{
                        krformat += 0;
                    }

                }
                i++;
            }
        }
        rangeval = 50 * ((float) krchars / (float) dbchars);
        formatval = 50 * ((float) krformat / (float) totalformat);

        return (int) (rangeval + formatval);
    }

    private int euc_jpprobability(byte[] content) ...{
        int i, rawtextlen = 0;

        int dbchars = 1, jpchars = 1;
        long jpformat = 0, totalformat = 1;
        float rangeval = 0, formatval = 0;
        int row, column;

        rawtextlen = content.length;
        for (i = 0; i < rawtextlen - 1; i++) ...{
            if (content[i] >= 0) ...{
            } else ...{
                dbchars++;
                if ((byte) 0xa1 <= content[i] && content[i] <= (byte) 0xfe
                        && (byte) 0xa1 <= content[i + 1]
                        && content[i + 1] <= (byte) 0xfe) ...{
                    jpchars++;
                    totalformat += 500;
                    row = content[i] + 256 - 0xa1;
                    column = content[i + 1] + 256 - 0xa1;
                    if (jpformat[row][column] != 0) ...{
                        jpformat += jpformat[row][column];
                    } else if (15 <= row && row < 55) ...{
                        jpformat += 0;
                    }

                }
                i++;
            }
        }
        rangeval = 50 * ((float) jpchars / (float) dbchars);
        formatval = 50 * ((float) jpformat / (float) totalformat);

        return (int) (rangeval + formatval);
    }

    private int sjisprobability(byte[] content) ...{
        int i, rawtextlen = 0;

        int dbchars = 1, jpchars = 1;
        long jpformat = 0, totalformat = 1;
        float rangeval = 0, formatval = 0;
        int row, column, adjust;

        rawtextlen = content.length;
        for (i = 0; i < rawtextlen - 1; i++) ...{
            if (content[i] >= 0) ...{
            } else ...{
                dbchars++;
                if (i + 1 < content.length
                        && (((byte) 0x81 <= content[i] && content[i] <= (byte) 0x9f) || ((byte) 0xe0 <= content[i] && content[i] <= (byte) 0xef))
                        && (((byte) 0x40 <= content[i + 1] && content[i + 1] <= (byte) 0x7e) || ((byte) 0x80 <= content[i + 1] && content[i + 1] <= (byte) 0xfc))) ...{
                    jpchars++;
                    totalformat += 500;
                    row = content[i] + 256;
                    column = content[i + 1] + 256;
                    if (column < 0x9f) ...{
                        adjust = 1;
                        if (column > 0x7f) ...{
                            column -= 0x20;
                        } else ...{
                            column -= 0x19;
                        }
                    } else ...{
                        adjust = 0;
                        column -= 0x7e;
                    }
                    if (row < 0xa0) ...{
                        row = ((row - 0x70) << 1) - adjust;
                    } else ...{
                        row = ((row - 0xb0) << 1) - adjust;
                    }

                    row -= 0x20;
                    column = 0x20;
                    if (row < jpformat.length && column < jpformat[row].length
                            && jpformat[row][column] != 0) ...{
                        jpformat += jpformat[row][column];
                    }
                    i++;
                } else if ((byte) 0xa1 <= content[i]
                        && content[i] <= (byte) 0xdf) ...{
                }

            }
        }
        rangeval = 50 * ((float) jpchars / (float) dbchars);
        formatval = 50 * ((float) jpformat / (float) totalformat);

        return (int) (rangeval + formatval) - 1;
    }

}

encodingtest.java

package org.loon.test.encoding;
/** *//**
 * <p>title: loonframework</p>
 * <p>description:</p>
 * <p>copyright: copyright (c) 2008</p>
 * <p>company: loonframework</p>
 * <p>license: http://www.apache.org/licenses/license-2.0</p>
 * @author chenpeng 
 * @email:ceponline@yahoo.com.cn
 * @version 0.1
 */
public class encodingtest ...{
    public static void main(string argc[]) ...{
        parseencoding parse;

        parse = new parseencoding();
       
         system.out.println("中国大陆:");
         system.out.println("测试字符串,编码格式="+parse.getencoding("百度".getbytes()));
         system.out.println("测试站点,编码格式="+parse.getencoding("http://www.111cn.net"));
         system.out.println();
         system.out.println("中国台湾:");
         system.out.println("测试字符串,编码格式="+parse.getencoding("".getbytes()));
         system.out.println("测试站点,编码格式="+parse.getencoding("http://tw.yahoo.com/"));
         system.out.println("测试站点(繁体字,utf编码),编码格式="+parse.getencoding("http://.tw/jute"));
         system.out.println();
         system.out.println("日本:");
         system.out.println("测试字符串,编码格式="+parse.getencoding("".getbytes()));
         system.out.println("测试站点,编码格式="+parse.getencoding("http://www.111cn.net"));
         system.out.println();
         system.out.println("自称蚩尤后代那群……:");
         system.out.println("测试站点,编码格式="+parse.getencoding("http://www.easyjava.co.kr/"));
       
    }
}

时间: 2024-10-16 08:54:28

java获取字符串编码函数的相关文章

php自动获取字符串编码函数mb_detect_encoding_php技巧

当在php中使用mb_detect_encoding函数进行编码识别时,很多人都碰到过识别编码有误的问题,例如对与GB2312和UTF- 8,或者UTF-8和GBK(这里主要是对于cp936的判断),网上说是由于字符短是,mb_detect_encoding会出现误判. 例如: 复制代码 代码如下: $encode = mb_detect_encoding($keytitle, array("ASCII",'UTF-8′,"GB2312′,"GBK",'B

oracle获取字符串长度函数length()和hengthb()

原文:oracle获取字符串长度函数length()和hengthb() lengthb(string)计算string所占的字节长度:返回字符串的长度,单位是字节 length(string)计算string所占的字符长度:返回字符串的长度,单位是字符 对于单字节字符,LENGTHB和LENGTH是一样的. 如可以用length('string')=lengthb('string')判断字符串是否含有中文. 注: 一个汉字在Oracle数据库里占多少字节跟数据库的字符集有关,UTF8时,长度为

escape() 字符串编码函数及其它js 编码函数

escape() 字符串编码函数及其它js 编码函数 escape(string) 定义和用法 escape() 函数可对字符串进行编码,这样就可以在所有的计算机上读取该字符串. 语法 escape(string)参数 描述 string 必需.要被转义或编码的字符串. 1:说明:所有空格.标点.重音符号以及其他非 ascii 字符都用 %xx 编码代替,其中 xx 等于表示该字符的十六进制数.例如,空格返回的是 "%20" .字符值大于 255 的以 "%uxxxx&quo

Java获取网页编码

    使用爬虫从网上抓取到一个网页内容,要想能正确显示,必须要获取网页的原始编码,否则会出现乱码.首先需要获取网页内容,最简单的办法就是通过JDK自带的HttpURLConnection类,要实现更复杂的抓取操作,请使用开源的爬虫框架,如Crawler4j,Web-Harvest,JSpider,WebMagic,Heritrix,Nutch等,我并不是来说爬虫相关技术的,只是网页内容的获取需要使用到爬虫技术,所以顺带提提有关爬虫的框架,具体你们自己去研究.这里为了简便起见,我就以JDK自带的

我的Java开发学习之旅------&amp;gt;工具类:Java获取字符串和文件进行MD5值

ps:这几天本人用百度云盘秒传了几部大片到云盘上,几个G的文件瞬秒竟然显示"上传成功"!这真让我目瞪口呆,要是这样的话,那得多快的网速,这绝对是不可能的,也许这仅是个假象.百度了一下才发现所谓的"秒传"是常见的"忽略式"上传方式,就是您上传了一个文件名为111.exe,MD5为一个数,有一个网友以前也上传一个叫222.exe,MD5和您上传的文件MD5码一模一样,所以这个文件上传到服务器上的时间就很短了,这是因为别人上传过这个文件,您上传这个文件

js中字符串编码函数escape()、encodeURI()、encodeURIComponent()区别详解_javascript技巧

JavaScript中有三个可以对字符串编码的函数,分别是: escape,encodeURI,encodeURIComponent,相应3个解码函数: unescape,decodeURI,decodeURIComponent . 下面简单介绍一下它们的区别 1 escape()函数 定义和用法 escape() 函数可对字符串进行编码,这样就可以在所有的计算机上读取该字符串. 语法 escape(string) 参数 描述 string 必需.要被转义或编码的字符串. 返回值 已编码的 st

mysql获取字符串长度函数(CHAR_LENGTH)_Mysql

length:   是计算字段的长度一个汉字是算三个字符,一个数字或字母算一个字符CHAR_LENGTH(str) 返回值为字符串str 的长度,长度的单位为字符.一个多字节字符算作一个单字符.对于一个包含五个二字节字符集, LENGTH()返回值为 10,而CHAR_LENGTH()的返回值为5.CHARACTER_LENGTH(str) CHARACTER_LENGTH()是CHAR_LENGTH()的同义词.BIT_LENGTH(str) 返回2进制长度.例如:可以查出用户名长度少于6个字

mysql获取字符串长度函数(CHAR_LENGTH)

length:   是计算字段的长度一个汉字是算三个字符,一个数字或字母算一个字符 CHAR_LENGTH(str) 返回值为字符串str 的长度,长度的单位为字符.一个多字节字符算作一个单字符.对于一个包含五个二字节字符集, LENGTH()返回值为 10,而CHAR_LENGTH()的返回值为5. CHARACTER_LENGTH(str) CHARACTER_LENGTH()是CHAR_LENGTH()的同义词. BIT_LENGTH(str) 返回2进制长度. 例如:可以查出用户名长度少

php判断字符串编码函数

mb_detect_encoding()($str);  代码如下 复制代码 //判断字符串是什么编码 if ($tag === mb_convert_encoding(mb_convert_encoding($tag, "GB2312", "UTF-8"), "UTF-8", "GB2312")) { } else {//如果是gb2312 的就转换为utf8的 $tag = mb_convert_encoding($tag