Android 汉字转拼音之JNI篇

package com.tool.hz2py;

import android.os.Bundle;
import android.app.Activity;
import android.view.Menu;
import android.widget.TextView;

public class MainActivity extends Activity {

	protected Hz2py hz2py;

	@Override
	protected void onCreate(Bundle savedInstanceState) {
		super.onCreate(savedInstanceState);
		setContentView(R.layout.activity_main);
		hz2py = new Hz2py();
		TextView view = (TextView) findViewById(R.id.text);
		view.setText(hz2py.hz2py("汉字转拼音"));
	}

	@Override
	public boolean onCreateOptionsMenu(Menu menu) {
		// Inflate the menu; this adds items to the action bar if it is present.
		getMenuInflater().inflate(R.menu.main, menu);
		return true;
	}

}

jni类:Hz2py

package com.tool.hz2py;

public class Hz2py {

	static {
		System.loadLibrary("Hz2py");
	};

	public native String hz2py(String text);

}

下面是C++头文件和代码

/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class com_tool_hz2py_Hz2py */

#ifndef _Included_com_tool_hz2py_Hz2py
#define _Included_com_tool_hz2py_Hz2py
#ifdef __cplusplus
extern "C" {
#endif
/*
 * Class:     com_tool_hz2py_Hz2py
 * Method:    hz2py
 * Signature: (Ljava/lang/String;)Ljava/lang/String;
 */
JNIEXPORT jstring JNICALL Java_com_tool_hz2py_Hz2py_hz2py
  (JNIEnv *, jobject, jstring);

#ifdef __cplusplus
}
#endif
#endif

#include "hz2py.h"
#include <string.h>
#include "com_tool_hz2py_Hz2py.h"

#define HZ2PY_UTF8_CHECK_LENGTH 20
#define HZ2PY_FILE_READ_BUF_ARRAY_SIZE 1000
#define HZ2PY_INPUT_BUF_ARRAY_SIZE 1024
#define HZ2PY_OUTPUT_BUF_ARRAY_SIZE 2048
#define HZ2PY_STR_COPY(to, from, count) \
    ok = 1;\
    i = 0;\
    _tmp = from;\
    while(i < count)\
    {\
        if (*_tmp == '\0')\
        {\
            ok = 0;\
            break;\
        }\
        _tmp ++;\
        i ++;\
    }\
    if (ok)\
{\
    i = 0;\
    while(i < count)\
    {\
        *to = *from;\
        to ++;\
        from ++;\
        i ++;\
    }\
}\
else\
{\
    if (overage_buff != NULL)\
    {\
        while(*from != '\0')\
        {\
            *overage_buff = *from;\
            from ++;\
        }\
    }\
    break;\
}

//将utf8编码的字符串中的汉字解成拼音
// in 输入
// out 输出
// first_letter_only 是否只输出拼音首字母
// polyphone_support 是否输出多音字
// add_blank 是否在拼音之间追加空格
// convert_double_char 是否转换全角字符为半角字符
// overage_buff 末尾如果有多余的不能组成完整utf8字符的字节，将写到overage_buff，传NULL将输出到out

void utf8_to_pinyin(char *in, char *out, int first_letter_only,
		int polyphone_support, int add_blank, int convert_double_char,
		char *overage_buff) {
	int i = 0;
	char *utf = in;
	char *_tmp;
	char *_tmp2;
	char py_tmp[30] = "";
	char py_tmp2[30] = "";
	char *out_start_flag = out;
	int uni;
	int ok = 0;
	while (*utf != '\0') {
		if ((*utf >> 7) == 0) {
			HZ2PY_STR_COPY(out, utf, 1);
			//如果为一个字节加上#号分隔
			*out = '#'; //用#号做为分隔符
			out++;
			//去掉其它的英文只留汉字
			//只能搜索到汉字拼音里面字母
			//			out--;
			//			*out = ' ';
		}
		//两个字节
		else if ((*utf & 0xE0) == 0xC0) {
			HZ2PY_STR_COPY(out, utf, 2);
		}
		//三个字节
		else if ((*utf & 0xF0) == 0xE0) {
			if (*(utf + 1) != '\0' && *(utf + 2) != '\0') {
				uni = (((int) (*utf & 0x0F)) << 12)
						| (((int) (*(utf + 1) & 0x3F)) << 6)
						| (*(utf + 2) & 0x3F);

				if (uni > 19967 && uni < 40870) {
					memset(py_tmp, '\0', 30);
					memset(py_tmp2, '\0', 30);
					strcpy(py_tmp, _pinyin_table_[uni - 19968]);
					_tmp = py_tmp;
					_tmp2 = py_tmp2;

					if (first_letter_only == 1) {
						*_tmp2 = *_tmp;
						_tmp++;
						_tmp2++;
						while (*_tmp != '\0') {
							if (*_tmp == '|' || *(_tmp - 1) == '|') {
								*_tmp2 = *_tmp;
								_tmp2++;
							}
							_tmp++;
						}
					} else {
						strcpy(py_tmp2, py_tmp);
					}

					_tmp2 = py_tmp2;

					if (polyphone_support == 0) {
						while (*_tmp2 != '\0') {
							if (*_tmp2 == '|') {
								*_tmp2 = '\0';
								break;
							}
							_tmp2++;
						}
						_tmp2 = py_tmp2;
					}
					strcpy(out, _tmp2);
					out += strlen(_tmp2);
					if (add_blank) {
						*out = '#'; //用#号做为分隔符
						out++;
					}
					utf += 3;
				} else if (convert_double_char && uni > 65280 && uni < 65375) {
					*out = uni - 65248;
					out++;
					utf += 3;
				} else if (convert_double_char && uni == 12288) {
					*out = 32;
					out++;
					utf += 3;
				} else {
					HZ2PY_STR_COPY(out, utf, 3);
				}
			} else {
				HZ2PY_STR_COPY(out, utf, 3);
			}
		}
		//四个字节
		else if ((*utf & 0xF8) == 0xF0) {
			HZ2PY_STR_COPY(out, utf, 4);
		}
		//五个字节
		else if ((*utf & 0xFC) == 0xF8) {
			HZ2PY_STR_COPY(out, utf, 5);
		}
		//六个字节
		else if ((*utf & 0xFE) == 0xFC) {
			HZ2PY_STR_COPY(out, utf, 6);
		} else {
			if (overage_buff != NULL) {
				*overage_buff = *utf;
				overage_buff++;
			} else {
				HZ2PY_STR_COPY(out, utf, 1);
			}
			break;
		}
	}
}

//判断一个字符串是否为utf8编码
int is_utf8_string(char *utf) {
	int length = strlen(utf);
	int check_sub = 0;
	int i = 0;

	if (length > HZ2PY_UTF8_CHECK_LENGTH) {
		length = HZ2PY_UTF8_CHECK_LENGTH;
	}

	for (; i < length; i++) {
		if (check_sub == 0) {
			if ((utf[i] >> 7) == 0) {
				continue;
			} else if ((utf[i] & 0xE0) == 0xC0) {
				check_sub = 1;
			} else if ((utf[i] & 0xF0) == 0xE0) {
				check_sub = 2;
			} else if ((utf[i] & 0xF8) == 0xF0) {
				check_sub = 3;
			} else if ((utf[i] & 0xFC) == 0xF8) {
				check_sub = 4;
			} else if ((utf[i] & 0xFE) == 0xFC) {
				check_sub = 5;
			} else {
				return 0;
			}
		} else {
			if ((utf[i] & 0xC0) != 0x80) {
				return 0;
			}
			check_sub--;
		}
	}
	return 1;
}

int hztpy(const char *read_buff, char *outbuf) {
	char overage_buff[7] = { 0 };
	char *_tmp = NULL;
	char inbuf[HZ2PY_INPUT_BUF_ARRAY_SIZE] = { 0 };
	int add_blank = 1;
	int polyphone_support = 1;
	int first_letter_only = 0;
	int convert_double_char = 0;

	// first_letter_only 是否只输出拼音首字母
	// polyphone_support 是否输出多音字
	// add_blank 是否在拼音之间追加空格
	// convert_double_char 是否转换全角字符为半角字符
	// overage_buff 末尾如果有多余的不能组成完整utf8字符的字节，将写到overage_buff，传NULL将输出到out

	_tmp = inbuf;
	if (strlen(overage_buff)) {
		strcpy(_tmp, overage_buff);
		_tmp += strlen(overage_buff);
		memset(overage_buff, '\0', 7);
	}
	strcpy(_tmp, read_buff);
	if (!is_utf8_string(inbuf)) {
		return -1;
	}
	utf8_to_pinyin(inbuf, outbuf, first_letter_only, polyphone_support,
			add_blank, convert_double_char, overage_buff);
	return 1;
}

JNIEXPORT jstring JNICALL Java_com_tool_hz2py_Hz2py_hz2py(JNIEnv *env,
		jobject thiz, jstring text) {
	const char* pText = env->GetStringUTFChars(text, 0);
	char* oText = new char[512];//256中文
	memset(oText,0,512);
	hztpy(pText,oText);
	jstring returnText = env->NewStringUTF(oText);
	env->ReleaseStringUTFChars(text,pText);
	delete oText;
	return returnText;
}

头文件有点大，我直接上传给大家下载好了。
hz2py
直接编译好的.so共享库和java文件，注意，包名只能用这个，不能更改。
libHz2py

本文固定链接: http://www.ithtw.com/37.html
转载请注明: leehom 2014年08月05日于 IT十万为什么发表

时间： 2024-10-30 11:15:20

Android 汉字转拼音之JNI篇的相关文章

Android 汉字转拼音之工具篇

/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://w

android实现汉字转拼音功能带多音字识别

android 汉字转拼音带多音字识别功能,供大家参考,具体内容如下问题来源在做地名按首字母排序的时候出现了这样一个bug.长沙会被翻译拼音成zhangsha,重庆会被翻译拼音成zhong qing.于是排序出了问题. 汉字转拼音库和多音字识别库 1.多音字对应的词汇库 2.文字的二进制大小对应的拼音库关键代码 1.我在这里首先将要转化的文字转化成对应的"gb2312"编码.汉字转化成二进制编码一般占两个字节,如果一个字节返回字符,如果是两个字节算一下偏移量.代码如下 /** *

利用IFELanguage分隔中文语句并对汉字加注拼音

这篇文档是讲如何利用IFELanguage接口实现对中文语句的分隔,并对词语和字加注拼音的方法. 首先感谢一下Zswang(伴水)兄弟,他的无私奉献精神和对Windows的深入研究值得我们每一个人学习.每次找到好东东他总是给我一份,包括这个IFELanguage接口资料.最初的资料源自一个日本网站,源代码是用VC8写的,Zswang(伴水)将其改写为一个适用于Delphi下的版本.原C++代码经ccrun(老妖)略作修改,在BCB6下调试通过.原VC8的版本和BCB6还有Zswang(伴水)写的

php汉字转拼音的示例

这篇文章主要介绍了php汉字转拼音的示例,需要的朋友可以参考下代码如下: <?php class Helper_Spell{ public $spellArray = array(); static public function getArray() { return unserialize(file_get_contents('pytable_without_tune.txt')); } /** * @desc 获取字符串的

C#汉字转拼音（支持多音字）

原文 C#汉字转拼音(支持多音字) 阅读目录首先在nuget引用对应的包简单的demo 汉字转拼音类封装源码分享之前由于项目需要,中间需要一个汉字转拼音和首拼的功能来做查询,感觉这种功能基本已经成熟化了,于是查找了相关的代码,首先引入眼帘的是下面两篇文章 1.C# 汉字转拼音(支持GB2312字符集中所有汉字) 2.[干货]JS版汉字与拼音互转终极方案,附简单的JS拼音输入法感谢两位博主,写的比较全也很详细,都有提供源码,大家可以参考下. 由于考虑到接口的需要,于是参考了第一篇,文章

C#----汉字转拼音

上一篇博客中介绍的是动态加载EasyUI控件显示到前台,里面包括按钮控件,而且每一个设备有可能有不同的命令和参数,不过总共可以显示的有八种不同的按钮,公用的,那如何实现不同的参数按钮点击的时候能够去加载相同的JS,而不用每次都去获取一个新的ID,于是就想到了一个办法,根据从数据库中获取的命令的数据,将汉字转化成拼音,这样就可以实现上面的结果. 代码如下: using System; using System.Collections.Generic; using System.Linq; usin

php 汉字转拼音 [包含20902个基本汉字+5059生僻字]

原文:php 汉字转拼音 [包含20902个基本汉字+5059生僻字] 昨天在转换拼音的时候发现个bug,有好多字都无法转换,不过也不能怪他,毕竟人家的库才8k,应该只有常用的.无奈上网找了下,发现一篇<最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)> 看着都屌,测试后也不错,都能识别,但问题是功能不够,,无奈,自己动手压缩字库(无损压缩),扩展功能.我用的是他 pinyin.php ~ 206KB 那个UTF8字库,经过压缩扩展后就剩 106K 了.当

asp.net 汉字转换拼音及首字母实现代码_实用技巧

Default.aspx页面复制代码代码如下: <%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default" %> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http:

利用pinyin4j将汉字转换为拼音完整示例

MainActivity如下: package cn.cc; import android.app.Activity; import android.os.Bundle; /** * Demo描述: * 利用pinyin4j将汉字转换为拼音 * * 注意事项: * 添加pinyin4j.jar包 */ public class MainActivity extends Activity { @Override public void onCreate(Bundle savedInstanceSt