(1)安装gensim
pip install --upgrade setuptools pip install gensim sudo pip install pattern
(2)使用上次训练好的词向量vectors.bin
vectors.bin 的生成参考http://blog.csdn.net/u013378306/article/details/54616829
# -*- coding: utf-8 -*- from gensim.models.word2vec import Word2Vec model=Word2Vec.load_word2vec_format("/home/lhy/data/word2vec/w2v/trunk/vectors.bin",binary=True); #输出词good的向量 print model["good"] #找出 woman king积极影响,man消极影响的词,按顺序5个 ll=model.most_similar(positive=['woman','king'],negative=['man'],topn=5); print ll; #good ,bad的余弦相似度 sim1 = model.similarity(u'good', u'bad') print sim1 # 两个列表的 余弦相似度 list1 = [u'good', u'bad'] list2 = [u'red',u'good'] list_sim1 = model.n_similarity(list1, list2) print list_sim1 #找出其中一个不同于其他类的词 list = [u'good', u'bad', u'her', u'greet'] print model.doesnt_match(list)
时间: 2024-10-25 20:44:38