Python自然语言处理 7.3 Training Classifier

编程入门行业动态更新时间:2024-10-19 15:36:11

Python<a href=https://www.elefans.com/category/jswz/34/1768401.html style= 自然语言处理 7.3 Training Classifier"/>

Python自然语言处理 7.3 Training Classifier

执行示例代码

def npchunk_features(sentence, i, history):word, pos = sentence[i]return {"pos": pos}class ConsecutiveNPChunkTagger(nltk.TaggerI):def __init__(self, train_sents):train_set = []for tagged_sent in train_sents:untagged_sent = nltk.tag.untag(tagged_sent)history = []for i, (word, tag) in enumerate(tagged_sent):featureset = npchunk_features(untagged_sent, i, history)train_set.append((featureset, tag))history.append(tag)# self.classifier = nltk.NaiveBayesClassifier.train(train_set)self.classifier = nltk.MaxentClassifier.train(train_set, algorithm='MEGAM', trace=0)  #def tag(self, sentence):history = []for i, word in enumerate(sentence):featureset = npchunk_features(sentence, i, history)tag = self.classifier.classify(featureset)history.append(tag)return zip(sentence, history)class ConsecutiveNPChunker(nltk.ChunkParserI):def __init__(self, train_sents):tagged_sents = [[((w,t),c) for (w,t,c) innltk.chunk.tree2conlltags(sent)]for sent in train_sents]self.tagger = ConsecutiveNPChunkTagger(tagged_sents)def parse(self, sentence):tagged_sents = self.tagger.tag(sentence)conlltags = [(w, t, c) for ((w, t), c) in tagged_sents]return nltk.chunk.conlltags2tree(conlltags)test_sents = conll2000.chunked_sents('test.txt', chunk_types=['NP'])
train_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
chunker = ConsecutiveNPChunker(train_sents)
print(chunker.evaluate(test_sents))

报错

Traceback (most recent call last):File "E:/Python Practice/NLP/Chapter7.py", line 225, in <module>chunker = ConsecutiveNPChunker(train_sents)File "E:/Python Practice/NLP/Chapter7.py", line 216, in __init__self.tagger = ConsecutiveNPChunkTagger(tagged_sents)File "E:/Python Practice/NLP/Chapter7.py", line 201, in __init__train_set, algorithm='MEGAM', trace=0)  #File "D:\Anaconda3\lib\site-packages\nltk\classify\maxent.py", line 335, in traintrain_toks, trace, encoding, labels, gaussian_prior_sigma, **cutoffsFile "D:\Anaconda3\lib\site-packages\nltk\classify\maxent.py", line 1483, in train_maxent_classifier_with_megamstdout = call_megam(options)File "D:\Anaconda3\lib\site-packages\nltk\classify\megam.py", line 168, in call_megamconfig_megam()File "D:\Anaconda3\lib\site-packages\nltk\classify\megam.py", line 57, in config_megamurl=".html",File "D:\Anaconda3\lib\site-packages\nltk\internals.py", line 690, in find_binaryname, path_to_bin, env_vars, searchpath, binary_names, url, verboseFile "D:\Anaconda3\lib\site-packages\nltk\internals.py", line 674, in find_binary_iterpath_to_bin or name, env_vars, searchpath, binary_names, url, verboseFile "D:\Anaconda3\lib\site-packages\nltk\internals.py", line 632, in find_file_iterraise LookupError("\n\n%s\n%s\n%s" % (div, msg, div))
LookupError: ===========================================================================
NLTK was unable to find the megam file!
Use software specific configuration paramaters or set the MEGAM environment variable.For more information on megam, see:<.html>
===========================================================================

通过StackOverflow，在这里下载MEGAM源文件Source: megam_src.tgz，下载之后解压，然后在代码中（程序的开始）为MEGAM添加环境变量

import os
os.environ["MEGAM"] = 'D:\Anaconda3\Lib\site-packages\MEGAM\megam-64'

再次运行，报错

  File "E:/Python Practice/NLP/Chapter7.py", line 204, in __init__train_set, algorithm='MEGAM', trace=0)  #File "D:\Anaconda3\lib\site-packages\nltk\classify\maxent.py", line 335, in traintrain_toks, trace, encoding, labels, gaussian_prior_sigma, **cutoffsFile "D:\Anaconda3\lib\site-packages\nltk\classify\maxent.py", line 1483, in train_maxent_classifier_with_megamstdout = call_megam(options)File "D:\Anaconda3\lib\site-packages\nltk\classify\megam.py", line 172, in call_megamp = subprocess.Popen(cmd, stdout=subprocess.PIPE)File "D:\Anaconda3\lib\subprocess.py", line 800, in __init__restore_signals, start_new_session)File "D:\Anaconda3\lib\subprocess.py", line 1207, in _execute_childstartupinfo)
OSError: [WinError 193] %1 is not a valid Win32 application

该错误原因是python版本是64位，调用的库是32位的，所以解决方法便是安装32位的python。在官网下载一个32位的python，这里选择 python3.7.7 Windows x86 executable installer 安装成功，更换解释器之后还是报同样的错误。。。

由于本人使用了Anaconda，又在Anaconda官网安装了一个32位的Anaconda，依旧报错。。。

有时间再来更新

更多推荐

Python自然语言处理 7.3 Training Classifier

本文发布于:2024-03-23 23:42:56，感谢您对本站的认可！

本文链接:https://www.elefans.com/category/jswz/34/1744226.html