使用python对自然语言做文本分词等处理,在将处理结果写入excel表格时出现
# coding:utf-8import sysreload(sys)sys.setdefaultencoding("utf-8")import xlrdimport xlwtinfile = ('/Users/mac/Documents/essay_data.xlsx')outfile = ('/Users/mac/Documents/segment_data.xlsx')book_in = xlrd.open_workbook(infile)sheet1 = book_in.sheet_by_index(0)nrows = sheet1.nrows #获取行数# sheet1.col_values(1) # 获取征文内容列的值# 将表格内容转换为列表data_list = []for i in range(1, nrows): cell_value = sheet1.cell_value(i, 1) data_list.append(cell_value)# 对列表中的元素进行分词import pynlpirr_list = []pynlpir.open()for s in data_list: key_words = pynlpir.get_key_words(s, weighted=True) for kw in key_words: # print kw[0], '\t', kw[1] r_list.append(kw)pynlpir.close()# 将列表内容写入到输出表格中book_out = xlwt.Workbook()sheet2 = book_out.add_sheet('segment_result')for i in range(len(r_list)): print r_list sheet2.write(i, 0, r_list)book_out.save(outfile)***********运行结果************/usr/bin/python2.7 /Users/mac/PycharmProjects/myessay/process_data.py[2018-03-26 14:04:48] Cannot open file /Library/Python/2.7/site-packages/pynlpir/Data/NewWord.lstCannot write log file /Library/Python/2.7/site-packages/pynlpir/Data/20180326.err!Traceback (most recent call last): File "/Users/mac/PycharmProjects/myessay/process_data.py", line 42, in sheet2.write(i, 0, r_list) File "/Library/Python/2.7/site-packages/xlwt/Worksheet.py", line 1088, in write self.row(r).write(c, label, style) File "/Library/Python/2.7/site-packages/xlwt/Row.py", line 252, in write self.__rich_text_helper(col, label, style, style_index) File "/Library/Python/2.7/site-packages/xlwt/Row.py", line 278, in __rich_text_helper raise Exception ("Unexpected data type %r" % type(data))Exception: Unexpected data type (u'\u7f51\u5e97', 10.35)Process finished with exit code 1 |
免责声明:本内容仅代表回答会员见解不代表天盟观点,请谨慎对待。
版权声明:作者保留权利,不代表天盟立场。
|
|
|
|