You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
AlphaGenerator/manual_tools/keys_organize.py

28 lines
818 B

import jieba
'''
数据库中读取数据集描述, 转换成标签
'''
def process_text(text):
filter_list = ['\n', '\t', '\r', '\b', '\f', '\v', '', '', '', '10', '', '', '', '', '', '', ' ', '', '', '', '']
# 使用 jieba 进行分词
text_list = jieba.lcut(text)
# 过滤掉包含 filter_list 中任何字符的元素
results = []
for tl in text_list:
# 检查当前元素是否包含 filter_list 中的任何字符
should_include = True
for fl in filter_list:
if fl in tl:
should_include = False
break
# 如果不包含任何 filter_list 中的字符,则添加到结果
if should_include:
results.append(tl)
print(list(set(results)))