import jieba
'''
数据库中读取数据集描述, 转换成标签
'''


def process_text(text):
    filter_list = ['\n', '\t', '\r', '\b', '\f', '\v', '：', '的', '或', '10', '天', '了', '可', '是', '该', '，', ' ', '、', '让', '和', '集']

    # 使用 jieba 进行分词
    text_list = jieba.lcut(text)

    # 过滤掉包含 filter_list 中任何字符的元素
    results = []
    for tl in text_list:
        # 检查当前元素是否包含 filter_list 中的任何字符
        should_include = True
        for fl in filter_list:
            if fl in tl:
                should_include = False
                break
        
        # 如果不包含任何 filter_list 中的字符，则添加到结果
        if should_include:
            results.append(tl)

    print(list(set(results)))