import os

INPUT_FILE = '初始文本.txt'  
OUTPUT_FILE = '文本.txt'     

def clean_special_format(text):
    """
    专门针对 '收起评价' 和 双引号 格式的清洗函数
    """
    
    text = text.strip()
    
    
    text = text.replace("收起评价", "")
    
    
    text = text.replace('"', '')
    
    
    text = text.strip()
    
    return text

def main():
    if not os.path.exists(INPUT_FILE):
        print(f"❌ 找不到文件: {INPUT_FILE}，请确认文件名。")
        return

    print(f"📖 正在读取 {INPUT_FILE} ...")
    
    valid_lines = []
    
    
    with open(INPUT_FILE, 'r', encoding='utf-8', errors='ignore') as f:
        raw_lines = f.readlines()
        
    print(f"🔄 正在清洗 {len(raw_lines)} 行原始数据...")

    for i, line in enumerate(raw_lines):
        
        cleaned = clean_special_format(line)
        
        
        # 只有当清洗完长度 > 0 时，才算有效数据
        # 那些原本只有制表符的行，现在变成了空串，会被自动扔掉
        if len(cleaned) > 0:
            valid_lines.append(cleaned)

   
    print(f"正在保存至 {OUTPUT_FILE} ...")
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        for line in valid_lines:
            f.write(line + '\n')
            
    print("-" * 30)
    print(f"✅ 清洗完成！")
    print(f"   原始行数: {len(raw_lines)}")
    print(f"   有效行数: {len(valid_lines)}")
    print(f"   🗑️ 剔除垃圾行: {len(raw_lines) - len(valid_lines)}")
    print("-" * 30)

if __name__ == "__main__":
    main()