1
0

gbk_to_utf8.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import os, chardet, codecs, argparse
  2. def WriteFile(filePath, u, encoding="utf-8"):
  3. with codecs.open(filePath, "w", encoding) as f:
  4. f.write(u)
  5. def GBK_2_UTF8(src, dst):
  6. # 检测编码,coding可能检测不到编码,有异常
  7. f = open(src, "rb")
  8. coding = chardet.detect(f.read())["encoding"]
  9. f.close()
  10. if coding != "utf-8":
  11. with codecs.open(src, "r", coding) as f:
  12. try:
  13. WriteFile(dst, f.read(), encoding="utf-8")
  14. try:
  15. print(src + " " + coding + " to utf-8 converted!")
  16. except Exception:
  17. print("print error")
  18. except Exception:
  19. print(src +" "+ coding+ " read error")
  20. # 把目录中的文件编码由gbk转换为utf-8
  21. def ReadDirectoryFile(rootdir):
  22. for parent, dirnames, filenames in os.walk(rootdir):
  23. for dirname in dirnames:
  24. #递归函数,遍历所有子文件夹
  25. ReadDirectoryFile(dirname)
  26. for filename in filenames:
  27. if filename.endswith(".c") or filename.endswith(".cpp") or filename.endswith(".bas"):
  28. GBK_2_UTF8(os.path.join(parent, filename),
  29. os.path.join(parent, filename))
  30. if __name__ == "__main__":
  31. parser = argparse.ArgumentParser(description="从指定目录读取文件")
  32. parser.add_argument("src_path", help="源目录的路径")
  33. args = parser.parse_args()
  34. ReadDirectoryFile(args.src_path)