一个python脚本,把一个目录下所有文件转成utf8编码
chardet package is in gentoo-china-overlay
layman -a gentoo-china
I have already filed a chardet's bug, requesting it to be added to portage
However, no response yet
#! /usr/bin/python
import sys,chardet,shutil,os,tempfile
def convert(fname):
fp = open(fname)
text = fp.read()
encoding = chardet.detect(text)['encoding']
rate = chardet.detect(text)['confidence']
if rate > 0.9 and not encoding == 'utf-8':
tmp,tmpname=tempfile.mkstemp()
os.write(tmp, unicode(text,encoding).encode('utf-8'))
shutil.move(tmpname, fname)
if __name__=="__main__":
dir = sys.argv[1]
for f in os.listdir(dir):
pathname = os.path.join(dir, f)
convert(pathname)
layman -a gentoo-china
I have already filed a chardet's bug, requesting it to be added to portage
However, no response yet
#! /usr/bin/python
import sys,chardet,shutil,os,tempfile
def convert(fname):
fp = open(fname)
text = fp.read()
encoding = chardet.detect(text)['encoding']
rate = chardet.detect(text)['confidence']
if rate > 0.9 and not encoding == 'utf-8':
tmp,tmpname=tempfile.mkstemp()
os.write(tmp, unicode(text,encoding).encode('utf-8'))
shutil.move(tmpname, fname)
if __name__=="__main__":
dir = sys.argv[1]
for f in os.listdir(dir):
pathname = os.path.join(dir, f)
convert(pathname)
Comments