Python文本处理
来自CloudWiki
Python编码解码
# -*- coding: utf-8 -*- # 本文件应该保存为utf-8编码,否则会报错 str = "我是中国人" print(f'Unicode字符串为"{str}"') byte0 = str.encode("utf-8") print(f'Unicode字符串"{str}"以utf-8编码得到字节串[{byte0}]') str0 = byte0.decode("utf-8") print(f'将utf-8字节串[{byte0}]解码得到Unicode字符串"{str0}"') byte1 = str.encode("gbk") print(f'Unicode字符串"{str}"以gbk编码得到字节串[{byte1}]') str1 = byte1.decode("gbk") print(f'将gbk字节串[{byte1}]解码得到Unicode字符串"{str1}"') print(f'以文本方式将Unicode字符串"{str}"写入a.txt') with open("a.txt", "w", encoding="gbk") as f: f.write(str) print("以文本方式读取 a.txt 的内容") with open("a.txt", "r", encoding="gbk") as f: print(f.read())
Python文件读写
# -*- coding: utf-8 -*- f = open("wb.txt", "w", encoding="utf-8") f.write("测试w方式写入,如果文件存在,则清空内容后写入,如果文件不存在则创建\n") f.close() f = open("wb.txt", "a", encoding="utf-8") f.write("测试a方式写入,如果文件存在,在文件内容后最后追加写入,如果文件不存在则创建") f.close() f = open("wb.txt", "r", encoding="utf-8") # 以文本方式读,f.read()返回字符串对象 data = f.read() print(type(data)) print(data) f.close() f = open("wb.txt", "rb") # 以文本方式读,f.read()返回字节对象 data = f.read() print(type(data)) print(data) print('将读取的字符对象解码:') print(data.decode('utf-8')) f.close()
例:在文件中定位
# -*- coding: utf-8 -*- # !/usr/local/bin/python # Time: 2018/5/23 22:56:26 # Description: # File Name: seek_file.py f = open("tmp.txt", "rb+") f.write(b"abcdefghi") f.seek(5) # 移动到文件的第六个字节 print(f.read(1)) f.seek(-3, 2) # 移动到文件的倒数第三字节 print(f.read(1))
例:基于seek实现类似Linux命令tail -f的功能(文件名为lx_tailf.py)
# encoding=utf-8 import time with open('tmp.txt', 'rb') as f: f.seek(0, 2) # 将光标移动至文件末尾 while True: # 实时显示文件新增加的内容 line = f.read() if line: print(line.decode('utf-8'), end='') else: time.sleep(0.2) # 读取完毕后短暂的睡眠
当tmp.txt追加新的内容时,新内容会被程序立即打印出来。
对大文件进行读写:
import os with open('a.txt',encoding="utf-8") as read_f,open('.a.txt.swap','w',encoding="utf-8") as write_f: for line in read_f:#对可迭代对象f逐行操作,防止内存溢出 line=line.replace('中国人','Chinese') write_f.write(line) os.remove('a.txt') os.rename('.a.txt.swap','a.txt')