“Python文本处理”的版本间的差异
来自CloudWiki
(创建页面,内容为“==Python编码解码== <nowiki># -*- coding: utf-8 -*- # 本文件应该保存为utf-8编码,否则会报错 str = "我是中国人" print(f'Unicode字符串为"…”) |
|||
第50行: | 第50行: | ||
f.close() | f.close() | ||
</nowiki> | </nowiki> | ||
+ | |||
+ | 例:在文件中定位 | ||
+ | |||
+ | <nowiki># -*- coding: utf-8 -*- | ||
+ | # !/usr/local/bin/python | ||
+ | # Time: 2018/5/23 22:56:26 | ||
+ | # Description: | ||
+ | # File Name: seek_file.py | ||
+ | |||
+ | f = open("tmp.txt", "rb+") | ||
+ | f.write(b"abcdefghi") | ||
+ | f.seek(5) # 移动到文件的第六个字节 | ||
+ | print(f.read(1)) | ||
+ | f.seek(-3, 2) # 移动到文件的倒数第三字节 | ||
+ | print(f.read(1))</nowiki> | ||
+ | |||
+ | 例:基于seek实现类似Linux命令tail -f的功能(文件名为lx_tailf.py) | ||
+ | |||
+ | <nowiki># encoding=utf-8 | ||
+ | |||
+ | import time | ||
+ | |||
+ | with open('tmp.txt', 'rb') as f: | ||
+ | f.seek(0, 2) # 将光标移动至文件末尾 | ||
+ | while True: # 实时显示文件新增加的内容 | ||
+ | line = f.read() | ||
+ | if line: | ||
+ | print(line.decode('utf-8'), end='') | ||
+ | else: | ||
+ | time.sleep(0.2) # 读取完毕后短暂的睡眠</nowiki> | ||
+ | |||
+ | |||
+ | 当tmp.txt追加新的内容时,新内容会被程序立即打印出来。 | ||
+ | |||
+ | 对大文件进行读写: | ||
+ | |||
+ | <nowiki>import os | ||
+ | with open('a.txt',encoding="utf-8") as read_f,open('.a.txt.swap','w',encoding="utf-8") as write_f: | ||
+ | for line in read_f:#对可迭代对象f逐行操作,防止内存溢出 | ||
+ | line=line.replace('中国人','Chinese') | ||
+ | write_f.write(line) | ||
+ | os.remove('a.txt') | ||
+ | os.rename('.a.txt.swap','a.txt')</nowiki> | ||
+ | |||
+ | ===读写配置文件=== | ||
+ | |||
+ | ===读写XML文件=== |
2020年1月17日 (五) 00:35的版本
Python编码解码
# -*- coding: utf-8 -*- # 本文件应该保存为utf-8编码,否则会报错 str = "我是中国人" print(f'Unicode字符串为"{str}"') byte0 = str.encode("utf-8") print(f'Unicode字符串"{str}"以utf-8编码得到字节串[{byte0}]') str0 = byte0.decode("utf-8") print(f'将utf-8字节串[{byte0}]解码得到Unicode字符串"{str0}"') byte1 = str.encode("gbk") print(f'Unicode字符串"{str}"以gbk编码得到字节串[{byte1}]') str1 = byte1.decode("gbk") print(f'将gbk字节串[{byte1}]解码得到Unicode字符串"{str1}"') print(f'以文本方式将Unicode字符串"{str}"写入a.txt') with open("a.txt", "w", encoding="gbk") as f: f.write(str) print("以文本方式读取 a.txt 的内容") with open("a.txt", "r", encoding="gbk") as f: print(f.read())
Python文件读写
# -*- coding: utf-8 -*- f = open("wb.txt", "w", encoding="utf-8") f.write("测试w方式写入,如果文件存在,则清空内容后写入,如果文件不存在则创建\n") f.close() f = open("wb.txt", "a", encoding="utf-8") f.write("测试a方式写入,如果文件存在,在文件内容后最后追加写入,如果文件不存在则创建") f.close() f = open("wb.txt", "r", encoding="utf-8") # 以文本方式读,f.read()返回字符串对象 data = f.read() print(type(data)) print(data) f.close() f = open("wb.txt", "rb") # 以文本方式读,f.read()返回字节对象 data = f.read() print(type(data)) print(data) print('将读取的字符对象解码:') print(data.decode('utf-8')) f.close()
例:在文件中定位
# -*- coding: utf-8 -*- # !/usr/local/bin/python # Time: 2018/5/23 22:56:26 # Description: # File Name: seek_file.py f = open("tmp.txt", "rb+") f.write(b"abcdefghi") f.seek(5) # 移动到文件的第六个字节 print(f.read(1)) f.seek(-3, 2) # 移动到文件的倒数第三字节 print(f.read(1))
例:基于seek实现类似Linux命令tail -f的功能(文件名为lx_tailf.py)
# encoding=utf-8 import time with open('tmp.txt', 'rb') as f: f.seek(0, 2) # 将光标移动至文件末尾 while True: # 实时显示文件新增加的内容 line = f.read() if line: print(line.decode('utf-8'), end='') else: time.sleep(0.2) # 读取完毕后短暂的睡眠
当tmp.txt追加新的内容时,新内容会被程序立即打印出来。
对大文件进行读写:
import os with open('a.txt',encoding="utf-8") as read_f,open('.a.txt.swap','w',encoding="utf-8") as write_f: for line in read_f:#对可迭代对象f逐行操作,防止内存溢出 line=line.replace('中国人','Chinese') write_f.write(line) os.remove('a.txt') os.rename('.a.txt.swap','a.txt')