Python文本处理

Python编码解码

# -*- coding: utf-8 -*-
# 本文件应该保存为utf-8编码，否则会报错

str = "我是中国人"
print(f'Unicode字符串为"{str}"')
byte0 = str.encode("utf-8")
print(f'Unicode字符串"{str}"以utf-8编码得到字节串[{byte0}]')
str0 = byte0.decode("utf-8")
print(f'将utf-8字节串[{byte0}]解码得到Unicode字符串"{str0}"')
byte1 = str.encode("gbk")
print(f'Unicode字符串"{str}"以gbk编码得到字节串[{byte1}]')
str1 = byte1.decode("gbk")
print(f'将gbk字节串[{byte1}]解码得到Unicode字符串"{str1}"')

print(f'以文本方式将Unicode字符串"{str}"写入a.txt')

with open("a.txt", "w", encoding="gbk") as f:
    f.write(str)

print("以文本方式读取 a.txt 的内容")
with open("a.txt", "r", encoding="gbk") as f:
    print(f.read())

Python文件读写

# -*- coding: utf-8 -*-

f = open("wb.txt", "w", encoding="utf-8")
f.write("测试w方式写入，如果文件存在，则清空内容后写入，如果文件不存在则创建\n")
f.close()

f = open("wb.txt", "a", encoding="utf-8")
f.write("测试a方式写入，如果文件存在，在文件内容后最后追加写入，如果文件不存在则创建")
f.close()

f = open("wb.txt", "r", encoding="utf-8")
# 以文本方式读，f.read()返回字符串对象
data = f.read()
print(type(data))
print(data)
f.close()

f = open("wb.txt", "rb")
# 以文本方式读，f.read()返回字节对象
data = f.read()
print(type(data))
print(data)
print('将读取的字符对象解码：')
print(data.decode('utf-8'))
f.close()

例：在文件中定位

# -*- coding: utf-8 -*-
# !/usr/local/bin/python
# Time: 2018/5/23 22:56:26
# Description:
# File Name: seek_file.py

f = open("tmp.txt", "rb+")
f.write(b"abcdefghi")
f.seek(5)  # 移动到文件的第六个字节
print(f.read(1))
f.seek(-3, 2)  # 移动到文件的倒数第三字节
print(f.read(1))

例：基于seek实现类似Linux命令tail -f的功能（文件名为lx_tailf.py）

# encoding=utf-8

import time

with open('tmp.txt', 'rb') as f:
    f.seek(0, 2)  # 将光标移动至文件末尾
    while True:  # 实时显示文件新增加的内容
        line = f.read()
        if line:
            print(line.decode('utf-8'), end='')
        else:
            time.sleep(0.2)  # 读取完毕后短暂的睡眠

当tmp.txt追加新的内容时，新内容会被程序立即打印出来。

对大文件进行读写：

import os
with open('a.txt',encoding="utf-8") as read_f,open('.a.txt.swap','w',encoding="utf-8") as write_f:
     for line in read_f:#对可迭代对象f逐行操作，防止内存溢出
          line=line.replace('中国人','Chinese')
          write_f.write(line)
os.remove('a.txt')
os.rename('.a.txt.swap','a.txt')

Python文本处理

目录

Python编码解码

Python文件读写

读写配置文件

读写XML文件

导航菜单

个人工具

命名空间

变种

视图

更多

搜索

导航

工具