18 io流处理
有时候你需要把字符串当作文件来操作——比如测试文件读写逻辑、在内存中拼接CSV数据、处理网络响应……这时候StringIO和BytesIO就派上用场了,它们是内存中的"虚拟文件"。
一、StringIO:内存文本流
1.1 基本用法
python
from io import StringIO
# 创建StringIO对象
buffer = StringIO()
# 写入数据
buffer.write("Hello, ")
buffer.write("World!\n")
buffer.write("第二行\n")
# 获取内容
content = buffer.getvalue()
print(content)
# Hello, World!
# 第二行
# 关闭
buffer.close()1.2 带初始值
python
from io import StringIO
# 创建时就带内容
buffer = StringIO("初始内容\n")
# 读取
print(buffer.read()) # 初始内容
# 回到开头
buffer.seek(0)
print(buffer.readline()) # 初始内容1.3 用with语句
python
from io import StringIO
with StringIO() as buffer:
buffer.write("Hello\n")
buffer.write("World\n")
content = buffer.getvalue()
print(content)二、BytesIO:内存二进制流
2.1 基本用法
python
from io import BytesIO
# 创建BytesIO对象
buffer = BytesIO()
# 写入二进制数据
buffer.write(b"Hello, ")
buffer.write(b"World!")
# 获取内容
content = buffer.getvalue()
print(content) # b'Hello, World!'
buffer.close()2.2 带初始值
python
from io import BytesIO
buffer = BytesIO(b"binary data")
print(buffer.read()) # b'binary data'三、文件接口
StringIO和BytesIO都实现了完整的文件接口:
3.1 读取方法
python
from io import StringIO
buffer = StringIO("line1\nline2\nline3\n")
# read():读取全部
buffer.seek(0)
print(buffer.read()) # line1\nline2\nline3\n
# readline():读取一行
buffer.seek(0)
print(buffer.readline()) # line1\n
# readlines():读取所有行
buffer.seek(0)
print(buffer.readlines()) # ['line1\n', 'line2\n', 'line3\n']
# 迭代
buffer.seek(0)
for line in buffer:
print(line.strip())3.2 写入方法
python
from io import StringIO
buffer = StringIO()
# write():写入字符串
buffer.write("Hello")
# writelines():写入多个字符串
buffer.writelines([" World", "!"])
print(buffer.getvalue()) # Hello World!3.3 定位方法
python
from io import StringIO
buffer = StringIO("Hello World")
# tell():返回当前位置
print(buffer.tell()) # 0
# seek():移动位置
buffer.seek(6)
print(buffer.read()) # World
# seek(0):回到开头
buffer.seek(0)
print(buffer.read()) # Hello World四、实用场景
4.1 测试文件操作
python
from io import StringIO
def process_file(file_obj):
"""处理文件的函数"""
lines = file_obj.readlines()
return [line.strip().upper() for line in lines]
# 测试时不需要真实文件
test_data = StringIO("hello\nworld\npython\n")
result = process_file(test_data)
print(result) # ['HELLO', 'WORLD', 'PYTHON']4.2 内存中生成CSV
python
from io import StringIO
import csv
buffer = StringIO()
writer = csv.writer(buffer)
writer.writerow(["name", "age", "city"])
writer.writerow(["大志", 28, "北京"])
writer.writerow(["小明", 25, "上海"])
csv_content = buffer.getvalue()
print(csv_content)4.3 捕获print输出
python
from io import StringIO
import sys
# 重定向stdout
old_stdout = sys.stdout
sys.stdout = StringIO()
print("这行不会显示在终端")
print("这行也不会")
# 获取输出
output = sys.stdout.getvalue()
sys.stdout = old_stdout
print(f"捕获到: {output}")更优雅的方式:
python
from io import StringIO
from contextlib import redirect_stdout
with StringIO() as buffer:
with redirect_stdout(buffer):
print("捕获这行")
print("也捕获这行")
output = buffer.getvalue()
print(f"捕获到: {output}")4.4 处理网络响应
python
from io import BytesIO
import json
# 模拟网络响应
response_data = b'{"name": "大志", "age": 28}'
buffer = BytesIO(response_data)
# 像读文件一样处理
data = json.loads(buffer.read())
print(data) # {'name': '大志', 'age': 28}4.5 临时文件替代
python
from io import BytesIO
def compress_data(data):
"""压缩数据"""
import gzip
buffer = BytesIO()
with gzip.GzipFile(fileobj=buffer, mode='wb') as f:
f.write(data)
return buffer.getvalue()
def decompress_data(data):
"""解压数据"""
import gzip
buffer = BytesIO(data)
with gzip.GzipFile(fileobj=buffer, mode='rb') as f:
return f.read()
original = b"Hello World " * 1000
compressed = compress_data(original)
decompressed = decompress_data(compressed)
print(f"原始: {len(original)}字节")
print(f"压缩: {len(compressed)}字节")
print(f"解压后一致: {original == decompressed}")五、编码处理
5.1 StringIO与编码
python
from io import StringIO
# StringIO处理的是字符串,不需要指定编码
buffer = StringIO()
buffer.write("你好世界")
print(buffer.getvalue()) # 你好世界5.2 BytesIO与编码
python
from io import BytesIO
# BytesIO处理的是字节
buffer = BytesIO()
# 字符串需要先编码
text = "你好世界"
buffer.write(text.encode("utf-8"))
# 读取后需要解码
content = buffer.getvalue().decode("utf-8")
print(content) # 你好世界5.3 文本与二进制转换
python
from io import StringIO, BytesIO
# 文本 → 二进制
text_buffer = StringIO("Hello 你好")
text = text_buffer.getvalue()
bytes_buffer = BytesIO(text.encode("utf-8"))
# 二进制 → 文本
bytes_data = bytes_buffer.getvalue()
text_buffer = StringIO(bytes_data.decode("utf-8"))
print(text_buffer.getvalue()) # Hello 你好六、性能考虑
6.1 大量拼接
python
from io import StringIO
# 不推荐:字符串拼接(每次创建新对象)
result = ""
for i in range(10000):
result += str(i)
# 推荐:用StringIO
buffer = StringIO()
for i in range(10000):
buffer.write(str(i))
result = buffer.getvalue()
# 更推荐:用join
result = "".join(str(i) for i in range(10000))6.2 内存使用
python
from io import BytesIO
# BytesIO会把所有数据存在内存中
# 大文件应该用真实文件或流式处理
buffer = BytesIO()
for i in range(1000000):
buffer.write(f"line {i}\n".encode())
# 这会占用大量内存七、与其他模块配合
7.1 与csv模块
python
from io import StringIO
import csv
# 在内存中读写CSV
output = StringIO()
writer = csv.writer(output)
writer.writerow(["name", "age"])
writer.writerow(["大志", 28])
csv_string = output.getvalue()
# 解析CSV
input = StringIO(csv_string)
reader = csv.reader(input)
for row in reader:
print(row)7.2 与json模块
python
from io import StringIO
import json
# 在内存中读写JSON
buffer = StringIO()
json.dump({"name": "大志", "age": 28}, buffer, ensure_ascii=False)
json_string = buffer.getvalue()
# 解析JSON
data = json.loads(json_string)
print(data)7.3 与gzip模块
python
from io import BytesIO
import gzip
# 内存中压缩
text = "Hello World " * 1000
buffer = BytesIO()
with gzip.GzipFile(fileobj=buffer, mode='wb') as f:
f.write(text.encode())
compressed = buffer.getvalue()
print(f"压缩率: {len(compressed) / len(text):.2%}")八、总结
io模块的核心类:
| 类 | 用途 |
|---|---|
StringIO | 内存文本流 |
BytesIO | 内存二进制流 |
两者都支持:
read()/readline()/readlines()write()/writelines()seek()/tell()getvalue()获取全部内容with语句
使用场景:
- 测试时模拟文件
- 内存中生成数据
- 捕获输出
- 处理网络响应
- 替代临时文件
StringIO处理字符串,BytesIO处理字节,根据数据类型选择。