需求:需要对数据集进行 0.7 比例的训练集验证集划分,并分配 100 张图片作为测试集

基于以上需求编写该脚本,代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import time
import numpy as np
import shutil


def main():
splitNum = 0.7 # 训练集验证集划分比例
imagePath = './wormPic/images/'
txtPath = './wormPic/txt/'

imageList = os.listdir(imagePath)
txtList = os.listdir(txtPath)

totalNum = len(imageList)
testNum = 100 # 默认100张图片作为测试集
totalNum = totalNum - 100
trainNum = int(splitNum * totalNum)
valNum = totalNum - trainNum
print('训练集划分:'+str(trainNum),
'验证集划分:'+str(valNum),
'测试集划分:'+str(testNum))
# print(txtPath)

# 原始文件名字
randomList = []
for jpgname in imageList:
filename = jpgname.replace('.jpg', '')
randomList.append(filename)

np.random.shuffle(randomList)

# 创建文件夹
localtime = time.asctime()
localtime = localtime.replace(' ', '-').replace(':','-')

# 一级目录
os.makedirs('./dataSet-'+localtime, exist_ok=True)
# 二级目录
for i in ['train', 'val', 'test']:
os.makedirs('./dataSet-'+localtime+'/'+i, exist_ok=True)
# 三级目录
for j in ['images', 'labels']:
os.makedirs('./dataSet-'+localtime+'/'+i+'/'+j, exist_ok=True)

# shutil.copy(src_file, target_path)
# 文件的复制操作
# images复制
for imagefileTrain in randomList[0:trainNum]:
shutil.copy(imagePath+imagefileTrain+'.jpg', './dataSet-'+localtime+'/train/images/'+imagefileTrain+'.jpg')

for imagefileVal in randomList[trainNum: trainNum+valNum]:
shutil.copy(imagePath+imagefileVal+'.jpg', './dataSet-'+localtime+'/val/images/'+imagefileVal+'.jpg')

for imagefileTest in randomList[trainNum+valNum:]:
shutil.copy(imagePath+imagefileTest+'.jpg', './dataSet-'+localtime+'/test/images/'+imagefileTest+'.jpg')

# txt文件复制
for txtfileTrain in randomList[0:trainNum]:
shutil.copy(txtPath+txtfileTrain+'.txt', './dataSet-'+localtime+'/train/labels/'+txtfileTrain+'.txt')

for txtfileVal in randomList[trainNum: trainNum+valNum]:
shutil.copy(txtPath+txtfileVal+'.txt', './dataSet-'+localtime+'/val/labels/'+txtfileVal+'.txt')

for txtfileTest in randomList[trainNum+valNum:]:
shutil.copy(txtPath+txtfileTest+'.txt', './dataSet-'+localtime+'/test/labels/'+txtfileTest+'.txt')

if __name__ == '__main__':
main()
print('运行完毕')

运行后的脚本会产生一个文件夹,类似于:dataSet-Fri-Ju1-19-15-32-49-2024

其二级目录含有test,val、train文件夹

其三级目录含有images,labels文件夹

内部存有已经分配好的图片与标签。
图片与标签。