大文件的分割与合并

1.整体思路

 首先我们会经常遇到一些大的文件的传输问题,对于大的文件传输,整体传输会非常的慢 效率很低,所以我们可以考虑 采用分解的思想,把大文件分解成若干个小文件,然后再对整体进行合并。如果进行跨机器传输的话,我们可以开启若干个线程 每个线程去分解文件,然后通过socket网络传输发送socket服务端 socket服务端也可以采用多个线程去接受 然后合并。

2.整体代码

1.文件工具类代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
public class {
public void mergePartFiles(String dirpath, String partFileSuffix, String mergeFileName) throws Exception {
ArrayList<File> partFiles = getDirFiles(dirpath, partFileSuffix);
Collections.sort(partFiles, new FileComparator());
int partFileSize = (int) partFiles.get(0).length();
RandomAccessFile randomAccessFile = new RandomAccessFile(mergeFileName, "rw");
// System.out.println("文件长度"+partFileSize * (partFiles.size() - 1) + partFiles.get(partFiles.size() - 1).length());
randomAccessFile.setLength(partFileSize * (partFiles.size() - 1) + partFiles.get(partFiles.size() - 1).length());
randomAccessFile.close();
for (File partFile : partFiles) {
System.out.println(partFile.getName() + partFile.length());
}
ThreadPoolExecutor threadPool = new ThreadPoolExecutor(partFiles.size(), partFiles.size() * 3, 1, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(partFiles.size() * 2));
for (int i = 0; i < partFiles.size(); i++) {
threadPool.execute(new MergeRunnable(i * partFileSize, mergeFileName, partFiles.get(i)));
}
System.out.println("成功了!!!");
}
/**
* 获得目录下的文件
*
* @param dirpath
* @param suffix
* @return
*/
private ArrayList<File> getDirFiles(String dirpath, final String suffix) {
File path = new File(dirpath);
File[] fileArr = path.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
String lowerName = name.toLowerCase();
String filesuffix = suffix.toLowerCase();
if (lowerName.endsWith(filesuffix)) {
return true;
} else
return false;
}
});
ArrayList<File> files = new ArrayList<File>();
for (File file : fileArr) {
if (file.isFile()) {
files.add(file);
}
}
return files;
}
public List<String> splitBySize(String fileName, int count) throws Exception {
List<String> parts = new ArrayList<String>();
File file = new File(fileName);
// int byteSize = (int) Math.ceil(file.length() / (double)count);
List<Integer> bytes = new ArrayList<Integer>();
int countLen = (count+"").length();
testLen((int) file.length(), count, bytes);
ThreadPoolExecutor threadpool = new ThreadPoolExecutor(count, count * 3, 1, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(count * 2));
for (int i = 0; i < count; i++) {
String partFileName = file.getName() + "_" + leftPad(i+"",countLen,'0') + ".part";
threadpool.execute(new SplitRunnable(bytes.get(i), i * bytes.get(i == 0 ? 0 : i - 1), partFileName, file));
parts.add(partFileName);
}
return parts;
}
private String leftPad(String str, int countLen, char c) {
if (str.length() > countLen) {
return str;
}
char[] chs = new char[countLen];
Arrays.fill(chs, c);
char[] src = str.toCharArray();
System.arraycopy(src, 0, chs, countLen - src.length, src.length);
return new String(chs);
}
public static void main(String[] args) throws Exception {
new FileUtils().mergePartFiles("D:\test\frj\", ".mp4", "D:\test\frj\merge.mp4"); //1,626,409,091
}
/***
* 构建文件大小集合
*
* @param len
* @param count
* @param arr
*/
public static void testLen(Integer len, int count, List<Integer> arr) {
int i = len / count;
int j = 0;
if (len % count == 0) {
while (j < count) {
arr.add(i);
j++;
}
} else {
int sum = 0;
while (j < count - 1) {
sum += i;
arr.add(i);
j++;
}
arr.add(len - sum);
}
System.out.println(arr);
}
/**
* 根据文件名称 比较文件
*/
private class FileComparator implements Comparator<File> {
public int compare(File o1, File o2) {
return o1.getName().compareToIgnoreCase(o2.getName());
}
}
}

2.文件分解线程主要代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
public class SplitRunnable implements Runnable {
int byteSize;
String partFileName;
File originFile;
int startPos;
Socket socket;
public SplitRunnable(int byteSize, int startPos,String partFileName,File originFile) throws IOException {
this.startPos = startPos;
this.byteSize = byteSize;
this.partFileName = partFileName;
this.originFile = originFile;
// socket = new Socket("localhost",8088);
}
public void run() {
RandomAccessFile rfile;
OutputStream os;
// DataOutputStream dos;
try {
// dos = new DataOutputStream(socket.getOutputStream());
rfile = new RandomAccessFile(originFile,"r");
byte[] b = new byte[byteSize];
rfile.seek(startPos);
int s = rfile.read(b);
File file = new File(partFileName);
// 写到本地的时候用
os = new FileOutputStream("d:/test/frj/" + partFileName);
// dos.writeUTF(file.getName());
// dos.flush();
// dos.writeLong(file.length());
// dos.flush();
// dos.write(b,0,s);
// dos.flush();
// dos.close();
os.write(b,0,s);
os.flush();
os.close();
} catch(Exception e) {
e.printStackTrace();
}
}
}

3.文件合并线程主要代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
public class MergeRunnable implements Runnable {
long startPos;
String mergeFileName;
File partFile;
public MergeRunnable(int startPos, String mergeFileName, File partFile) {
this.startPos = startPos;
this.mergeFileName = mergeFileName;
this.partFile = partFile;
}
public void run() {
RandomAccessFile rfile;
try {
rfile = new RandomAccessFile(mergeFileName,"rw");
rfile.seek(startPos);
FileInputStream fs = new FileInputStream(partFile);
byte[] b = new byte[fs.available()];
fs.read(b);
fs.close();
rfile.write(b);
rfile.close();
} catch(Exception e) {
e.printStackTrace();
}
}
}