re0:从零开始的javascript

需求:在爬取网页的时候需要同时向多个分页进行爬取,并能够进行爬取中间暂停。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
let builder = x => async () => Promise.resolve(x);
let tasks = Array.from({ length:1120 }).map((_, k) => builder(console.log(${k})))
let index = 0;
let timer;
let canRun = true;
function run(){
index++;
timer = setTimeout(async () => {
let funcSource = await tasks[index](index);
let func = new Function(funcSource);
func()
if (canRun) {
run()
}else{
clearTimeout(timer)
}
})
}
run()
setTimeout(function () {
console.log("change canRun")
canRun = false;
}, 10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
function sleep(milliSeconds) {
var startTime = new Date().getTime();
while (new Date().getTime() < startTime + milliSeconds);
};
const getRandom = async () => {
sleep(300)
return Math.random() > 0.1;
}
let builder = x => async () => Promise.resolve(x);
let tasks = Array.from({
length: 1120
}).map((_, k) => builder(console.log(${k})))
async function run() {
for (let task of tasks) {
let random = await getRandom()
if (random) {
let source = await task();
(new Function(source))()
}else{
break;
}
}
}
run()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
const EventEmitter = require('events');
let builder = x => async () => Promise.resolve(x);
let tasks = Array.from({ length:1120 }).map((_, k) => builder(console.log(${k})))
function sleep(milliSeconds) {
var startTime = new Date().getTime();
while (new Date().getTime() < startTime + milliSeconds);
};
const getRandom = async () => {
sleep(300)
return Math.random() > 0.1;
}
class Queue extends EventEmitter {
constructor() {
super();
this._index = 0;
this._start();
}
_start() {
this._timer = setTimeout(async () => {
const source = await tasks[this._index]()
this.emit('next', source);
this._index++;
this._start();
}, 0);
}
_stop(){
clearTimeout(this._timer)
}
};
let q = new Queue();
q.on('next', async (source) => {
(new Function(source))()
});
setTimeout(()=>{
q._stop()
}, 100)