node爬虫偷取笑话上传数据库

node爬虫(笑话)

很喜欢async、await

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
const fs = require('fs');
const fetch = require('node-fetch');
const cheerio = require('cheerio');

// 目标地址
let url = 'http://www.budejie.com/text/';

const mysql = require('knex')({
client: 'mysql',
connection: {
host: '118.25.43.168',
port: '3306',
user: '***',
password: '***',
database: '***',
}
});

let joke = [];

// 开始
start();

async function start() {
for (let i = 0; i < 50; i++) {
let res = await runHelper(spider(i));
if (res) {
console.log(i, res.name); // 方便观察进度
joke = joke.concat(res);
}
}
upload(joke); // 上传数据库
}

async function spider(page) {
let currentUrl = url + page;
let html = await fetch(currentUrl).then(res => res.text());
let $ = cheerio.load(html);
let ls = $('.j-r-list>ul>li');
let data = [];
ls.map((i, item) => {
let avatar = $(item).find('img').attr('data-original');
let name = $(item).find('.u-user-name').text();
let joke = $(item).find('.j-r-list-c-desc a').text();
data.push({
name, avatar, joke
});
});
return data;
}


function runHelper(fn, time = 2000) {
const limit = new Promise((resolve) => {
setTimeout(() => {
resolve(null);
}, time);
});

return Promise.race([limit, fn]);
}


async function upload(joke) {
for (let i = 0; i < joke.length; i++) {
let res = await mysql('kuan_joke').insert(joke[i]);
console.log(res);
}
}