Phú Nguyễn

26 bundles
1 file3 months ago
2

truyentranhtuan chapter crawler

index.js

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145"use strict";

const fs = require("fs");
const puppeteer = require("puppeteer");
const cheerio = require("cheerio");
const request = require("request-promise");
const req = require("request");
const url = require("url");

async function connectServer() {
    // Assign the url of title.
    const url = "http://truyentranhtuan.com/sins/";

    // Get the HTML content of page.
    const HTMLContent = await request.get(
        `${url}`,
        { json: true },
        (error, response, body) => {
            if (error) {
                return Promise.reject(error);
            }

            return Promise.resolve(body);
        }
    );

    // Get title and list of chapters.
    const cheer = cheerio.load(HTMLContent);

    const mangaTitle = `${cheer("h1").text()}`;
    const mangaChapters = cheer("#manga-chapter").find(".chapter-name");
    const path = `./downloaded/${mangaTitle}`;

    if (!fs.existsSync(path)) {
        fs.mkdirSync(path);
    }

    const tempChapters = [];

    // Put chapters and title to object
    mangaChapters.each((index, chapter) => {
        tempChapters.unshift({
            url: chapter.firstChild.next.attribs.href,
            name: chapter.firstChild.next.children[0].data
        });
    });

    // Return promise.
    if (tempChapters) {
        return Promise.resolve({ pages: tempChapters, title: mangaTitle });
    } else {
        return Promise.reject(false);
    }
}

async function getPages(chapterPages) {
    let tempPages = [];

    // Launch puppetter
    const browser = await puppeteer.launch();
    const page = await browser.newPage();

    // Iterate chapter to get urls of each one.
    for (let index in chapterPages.pages) {
        const path = `./downloaded/${chapterPages.title}/${chapterPages.pages[index].name}/`;

        if (!fs.existsSync(path)) {
            fs.mkdirSync(path);
        }

        await page.goto(`${chapterPages.pages[index].url}`, { timeout: 0 });

        // Get image's urls of each chapter and put to object.
        const imagesContainer = await page.$("#viewer");
        const imagesURL = await imagesContainer.evaluate(() => {
            let images = document.querySelectorAll("img");
            images = Array.from(images);
            return images.map(img => img.src);
        });

        tempPages.unshift({ url: imagesURL, path });
    }

    await browser.close();

    // Return object.
    if (tempPages) {
        return Promise.resolve(tempPages);
    } else {
        return Promise.reject(false);
    }
}

function DownloadPages(chapters = []) {
    // Loop through every chapter and download respective pages.
    chapters &&
        chapters.forEach(async chapter => {
            const downloadPath = chapter.path;
            const imgLinks = Array.from(chapter.url);

            // Loop through every page of chapter.
            imgLinks &&
                imgLinks.forEach(async imgLink => {
                    // Get file name and attach to path.
                    const parsedLink = url.parse(`${imgLink}`);
                    const pathName = parsedLink.pathname;
                    const imgName = /[^/]*$/.exec(pathName)[0];

                    // Request to get page.
                    req.get(`${imgLink}`)
                        .on("error", err => {
                            console.log(err);
                        })
                        .pipe(
                            fs.createWriteStream(`${downloadPath}/${imgName}`)
                        );
                });
        });
}

function main() {
    if (!fs.existsSync("./downloaded")) {
        fs.mkdirSync("./downloaded");
    }

    connectServer()
        .then(async response => {
            // chaptersResponse returns object contains title and a bunch of chapters then pass it down to download.
            const chaptersResponse = await getPages(response);

            if (chaptersResponse) {
                return Promise.resolve(chaptersResponse);
            } else {
                return Promise.reject(false);
            }
        })
        .then(response => {
            DownloadPages(response);
        })
        .catch(error => {
            console.log(error);
        });
}

main();