Quick Starts

Run Puppeteer on an ECS instance

This topic provides a tutorial on how to run Puppeteer on an Elastic Compute Service (ECS) instance. Puppeteer is a powerful Node.js browser automation library that allows you to create web applications for automated testing and web crawling. By the time you complete this tutorial, you will understand how to:

  • Configure a runtime environment for Puppeteer

  • Write simple code to use the Google search engine to search for keywords and obtain the search results

Procedure

Step 1 Create an ECS instance that runs an Ubuntu operating system in the ECS console

image

Step 2 Install Node.js, npm, and n

Puppeteer is a Node.js library. Before you can install Puppeteer, you must install Node.js and npm. Download the Node.js installation package that corresponds to your instance operating system from the Node.js website and install Node.js as prompted. n is a lightweight and easy-to-use Node.js version manager. We recommend that you use n to switch between Node.js versions.

apt-get update && apt install -y nodejs
curl -0 -L https://npmjs.org/install.sh | sudo sh
//After you run the command, exit shell. Then, reconnect to theinstance to make environment variables take effect.
//Switch Node.js to a stable version.
npm cache clean -f && npm install -g n && n stable

Step 3 Check whether a runtime environment for Puppeteer is properly installed

image

Step 4 Install Puppeteer

npm i puppeteer

Check whether Puppeteer is properly installed.

image

Step 5 Install Chrome dependencies

apt-get install -y fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget sudo libgbm-dev libxshmfence1 --no-install-recommends

Step 6 Run sample code

After you perform the preceding steps, you can write a Puppeteer script and run the script.

In the script, import the Puppeteer library, create a browser object, and then use the object to perform operations such as opening web pages, taking screenshots, and filling out forms.

In this example, we want to use the Google search engine to search for keywords. Create a file that is named auto_search.js and write the following sample code in the file.

const puppeteer = require('puppeteer');

const EL =  async function (page, selector) {
    let el = selector;
    if(el.constructor.name == "".constructor.name) {
        try {
            el = await page.$(selector);
        }catch (e) {
            el = await page.$x(selector);
            if(el.length>0){
                el = el[0];
            } else {
                el = null;
            }
        }
    }
    if(!el){
        console.log("can not found selector ", selector);
    }
    return el;
}

const hover = async function (page, selector) {
    let el = await EL(page, selector);
    if(el){
        let box = await el.boundingBox();
        let vp = {height:842,width:1366};
        if(!box){
            return;
        }
        let mx = 0 - (vp.height/2 - box.y);
        await page.mouse.wheel({deltaY: mx});
        let prey = box.y;
        for(;;){
            await waiteFor(100);
            box = await el.boundingBox();
            if(box.y == prey){
                const xy = {x:Math.round(box.x + box.width/2), y: Math.round(box.y + box.height/2)}
                await page.mouse.move(xy.x, xy.y);
                await waiteFor(300);
                break;
            }
            prey = box.y;
        }
    }
};

const click = async function (page, selector) {
    let el = await EL(page, selector);
    let box = await el.boundingBox();
    const xy = {x:Math.round(box.x + box.width/2), y: Math.round(box.y + box.height/2)}
    await page.mouse.click(xy.x, xy.y,  {button: "left",clickCount:1});
    await waiteFor(1000);
};
    
const waiteFor = function (mil) {
    return new Promise((resolve) => setTimeout(resolve, mil));
};

const waitForNavigation = async function (page) {
    try {
        return await page.waitForNavigation({timeout:5000});
    }catch (e) {
        //console.log("[Warning] waitForNavigation timeout");
    }
};
    
(async () => {

    let browser;
    const USER_HOME = process.env.HOME || process.env.USERPROFILE;
    let lunchParam = {
        headless: false,
        args:["--window-size=1366,842", "--no-sandbox", "--disable-gpu", "--user-data-dir="+USER_HOME+"/.chrome/user_profile"],
        defaultViewport:null,
        dumpio: false};
    try {
        browser = await puppeteer.launch(lunchParam);
    }catch (e) {
        //windows linux adaption
        console.log("headless false launch failure try headless launch");
        lunchParam.headless = true;
        browser = await puppeteer.launch(lunchParam);
    }
    console.log("start param ", lunchParam);
    console.log(await browser.version(), await browser.userAgent(), await browser.wsEndpoint());
    let pages = await browser.pages();
    let page = pages[0];
    let ua = await browser.userAgent();
    let fua = ua.replace(/Headless/g, "");
    fua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36";
    console.log("use user-agent:", fua);
    await page.setUserAgent(fua);
    await page.goto('https://www.google.com');
    let keywords = Array.from(["alibaba cloud ecs", "alibaba cloud rds"]);
    console.log("batch kwords ,", keywords);
    for (;;) {
        let kw = keywords.pop();
        if(!kw){
            break;
        }
        let q = await page.$('[name="q"]');
        let qwords = await page.evaluate(qn=>{return qn.value}, q);
        console.log("Clear Search Keyword [", qwords, "]");
        await page.focus('[name="q"]');
        await page.keyboard.down('Shift');
        for(var i=0; i< qwords.length;i++){
            await page.keyboard.press('ArrowRight');
        }
        await page.keyboard.up('Shift');
        await page.keyboard.press('Backspace');
        console.log("Search Keyword [", kw, "]");
        await page.keyboard.type(kw, {delay: 200});
        await page.keyboard.press('Enter');
        await waitForNavigation(page);
        console.log("search success ");
        let slist = await page.$x('//*[@id="search"]//*[@class="yuRUbf"]');
        let rank = 0;
        for(let i=0;i<slist.length;i++){
            try {
                slist = await page.$x('//*[@id="search"]//*[@class="yuRUbf"]');
                let l = slist[i];
                const boundingBox = await l.boundingBox();
                if(boundingBox){
                    let alink = await l.$('a');
                    let itemHref = await page.evaluate(alk =>{
                        return alk.href;
                    }, alink);
                    rank++
                    await hover(page, alink);
                    console.log(rank,itemHref);
                }
            }catch (e) {
                console.log(e);
            }
        }
    }
    

    console.log("search end...")
    await waiteFor(5000)
    await browser.close();
    
})();

Step 7 Run the node auto_search.js command

image

Recommended Reading

This topic describes how to run Puppeteer on an ECS instance. Alibaba Cloud ECS is a cloud computing service that allows you to purchase, deploy, and manage virtual machines (instances) in Alibaba Cloud. ECS provides the following benefits: high performance, scalability, security, reliability, elasticity, support for big data, and support across various regions and zones. Alibaba Cloud ECS can meet your business requirements for various scenarios.

For more information about ECS, seeECS documentation.

Was this helpful?

open