Java版的Puppeteer爬虫框架

Java版的Puppeteer爬虫框架

首页游戏大全流氓精英2地牢爬虫更新时间:2024-05-09
github地址:https://github.com/fanyong920/jvppeteer快速开始

自动下载最新chromium并启动:

package com.ruiyun.example; import com.ruiyun.jvppeteer.core.Puppeteer; import com.ruiyun.jvppeteer.core.browser.Browser; import com.ruiyun.jvppeteer.core.browser.BrowserFetcher; import java.io.IOException; import java.util.concurrent.ExecutionException; /** * 展示下载最新的chromuim浏览器的例子 */ public class DownloadChromiumExample2 { public static void main(String[] args) throws IOException, InterruptedException, ExecutionException { Puppeteer puppeteer = new Puppeteer(); //创建下载实例 BrowserFetcher browserFetcher = puppeteer.createBrowserFetcher(); //下载最新版本的chromuim browserFetcher.download(); Browser browser = Puppeteer.launch(false); String version = browser.version(); System.out.println(version); } }

爬取整个页面的内容:

package com.ruiyun.example; import com.ruiyun.jvppeteer.core.Puppeteer; import com.ruiyun.jvppeteer.options.LaunchOptions; import com.ruiyun.jvppeteer.options.OptionsBuilder; import com.ruiyun.jvppeteer.core.browser.Browser; import com.ruiyun.jvppeteer.core.page.Page; import java.io.IOException; import java.util.ArrayList; public class PageContentExample { public static void main(String[] args) throws InterruptedException, IOException { String path = new String("F:\\java教程\\49期\\vuejs\\puppeteer\\.local-chromium\\win64-722234\\chrome-win\\chrome.exe".getBytes(),"UTF-8"); // String path ="D:\\develop\\project\\toString\\chrome-win\\chrome.exe"; ArrayList<String> arrayList = new ArrayList<>(); LaunchOptions options = new OptionsBuilder().withArgs(arrayList).withHeadless(false).withExecutablePath(path).build(); arrayList.add("--no-sandbox"); arrayList.add("--disable-setuid-sandbox"); Browser browser = Puppeteer.launch(options); Page page = browser.newPage(); page.goTo("https://www.baidu.com/?tn=98012088_10_dg&ch=3"); String content = page.content(); System.out.println("=======================content==============" content); } }

截图

package com.ruiyun.example; import com.ruiyun.jvppeteer.core.Puppeteer; import com.ruiyun.jvppeteer.core.browser.Browser; import com.ruiyun.jvppeteer.core.page.Page; import com.ruiyun.jvppeteer.options.Clip; import com.ruiyun.jvppeteer.options.LaunchOptions; import com.ruiyun.jvppeteer.options.OptionsBuilder; import com.ruiyun.jvppeteer.options.ScreenshotOptions; import java.util.ArrayList; public class PagescreenshotExample { public static void main(String[] args) throws Exception { // String path = new String("F:\\java教程\\49期\\vuejs\\puppeteer\\.local-chromium\\win64-722234\\chrome-win\\chrome.exe".getBytes(),"UTF-8"); ArrayList<String> arrayList = new ArrayList<>(); String path = "D:\\develop\\project\\toString\\chrome-win\\chrome.exe"; LaunchOptions options = new OptionsBuilder().withArgs(arrayList).withHeadless(true).withExecutablePath(path).build(); arrayList.add("--no-sandbox"); arrayList.add("--disable-setuid-sandbox"); Browser browser = Puppeteer.launch(options); Page page = browser.newPage(); page.goTo("https://www.baidu.com/?tn=98012088_10_dg&ch=3"); // ScreenshotOptions screenshotOptions = new ScreenshotOptions(); // //设置截图范围 // Clip clip = new Clip(1.0,1.56,400,400); // screenshotOptions.setClip(clip); // //设置存放的路径 // screenshotOptions.setPath("test.png"); // page.screenshot(screenshotOptions); ScreenshotOptions screenshotOptions = new ScreenshotOptions(); //设置截图范围 Clip clip = new Clip(1.0,1.56,400,400); screenshotOptions.setClip(clip); //设置存放的路径 screenshotOptions.setPath("test.png"); page.screenshot(screenshotOptions); } }

文件选择

package com.ruiyun.example; import com.ruiyun.jvppeteer.core.Puppeteer; import com.ruiyun.jvppeteer.core.browser.Browser; import com.ruiyun.jvppeteer.core.page.ElementHandle; import com.ruiyun.jvppeteer.core.page.FileChooser; import com.ruiyun.jvppeteer.core.page.Page; import com.ruiyun.jvppeteer.options.LaunchOptions; import com.ruiyun.jvppeteer.options.OptionsBuilder; import com.ruiyun.jvppeteer.options.PageNavigateOptions; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; public class PageFileChooserExample { public static void main(String[] args) throws InterruptedException, ExecutionException, IOException { // String path = new String("F:\\java教程\\49期\\vuejs\\puppeteer\\.local-chromium\\win64-722234\\chrome-win\\chrome.exe".getBytes(),"UTF-8"); ArrayList<String> arrayList = new ArrayList<>(); String path = "D:\\develop\\project\\toString\\chrome-win\\chrome.exe"; LaunchOptions options = new OptionsBuilder().withArgs(arrayList).withHeadless(false).withExecutablePath(path).build(); arrayList.add("--no-sandbox"); arrayList.add("--disable-setuid-sandbox"); Browser browser = Puppeteer.launch(options); Page page = browser.newPage(); PageNavigateOptions options1 = new PageNavigateOptions(); options1.setWaitUntil(Arrays.asList("domcontentloaded")); page.goTo("https://www.baidu.com/?tn=98012088_10_dg&ch=3"); Future<FileChooser> fileChooserFuture = page.waitForFileChooser(30000); ElementHandle elementHandle = page.$("#form > span.bg.s_ipt_wr.quickdelete-wrap > span.soutu-btn"); elementHandle.click(); //点击选择文件的按钮 ElementHandle button = page.$("#form > div > div.soutu-state-normal > div.upload-wrap > input"); button.click(); //等待一个选择文件的弹窗事件返回 FileChooser fileChooser = fileChooserFuture.get(); //选择本地的文件 List<String> paths = new ArrayList<>(); paths.add("C:\\Users\\howay\\Desktop\\sunway.png"); fileChooser.accept(paths); } }

除此之外,还有更多的功能,Jvppeteer可以做:

  • 生成页面 PDF。
  • 抓取 SPA(单页应用)并生成预渲染内容(即“SSR”(服务器端渲染))。
  • 自动提交表单,进行 UI 测试,键盘输入等。
  • 创建一个时时更新的自动化测试环境。 使用最新的 JavaScript 和浏览器功能直接在最新版本的Chrome中执行测试。
  • 捕获网站的 timeline trace,用来帮助分析性能问题。
  • 测试浏览器扩展。
  • 查看全文
    大家还看了
    也许喜欢
    更多游戏

    Copyright © 2024 妖气游戏网 www.17u1u.com All Rights Reserved