Interfacing HTTPBuilder and HTMLUnit... some code

Posted by Misha Koshelev on Stack Overflow See other posts from Stack Overflow or by Misha Koshelev
Published on 2010-06-11T04:32:06Z Indexed on 2010/06/11 4:43 UTC
Read the original article Hit count: 464

Ok, this isn't even a question:

import com.gargoylesoftware.htmlunit.HttpMethod
import com.gargoylesoftware.htmlunit.WebClient
import com.gargoylesoftware.htmlunit.WebResponseData
import com.gargoylesoftware.htmlunit.WebResponseImpl
import com.gargoylesoftware.htmlunit.util.Cookie
import com.gargoylesoftware.htmlunit.util.NameValuePair

import static groovyx.net.http.ContentType.TEXT

import java.io.File

import java.util.logging.Logger

import org.apache.http.impl.cookie.BasicClientCookie

/**
 * HTTPBuilder class
 *
 * Allows Javascript processing using HTMLUnit
 *
 * @author Misha Koshelev
 */
class HTTPBuilder {
  /**
   * HTTP Builder - implement this way to avoid underlying logging output
   */
  def httpBuilder

  /**
   * Logger
   */
  def logger

  /**
   * Directory for storing HTML files, if any
   */
  def saveDirectory=null

  /**
   * Index of current HTML file in directory
   */
  def saveIdx=1

  /**
   * Current page text
   */
  def text=null

  /**
   * Response for processJavascript (Complex Version)
   */
  def resp=null

  /**
   * URI for processJavascript (Complex Version)
   */
  def uri=null

  /**
   * HttpMethod for processJavascript (Complex Version)
   */
  def method=null

  /**
   * Default constructor
   */
  public HTTPBuilder() {
    // New HTTPBuilder
    httpBuilder=new groovyx.net.http.HTTPBuilder()

    // Logging
    logger=Logger.getLogger(this.class.name)
  }

  /**
   * Constructor that allows saving output files for testing
   */
  public HTTPBuilder(saveDirectory,saveIdx) {
    this()
    this.saveDirectory=saveDirectory
    this.saveIdx=saveIdx
  }

  /**
   * Save text and return corresponding XmlSlurper object
   */
  public saveText() {
    if (saveDirectory) {
      def file=new File(saveDirectory.toString()+File.separator+saveIdx+".html")
      logger.finest "HTTPBuilder.saveText: file=\""+file.toString()+"\""
      file<<text
      saveIdx++
    }
    new XmlSlurper(new org.cyberneko.html.parsers.SAXParser()).parseText(text)      
  }

  /**
   * Wrapper around supertype get method
   */
  public Object get(Map<String,?> args) {
    logger.finer "HTTPBuilder.get: args=\""+args+"\""
    args.contentType=TEXT
    httpBuilder.get(args) { resp,reader->
      text=reader.text
      this.resp=resp
      this.uri=args.uri
      this.method=HttpMethod.GET
      saveText()
    }
  }

  /**
   * Wrapper around supertype post method
   */
  public Object post(Map<String,?> args) {
    logger.finer "HTTPBuilder.post: args=\""+args+"\""
    args.contentType=TEXT
    httpBuilder.post(args) { resp,reader->
      text=reader.text
      this.resp=resp
      this.uri=args.uri
      this.method=HttpMethod.POST
      saveText()
    }    
  }

  /**
   * Load cookies from specified file
   */
  def loadCookies(file) {
    logger.finer "HTTPBuilder.loadCookies: file=\""+file.toString()+"\""
    file.withObjectInputStream { ois->
      ois.readObject().each { cookieMap->
    def cookie=new BasicClientCookie(cookieMap.name,cookieMap.value)
    cookieMap.remove("name")
    cookieMap.remove("value")
    cookieMap.entrySet().each { entry->
      cookie."${entry.key}"=entry.value
    }
    httpBuilder.client.cookieStore.addCookie(cookie)
      }
    }
  }

  /**
   * Save cookies to specified file
   */
  def saveCookies(file) {
    logger.finer "HTTPBuilder.saveCookies: file=\""+file.toString()+"\""
    def cookieMaps=new ArrayList(new LinkedHashMap())
    httpBuilder.client.cookieStore.getCookies().each { cookie->
      def cookieMap=[:]
      cookieMap.version=cookie.version
      cookieMap.name=cookie.name
      cookieMap.value=cookie.value
      cookieMap.domain=cookie.domain
      cookieMap.path=cookie.path
      cookieMap.expiryDate=cookie.expiryDate
      cookieMaps.add(cookieMap)
    }
    file.withObjectOutputStream { oos->
      oos.writeObject(cookieMaps)
    }
  }

  /**
   * Process Javascript using HTMLUnit (Simple Version)
   */
  def processJavascript() {
    logger.finer "HTTPBuilder.processJavascript (Simple)"
    def webClient=new WebClient()
    def tempFile=File.createTempFile("HTMLUnit","")
    tempFile<<text
    def page=webClient.getPage("file://"+tempFile.toString())
    webClient.waitForBackgroundJavaScript(10000)
    text=page.asXml()
    webClient.closeAllWindows()
    tempFile.delete()
    saveText()
  }

  /**
   * Process Javascript using HTMLUnit (Complex Version)
   * Closure, if specified, used to determine presence of necessary elements
   */
  def processJavascript(closure) {
    logger.finer "HTTPBuilder.processJavascript (Complex)"

    // Convert response headers
    def headers=new ArrayList()
    resp.allHeaders.each() { header->
      headers.add(new NameValuePair(header.name,header.value))
    }
    def responseData=new WebResponseData(text.bytes,resp.statusLine.statusCode,resp.statusLine.toString(),headers)
    def response=new WebResponseImpl(responseData,uri.toURL(),method,0)

    // Transfer cookies
    def webClient=new WebClient()
    httpBuilder.client.cookieStore.getCookies().each { cookie->
      webClient.cookieManager.addCookie(new Cookie(cookie.domain,cookie.name,cookie.value,cookie.path,cookie.expiryDate,cookie.isSecure()))
    }
    def page=webClient.loadWebResponseInto(response,webClient.getCurrentWindow())

    // Wait for condition
    if (closure) {
      for (i in 1..20) {
    if (closure(page)) {
      break;
    }
    synchronized(page) {
      page.wait(500);
    }
      }
    }

    // Return text
    text=page.asXml()
    webClient.closeAllWindows()
    saveText()
  }
}

Allows one to interface HTTPBuilder with HTMLUnit! Enjoy

Misha

© Stack Overflow or respective owner

Related posts about JavaScript

Related posts about groovy