Content of http-client module

xquery version "3.0";

(:
 : Copyright 2006-2009 The FLWOR Foundation.
 :
 : Licensed under the Apache License, Version 2.0 (the "License");
 : you may not use this file except in compliance with the License.
 : You may obtain a copy of the License at
 :
 : http://www.apache.org/licenses/LICENSE-2.0
 :
 : Unless required by applicable law or agreed to in writing, software
 : distributed under the License is distributed on an "AS IS" BASIS,
 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 : See the License for the specific language governing permissions and
 : limitations under the License.
:)

(:~
 : <p>
 : This module provides an implementation of the
 : <a href="http://expath.org/modules/http-client/">EXPath Http Client</a>.
 : It provides functions for making HTTP requests and is a superset of the
 : module specified by EXPath.
 : Specifically, it implements the <code>http:send-request()</code> functions
 : as specified by EXPath. Moreover, it adds an additional function
 : <code>http:read()</code> (with several arities for the sake of ease).
 : </p>
 :
 : <p>
 : In general, both functions take a description of the HTTP request to make
 : as parameter, execute the request, and return a representation of the HTTP
 : response. For instance, in the following code snippet, we fetch the Zorba
 : home page:
 : </p>
 : <pre class="ace-static" ace-mode="xquery"><![CDATA[import module namespace http = "http://expath.org/ns/http-client";
 : 
 : http:send-request(
 :  <http:request href="http://zorba.io" method="get" />
 : )
 : ]]></pre>
 : 
 : <p>
 : The <code>http:send-request()</code> functions are declared as sequential. 
 : Sequential functions are allowed to have side effects. For example, most probably,
 : an HTTP POST request is a request that has side effects because it adds/changes
 : a remote resource. Sequential functions are specified in the
 : <a href="http://zorba.io/documentation/2.9/zorba/scripting_tutorial.html">XQuery Scripting Extension</a>.
 : In contrast, the http:read() functions are not declared as sequential -
 : they are declared as nondeterministic though, which
 : means that several calls may return different results.
 : HTTP requests performed using these functions are <b>not</b> allowed to have
 : side effects.
 : </p>
 :
 : <p>
 : The response is returned as a sequence of one or more items. The first
 : one is an <code>http:response</code> element with quite the same
 : structure as an http:request, but without the content itself.
 : The content is returned as the second item (or several items in case of
 : a multipart response) as a string, a document node, or a binary item.
 : This depends on the content-type returned.
 : Specifically, the rules are as follows:
 : 
 : <ul>
 :  <li>A document node is returned if the media type has a MIME type of
 :     text/xml, application/xml, text/xml-external-parsed-entity, or
 :     application/xml-external-parsed-entity, as defined in [RFC 3023]
 :     (except that application/xml-dtd is considered a text media type).
 :     MIME types ending by +xml are also XML media types.</li>
 :  <li>A document node is returned if the media type has a MIME type of
 :      text/html. In order to be able to make HTML parseable, tidy is automatically
 :      invoked. If you want to prevent that, you can also set your own content-type
 :      by setting the override-media-type attribute in the request element.
 :       For tidying, the following <a href="http://tidy.sourceforge.net/docs/quickref.html">options</a>
 :       will be used:
 :       <ul>
 :         <li>TidyXmlOut=yes</li>
 :         <li>TidyDoctypeMode=TidyDoctypeOmit</li>
 :         <li>TidyQuoteNbsp=yes</li>
 :         <li>TidyCharEncoding="utf8"</li>
 :         <li>TidyNewline="LF"</li>
 :       </ul>
 :  </li>
 :  <li>An xs:string item is returned if the media type has a text MIME type,
 :     i.e. beginning with text/.</li>
 :  <li>An xs:base64Binary item is returned for all the other media types.</li>
 : </ul>
 : </p>
 :
 :
 : <p>
 : The structure of a request element is defined in the schema that is imported
 : by this module. The details are described in the
 : <a href="http://expath.org/spec/http-client#d2e183">specification</a>.
 : Analogously, the response element is also described in this
 : <a href="http://expath.org/spec/http-client#d2e491">specification</a>.
 : </p>
 :
 : @author Federico Cavalieri, Markus Pilman
 : @see <a href="http://www.w3.org/TR/xquery-3/#FunctionDeclns">XQuery 3.0: Function Declaration</a>
 : @library <a href="http://curl.haxx.se/">cURL Library</a>
 : @project EXPath/EXPath HTTP Client
 :
 :)
module namespace http = "http://expath.org/ns/http-client";

import module namespace http-wrapper = "http://zorba.io/modules/http-client-wrapper";
import module namespace err = "http://expath.org/ns/error";

import module namespace tidy="http://www.zorba-xquery.com/modules/converters/html";
import schema namespace tidy-options="http://www.zorba-xquery.com/modules/converters/html-options";

import schema namespace https = "http://expath.org/ns/http-client";

declare namespace an = "http://zorba.io/annotations";

declare namespace ver = "http://zorba.io/options/versioning";
declare option ver:module-version "1.0";

(:~
 : This function sends an HTTP request and returns the corresponding response. 
 :
 : <p>
 : This function is declared as sequential (see XQuery Scripting).
 : Sequential functions are allowed to have side effects. For example, most probably,
 : an HTTP POST request is a request that has side effects because it adds/changes
 : a remote resource.
 : </p>
 :
 : @param $request Contains the various parameters of the request. 
 :   See the 
 :   <a href="http://expath.org/spec/http-client#d2e183">specification</a>.
 :   for a full description of the structure of this element.
 : @param $href is the HTTP or HTTPS URI to send the request to. It must be a valid
 :  xs:anyURI, but is declared as a string to be able to pass literal strings
 :  (without requiring to explicitly cast it to an xs:anyURI.)
 : @param $content is the request body content, for HTTP methods that can
 :  contain a body in the request (i.e. POST and PUT). It is an error, if this
 :  param is not the empty sequence for methods other then DELETE, GET, HEAD
 :  and OPTIONS.
 : @return a sequence of items, where the first item is a element of type
 :  http:responseType. The response element is also described in the
 :  <a href="http://expath.org/spec/http-client#d2e483">specification</a>.
 :  If there is one (or several, in case of multipart) response body, the response bodies
 :  are the next items in the sequence.
 :)   
declare %an:sequential function http:send-request(
  $request as element()?,
  $href as xs:string?,
  $bodies as item()*) as item()+ {
  try 
  {
     {
       variable $result := http-wrapper:http-sequential-request($request, $href, $bodies);
       http:tidy-result($result, fn:data($request/@override-media-type))
     }
  } catch XPTY0004 {
    fn:error($err:HC005, "The request element is not valid.")
  }
};

(:~
 : Function for convenience.
 :
 : Calling this function is equivalent to calling
 :
 : <code>
 : http:send-request($request, (), ())
 : </code>
 :
 : @see documentation of <a href="#send-request-3">send-request</a> with three parameters.
 :
 : @param $request see request parameter of the sequential
 :  <a href="#send-request-3">send-request</a> function with three parameters.
 : @return see return value of the sequential 
 :  <a href="#send-request-3">send-request</a> function with three parameters.
 :)
declare %an:sequential function http:send-request (
  $request as element()) as item()+ {
  try {
    http:send-request($request, (), ())
  } catch XPTY0004 {
    fn:error($err:HC005, "The request element is not valid.")
  }
};

(:~
 : Function for convenience.
 :
 : Calling this function is equivalent to calling
 :
 : <code>
 : http:send-request($request, $href, ())
 : </code>
 :
 : @see documentation of <a href="#send-request-3">send-request</a> with three parameters.
 :
 : @param $request see request parameter of the sequential
 :  <a href="#send-request-3">send-request</a> function with three parameters.
 : @param $href see href parameter of the sequential
 :  <a href="#send-request-3">send-request</a> function with three parameters.
 : @return see return of
 :  <a href="#send-request-3">send-request</a>
 :)
declare %an:sequential function http:send-request(
  $request as element()?,
  $href as xs:string?) as item()+ {
  try {
    http:send-request($request, $href, ())
  } catch XPTY0004 {
    fn:error($err:HC005, "The request element is not valid.")
  }
};

declare %private function http:tidy-result($result as item()+, $override-media-type as xs:string?) as item()+
{
  $result[1],
  for $body at $pos in fn:subsequence($result, 2)
  let $media-type := ($override-media-type, $result[1]//https:body[$pos]/@media-type/fn:data(.))[1]
  return
    if ($media-type eq "text/html") then
      tidy:parse($body)
    else
      $body
};