Content of normalization module

xquery version "3.0";

(:
 : Copyright 2006-2009 The FLWOR Foundation.
 :
 : Licensed under the Apache License, Version 2.0 (the "License");
 : you may not use this file except in compliance with the License.
 : You may obtain a copy of the License at
 :
 : http://www.apache.org/licenses/LICENSE-2.0
 :
 : Unless required by applicable law or agreed to in writing, software
 : distributed under the License is distributed on an "AS IS" BASIS,
 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 : See the License for the specific language governing permissions and
 : limitations under the License.
 :)

(:~
 : <p>This library module provides data normalization functions for processing calendar dates, 
 : temporal values, currency values, units of measurement, location names and postal addresses.
 : These functions are particularly useful for converting different data representations into cannonical formats.</p>
 :
 : <p>The logic contained in this module is not specific to any particular XQuery implementation.</p>
 :
 : @author Bruno Martins and Diogo Simões
 : @project Zorba/Data Cleaning/Normalization
 :)

module namespace normalization = "http://zorba.io/modules/data-cleaning/normalization";

import module namespace http = "http://www.zorba-xquery.com/modules/http-client";

declare namespace an = "http://zorba.io/annotations";
declare namespace ver = "http://zorba.io/options/versioning";
declare option ver:module-version "2.0";

(:~
 : <p>Converts a given string representation of a date value into a date representation valid according 
 : to the corresponding XML Schema type.</p>
 :
 :
 : @param $sd The string representation for the date
 : @param $format An optional parameter denoting the format used to represent the date in the string, according to a 
 : sequence of conversion specifications. In the format string, a conversion specification is introduced by '%', usually followed 
 : by a single letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion 
 : specification is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows:
 : <pre>
 : '%b' Abbreviated month name in the current locale.
 : '%B' Full month name in the current locale.
 : '%d' Day of the month as decimal number (01-31).
 : '%m' Month as decimal number (01-12).
 : '%x' Date, locale-specific.
 : '%y' Year without century (00-99).
 : '%Y' Year with century.
 : '%C' Century (00-99): the integer part of the year divided by 100.
 : '%D' Locale-specific date format such as '%m/%d/%y'.
 : '%e' Day of the month as decimal number (1-31), with a leading pace for a single-digit number.
 : '%F' Equivalent to %Y-%m-%d (the ISO 8601 date format).
 : '%h' Equivalent to '%b'. 
 :</pre>
 :
 : @return The date value resulting from the conversion.
 : @example test/Queries/data-cleaning/normalization/to-date.xq
 :)
declare function normalization:to-date ( $sd as xs:string, $format as xs:string? ) as xs:string{

 
 let $dictionary := normalization:month-dictionary()
 let $format-tokens := tokenize($format, "[ %\-/:]+")[position()>1] 
 let $sd-tokens := 
 	if (contains($sd, "-") or contains($sd, "/") or contains($sd, " "))
 	then tokenize ($sd, "[ \-/]+")
 	else let $ydtoken := tokenize(replace($sd, "[A-Za-z]", " "), " ")
	     let $ft := $ydtoken[position()=1]
	     let $lt := $ydtoken[last()]
	     let $mtoken := replace($sd, "[0-9]", "") return ($ft, $mtoken, $lt)
 return
 if (count($sd-tokens)>1) 
	then  	
		let $year := 
			if (count(index-of($format-tokens, "F")) != 0) then string($sd-tokens[position() = 1])
			else	
			
			if (count(index-of($format-tokens, "D")) != 0) then concat("19", string($sd-tokens[position() = 3]))
			else

			if (count(index-of($format-tokens, "Y")) != 0)
			then string($sd-tokens[position() = index-of($format-tokens, "Y")]) else 
				
			if (count(index-of($format-tokens, "y")) != 0)
			then 
			  if(count(index-of($format-tokens, "C")) !=0)
			  then concat(string(number(string($sd-tokens[position() = index-of($format-tokens, "C")]))-1), string($sd-tokens[position() = index-of($format-tokens, "y")]))

			  else 
			   concat("19", string($sd-tokens[position() = index-of($format-tokens, "y")]))

			else "YND"

		let $month := 
			if (count(index-of($format-tokens, "h")) != 0)
			then string($dictionary//month[abrv/text() = $sd-tokens[position() = index-of($format-tokens, "h")]]/@value) else
			
			if (count(index-of($format-tokens, "b")) != 0)
			then string($dictionary//month[abrv/text() = $sd-tokens[position() = index-of($format-tokens, "b")]]/@value)
			else 
			
			if (count(index-of($format-tokens, "B")) != 0)
			then string($dictionary//month[lower-case(@name) = 
				lower-case($sd-tokens[position() = index-of($format-tokens, "B")])]/@value)
			   
			else 

			if (count(index-of($format-tokens, "F")) != 0)
			then string($sd-tokens[position() = 2])	
			else 
			
			if (count(index-of($format-tokens, "D")) != 0) then string($sd-tokens[position() = 1])
			else

			if (count(index-of($format-tokens, "m")) != 0)
			then string($sd-tokens[position() = index-of($format-tokens, "m")])
			
			else "MND"

	      	let $day := 
			if (count(index-of($format-tokens, "F")) != 0) 
			then string($sd-tokens[position() = 3]) else 
			
			if (count(index-of($format-tokens, "D")) != 0) then string($sd-tokens[position() = 2])
			else
  
			if (count(index-of($format-tokens, "d")) != 0) 
			then $sd-tokens[position() = index-of($format-tokens, "d")] else

			if (count(index-of($format-tokens, "e")) != 0)
			then concat("0", string($sd-tokens[position() = index-of($format-tokens, "e")]))
			else "DND"
	
	let $result := concat($year, "-", $month, "-", $day)
	
	return normalization:check-date($result)
	else()
	
};

(:~
 : <p>Converts a given string representation of a time value into a time representation valid according to 
 : the corresponding XML Schema type.</p>
 :
 :
 : @param $sd The string representation for the time.
 : @param $format An optional parameter denoting the format used to represent the time in the string, according to a sequence of 
 : conversion specifications. In the format string, a conversion specification is introduced by '%', usually followed by a single 
 : letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion specification 
 : is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows:
 : <p/>
 : <pre class="ace-static">
 : '%H' Hours as decimal number (00-23).
 : '%I' Hours as decimal number (01-12).
 : '%M' Minute as decimal number (00-59).
 : '%p' AM/PM indicator in the locale. Used in conjunction with '%I' and *not* with '%H'.
 : '%S' Second as decimal number (00-61), allowing for up to two leap-seconds.
 : '%X' Time, locale-specific.
 : '%z' Offset from Greenwich, so '-0900' is 9 hours west of Greenwich.
 : '%Z' Time zone as a character string.
 : '%k' The 24-hour clock time with single digits preceded by a blank.
 : '%l' The 12-hour clock time with single digits preceded by a blank.
 : '%r' The 12-hour clock time (using the locale's AM or PM).
 : '%R' Equivalent to '%H:%M'.
 : '%T' Equivalent to '%H:%M:%S'.
 :</pre>
 :
 : @error normalization:NOTSUPPORTED if the date type is not known to the service. 
 : @return The time value resulting from the conversion.
 : @example test/Queries/data-cleaning/normalization/to-time.xq
 :)
declare function normalization:to-time ( $sd as xs:string, $format as xs:string? ) as xs:string?{
 let $timezoneDict := normalization:timeZone-dictionary()
 let $format-string := replace(replace ($format, '%R', '%H:%M'), '%T', '%H:%M:%S')
 let $format-tokens := tokenize($format-string, "( |%|:)+")[position()>1] 
 let $sd-tokens := 
 	if (contains($sd, ":") or contains($sd, ".") or contains($sd, " "))
 	then tokenize ($sd, "[ :\.]")
 	else ()
 return
 if (count($sd-tokens)>1) 
	then  	
		let $hours := 
			if (count(index-of($format-tokens, "T")) != 0) then string($sd-tokens[position() = 1])
			else	
			
			if (count(index-of($format-tokens, "X")) != 0) then substring(string(current-time()),1,2)
			else

			if (count(index-of($format-tokens, "R")) != 0) then string($sd-tokens[position() = 1])
			else

			if (count(index-of($format-tokens, "H")) != 0)
			then string($sd-tokens[position() = index-of($format-tokens, "H")]) else 
		
			if (count(index-of($format-tokens, "k")) != 0)
			then if(string-length(string($sd-tokens[position() = index-of($format-tokens, "k")]))=1)
			     then concat("0", string($sd-tokens[position() = index-of($format-tokens, "k")]))
			     else string($sd-tokens[position() = index-of($format-tokens, "k")])  
			else	
			
			if (count(index-of($format-tokens, "r")) != 0)
			then 
			  if(lower-case(string($sd-tokens[position() = 4]))="am")
			  then string($sd-tokens[position() = 1])
			  else if(lower-case(string($sd-tokens[position() = 4]))="pm")
			       then if(string($sd-tokens[position() = 1])="12") then 12
				  else string(number(string($sd-tokens[position() = 1]))+12)
			       else()
			
			else

			if (count(index-of($format-tokens, "I")) != 0)
			then 
			  if(count(index-of($format-tokens, "p")) !=0)
			  then if (lower-case(string($sd-tokens[position() = 
					index-of($format-tokens, "p")]))="am")
			       then string($sd-tokens[position() = index-of($format-tokens, "I")]) 
			       else if (lower-case(string($sd-tokens[position() = 
					index-of($format-tokens, "p")]))="pm")
				  then if (string($sd-tokens[position() = index-of($format-tokens, "I")])="12")
				       then "12"
				       else string(number(string($sd-tokens[position() = index-of($format-tokens, "I")]))+12)
				  else()
			  else()
			
			else
			if (count(index-of($format-tokens, "l")) != 0)
			then 
			  if(count(index-of($format-tokens, "p")) !=0)
			  then if (lower-case(string($sd-tokens[position() = 
					index-of($format-tokens, "p")]))="am")
			       then if(string-length(string($sd-tokens[position() = index-of($format-tokens, "l")]))=1)
				  then concat("0", string($sd-tokens[position() = index-of($format-tokens, "l")]))
				  else string($sd-tokens[position() = index-of($format-tokens, "l")])  
				
			       else if (lower-case(string($sd-tokens[position() = index-of($format-tokens, "p")]))="pm")
				  then if (string($sd-tokens[position() = index-of($format-tokens, "l")])="12")
				       then "12"
				       else string(number(string($sd-tokens[position() = index-of($format-tokens, "l")]))+12)
				  else()
			  			
			else ()
			
			
			else "HND"

		let $minutes := 
			
			if (count(index-of($format-tokens, "T")) != 0)
			then string($sd-tokens[position() = 2])	
			else 
			
			if (count(index-of($format-tokens, "X")) != 0) then substring(string(current-time()),4,2)
			else

			if (count(index-of($format-tokens, "R")) != 0) then string($sd-tokens[position() = 2])
			else

			if (count(index-of($format-tokens, "r")) != 0)
			then string($sd-tokens[position() = 2])	
			else 

			if (count(index-of($format-tokens, "M")) != 0)
			then string($sd-tokens[position() = index-of($format-tokens, "M")])
			
			else "MND"

	      	let $seconds := 
			if (count(index-of($format-tokens, "T")) != 0) 
			then string($sd-tokens[position() = 3]) else 
			
			if (count(index-of($format-tokens, "X")) != 0) then substring(string(current-time()),7,2)
			else

			if (count(index-of($format-tokens, "r")) != 0)
			then string($sd-tokens[position() = 3])	
			else 

			if (count(index-of($format-tokens, "R")) != 0) then "00"
			else

			if (count(index-of($format-tokens, "S")) != 0) 
			then $sd-tokens[position() = index-of($format-tokens, "S")] else

			if (count(index-of($format-tokens, "e")) != 0)
			then concat("0", string($sd-tokens[position() = index-of($format-tokens, "e")]))
			else "00"
	
	let $result :=

	if (count(index-of($format-tokens, "Z")) != 0) 
	then
	   if (substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() = index-of($format-tokens, "Z")]]),1,1)='+')
	   then let $complement := 
		  if (number($minutes)+number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
			 index-of($format-tokens, "Z")]]),4,2)) > 59) 		  
		  then 1 
		  else 0

		let $rhours := 
		  if (string-length(string(
			(number($complement) + number($hours) + 
	                        number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24)) = 2) 
		  then (string(
			(number($complement) + number($hours) + 
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))
		  else concat("0",
			string(
			(number($complement) + number($hours) + 
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))	
		  
	          let $rminutes := 
		  if (string-length(string(
			(number($minutes)+
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60)) = 2) 
		  then (string(
			(number($minutes)+
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60))
		  else concat("0",
			string(
			(number($minutes)+
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60))

			
	          return concat($rhours, ":", $rminutes, ":", $seconds)
	     else 
	     
	     if (substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() = 
				index-of($format-tokens, "Z")]]),1,1)='-')
	     then	
		let $complement := 
		  if (number($minutes)-number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2)) < 0) 		  
		  then -1 
		  else 0
	
		let $rhours :=
		     if( ((number($complement) + number($hours) -
			number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24) >= 0 )
		     then
		  	if (string-length(string(
		  	    (number($complement) + number($hours) -
	                          number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24)) = 2)
		   	then (string(
			  (number($complement) + number($hours) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))
		  	else concat("0",
		  	  string(
			    (number($complement) + number($hours) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))
		     else
		  	if (string-length(string(
		  	    (24 + number($complement) + number($hours) -
	                          number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24)) = 2)
		   	then (string(
			  (24 + number($complement) + number($hours) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))
		  	else concat("0",
		  	  string(
			    (24 + number($complement) + -(number($hours) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2)))) mod 24))
		
		let $rminutes := 
		     if( ((number($minutes) -
			number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60) >= 0 )
		     then
		  	if (string-length(string(
		  	    (number($minutes) -
	                          number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60)) = 2)
		   	then (string(
			  (number($minutes) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60))
		  	else concat("0",
		  	  string(
			    (number($minutes) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60))
		     else
		  	if (string-length(string(
		  	    (60 - -(number($minutes) -
	                          number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2)))) mod 60)) = 2)
		   	then (string(
			  (60 - -(number($minutes) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2)))) mod 60))
		  	else concat("0",
		  	  string(
			    (60 - -(number($minutes) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2)))) mod 60))

		return concat($rhours, ":", $rminutes, ":", $seconds)
	     else ()
	else	


	if (count(index-of($format-tokens, "z")) != 0) 
	then if (substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),1,1)='+')
	     then let $complement := 
		  if (number($minutes)+number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2)) > 59) 		  then 1 
		  else 0

		let $rhours := 
		  if (string-length(string(
			(number($complement) + number($hours) + 
	                        number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24)) = 2) 
		  then (string(
			(number($complement) + number($hours) + 
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))
		  else concat("0",
			string(
			(number($complement) + number($hours) + 
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))	
		  
	          let $rminutes := 
		  if (string-length(string(
			(number($minutes)+
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60)) = 2) 
		  then (string(
			(number($minutes)+
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60))
		  else concat("0",
			string(
			(number($minutes)+
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60))

			
	          return concat($rhours, ":", $rminutes, ":", $seconds)
	     else 
	     
	     if (substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),1,1)='-')
	     then	
		let $complement := 
		  if (number($minutes)-number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2)) < 0) 		  then -1 
		  else 0
	
		let $rhours :=
		     if( ((number($complement) + number($hours) -
			number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24) >= 0 )
		     then
		  	if (string-length(string(
		  	    (number($complement) + number($hours) -
	                          number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24)) = 2)
		   	then (string(
			  (number($complement) + number($hours) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))
		  	else concat("0",
		  	  string(
			    (number($complement) + number($hours) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))
		     else
		  	if (string-length(string(
		  	    (24 + number($complement) + number($hours) -
	                          number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24)) = 2)
		   	then (string(
			  (24 + number($complement) + number($hours) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))
		  	else concat("0",
		  	  string(
			    (24 + number($complement) + -(number($hours) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2)))) mod 24))
		
		let $rminutes := 
		     if( ((number($minutes) -
			number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60) >= 0 )
		     then
		  	if (string-length(string(
		  	    (number($minutes) -
	                          number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60)) = 2)
		   	then (string(
			  (number($minutes) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60))
		  	else concat("0",
		  	  string(
			    (number($minutes) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60))
		     else
		  	if (string-length(string(
		  	    (60 - -(number($minutes) -
	                          number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2)))) mod 60)) = 2)
		   	then (string(
			  (60 - -(number($minutes) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2)))) mod 60))
		  	else concat("0",
		  	  string(
			    (60 - -(number($minutes) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2)))) mod 60))

		return concat($rhours, ":", $rminutes, ":", $seconds)
	     else ()
	else
	 concat($hours, ":", $minutes, ":", $seconds)
	
	return 
	
	normalization:check-time($result)
	else()

};

(:~
 : <p>Converts a given string representation of a dateTime value into a dateTime representation 
 : valid according to the corresponding XML Schema type.</p>
 :
 :
 : @param $sd The string representation for the dateTime.
 : @param $format An optional parameter denoting the format used to represent the dateTime in the string, according to a sequence 
 : of conversion specifications. In the format string, a conversion specification is introduced by '%', usually followed by a single 
 : letter or 'O' or 'E' and then a single letter. Any character in the format string that is not part of a conversion specification 
 : is interpreted literally, and the string '%%' gives '%'. The supported conversion specifications are as follows:
 : <p/>
 : <pre class="ace-static">
 : '%b' Abbreviated month name in the current locale.
 : '%B' Full month name in the current locale.
 : '%c' Date and time, locale-specific.
 : '%C' Century (00-99): the integer part of the year divided by 100.
 : '%d' Day of the month as decimal number (01-31).
 : '%H' Hours as decimal number (00-23).
 : '%I' Hours as decimal number (01-12).
 : '%j' Day of year as decimal number (001-366).
 : '%m' Month as decimal number (01-12).
 : '%M' Minute as decimal number (00-59).
 : '%p' AM/PM indicator in the locale. Used in conjunction with '%I' and *not* with '%H'.
 : '%S' Second as decimal number (00-61), allowing for up to two leap-seconds.
 : '%x' Date, locale-specific.
 : '%X' Time, locale-specific.
 : '%y' Year without century (00-99).
 : '%Y' Year with century.
 : '%z' Offset from Greenwich, so '-0900' is 9 hours west of Greenwich.
 : '%Z' Time zone as a character string.
 : '%D' Locale-specific date format such as '%m/%d/%y': ISO C99 says it should be that exact format.
 : '%e' Day of the month as decimal number (1-31), with a leading pace for a single-digit number.
 : '%F' Equivalent to %Y-%m-%d (the ISO 8601 date format).
 : '%g' The last two digits of the week-based year (see '%V').
 : '%G' The week-based year (see '%V') as a decimal number.
 : '%h' Equivalent to '%b'.
 : '%k' The 24-hour clock time with single digits preceded by a blank.
 : '%l' The 12-hour clock time with single digits preceded by a blank.
 : '%r' The 12-hour clock time (using the locale's AM or PM).
 : '%R' Equivalent to '%H:%M'.
 : '%T' Equivalent to '%H:%M:%S'.
 :</pre>
 :
 : @error normalization:NOTSUPPORTED if the dateTime type is not known to the service. 
 : @return The dateTime value resulting from the conversion.
 : @example test/Queries/data-cleaning/normalization/to-dateTime.xq
 :)
declare function normalization:to-dateTime ( $sd as xs:string, $format as xs:string? ) as xs:string {
  let $timezoneDict := normalization:timeZone-dictionary()
  let $monthDict := normalization:month-dictionary()
  let $format-string := replace(replace(replace ($format, '%R', '%H:%M'), '%T', '%H:%M:%S'), '%F', '%Y-%m-%d')  
  let $format-tokens := tokenize($format-string, "[ %\-/:\.]+")[position()>1]  
  let $sdt := 
 	if (contains($sd, ":") or contains($sd, ".") or contains($sd, " ") or contains($sd, "-") 
		or contains($sd, "/"))
 	then tokenize ($sd, "[ \-/:\.]+")
 	else ()
  let $sdtok :=  
    	if ((count(index-of($format-tokens, "z")) != 0) and (not(contains($sdt[last()], "+"))))
          then ($sdt[position() != last()], concat("-", $sdt[position() = last()]))
          else $sdt
  let $sd-tokens :=
    for $a in $sdtok
       return
  	if (matches($a, "[0-9][0-9][A-Za-z]+[0-9][0-9]+"))
          then (let $ydtoken := tokenize(replace($a, "[A-Za-z]", " "), " ")
	     let $ft := $ydtoken[position()=1]
	     let $lt := $ydtoken[last()]
	     let $mtoken := replace($a, "[0-9]", "") return ($ft, $mtoken, $lt))
          else $a
  let $timeFormat := tokenize($format, "[ :\.\-]")[position()>1]
  let $dateFormat := tokenize($format, "[ :\.\-]")[position()=1]
   return
 if (count($sd-tokens)>1) 
	then  	
		(:Date:)
		let $year := 
			if (count(index-of($format-tokens, "F")) != 0) then string($sd-tokens[position() = 1])
			else	
			
			if (count(index-of($format-tokens, "D")) != 0) then concat("19", string($sd-tokens[position() = 3]))
			else

			if (count(index-of($format-tokens, "Y")) != 0)
			then string($sd-tokens[position() = index-of($format-tokens, "Y")]) else 
				
			if (count(index-of($format-tokens, "y")) != 0)
			then 
			  if(count(index-of($format-tokens, "C")) !=0)
			  then concat(string(number(string($sd-tokens[position() = index-of($format-tokens, "C")]))-1), string($sd-tokens[position() = index-of($format-tokens, "y")]))

			  else 
			   concat("19", string($sd-tokens[position() = index-of($format-tokens, "y")]))

			else "YND"

		let $month := 
			if (count(index-of($format-tokens, "h")) != 0)
			then string($monthDict//month[abrv/text() = $sd-tokens[position() = index-of($format-tokens, "h")]]/@value) else
			
			if (count(index-of($format-tokens, "b")) != 0)
			then string($monthDict//month[abrv/text() = $sd-tokens[position() = index-of($format-tokens, "b")]]/@value)
			else 
			
			if (count(index-of($format-tokens, "B")) != 0)
			then string($monthDict//month[lower-case(@name) = 
				lower-case($sd-tokens[position() = index-of($format-tokens, "B")])]/@value)
			   
			else 

			if (count(index-of($format-tokens, "F")) != 0)
			then string($sd-tokens[position() = 2])	
			else 
			
			if (count(index-of($format-tokens, "D")) != 0) then string($sd-tokens[position() = 1])
			else

			if (count(index-of($format-tokens, "m")) != 0)
			then string($sd-tokens[position() = index-of($format-tokens, "m")])
			
			else "MND"

	      	let $day := 
			if (count(index-of($format-tokens, "F")) != 0) 
			then string($sd-tokens[position() = 3]) else 
			
			if (count(index-of($format-tokens, "D")) != 0) then string($sd-tokens[position() = 2])
			else
  
			if (count(index-of($format-tokens, "d")) != 0) 
			then $sd-tokens[position() = index-of($format-tokens, "d")] else

			if (count(index-of($format-tokens, "e")) != 0)
			then concat("0", string($sd-tokens[position() = index-of($format-tokens, "e")]))
			else "DND"
	
		(:Time:)
		let $hours := 
			if (count(index-of($format-tokens, "T")) != 0) then string($sd-tokens[position() = 1])
			else	
			
			if (count(index-of($format-tokens, "X")) != 0) then substring(string(current-time()),1,2)
			else

			if (count(index-of($format-tokens, "R")) != 0) then string($sd-tokens[position() = 1])
			else

			if (count(index-of($format-tokens, "H")) != 0)
			then string($sd-tokens[position() = index-of($format-tokens, "H")]) else 
		
			if (count(index-of($format-tokens, "k")) != 0)
			then if(string-length(string($sd-tokens[position() = index-of($format-tokens, "k")]))=1)
			     then concat("0", string($sd-tokens[position() = index-of($format-tokens, "k")]))
			     else string($sd-tokens[position() = index-of($format-tokens, "k")])  
			else	
			
			if (count(index-of($format-tokens, "r")) != 0)
			then 
			  if(lower-case(string($sd-tokens[position() = 4]))="am")
			  then string($sd-tokens[position() = 1])
			  else if(lower-case(string($sd-tokens[position() = 4]))="pm")
			       then if(string($sd-tokens[position() = 1])="12") then 12
				  else string(number(string($sd-tokens[position() = 1]))+12)
			       else()
			
			else

			if (count(index-of($format-tokens, "I")) != 0)
			then 
			  if(count(index-of($format-tokens, "p")) !=0)
			  then if (lower-case(string($sd-tokens[position() = 
					index-of($format-tokens, "p")]))="am")
			       then string($sd-tokens[position() = index-of($format-tokens, "I")]) 
			       else if (lower-case(string($sd-tokens[position() = 
					index-of($format-tokens, "p")]))="pm")
				  then if (string($sd-tokens[position() = index-of($format-tokens, "I")])="12")
				       then "12"
				       else string(number(string($sd-tokens[position() = index-of($format-tokens, "I")]))+12)
				  else()
			  else()
			
			else
			if (count(index-of($format-tokens, "l")) != 0)
			then 
			  if(count(index-of($format-tokens, "p")) !=0)
			  then if (lower-case(string($sd-tokens[position() = 
					index-of($format-tokens, "p")]))="am")
			       then if(string-length(string($sd-tokens[position() = index-of($format-tokens, "l")]))=1)
				  then concat("0", string($sd-tokens[position() = index-of($format-tokens, "l")]))
				  else string($sd-tokens[position() = index-of($format-tokens, "l")])  
				
			       else if (lower-case(string($sd-tokens[position() = index-of($format-tokens, "p")]))="pm")
				  then if (string($sd-tokens[position() = index-of($format-tokens, "l")])="12")
				       then "12"
				       else string(number(string($sd-tokens[position() = index-of($format-tokens, "l")]))+12)
				  else()
			  			
			else ()
			
			
			else "HND"

		let $minutes := 
			
			if (count(index-of($format-tokens, "T")) != 0)
			then string($sd-tokens[position() = 2])	
			else 
			
			if (count(index-of($format-tokens, "X")) != 0) then substring(string(current-time()),4,2)
			else

			if (count(index-of($format-tokens, "R")) != 0) then string($sd-tokens[position() = 2])
			else

			if (count(index-of($format-tokens, "r")) != 0)
			then string($sd-tokens[position() = 2])	
			else 

			if (count(index-of($format-tokens, "M")) != 0)
			then string($sd-tokens[position() = index-of($format-tokens, "M")])
			
			else "MND"

	      	let $seconds := 
			if (count(index-of($format-tokens, "T")) != 0) 
			then string($sd-tokens[position() = 3]) else 
			
			if (count(index-of($format-tokens, "X")) != 0) then substring(string(current-time()),7,2)
			else

			if (count(index-of($format-tokens, "r")) != 0)
			then string($sd-tokens[position() = 3])	
			else 

			if (count(index-of($format-tokens, "R")) != 0) then "00"
			else

			if (count(index-of($format-tokens, "S")) != 0) 
			then $sd-tokens[position() = index-of($format-tokens, "S")] else

			if (count(index-of($format-tokens, "e")) != 0)
			then concat("0", string($sd-tokens[position() = index-of($format-tokens, "e")]))
			else "00"
	
	let $result :=

	if (count(index-of($format-tokens, "Z")) != 0) 
	then
	   if (substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() = index-of($format-tokens, "Z")]]),1,1)='+')
	   then let $complement := 
		  if (number($minutes)+number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
			 index-of($format-tokens, "Z")]]),4,2)) > 59) 		  
		  then 1 
		  else 0

		let $dayscomplement := 
		  if (number($complement) + number($hours) + number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() 			=index-of($format-tokens, "Z")]]),2,2)) >= 24)
		  then 1
		  else 0

		let $monthscomplement :=
		  if(($dayscomplement + number($day) > 28) and (compare($month, '02') = 0) and (number($year) mod 4 != 0))
		  then 1  
		  else 
		    if(($dayscomplement + number($day) > 30) and ((compare($month, '04') = 0) or (compare($month, '06') = 0) or (compare($month, '09') = 0) or (compare($month, '11') = 0)))
		    then 1
		    else 
		      if(($dayscomplement + number($day) > 31) and ((compare($month, '04') = 0) or (compare($month, '01') = 0) or (compare($month, '03') = 0) or (compare($month, '05') = 0) or (compare($month, '07') = 0) or (compare($month, '08') = 0) or (compare($month, '10') = 0) or (compare($month, '12') = 0)))
		      then 1
		      else 
		        if(($dayscomplement + number($day) > 29) and (compare($month, '02') = 0) and (number($year) mod 4 = 0))
		        then 1
		        else 0

		let $ryear := 
		  if ($monthscomplement + number($month) > 12)
		  then string(number($year) + 1)
		  else $year

		let $daywcompl := 
		  if ($monthscomplement = 1)
		  then 1
		  else number($day) + $dayscomplement

		let $monthwcompl :=
		  if($monthscomplement + number($month) <= 12)
		  then number($month) + $monthscomplement		
		  else 1
 
		let $rday := 
		  if (string-length(string($daywcompl)) = 1)
		  then concat ('0', string($daywcompl))
		  else string($daywcompl)

		let $rmonth :=
		  if (string-length(string($monthwcompl)) = 1)
		  then concat ('0', string($monthwcompl))
		  else string($monthwcompl)

		let $rhours := 
		  if (string-length(string(
			(number($complement) + number($hours) + 
	                        number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24)) = 2) 
		  then (string(
			(number($complement) + number($hours) + 
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))
		  else concat("0",
			string(
			(number($complement) + number($hours) + 
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))	
		  
	          let $rminutes := 
		  if (string-length(string(
			(number($minutes)+
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60)) = 2) 
		  then (string(
			(number($minutes)+
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60))
		  else concat("0",
			string(
			(number($minutes)+
			    number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60))

			
	          return concat($ryear, "-", $rmonth, "-", $rday, "T", $rhours, ":", $rminutes, ":", $seconds)
	     else 
	     
	     if (substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() = 
				index-of($format-tokens, "Z")]]),1,1)='-')
	     then	
		let $complement := 
		  if (number($minutes)-number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2)) < 0) 		  
		  then -1 
		  else 0
	
		let $dayscomplement := 
		  if (number($complement) - number($hours) - number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position()=
				 index-of($format-tokens, "Z")]]),2,2)) < 0)
		  then -1
		  else 0

		let $monthcomplement :=
		  if(number($day) + $dayscomplement < 1)
		  then -1
		  else 0
	
		let $yearcomplement :=
		  if(number($month) + $monthcomplement< 1)
		  then -1
		  else 0

		let $daywcompl := 
		  if ($monthcomplement = 0)
		  then number($day) + $dayscomplement 
		  else 
		   if ( (number($month) = 5) or (number($month) = 7) or (number($month) = 10) or (number($month) = 12))
		   then 30
		   else 
		    if((number($month) = 4) or (number($month) = 6) or (number($month) = 9) or (number($month) = 11) or (number($month) = 2) or 			(number($month) = 1) or (number($month) = 8))
		    then 31
		    else 
		      if((number($month) = 3) and (number($year) mod 4 != 0))
		      then 28
		      else 
		        if((number($month) = 3) and (number($year) mod 4 = 0))
		        then 29
		        else number($day) + $dayscomplement
	  	      	
		let $monthwcompl:=
		  if($yearcomplement = 0)
		  then number($month) + $monthcomplement
		  else 12

		let $ryear := 
		  number($year) + $yearcomplement

		let $rday := 
		  if (string-length(string($daywcompl)) = 1)
		  then concat ('0', string($daywcompl))
		  else string($daywcompl)

		let $rmonth :=
		  if (string-length(string($monthwcompl)) = 1)
		  then concat ('0', string($monthwcompl))
		  else string($monthwcompl)

		let $rhours :=
		     if( ((number($complement) + number($hours) -
			number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24) >= 0 )
		     then
		  	if (string-length(string(
		  	    (number($complement) + number($hours) -
	                          number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24)) = 2)
		   	then (string(
			  (number($complement) + number($hours) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))
		  	else concat("0",
		  	  string(
			    (number($complement) + number($hours) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))
		     else
		  	if (string-length(string(
		  	    (24 + number($complement) + number($hours) -
	                          number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24)) = 2)
		   	then (string(
			  (24 + number($complement) + number($hours) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2))) mod 24))
		  	else concat("0",
		  	  string(
			    (24 + number($complement) + -(number($hours) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2)))) mod 24))
		
		let $rminutes := 
		     if( ((number($minutes) -
			number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60) >= 0 )
		     then
		  	if (string-length(string(
		  	    (number($minutes) -
	                          number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60)) = 2)
		   	then (string(
			  (number($minutes) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60))
		  	else concat("0",
		  	  string(
			    (number($minutes) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2))) mod 60))
		     else
		  	if (string-length(string(
		  	    (60 - -(number($minutes) -
	                          number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2)))) mod 60)) = 2)
		   	then (string(
			  (60 - -(number($minutes) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),4,2)))) mod 60))
		  	else concat("0",
		  	  string(
			    (60 - -(number($minutes) - 
			      number(substring(string($timezoneDict//timeZone/@value[../@name=$sd-tokens[position() =
				 index-of($format-tokens, "Z")]]),2,2)))) mod 60))

		return concat($ryear, "-", $rmonth, "-", $rday, "T", $rhours, ":", $rminutes, ":", $seconds)
	     else ()
	else	


	if (count(index-of($format-tokens, "z")) != 0) 
	then if (substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),1,1)='+')
	     then let $complement := 
		  if (number($minutes)+number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2)) > 59) 		  	  then 1 
		  else 0

		let $dayscomplement := 
		  if (number($complement) + number($hours) + number(substring(string($sd-tokens[position() = 					index-of($format-tokens, "z")]),2,2)) >= 24)
		  then 1
		  else 0

		let $monthscomplement :=
		  if(($dayscomplement + number($day) > 28) and (compare($month, '02') = 0) and (number($year) mod 4 != 0))
		  then 1  
		  else 
		    if(($dayscomplement + number($day) > 30) and ((compare($month, '04') = 0) or (compare($month, '06') = 0) or (compare($month, '09') = 0) or (compare($month, '11') = 0)))
		    then 1
		    else 
		      if(($dayscomplement + number($day) > 31) and ((compare($month, '04') = 0) or (compare($month, '01') = 0) or (compare($month, '03') = 0) or (compare($month, '05') = 0) or (compare($month, '07') = 0) or (compare($month, '08') = 0) or (compare($month, '10') = 0) or (compare($month, '12') = 0)))
		      then 1
		      else 
		        if(($dayscomplement + number($day) > 29) and (compare($month, '02') = 0) and (number($year) mod 4 = 0))
		        then 1
		        else 0

		let $ryear := 
		  if ($monthscomplement + number($month) > 12)
		  then string(number($year) + 1)
		  else $year

		let $daywcompl := 
		  if ($monthscomplement = 1)
		  then 1
		  else number($day) + $dayscomplement

		let $monthwcompl :=
		  if($monthscomplement + number($month) <= 12)
		  then number($month) + $monthscomplement		
		  else 1
 
		let $rday := 
		  if (string-length(string($daywcompl)) = 1)
		  then concat ('0', string($daywcompl))
		  else string($daywcompl)

		let $rmonth :=
		  if (string-length(string($monthwcompl)) = 1)
		  then concat ('0', string($monthwcompl))
		  else string($monthwcompl)

		let $rhours := 
		  if (string-length(string(
			(number($complement) + number($hours) + 
	                        number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24)) = 2) 
		  then (string(
			(number($complement) + number($hours) + 
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))
		  else concat("0",
			string(
			(number($complement) + number($hours) + 
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))	
		  
	          let $rminutes := 
		  if (string-length(string(
			(number($minutes)+
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60)) = 2) 
		  then (string(
			(number($minutes)+
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60))
		  else concat("0",
			string(
			(number($minutes)+
			    number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60))

			
	          return concat($ryear, "-", $rmonth, "-", $rday, "T", $rhours, ":", $rminutes, ":", $seconds)
	     else 
	     
	     if (substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),1,1)='-')
	     then	
		let $complement := 
		  if (number($minutes)-number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2)) < 0) 		  	  then -1 
		  else 0
	
		let $dayscomplement := 
		  if (number($complement) - number($hours) - number(substring(string($sd-tokens[position() = 					index-of($format-tokens, "z")]),2,2)) < 0)
		  then -1
		  else 0

		let $monthcomplement :=
		  if(number($day) + $dayscomplement< 1)
		  then -1
		  else 0
	
		let $yearcomplement :=
		  if(number($month) + $monthcomplement< 1)
		  then -1
		  else 0

		let $daywcompl := 
		  if ($monthcomplement = 0)
		  then number($day) + $dayscomplement 
		  else 
		   if ( (number($month) = 5) or (number($month) = 7) or (number($month) = 10) or (number($month) = 12))
		   then 30
		   else 
		    if((number($month) = 4) or (number($month) = 6) or (number($month) = 9) or (number($month) = 11) or (number($month) = 2) or 			(number($month) = 1) or (number($month) = 8))
		    then 31
		    else 
		      if((number($month) = 3) and (number($year) mod 4 != 0))
		      then 28
		      else 
		        if((number($month) = 3) and (number($year) mod 4 = 0))
		        then 29
		        else number($day) + $dayscomplement
	  	      	
		let $monthwcompl:=
		  if($yearcomplement = 0)
		  then number($month) + $monthcomplement
		  else 12

		let $ryear := 
		  number($year) + $yearcomplement

		let $rday := 
		  if (string-length(string($daywcompl)) = 1)
		  then concat ('0', string($daywcompl))
		  else string($daywcompl)

		let $rmonth :=
		  if (string-length(string($monthwcompl)) = 1)
		  then concat ('0', string($monthwcompl))
		  else string($monthwcompl)

		let $rhours :=
		     if( ((number($complement) + number($hours) -
			number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24) >= 0 )
		     then
		  	if (string-length(string(
		  	    (number($complement) + number($hours) -
	                          number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24)) = 2)
		   	then (string(
			  (number($complement) + number($hours) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))
		  	else concat("0",
		  	  string(
			    (number($complement) + number($hours) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))
		     else
		  	if (string-length(string(
		  	    (24 + number($complement) + number($hours) -
	                          number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24)) = 2)
		   	then (string(
			  (24 + number($complement) + number($hours) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2))) mod 24))
		  	else concat("0",
		  	  string(
			    (24 + number($complement) + -(number($hours) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2)))) mod 24))
		
		let $rminutes := 
		     if( ((number($minutes) -
			number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60) >= 0 )
		     then
		  	if (string-length(string(
		  	    (number($minutes) -
	                          number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60)) = 2)
		   	then (string(
			  (number($minutes) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60))
		  	else concat("0",
		  	  string(
			    (number($minutes) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2))) mod 60))
		     else
		  	if (string-length(string(
		  	    (60 - -(number($minutes) -
	                          number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2)))) mod 60)) = 2)
		   	then (string(
			  (60 - -(number($minutes) - 
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),4,2)))) mod 60))
		  	else concat("0",
		  	  string(
			    (60 - -(number($minutes) -
			      number(substring(string($sd-tokens[position() = index-of($format-tokens, "z")]),2,2)))) mod 60))

		return concat($ryear, "-", $rmonth, "-", $rday, "T", $rhours, ":", $rminutes, ":", $seconds)
	     else ()
	else
	 concat($year, "-", $month, "-", $day, "T", $hours, ":", $minutes, ":", $seconds)

	return 
	normalization:check-dateTime($result)
	else()
};

(:~
 : <p>Uses an address normalization Web service to convert a postal address given as input into a 
 : cannonical representation format.</p>
 :
 : 
 : @param $addr A sequence of strings encoding an address, where each string in the sequence corresponds to a different component (e.g., street, city, country, etc.) of the address.
 : @return A sequence of strings with the address encoded in a cannonical format, where each string in the sequence corresponds to a different component (e.g., street, city, country, etc.) of the address.
 : @example test/Queries/data-cleaning/normalization/normalize-address.xq
 :)
declare %an:nondeterministic  function normalization:normalize-address ( $addr as xs:string* ) as xs:string* {

  let $id   := ""
  let $url  := "http://where.yahooapis.com/geocode?q="
  let $q2   := string-join(for $i in $addr return translate($i," ","+"),",")
  let $call := concat($url,$q2,"&appid=",$id)
  let $doc  := http:get-node($call)[2]
  return distinct-values( (if (string-length($doc//xs:string(*:country)) > 0) then ($doc//xs:string(*:country)) else (),
                          if (string-length($doc//xs:string(*:state)) > 0) then ($doc//xs:string(*:state)) else (),
                          if (string-length($doc//xs:string(*:county)) > 0) then ($doc//xs:string(*:county)) else (),
                          if (string-length($doc//xs:string(*:city)) > 0) then ($doc//xs:string(*:city)) else (),
			  if (string-length($doc//xs:string(*:neighborhood)) > 0) then ($doc//xs:string(*:neighborhood)) else (),
                          if (string-length($doc//xs:string(*:street)) > 0) then ($doc//xs:string(*:street)) else (),
                          if (string-length($doc//xs:string(*:house)) > 0) then ($doc//xs:string(*:house)) else () ) )
};

(:~
 : <p>Uses an phone number normalization Web service to convert a phone number given as input into a 
 : cannonical representation.</p>
 : 
 : @param $phone A strings encoding a phone number.
 : @return A strings with the phone number encoded in a cannonical format.
 : <p/>
 : <p><b> Attention : This function is still not implemented. </b></p> 
 :
 :)
declare function normalization:normalize-phone ( $addr as xs:string* ) as xs:string* {
 ()
};

(:~
 : <p>Internal auxiliary function that returns an XML representation for a dictionary that contains the 
 : time-shift value associated to different time-zone abbreviations.</p>
 :)
declare %private function normalization:timeZone-dictionary() as element(){
	let $result :=
	<dictionary>
		<timeZone name="A" value="+0100"/>
		<timeZone name="ADT" value="-0300"/>
		<timeZone name="AFT" value="+0430"/>
		<timeZone name="AKDT" value="-0800"/>
		<timeZone name="AKST" value="-0900"/>
		<timeZone name="ALMT" value="+0600"/>
		<timeZone name="AMST" value="+0500"/>
		<!--<timeZone name="AMST" value="-0300"/>-->
		<timeZone name="AMT" value="+0400"/>
		<!--<timeZone name="AMT" value="-0400"/>-->
		<timeZone name="ANAST" value="+1200"/>
		<timeZone name="ANAT" value="+1200"/>
		<timeZone name="AQTT" value="+0500"/>
		<timeZone name="ART" value="-0300"/>
		<timeZone name="AST" value="-0400"/>
		<timeZone name="AZOST" value="+0000"/>
		<timeZone name="AZOT" value="-0100"/>
		<timeZone name="AZST" value="+0500"/>
		<timeZone name="AZT" value="+0400"/>
		<timeZone name="B" value="+0200"/>
		<timeZone name="BNT" value="+0800"/>
		<timeZone name="BOT" value="-0400"/>
		<timeZone name="BRST" value="-0200"/>
		<timeZone name="BRT" value="-0300"/>
		<!--<timeZone name="BST" value="+0600"/>-->
		<timeZone name="BST" value="+0100"/>
		<timeZone name="BTT" value="+0600"/>
		<timeZone name="C" value="+0300"/>
		<timeZone name="CAST" value="+0800"/>
		<timeZone name="CAT" value="+0200"/>
		<timeZone name="CCT" value="+0630"/>
		<!--<timeZone name="CDT" value="+1030"/>-->
		<!--<timeZone name="CDT" value="-0400"/>-->
		<timeZone name="CDT" value="-0500"/>
		<timeZone name="CEST" value="+0200"/>
		<timeZone name="CET" value="+0100"/>
		<timeZone name="CHADT" value="+1345"/>
		<timeZone name="CHAST" value="+1245"/>
		<timeZone name="CKT" value="-1000"/>
		<timeZone name="CLST" value="-0300"/>
		<timeZone name="CLT" value="-0400"/>
		<timeZone name="COT" value="-0500"/>
		<!--<timeZone name="CST" value="+0800"/>-->
		<!--<timeZone name="CST" value="+0930"/>-->
		<!--<timeZone name="CST" value="-0600"/>-->
		<!--<timeZone name="CST" value="-0500"/>-->
		<timeZone name="CST" value="-0600"/>
		<timeZone name="CVT" value="-0100"/>
		<timeZone name="CXT" value="+0700"/>
		<timeZone name="ChST" value="+1000"/>
		<timeZone name="D" value="+0400"/>
		<timeZone name="DAVT" value="+0700"/>
		<timeZone name="E" value=""/>
		<timeZone name="EASST" value="-0500"/>
		<timeZone name="EAST" value="-0600"/>
		<timeZone name="EAT" value="+0300"/>
		<timeZone name="ECT" value="-0500"/>
		<!--<timeZone name="EDT" value="+1100"/>-->
		<timeZone name="EDT" value="-0400"/>
		<timeZone name="EEST" value="+0300"/>
		<timeZone name="EET" value="+0200"/>
		<timeZone name="EGST" value="+0000"/>
		<timeZone name="EGT" value="-0100"/>
		<timeZone name="EST" value="+1000"/>
		<!--<timeZone name="EST" value="-0500"/>-->
		<timeZone name="ET" value="-0500"/>
		<timeZone name="F" value="+0600"/>
		<timeZone name="FJST" value="+1300"/>
		<timeZone name="FJT" value="+1200"/>
		<timeZone name="FKST" value="-0300"/>
		<timeZone name="FKT" value="-0400"/>
		<timeZone name="FNT" value="-0200"/>
		<timeZone name="G" value="+0700"/>
		<timeZone name="GALT" value="-0600"/>
		<timeZone name="GAMT" value="-0900"/>
		<timeZone name="GET" value="+0400"/>
		<timeZone name="GFT" value="-0300"/>
		<timeZone name="GILT" value="+1200"/>
		<timeZone name="GMT" value="+0000"/>
		<timeZone name="GST" value="+0400"/>
		<timeZone name="GYT" value="-0400"/>
		<timeZone name="H" value="+0800"/>
		<timeZone name="HAA" value="-0300"/>
		<timeZone name="HAC" value="-0500"/>
		<timeZone name="HADT" value="-0900"/>
		<timeZone name="HAE" value="-0400"/>
		<timeZone name="HAP" value="-0700"/>
		<timeZone name="HAR" value="-0600"/>
		<timeZone name="HAST" value="-1000"/>
		<timeZone name="HAT" value="-0230"/>
		<timeZone name="HAY" value="-0800"/>
		<timeZone name="HKT" value="+0800"/>
		<timeZone name="HLV" value="-0430"/>
		<timeZone name="HNA" value="-0400"/>
		<timeZone name="HNC" value="-0600"/>
		<timeZone name="HNE" value="-0500"/>
		<timeZone name="HNP" value="-0800"/>
		<timeZone name="HNR" value="-0700"/>
		<timeZone name="HNT" value="-0330"/>
		<timeZone name="I" value="+0900"/>
		<timeZone name="ICT" value="+0700"/>
		<timeZone name="IDT" value="+0300"/>
		<timeZone name="IOT" value="+0600"/>
		<timeZone name="IRDT" value="+0430"/>
		<timeZone name="IRKST" value="+0900"/>
		<timeZone name="IRKT" value="+0800"/>
		<timeZone name="IRST" value="+0330"/>
		<!--<timeZone name="IST" value="+0200"/>-->
		<timeZone name="IST" value="+0530"/>
		<!--<timeZone name="IST" value="+0100"/>-->
		<timeZone name="JST" value="+0900"/>
		<timeZone name="K" value="+1000"/>
		<timeZone name="KGT" value="+0600"/>
		<timeZone name="KRAST" value="+0800"/>
		<timeZone name="KRAT" value="+0700"/>
		<timeZone name="KST" value="+0900"/>
		<timeZone name="KUYT" value="+0400"/>
		<timeZone name="L" value="+1100"/>
		<timeZone name="LHDT" value="+1100"/>
		<timeZone name="LHST" value="+10:30"/>
		<timeZone name="LINT" value="+1400"/>
		<timeZone name="M" value="+1200"/>
		<timeZone name="MAGST" value="+1200"/>
		<timeZone name="MAGT" value="+1100"/>
		<timeZone name="MART" value="-0930"/>
		<timeZone name="MAWT" value="+0500"/>
		<timeZone name="MDT" value="-0600"/>
		<timeZone name="MHT" value="+1200"/>
		<timeZone name="MMT" value="+0630"/>
		<timeZone name="MSD" value="+0400"/>
		<timeZone name="MSK" value="+0300"/>
		<timeZone name="MST" value="-0700"/>
		<timeZone name="MUT" value="+0400"/>
		<timeZone name="MVT" value="+0500"/>
		<timeZone name="MYT" value="+0800"/>
		<timeZone name="N" value="-0100"/>
		<timeZone name="NCT" value="+1100"/>
		<timeZone name="NDT" value="-0230"/>
		<timeZone name="NFT" value="+1130"/>
		<timeZone name="NOVST" value="+0700"/>
		<timeZone name="NOVT" value="+0600"/>
		<timeZone name="NPT" value="+0545"/>
		<timeZone name="NST" value="-0330"/>
		<timeZone name="NUT" value="-1100"/>
		<timeZone name="NZDT" value="+1300"/>
		<timeZone name="NZST" value="+1200"/>
		<timeZone name="O" value="-0200"/>
		<timeZone name="OMSST" value="+0700"/>
		<timeZone name="OMST" value="+0600"/>
		<timeZone name="P" value="-0300"/>
		<timeZone name="PDT" value="-0700"/>
		<timeZone name="PET" value="-0500"/>
		<timeZone name="PETST" value="+1200"/>
		<timeZone name="PETT" value="+1200"/>
		<timeZone name="PGT" value="+1000"/>
		<timeZone name="PHOT" value="+1300"/>
		<timeZone name="PHT" value="+0800"/>
		<timeZone name="PKT" value="+0500"/>
		<timeZone name="PMDT" value="-0200"/>
		<timeZone name="PMST" value="-0300"/>
		<timeZone name="PONT" value="+1100"/>
		<timeZone name="PST" value="-0800"/>
		<timeZone name="PT" value="-0800"/>
		<timeZone name="PWT" value="+0900"/>
		<timeZone name="PYST" value="-0300"/>
		<timeZone name="PYT" value="-0400"/>
		<timeZone name="Q" value="-0400"/>
		<timeZone name="R" value="-0500"/>
		<timeZone name="RET" value="+0400"/>
		<timeZone name="S" value="-0600"/>
		<timeZone name="SAMT" value="+0400"/>
		<timeZone name="SAST" value="+0200"/>
		<timeZone name="SBT" value="+1100"/>
		<timeZone name="SCT" value="+0400"/>
		<timeZone name="SGT" value="+0800"/>
		<timeZone name="SRT" value="-0300"/>
		<timeZone name="SST" value="-1100"/>
		<timeZone name="T" value="-0700"/>
		<timeZone name="TAHT" value="-1000"/>
		<timeZone name="TFT" value="+0500"/>
		<timeZone name="TJT" value="+0500"/>
		<timeZone name="TKT" value="-1000"/>
		<timeZone name="TLT" value="+0900"/>
		<timeZone name="TMT" value="+0500"/>
		<timeZone name="TVT" value="+1200"/>
		<timeZone name="U" value="-0800"/>
		<timeZone name="ULAT" value="+0800"/>
		<timeZone name="UTC" value="+0000"/>
		<timeZone name="UYST" value="-0200"/>
		<timeZone name="UYT" value="-0300"/>
		<timeZone name="UZT" value="+0500"/>
		<timeZone name="V" value="-0900"/>
		<timeZone name="VET" value="-0430"/>
		<timeZone name="VLAST" value="+1100"/>
		<timeZone name="VLAT" value="+1000"/>
		<timeZone name="VUT" value="+1100"/>
		<timeZone name="W" value="-1000"/>
		<timeZone name="WAST" value="+0200"/>
		<timeZone name="WAT" value="+0100"/>
		<timeZone name="WDT" value="+0900"/>
		<timeZone name="WEST" value="+0100"/>
		<timeZone name="WET" value="+0000"/>
		<timeZone name="WFT" value="+1200"/>
		<timeZone name="WGST" value="-0200"/>
		<timeZone name="WGT" value="-0300"/>
		<timeZone name="WIB" value="+0700"/>
		<timeZone name="WIT" value="+0900"/>
		<timeZone name="WITA" value="+0800"/>
		<!--<timeZone name="WST" value="+0100"/>-->
		<!--<timeZone name="WST" value="-1100"/>-->
		<timeZone name="WST" value="+0800"/>
		<timeZone name="WT" value="+0000"/>
		<timeZone name="X" value="-1100"/>
		<timeZone name="Y" value="-1200"/>
		<timeZone name="YAKST" value="+1000"/>
		<timeZone name="YAKT" value="+0900"/>
		<timeZone name="YAPT" value="+1000"/>
		<timeZone name="YEKST" value="+0600"/>
		<timeZone name="YEKY" value="+0500"/>
		<timeZone name="Z" value="+0000"/>
	</dictionary>
return $result
};

(:~
 : <p>Internal auxiliary function that returns an XML representation for a dictionary that contains a 
 : numeric value associated to different month name abbreviations.</p>
 :)
declare %private function normalization:month-dictionary() as element(){
let $dictionary :=
<dictionary>
	<month name="January" value="01">
		<abrv>Jan</abrv>
		<abrv>jan</abrv>
		<abrv>JAN</abrv>
	</month>
	<month name="February" value="02">
		<abrv>Feb</abrv>
		<abrv>feb</abrv>
		<abrv>FEB</abrv>
	</month>
	<month name="March" value="03">
		<abrv>Mar</abrv>
		<abrv>mar</abrv>
		<abrv>MAR</abrv>
	</month>
	<month name="April" value="04">
		<abrv>Apr</abrv>
		<abrv>apr</abrv>
		<abrv>APR</abrv>
	</month>
	<month name="May" value="05">
		<abrv>MAY</abrv>
		<abrv>may</abrv>
	</month>
	<month name="June" value="06">
		<abrv>Jun</abrv>
		<abrv>jun</abrv>
		<abrv>JUN</abrv>
	</month>
	<month name="July" value="07">
		<abrv>Jul</abrv>
		<abrv>jul</abrv>
		<abrv>JUL</abrv>
	</month>
	<month name="August" value="08">
		<abrv>aug</abrv>
		<abrv>Aug</abrv>
		<abrv>AUG</abrv>
	</month>
	<month name="September" value="09">
		<abrv>sep</abrv>
		<abrv>Sep</abrv>
		<abrv>SEP</abrv>
	</month>
	<month name="October" value="10">
		<abrv>oct</abrv>
		<abrv>OCT</abrv>
		<abrv>Oct</abrv>
	</month>
	<month name="November" value="11">
		<abrv>nov</abrv>
		<abrv>Nov</abrv>
		<abrv>NOV</abrv>
	</month>
	<month name="December" value="12">
		<abrv>dec</abrv>
		<abrv>Dec</abrv>
		<abrv>DEC</abrv>
	</month>
</dictionary>
return $dictionary
};

(:~
 : <p>Internal auxiliary function that checks if a string is in xs:dateTime format</p>
 :
 :
 : @param $dateTime The string representation for the dateTime.
 : @return The dateTime string if it represents the xs:dateTime format.
 :)
declare %private function normalization:check-dateTime($dateTime as xs:string) as xs:string{
 concat(string(year-from-dateTime(xs:dateTime($dateTime))), substring($dateTime,5))
};

(:~
 : <p>Internal auxiliary function that checks if a string is in xs:date format</p>
 :
 :
 : @param $dateTime The string representation for the date.
 : @return The date string if it represents the xs:date format.
 :)
declare %private function normalization:check-date($date as xs:string) as xs:string{
 concat(string(year-from-date(xs:date($date))), substring($date,5))
};

(:~
 : <p>Internal auxiliary function that checks if a string is in xs:time format</p>
 :
 :
 : @param $dateTime The string representation for the time.
 : @return The time string if it represents the xs:time format.
 :)
declare %private function normalization:check-time($Time as xs:string) as xs:string{
 if(string(hours-from-time(xs:time($Time))))
 then $Time
 else()
};