Difference between revisions of "Archive Module"

From BaseX Documentation
Jump to navigation Jump to search
Line 1: Line 1:
This [[Module Library|XQuery Module]] contains functions to handle ZIP archives. New ZIP archives can be created, existing archives can be updated, and the archive entries can be listed and extracted. This module will soon replace the existing [[ZIP Module]] ([http://spex.basex.org/index.php?title=ZIP_Module more information]).
+
This [[Module Library|XQuery Module]] contains functions to handle ZIP archives. New ZIP archives can be created, existing archives can be updated, and the archive entries can be listed and extracted. This module may soon replace the existing [[ZIP Module]] ([http://spex.basex.org/index.php?title=ZIP_Module more information]).
  
 
=Conventions=
 
=Conventions=
  
All functions in this module are assigned to the {{Code|http://basex.org/modules/zip2}} namespace, which is statically bound to the {{Code|zip2}} prefix.<br/>
+
All functions in this module are assigned to the {{Code|http://basex.org/modules/archive}} namespace, which is statically bound to the {{Code|archive}} prefix.<br/>
 
All errors are assigned to the {{Code|http://basex.org/errors}} namespace, which is statically bound to the {{Code|bxerr}} prefix.
 
All errors are assigned to the {{Code|http://basex.org/errors}} namespace, which is statically bound to the {{Code|bxerr}} prefix.
  
 
=Functions=
 
=Functions=
  
==zip2:create==
+
==archive:create==
 
{| width='100%'
 
{| width='100%'
 
|-
 
|-
 
| width='90' | '''Signatures'''
 
| width='90' | '''Signatures'''
|{{Func|zip2:create|$entries as element(entry)*, $contents as item()*|xs:base64Binary}}<br />
+
|{{Func|archive:create|$entries as element(entry)*, $contents as item()*|xs:base64Binary}}<br />
 
|-
 
|-
 
| '''Summary'''
 
| '''Summary'''
Line 33: Line 33:
 
|The following one-liner creates an archive {{Code|archive.zip}} with one file {{Code|file.txt}}:
 
|The following one-liner creates an archive {{Code|archive.zip}} with one file {{Code|file.txt}}:
 
<pre class="brush:xquery">
 
<pre class="brush:xquery">
zip2:create(<entry>file.txt</entry>, 'Hello World')
+
archive:create(<entry>file.txt</entry>, 'Hello World')
 
</pre>
 
</pre>
 
The following function creates an archive {{Code|mp3.zip}}, which contains all MP3 files of a local directory:
 
The following function creates an archive {{Code|mp3.zip}}, which contains all MP3 files of a local directory:
Line 39: Line 39:
 
let $path  := 'audio/'
 
let $path  := 'audio/'
 
let $files := file:list($path, true(), '*.mp3')
 
let $files := file:list($path, true(), '*.mp3')
let $zip  := zip2:create(
+
let $zip  := archive:create(
 
   for $f in $files return <entry>{ $f }</entry>,
 
   for $f in $files return <entry>{ $f }</entry>,
 
   for $f in $files return file:read-binary($path || $f)
 
   for $f in $files return file:read-binary($path || $f)
Line 46: Line 46:
 
|}
 
|}
  
==zip2:entries==
+
==archive:entries==
  
 
{| width='100%'
 
{| width='100%'
 
|-
 
|-
 
| width='90' | '''Signatures'''
 
| width='90' | '''Signatures'''
|{{Func|zip2:entries|$zip as xs:base64Binary|element(entry)*}}<br />
+
|{{Func|archive:entries|$zip as xs:base64Binary|element(entry)*}}<br />
 
|-
 
|-
 
| '''Summary'''
 
| '''Summary'''
Line 71: Line 71:
 
|Sums up the file sizes of all entries of a JAR file:
 
|Sums up the file sizes of all entries of a JAR file:
 
<pre class="brush:xquery">
 
<pre class="brush:xquery">
sum(zip2:entries(file:read-binary('zip.zip'))/@size)
+
sum(archive:entries(file:read-binary('zip.zip'))/@size)
 
</pre>
 
</pre>
 
|}
 
|}
  
==zip2:extract-text==
+
==archive:extract-text==
  
 
{| width='100%'
 
{| width='100%'
 
|-
 
|-
 
| width='90' | '''Signatures'''
 
| width='90' | '''Signatures'''
|{{Func|zip2:extract-text|$zip as xs:base64Binary|xs:string*}}<br/>{{Func|zip2:extract-text|$zip as xs:base64Binary, $entry-names as xs:string*|xs:string*}}<br/>{{Func|zip2:extract-text|$zip as xs:base64Binary, $entry-names as xs:string*, $encoding as xs:string|xs:string*}}<br/>
+
|{{Func|archive:extract-text|$zip as xs:base64Binary|xs:string*}}<br/>{{Func|archive:extract-text|$zip as xs:base64Binary, $entry-names as xs:string*|xs:string*}}<br/>{{Func|archive:extract-text|$zip as xs:base64Binary, $entry-names as xs:string*, $encoding as xs:string|xs:string*}}<br/>
 
|-
 
|-
 
| '''Summary'''
 
| '''Summary'''
Line 92: Line 92:
 
<pre class="brush:xquery">
 
<pre class="brush:xquery">
 
let $archive := file:read-binary("documents.zip")
 
let $archive := file:read-binary("documents.zip")
for $entry in zip2:entries($archive)[ends-with(., '.txt')]
+
for $entry in archive:entries($archive)[ends-with(., '.txt')]
return zip2:extract-text($archive, $entry)
+
return archive:extract-text($archive, $entry)
 
</pre>
 
</pre>
 
|}
 
|}
  
==zip2:extract-binary==
+
==archive:extract-binary==
  
 
{| width='100%'
 
{| width='100%'
 
|-
 
|-
 
| width='90' | '''Signatures'''
 
| width='90' | '''Signatures'''
|{{Func|zip2:extract-binary|$zip as xs:base64Binary|xs:string*}}<br/>{{Func|zip2:extract-binary|$zip as xs:base64Binary, $entry-names as xs:string*|xs:base64Binary*}}
+
|{{Func|archive:extract-binary|$zip as xs:base64Binary|xs:string*}}<br/>{{Func|archive:extract-binary|$zip as xs:base64Binary, $entry-names as xs:string*|xs:base64Binary*}}
 
|-
 
|-
 
| '''Summary'''
 
| '''Summary'''
Line 114: Line 114:
 
<pre class="brush:xquery">
 
<pre class="brush:xquery">
 
let $archive  := file:read-binary('archive.zip')
 
let $archive  := file:read-binary('archive.zip')
let $entries  := zip2:entries($archive)
+
let $entries  := archive:entries($archive)
let $contents := zip2:extract-binary($archive)
+
let $contents := archive:extract-binary($archive)
 
for $entry at $p in $entries
 
for $entry at $p in $entries
 
return file:write-binary($entry, $contents[$p])
 
return file:write-binary($entry, $contents[$p])
Line 121: Line 121:
 
|}
 
|}
  
==zip2:update==
+
==archive:update==
  
 
{| width='100%'
 
{| width='100%'
 
|-
 
|-
 
| width='90' | '''Signatures'''
 
| width='90' | '''Signatures'''
|{{Func|zip2:update|$zip as xs:base64Binary, $entries as element(entry)*, $contents as item()*|xs:base64Binary}}
+
|{{Func|archive:update|$zip as xs:base64Binary, $entries as element(entry)*, $contents as item()*|xs:base64Binary}}
 
|-
 
|-
 
| '''Summary'''
 
| '''Summary'''
|Adds new entries and replaces existing entries in a zip archive.<br/>The format of {{Code|$entries}} and {{Code|$contents}} is the same as for [[#zip2:create|zip2:create]].
+
|Adds new entries and replaces existing entries in a zip archive.<br/>The format of {{Code|$entries}} and {{Code|$contents}} is the same as for [[#archive:create|archive:create]].
 
|-
 
|-
 
| '''Errors'''
 
| '''Errors'''
Line 143: Line 143:
 
let $archive := file:read-binary($input)
 
let $archive := file:read-binary($input)
 
let $entry  :=
 
let $entry  :=
   copy $c := fn:parse-xml(zip2:extract-text($archive, $doc))
+
   copy $c := fn:parse-xml(archive:extract-text($archive, $doc))
 
   modify replace value of node $c//*[text() = "HELLO WORLD!"] with "HELLO UNIVERSE!"
 
   modify replace value of node $c//*[text() = "HELLO WORLD!"] with "HELLO UNIVERSE!"
 
   return fn:serialize($c)
 
   return fn:serialize($c)
let $updated := zip2:update($archive, <entry>{ $doc }</entry>, $entry)
+
let $updated := archive:update($archive, <entry>{ $doc }</entry>, $entry)
 
return file:write-binary($output, $updated)
 
return file:write-binary($output, $updated)
 
</pre>
 
</pre>
 
|}
 
|}
  
==zip2:delete==
+
==archive:delete==
  
 
{| width='100%'
 
{| width='100%'
 
|-
 
|-
 
| width='90' | '''Signatures'''
 
| width='90' | '''Signatures'''
|{{Func|zip2:delete|$zip as xs:base64Binary, $entry-names as xs:string*|xs:base64Binary}}
+
|{{Func|archive:delete|$zip as xs:base64Binary, $entry-names as xs:string*|xs:base64Binary}}
 
|-
 
|-
 
| '''Summary'''
 
| '''Summary'''
Line 168: Line 168:
 
<pre class="brush:xquery">
 
<pre class="brush:xquery">
 
let $zip := file:read-binary('old.zip')
 
let $zip := file:read-binary('old.zip')
let $entries := zip2:entries($zip)[matches(., '\.x?html?$', 'i')]
+
let $entries := archive:entries($zip)[matches(., '\.x?html?$', 'i')]
return file:write-binary('new.zip', zip2:delete($zip, $entries))
+
return file:write-binary('new.zip', archive:delete($zip, $entries))
 
</pre>
 
</pre>
 
|}
 
|}

Revision as of 18:39, 30 May 2012

This XQuery Module contains functions to handle ZIP archives. New ZIP archives can be created, existing archives can be updated, and the archive entries can be listed and extracted. This module may soon replace the existing ZIP Module (more information).

Conventions

All functions in this module are assigned to the http://basex.org/modules/archive namespace, which is statically bound to the archive prefix.
All errors are assigned to the http://basex.org/errors namespace, which is statically bound to the bxerr prefix.

Functions

archive:create

Signatures archive:create($entries as element(entry)*, $contents as item()*) as xs:base64Binary
Summary Creates a new ZIP archive from the specified entries and contents.
The $entries descriptors contain meta information required to create new ZIP entries. Beside the mandatory entry name, which is specified in a text node, further optional attributes can be specified:
  • last-modified: timestamp, specified as xs:dateTime (default: current time)
  • compression-level: 0-9, 0 = uncompressed (default: 8)
  • encoding: for textual entries (default: UTF-8)

An example:

<entry last-modified='2011-11-11T11:11:11'
       compression-level='9'
       encoding='US-ASCII'>hello.txt</entry>

The actual $contents must be xs:string or xs:base64Binary items.

Errors FOZ20001: the number of entries and contents differs.
FOZ20002: (some of) the contents are not of type xs:string or xs:base64Binary.
FOZ20003: entry descriptors contain invalid entry names, timestamps or compression levels.
FOZ20004: the specified encoding is invalid or not supported, or the string conversion failed.
FOZ29999: archive creation failed for some other reason.
Examples The following one-liner creates an archive archive.zip with one file file.txt:
archive:create(<entry>file.txt</entry>, 'Hello World')

The following function creates an archive mp3.zip, which contains all MP3 files of a local directory:

let $path  := 'audio/'
let $files := file:list($path, true(), '*.mp3')
let $zip   := archive:create(
  for $f in $files return <entry>{ $f }</entry>,
  for $f in $files return file:read-binary($path || $f)
)
return file:write-binary('mp3.zip', $zip)

archive:entries

Signatures archive:entries($zip as xs:base64Binary) as element(entry)*
Summary Returns the entry descriptors of the given zip archive. A descriptor contains the following attributes:
  • size: original file size
  • last-modified: timestamp, formatted as xs:dateTime
  • compressed-size: compressed file size

An example:

<entry size="1840" last-modified="2009-03-20T03:30:32" compressed-size="672">
  doc/index.html
</entry>
Errors FOZ29999: archive creation failed for some other reason.
Examples Sums up the file sizes of all entries of a JAR file:
sum(archive:entries(file:read-binary('zip.zip'))/@size)

archive:extract-text

Signatures archive:extract-text($zip as xs:base64Binary) as xs:string*
archive:extract-text($zip as xs:base64Binary, $entry-names as xs:string*) as xs:string*
archive:extract-text($zip as xs:base64Binary, $entry-names as xs:string*, $encoding as xs:string) as xs:string*
Summary Extracts archive entries and returns them as texts.
The returned entries can be limited to $entry-names.
The optional parameter $encoding defines the encoding of the file.
Errors FOZ20004: the specified encoding is invalid or not supported, or the string conversion failed.
FOZ29999: archive creation failed for some other reason.
Examples The following expression extracts all .txt files from an archive:
let $archive := file:read-binary("documents.zip")
for $entry in archive:entries($archive)[ends-with(., '.txt')]
return archive:extract-text($archive, $entry)

archive:extract-binary

Signatures archive:extract-binary($zip as xs:base64Binary) as xs:string*
archive:extract-binary($zip as xs:base64Binary, $entry-names as xs:string*) as xs:base64Binary*
Summary Extracts archive entries and returns them as binaries.
The returned entries can be limited to $entry-names.
Errors FOZ29999: archive creation failed for some other reason.
Examples This example unzips all files of an archive to the current directory:
let $archive  := file:read-binary('archive.zip')
let $entries  := archive:entries($archive)
let $contents := archive:extract-binary($archive)
for $entry at $p in $entries
return file:write-binary($entry, $contents[$p])

archive:update

Signatures archive:update($zip as xs:base64Binary, $entries as element(entry)*, $contents as item()*) as xs:base64Binary
Summary Adds new entries and replaces existing entries in a zip archive.
The format of $entries and $contents is the same as for archive:create.
Errors FOZ20001: the number of entries and contents differs.
FOZ20002: (some of) the contents are not of type xs:string or xs:base64Binary.
FOZ20003: entry descriptors contain invalid entry names, timestamps, compression levels or encodings.
FOZ20004: the specified encoding is invalid or not supported, or the string conversion failed.
FOZ29999: archive creation failed for some other reason.
Examples This example replaces texts in a Word document:
declare variable $input  := "HelloWorld.docx";
declare variable $output := "HelloUniverse.docx";
declare variable $doc    := "word/document.xml";
 
let $archive := file:read-binary($input)
let $entry   :=
  copy $c := fn:parse-xml(archive:extract-text($archive, $doc))
  modify replace value of node $c//*[text() = "HELLO WORLD!"] with "HELLO UNIVERSE!"
  return fn:serialize($c)
let $updated := archive:update($archive, <entry>{ $doc }</entry>, $entry)
return file:write-binary($output, $updated)

archive:delete

Signatures archive:delete($zip as xs:base64Binary, $entry-names as xs:string*) as xs:base64Binary
Summary Deletes entries from a zip archive.
The format of $entry-names is the list of entries to be deleted.
Errors FOZ29999: archive creation failed for some other reason.
Examples This example deletes all HTML files in an archive and creates a new file:
let $zip := file:read-binary('old.zip')
let $entries := archive:entries($zip)[matches(., '\.x?html?$', 'i')]
return file:write-binary('new.zip', archive:delete($zip, $entries))

Errors

Code Description
FOZ20001 The number of specified entries and contents differs.
FOZ20002 (Some of) the contents are not of type xs:string or xs:base64Binary.
FOZ20003 Entry descriptors contain invalid entry names, timestamps or compression levels.
FOZ20004 The specified encoding is invalid or not supported, or the string conversion failed.
FOZ29999 ZIP processing failed for some other reason.

Changelog

The module was introduced with Version 7.3.