Difference between revisions of "Statistics"
Jump to navigation
Jump to search
(17 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
− | + | This article is part of the [[Advanced User's Guide]]. | |
+ | It lists statistics on various databases instances that have been created with BaseX, with value and full-text indexes turned off. The URLs to the original sources, if available or public, are listed below. | ||
− | + | [[Databases]] in BaseX are light-weight. If a database limit is reached, you can distribute your documents across multiple database instances and access all of them with a single XQuery expression. | |
− | |||
− | |||
== Databases == | == Databases == | ||
+ | |||
{| class="wikitable sortable" | {| class="wikitable sortable" | ||
|- | |- | ||
!Instances | !Instances | ||
− | ! | + | !FileSize |
− | !# | + | !#Files |
− | ! | + | !DbSize |
− | !# | + | !#Nodes |
− | !# | + | !#Attr |
− | !# | + | !#ENames |
− | !# | + | !#ANames |
− | !# | + | !#URIs |
− | ! | + | !Height |
+ | |- | ||
+ | | '''Limits''' | ||
+ | |'''512 GiB'''<br/>(2^39 Bytes) | ||
+ | |'''536'870'912'''<br/>(2^29) | ||
+ | |''no limit''<br/> | ||
+ | |'''2'147'483'648'''<br/>(2^31) | ||
+ | |''no limit''<br/> | ||
+ | |'''32768'''<br/>(2^15) | ||
+ | |'''32768'''<br/>(2^15) | ||
+ | |'''256'''<br/>(2^8) | ||
+ | |''no limit''<br/> | ||
|- | |- | ||
| RuWikiHist | | RuWikiHist | ||
Line 23: | Line 34: | ||
|1 | |1 | ||
|416 GiB | |416 GiB | ||
− | |324 | + | |324'848'508 |
|3 | |3 | ||
|21 | |21 | ||
Line 34: | Line 45: | ||
| 1 | | 1 | ||
| 120 GiB | | 120 GiB | ||
− | | 179 | + | | 179'199'662 |
| 3 | | 3 | ||
| 21 | | 21 | ||
Line 45: | Line 56: | ||
| 1 | | 1 | ||
| 75 GiB | | 75 GiB | ||
− | | 134 | + | | 134'380'393 |
| 3 | | 3 | ||
| 21 | | 21 | ||
Line 56: | Line 67: | ||
| 1 | | 1 | ||
| 64 GiB | | 64 GiB | ||
− | | 1 | + | | 1'615'071'348 |
| 2 | | 2 | ||
| 74 | | 74 | ||
Line 67: | Line 78: | ||
| 1 | | 1 | ||
| 52 GiB | | 52 GiB | ||
− | | 401 | + | | 401'456'348 |
| 3 | | 3 | ||
| 21 | | 21 | ||
Line 78: | Line 89: | ||
| 379 | | 379 | ||
| 36 GiB | | 36 GiB | ||
− | | 1 | + | | 1'623'764'254 |
| 2 | | 2 | ||
| 84 | | 84 | ||
Line 89: | Line 100: | ||
| 1 | | 1 | ||
| 37 GiB | | 37 GiB | ||
− | | 1 | + | | 1'631'218'984 |
| 3 | | 3 | ||
| 245 | | 245 | ||
Line 96: | Line 107: | ||
| 9 | | 9 | ||
|- | |- | ||
− | | | + | | Inex2009 |
| 31 GiB | | 31 GiB | ||
− | | 2 | + | | 2'666'500 |
| 34 GiB | | 34 GiB | ||
− | | 1 | + | | 1'336'110'639 |
| 15 | | 15 | ||
− | | 28 | + | | 28'034 |
| 451 | | 451 | ||
| 1 | | 1 | ||
Line 109: | Line 120: | ||
| CoPhIR | | CoPhIR | ||
| 29 GiB | | 29 GiB | ||
− | | 10 | + | | 10'000'000 |
| 31 GiB | | 31 GiB | ||
− | | 1 | + | | 1'104'623'376 |
| 10 | | 10 | ||
| 42 | | 42 | ||
Line 122: | Line 133: | ||
| 1 | | 1 | ||
| 25 GiB | | 25 GiB | ||
− | | 198 | + | | 198'546'747 |
| 3 | | 3 | ||
| 24 | | 24 | ||
Line 133: | Line 144: | ||
| 1 | | 1 | ||
| 26 GiB | | 26 GiB | ||
− | | 645 | + | | 645'997'965 |
| 2 | | 2 | ||
| 74 | | 74 | ||
Line 144: | Line 155: | ||
| 1 | | 1 | ||
| 19 GiB | | 19 GiB | ||
− | | 860 | + | | 860'304'235 |
| 5 | | 5 | ||
| 7 | | 7 | ||
Line 155: | Line 166: | ||
| 1 | | 1 | ||
| 13 GiB | | 13 GiB | ||
− | | 432 | + | | 432'628'105 |
| 12 | | 12 | ||
| 26 | | 26 | ||
Line 164: | Line 175: | ||
| NewYorkTimes | | NewYorkTimes | ||
| 12 GiB | | 12 GiB | ||
− | | 1 | + | | 1'855'659 |
| 13 GiB | | 13 GiB | ||
− | | 280 | + | | 280'407'005 |
| 5 | | 5 | ||
| 41 | | 41 | ||
Line 177: | Line 188: | ||
| 1 | | 1 | ||
| 14 GiB | | 14 GiB | ||
− | | 589 | + | | 589'650'535 |
| 8 | | 8 | ||
| 47 | | 47 | ||
Line 188: | Line 199: | ||
| 1 | | 1 | ||
| 13 GiB | | 13 GiB | ||
− | | 323 | + | | 323'083'409 |
| 2 | | 2 | ||
| 74 | | 74 | ||
Line 197: | Line 208: | ||
| IntAct | | IntAct | ||
| 7973 MiB | | 7973 MiB | ||
− | | 25 | + | | 25'624 |
| 6717 MiB | | 6717 MiB | ||
− | | 297 | + | | 297'478'392 |
| 7 | | 7 | ||
| 64 | | 64 | ||
Line 210: | Line 221: | ||
| 1 | | 1 | ||
| 10 GiB | | 10 GiB | ||
− | | 443 | + | | 443'627'994 |
| 8 | | 8 | ||
| 61 | | 61 | ||
Line 221: | Line 232: | ||
| 1 | | 1 | ||
| 8028 MiB | | 8028 MiB | ||
− | | 395 | + | | 395'871'872 |
| 2 | | 2 | ||
| 22 | | 22 | ||
Line 232: | Line 243: | ||
| 1 | | 1 | ||
| 5171 MiB | | 5171 MiB | ||
− | | 6 | + | | 6'910'669 |
| 3 | | 3 | ||
| 19 | | 19 | ||
Line 243: | Line 254: | ||
| 1 | | 1 | ||
| 5422 MiB | | 5422 MiB | ||
− | | 241 | + | | 241'274'406 |
| 8 | | 8 | ||
| 70 | | 70 | ||
Line 254: | Line 265: | ||
| 1 | | 1 | ||
| 5532 MiB | | 5532 MiB | ||
− | | 167 | + | | 167'328'039 |
| 23 | | 23 | ||
| 186 | | 186 | ||
Line 263: | Line 274: | ||
| Wikicorpus | | Wikicorpus | ||
| 4492 MiB | | 4492 MiB | ||
− | | 659 | + | | 659'338 |
| 4432 MiB | | 4432 MiB | ||
− | | 157 | + | | 157'948'561 |
| 12 | | 12 | ||
− | | 1 | + | | 1'257 |
− | | 2 | + | | 2'687 |
| 2 | | 2 | ||
| 50 | | 50 | ||
Line 276: | Line 287: | ||
| 1 | | 1 | ||
| 3537 MiB | | 3537 MiB | ||
− | | 98 | + | | 98'433'194 |
| 1 | | 1 | ||
| 11 | | 11 | ||
Line 285: | Line 296: | ||
| CoPhIR | | CoPhIR | ||
| 2695 MiB | | 2695 MiB | ||
− | | 1 | + | | 1'000'000 |
| 2882 MiB | | 2882 MiB | ||
− | | 101 | + | | 101'638'857 |
| 10 | | 10 | ||
| 42 | | 42 | ||
Line 298: | Line 309: | ||
| 1 | | 1 | ||
| 2410 MiB | | 2410 MiB | ||
− | | 104 | + | | 104'845'819 |
| 3 | | 3 | ||
| 6 | | 6 | ||
Line 309: | Line 320: | ||
| 1 | | 1 | ||
| 2462 MiB | | 2462 MiB | ||
− | | 102 | + | | 102'901'519 |
| 2 | | 2 | ||
| 7 | | 7 | ||
Line 320: | Line 331: | ||
| 1 | | 1 | ||
| 1303 MiB | | 1303 MiB | ||
− | | 32 | + | | 32'298'989 |
| 2 | | 2 | ||
| 74 | | 74 | ||
Line 331: | Line 342: | ||
| 1 | | 1 | ||
| 850 MiB | | 850 MiB | ||
− | | 44 | + | | 44'821'506 |
| 4 | | 4 | ||
| 3 | | 3 | ||
Line 342: | Line 353: | ||
| 1 | | 1 | ||
| 918 MiB | | 918 MiB | ||
− | | 46 | + | | 46'401'941 |
| 3 | | 3 | ||
| 23 | | 23 | ||
Line 351: | Line 362: | ||
| Twitter | | Twitter | ||
| 736 MiB | | 736 MiB | ||
− | | 1 | + | | 1'177'495 |
| 767 MiB | | 767 MiB | ||
− | | 15 | + | | 15'309'015 |
| 0 | | 0 | ||
| 8 | | 8 | ||
Line 362: | Line 373: | ||
| Organizations | | Organizations | ||
| 733 MiB | | 733 MiB | ||
− | | 1 | + | | 1'019'132 |
| 724 MiB | | 724 MiB | ||
− | | 33 | + | | 33'112'392 |
| 3 | | 3 | ||
| 38 | | 38 | ||
Line 375: | Line 386: | ||
| 1 | | 1 | ||
| 944 MiB | | 944 MiB | ||
− | | 36 | + | | 36'878'181 |
| 4 | | 4 | ||
| 35 | | 35 | ||
Line 384: | Line 395: | ||
| Feeds | | Feeds | ||
| 692 MiB | | 692 MiB | ||
− | | 444 | + | | 444'014 |
| 604 MiB | | 604 MiB | ||
− | | 5 | + | | 5'933'713 |
| 0 | | 0 | ||
| 8 | | 8 | ||
Line 397: | Line 408: | ||
| 1 | | 1 | ||
| 407 MiB | | 407 MiB | ||
− | | 21 | + | | 21'602'141 |
| 5 | | 5 | ||
| 55 | | 55 | ||
Line 408: | Line 419: | ||
| 38 | | 38 | ||
| 273 MiB | | 273 MiB | ||
− | | 14 | + | | 14'512'851 |
| 1 | | 1 | ||
| 111 | | 111 | ||
Line 419: | Line 430: | ||
| 1 | | 1 | ||
| 195 MiB | | 195 MiB | ||
− | | 10 | + | | 10'401'847 |
| 5 | | 5 | ||
| 66 | | 66 | ||
Line 428: | Line 439: | ||
| ZDNET | | ZDNET | ||
| 130 MiB | | 130 MiB | ||
− | | 95 | + | | 95'663 |
| 133 MiB | | 133 MiB | ||
− | | 3 | + | | 3'060'186 |
| 21 | | 21 | ||
| 40 | | 40 | ||
Line 441: | Line 452: | ||
| 1 | | 1 | ||
| 171 MiB | | 171 MiB | ||
− | | 8 | + | | 8'592'666 |
| 0 | | 0 | ||
| 10 | | 10 | ||
Line 452: | Line 463: | ||
| 1 | | 1 | ||
| 130 MiB | | 130 MiB | ||
− | | 3 | + | | 3'221'926 |
| 2 | | 2 | ||
| 74 | | 74 | ||
Line 463: | Line 474: | ||
| 1 | | 1 | ||
| 86 MiB | | 86 MiB | ||
− | | 3 | + | | 3'832'028 |
| 1 | | 1 | ||
| 58 | | 58 | ||
Line 474: | Line 485: | ||
| 1 | | 1 | ||
| 93 MiB | | 93 MiB | ||
− | | 4 | + | | 4'842'638 |
| 4 | | 4 | ||
| 3 | | 3 | ||
Line 485: | Line 496: | ||
| 1 | | 1 | ||
| 92 MiB | | 92 MiB | ||
− | | 3 | + | | 3'829'513 |
| 1 | | 1 | ||
| 250 | | 250 | ||
Line 494: | Line 505: | ||
| DBLP2 | | DBLP2 | ||
| 80 MiB | | 80 MiB | ||
− | | 170 | + | | 170'843 |
| 102 MiB | | 102 MiB | ||
− | | 4 | + | | 4'044'649 |
| 4 | | 4 | ||
| 35 | | 35 | ||
Line 507: | Line 518: | ||
| 3 | | 3 | ||
| 39 MiB | | 39 MiB | ||
− | | 2 | + | | 2'070'157 |
| 7 | | 7 | ||
| 104 | | 104 | ||
Line 518: | Line 529: | ||
| 1 | | 1 | ||
| 68 MiB | | 68 MiB | ||
− | | 3 | + | | 3'784'285 |
| 0 | | 0 | ||
| 60 | | 60 | ||
Line 529: | Line 540: | ||
| 6 | | 6 | ||
| 66 MiB | | 66 MiB | ||
− | | 3 | + | | 3'468'606 |
| 1 | | 1 | ||
| 28 | | 28 | ||
Line 540: | Line 551: | ||
| 1 | | 1 | ||
| 45 MiB | | 45 MiB | ||
− | | 1 | + | | 1'619'443 |
| 3 | | 3 | ||
| 21 | | 21 | ||
Line 549: | Line 560: | ||
| HCIBIB2 | | HCIBIB2 | ||
| 32 MiB | | 32 MiB | ||
− | | 26 | + | | 26'390 |
| 33 MiB | | 33 MiB | ||
− | | 617 | + | | 617'023 |
| 1 | | 1 | ||
| 39 | | 39 | ||
Line 562: | Line 573: | ||
| 1 | | 1 | ||
| 25 MiB | | 25 MiB | ||
− | | 845 | + | | 845'805 |
| 2 | | 2 | ||
| 61 | | 61 | ||
Line 573: | Line 584: | ||
| 1 | | 1 | ||
| 19 MiB | | 19 MiB | ||
− | | 868 | + | | 868'980 |
| 6 | | 6 | ||
| 7 | | 7 | ||
Line 584: | Line 595: | ||
| 1 | | 1 | ||
| 18 MiB | | 18 MiB | ||
− | | 917 | + | | 917'833 |
| 3 | | 3 | ||
| 27 | | 27 | ||
Line 595: | Line 606: | ||
| 1 | | 1 | ||
| 13 MiB | | 13 MiB | ||
− | | 324 | + | | 324'274 |
| 2 | | 2 | ||
| 74 | | 74 | ||
Line 606: | Line 617: | ||
| 1 | | 1 | ||
| 9854 KiB | | 9854 KiB | ||
− | | 327 | + | | 327'170 |
| 0 | | 0 | ||
| 59 | | 59 | ||
Line 617: | Line 628: | ||
| 1 | | 1 | ||
| 7106 KiB | | 7106 KiB | ||
− | | 363 | + | | 363'560 |
| 7 | | 7 | ||
| 4 | | 4 | ||
Line 628: | Line 639: | ||
| 1 | | 1 | ||
| 4088 KiB | | 4088 KiB | ||
− | | 201 | + | | 201'798 |
| 7 | | 7 | ||
| 33 | | 33 | ||
Line 639: | Line 650: | ||
| 17 | | 17 | ||
| 2942 KiB | | 2942 KiB | ||
− | | 171 | + | | 171'400 |
| 8 | | 8 | ||
| 179 | | 179 | ||
Line 648: | Line 659: | ||
| BibDBPub | | BibDBPub | ||
| 2292 KiB | | 2292 KiB | ||
− | | 3 | + | | 3'465 |
| 2359 KiB | | 2359 KiB | ||
− | | 80 | + | | 80'178 |
| 1 | | 1 | ||
| 54 | | 54 | ||
Line 661: | Line 672: | ||
| 1 | | 1 | ||
| 1560 KiB | | 1560 KiB | ||
− | | 77 | + | | 77'315 |
| 16 | | 16 | ||
| 23 | | 23 | ||
Line 672: | Line 683: | ||
| 1 | | 1 | ||
| 1334 KiB | | 1334 KiB | ||
− | | 33 | + | | 33'056 |
| 2 | | 2 | ||
| 74 | | 74 | ||
Line 679: | Line 690: | ||
| 13 | | 13 | ||
|} | |} | ||
+ | |||
+ | This is the meaning of the attributes: | ||
+ | |||
+ | * ''FileSize'' is the original size of the input documents | ||
+ | * ''#Files'' indicates the number of stored XML documents | ||
+ | * ''#DbSize'' is the size of the resulting database (excluding the [[Indexes#Value Indexes|value index structures]]) | ||
+ | * ''#Nodes'' represents the number of XML nodes (elements, attributes, texts, etc.) stored in the database | ||
+ | * ''#Attr'' indicates the maximum number of attributes stored for a single element | ||
+ | * ''#ENames'' and #ANames reflect the number of distinct element and attribute names | ||
+ | * ''#URIs'' represent the number of distinct namespace URIs | ||
+ | * ''Height'' indicates the maximum level depth of the stored nodes | ||
== Sources == | == Sources == | ||
− | + | {| class="wikitable sortable" | |
− | + | ! Instances | |
− | + | ! Source | |
− | + | |- | |
− | + | | AirBase | |
− | + | | http://air-climate.eionet.europa.eu/databases/airbase/airbasexml | |
− | + | |- | |
− | + | | Alfred | |
− | + | | http://alfred.med.yale.edu/alfred/alfredWithDescription.zip | |
− | + | |- | |
− | + | | BibDBPub | |
− | + | | http://inex.is.informatik.uni-duisburg.de/2005/ | |
− | + | |- | |
− | + | | CoPhIR | |
− | + | | http://cophir.isti.cnr.it/ | |
− | + | |- | |
− | + | | DBLP | |
− | + | | http://dblp.uni-trier.de/xml | |
− | + | |- | |
− | + | | DBLP2 | |
− | + | | http://inex.is.informatik.uni-duisburg.de/2005/ | |
− | + | |- | |
− | + | | DDI | |
− | + | | http://tools.ddialliance.org/ | |
− | + | |- | |
− | + | | EnWikiMeta | |
− | + | | http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-meta-current.xml.bz2 | |
− | + | |- | |
− | + | | EnWikipedia | |
− | + | | http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 | |
− | + | |- | |
− | + | | EnWikiRDF | |
− | + | | http://www.xml-benchmark.org/ generated with xmlgen | |
− | + | |- | |
− | + | | EnWiktionary | |
− | + | | http://dumps.wikimedia.org/enwiktionary/latest/enwiktionary-latest-pages-meta-history.xml.7z | |
− | + | |- | |
− | + | | EURLex | |
− | + | | http://www.epsiplatform.eu/ | |
− | + | |- | |
− | + | | Factbook | |
− | + | | http://www.cs.washington.edu/research/xmldatasets/www/repository.html | |
− | + | |- | |
− | + | | Freebase | |
− | + | | http://download.freebase.com/wex | |
− | + | |- | |
− | + | | FreeDB | |
− | + | | http://www.xmldatabases.org/radio/xmlDatabases/projects/FreeDBtoXML | |
− | + | |- | |
− | + | | Freshmeat | |
− | + | | http://freshmeat.net/articles/freshmeat-xml-rpc-api-available | |
− | + | |- | |
− | + | | Genome1 | |
− | + | | ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/XML/ds_ch1.xml.gz | |
− | + | |- | |
+ | | HCIBIB2 | ||
+ | | http://inex.is.informatik.uni-duisburg.de/2005/ | ||
+ | |- | ||
+ | | Inex2009 | ||
+ | | http://www.mpi-inf.mpg.de/departments/d5/software/inex | ||
+ | |- | ||
+ | | IntAct | ||
+ | | ftp://ftp.ebi.ac.uk/pub/databases/intact/current/index.html | ||
+ | |- | ||
+ | | InterPro | ||
+ | | ftp://ftp.bio.net/biomirror/interpro/match_complete.xml.gz | ||
+ | |- | ||
+ | | iProClass | ||
+ | | ftp://ftp.pir.georgetown.edu/pir_databases/iproclass/iproclass.xml.gz | ||
+ | |- | ||
+ | | JMNEdict | ||
+ | | ftp://ftp.monash.edu.au/pub/nihongo/enamdict_doc.html | ||
+ | |- | ||
+ | | KanjiDic2 | ||
+ | | http://www.csse.monash.edu.au/~jwb/kanjidic2 | ||
+ | |- | ||
+ | | MedLine | ||
+ | | http://www.nlm.nih.gov/bsd | ||
+ | |- | ||
+ | | MeSH | ||
+ | | http://www.nlm.nih.gov/mesh/xmlmesh.html | ||
+ | |- | ||
+ | | MovieDB | ||
+ | | http://eagereyes.org/InfoVisContest2007Data.html | ||
+ | |- | ||
+ | | MusicXML | ||
+ | | http://www.recordare.com/xml/samples.html | ||
+ | |- | ||
+ | | Nasa | ||
+ | | http://www.cs.washington.edu/research/xmldatasets/www/repository.html | ||
+ | |- | ||
+ | | NewYorkTimes | ||
+ | | http://www.nytimes.com/ref/membercenter/nytarchive.html | ||
+ | |- | ||
+ | | OpenStreetMap | ||
+ | | http://dump.wiki.openstreetmap.org/osmwiki-latest-files.tar.gz | ||
+ | |- | ||
+ | | Organizations | ||
+ | | http://www.data.gov/raw/1358 | ||
+ | |- | ||
+ | | RuWikiHist | ||
+ | | http://dumps.wikimedia.org/ruwiki/latest/ruwiki-latest-pages-meta-history.xml.7z | ||
+ | |- | ||
+ | | SDMX | ||
+ | | http://www.metadatatechnology.com/ | ||
+ | |- | ||
+ | | Shakespeare | ||
+ | | http://www.cafeconleche.org/examples/shakespeare | ||
+ | |- | ||
+ | | SwissProt | ||
+ | | ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase | ||
+ | |- | ||
+ | | Thesaurus | ||
+ | | http://www.drze.de/BELIT/thesaurus | ||
+ | |- | ||
+ | | Treebank | ||
+ | | http://www.cs.washington.edu/research/xmldatasets | ||
+ | |- | ||
+ | | TreeOfLife | ||
+ | | http://tolweb.org/data/tolskeletaldump.xml | ||
+ | |- | ||
+ | | TrEMBL | ||
+ | | ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase | ||
+ | |- | ||
+ | | Wikicorpus | ||
+ | | http://www-connex.lip6.fr/~denoyer/wikipediaXML | ||
+ | |- | ||
+ | | XMark | ||
+ | | http://www.xml-benchmark.org/ generated with xmlgen | ||
+ | |- | ||
+ | | ZDNET | ||
+ | | http://inex.is.informatik.uni-duisburg.de/2005/ | ||
+ | |- | ||
+ | | ZhWikiHist | ||
+ | | http://dumps.wikimedia.org/zhwiki/latest/zhwiki-latest-pages-meta-history.xml.7z | ||
+ | |- | ||
+ | | LibraryUKN | ||
+ | | generated from university library data | ||
+ | |- | ||
+ | | MediaUKN | ||
+ | | generated from university library data | ||
+ | |- | ||
+ | | DeepFS | ||
+ | | generated from filesystem structure | ||
+ | |- | ||
+ | | University | ||
+ | | generated from students test data | ||
+ | |- | ||
+ | | Feeds | ||
+ | | compiled from news feeds | ||
+ | |- | ||
+ | | Twitter | ||
+ | | compiled from Twitter feeds | ||
+ | |} |
Latest revision as of 11:03, 1 March 2016
This article is part of the Advanced User's Guide. It lists statistics on various databases instances that have been created with BaseX, with value and full-text indexes turned off. The URLs to the original sources, if available or public, are listed below.
Databases in BaseX are light-weight. If a database limit is reached, you can distribute your documents across multiple database instances and access all of them with a single XQuery expression.
Databases[edit]
Instances | FileSize | #Files | DbSize | #Nodes | #Attr | #ENames | #ANames | #URIs | Height |
---|---|---|---|---|---|---|---|---|---|
Limits | 512 GiB (2^39 Bytes) |
536'870'912 (2^29) |
no limit |
2'147'483'648 (2^31) |
no limit |
32768 (2^15) |
32768 (2^15) |
256 (2^8) |
no limit |
RuWikiHist | 421 GiB | 1 | 416 GiB | 324'848'508 | 3 | 21 | 6 | 2 | 6 |
ZhWikiHist | 126 GiB | 1 | 120 GiB | 179'199'662 | 3 | 21 | 6 | 2 | 6 |
EnWiktionary | 79 GiB | 1 | 75 GiB | 134'380'393 | 3 | 21 | 6 | 2 | 6 |
XMark | 55 GiB | 1 | 64 GiB | 1'615'071'348 | 2 | 74 | 9 | 0 | 13 |
EnWikiMeta | 54 GiB | 1 | 52 GiB | 401'456'348 | 3 | 21 | 6 | 2 | 6 |
MedLine | 38 GiB | 379 | 36 GiB | 1'623'764'254 | 2 | 84 | 6 | 0 | 9 |
iProClass | 36 GiB | 1 | 37 GiB | 1'631'218'984 | 3 | 245 | 4 | 2 | 9 |
Inex2009 | 31 GiB | 2'666'500 | 34 GiB | 1'336'110'639 | 15 | 28'034 | 451 | 1 | 37 |
CoPhIR | 29 GiB | 10'000'000 | 31 GiB | 1'104'623'376 | 10 | 42 | 42 | 0 | 8 |
EnWikipedia | 26 GiB | 1 | 25 GiB | 198'546'747 | 3 | 24 | 21 | 2 | 6 |
XMark | 22 GiB | 1 | 26 GiB | 645'997'965 | 2 | 74 | 9 | 0 | 13 |
InterPro | 14 GiB | 1 | 19 GiB | 860'304'235 | 5 | 7 | 15 | 0 | 4 |
Genome1 | 13 GiB | 1 | 13 GiB | 432'628'105 | 12 | 26 | 101 | 2 | 6 |
NewYorkTimes | 12 GiB | 1'855'659 | 13 GiB | 280'407'005 | 5 | 41 | 33 | 0 | 6 |
TrEMBL | 11 GiB | 1 | 14 GiB | 589'650'535 | 8 | 47 | 30 | 2 | 7 |
XMark | 11 GiB | 1 | 13 GiB | 323'083'409 | 2 | 74 | 9 | 0 | 13 |
IntAct | 7973 MiB | 25'624 | 6717 MiB | 297'478'392 | 7 | 64 | 22 | 2 | 14 |
Freebase | 7366 MiB | 1 | 10 GiB | 443'627'994 | 8 | 61 | 283 | 1 | 93 |
SDMX | 6356 MiB | 1 | 8028 MiB | 395'871'872 | 2 | 22 | 6 | 3 | 7 |
OpenStreetMap | 5312 MiB | 1 | 5171 MiB | 6'910'669 | 3 | 19 | 5 | 2 | 6 |
SwissProt | 4604 MiB | 1 | 5422 MiB | 241'274'406 | 8 | 70 | 39 | 2 | 7 |
EURLex | 4815 MiB | 1 | 5532 MiB | 167'328'039 | 23 | 186 | 46 | 1 | 12 |
Wikicorpus | 4492 MiB | 659'338 | 4432 MiB | 157'948'561 | 12 | 1'257 | 2'687 | 2 | 50 |
EnWikiRDF | 3679 MiB | 1 | 3537 MiB | 98'433'194 | 1 | 11 | 2 | 11 | 4 |
CoPhIR | 2695 MiB | 1'000'000 | 2882 MiB | 101'638'857 | 10 | 42 | 42 | 0 | 8 |
MeSH | 2091 MiB | 1 | 2410 MiB | 104'845'819 | 3 | 6 | 5 | 2 | 5 |
FreeDB | 1723 MiB | 1 | 2462 MiB | 102'901'519 | 2 | 7 | 3 | 0 | 4 |
XMark | 1134 MiB | 1 | 1303 MiB | 32'298'989 | 2 | 74 | 9 | 0 | 13 |
DeepFS | 810 MiB | 1 | 850 MiB | 44'821'506 | 4 | 3 | 6 | 0 | 24 |
LibraryUKN | 760 MiB | 1 | 918 MiB | 46'401'941 | 3 | 23 | 3 | 0 | 5 |
736 MiB | 1'177'495 | 767 MiB | 15'309'015 | 0 | 8 | 0 | 0 | 3 | |
Organizations | 733 MiB | 1'019'132 | 724 MiB | 33'112'392 | 3 | 38 | 9 | 0 | 7 |
DBLP | 694 MiB | 1 | 944 MiB | 36'878'181 | 4 | 35 | 6 | 0 | 7 |
Feeds | 692 MiB | 444'014 | 604 MiB | 5'933'713 | 0 | 8 | 0 | 0 | 3 |
MedLineSupp | 477 MiB | 1 | 407 MiB | 21'602'141 | 5 | 55 | 7 | 0 | 9 |
AirBase | 449 MiB | 38 | 273 MiB | 14'512'851 | 1 | 111 | 5 | 0 | 11 |
MedLineDesc | 260 MiB | 1 | 195 MiB | 10'401'847 | 5 | 66 | 8 | 0 | 9 |
ZDNET | 130 MiB | 95'663 | 133 MiB | 3'060'186 | 21 | 40 | 90 | 0 | 13 |
JMNEdict | 124 MiB | 1 | 171 MiB | 8'592'666 | 0 | 10 | 0 | 0 | 5 |
XMark | 111 MiB | 1 | 130 MiB | 3'221'926 | 2 | 74 | 9 | 0 | 13 |
Freshmeat | 105 MiB | 1 | 86 MiB | 3'832'028 | 1 | 58 | 1 | 0 | 6 |
DeepFS | 83 MiB | 1 | 93 MiB | 4'842'638 | 4 | 3 | 6 | 0 | 21 |
Treebank | 82 MiB | 1 | 92 MiB | 3'829'513 | 1 | 250 | 1 | 0 | 37 |
DBLP2 | 80 MiB | 170'843 | 102 MiB | 4'044'649 | 4 | 35 | 6 | 0 | 6 |
DDI | 76 MiB | 3 | 39 MiB | 2'070'157 | 7 | 104 | 16 | 21 | 11 |
Alfred | 75 MiB | 1 | 68 MiB | 3'784'285 | 0 | 60 | 0 | 0 | 6 |
University | 56 MiB | 6 | 66 MiB | 3'468'606 | 1 | 28 | 4 | 0 | 5 |
MediaUKN | 38 MiB | 1 | 45 MiB | 1'619'443 | 3 | 21 | 3 | 0 | 5 |
HCIBIB2 | 32 MiB | 26'390 | 33 MiB | 617'023 | 1 | 39 | 1 | 0 | 4 |
Nasa | 24 MiB | 1 | 25 MiB | 845'805 | 2 | 61 | 8 | 1 | 9 |
MovieDB | 16 MiB | 1 | 19 MiB | 868'980 | 6 | 7 | 8 | 0 | 4 |
KanjiDic2 | 13 MiB | 1 | 18 MiB | 917'833 | 3 | 27 | 10 | 0 | 6 |
XMark | 11 MiB | 1 | 13 MiB | 324'274 | 2 | 74 | 9 | 0 | 13 |
Shakespeare | 7711 KiB | 1 | 9854 KiB | 327'170 | 0 | 59 | 0 | 0 | 9 |
TreeOfLife | 5425 KiB | 1 | 7106 KiB | 363'560 | 7 | 4 | 7 | 0 | 243 |
Thesaurus | 4288 KiB | 1 | 4088 KiB | 201'798 | 7 | 33 | 9 | 0 | 7 |
MusicXML | 3155 KiB | 17 | 2942 KiB | 171'400 | 8 | 179 | 56 | 0 | 8 |
BibDBPub | 2292 KiB | 3'465 | 2359 KiB | 80'178 | 1 | 54 | 1 | 0 | 4 |
Factbook | 1743 KiB | 1 | 1560 KiB | 77'315 | 16 | 23 | 32 | 0 | 6 |
XMark | 1134 KiB | 1 | 1334 KiB | 33'056 | 2 | 74 | 9 | 0 | 13 |
This is the meaning of the attributes:
- FileSize is the original size of the input documents
- #Files indicates the number of stored XML documents
- #DbSize is the size of the resulting database (excluding the value index structures)
- #Nodes represents the number of XML nodes (elements, attributes, texts, etc.) stored in the database
- #Attr indicates the maximum number of attributes stored for a single element
- #ENames and #ANames reflect the number of distinct element and attribute names
- #URIs represent the number of distinct namespace URIs
- Height indicates the maximum level depth of the stored nodes