Difference between revisions of "Statistics"
Line 11: | Line 11: | ||
== Sources == | == Sources == | ||
− | <table><tr> <td width=120><p> <b>Instances</b> </p></td> <td><p> <b>Source</b> </p></td> </tr> <tr> <td><p> AirBase </p></td> <td><p> http://air-climate.eionet.europa.eu/databases/airbase/airbasexml </p></td> </tr> <tr> <td><p> Alfred </p></td> <td><p> http://alfred.med.yale.edu/alfred/alfredWithDescription.zip </p></td> </tr> <tr> <td><p> BibDBPub </p></td> <td><p> http://inex.is.informatik.uni-duisburg.de/2005/ </p></td> </tr> <tr> <td><p> CoPhIR </p></td> <td><p> http://cophir.isti.cnr.it/ </p></td> </tr> <tr> <td><p> DBLP </p></td> <td><p> http://dblp.uni-trier.de/xml </p></td> </tr> <tr> <td><p> DBLP2 </p></td> <td><p> http://inex.is.informatik.uni-duisburg.de/2005/ </p></td> </tr> <tr> <td><p> DDI </p></td> <td><p> http://tools.ddialliance.org/ </p></td> </tr> <tr> <td><p> EnWikiMeta </p></td> <td><p> http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-meta-current.xml.bz2 </p></td> </tr> <tr> <td><p> EnWikipedia </p></td> <td><p> http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 </p></td> </tr> <tr> <td><p> EnWikiRDF </p></td> <td><p> http://www.xml-benchmark.org/ generated with xmlgen </p></td> </tr> <tr> <td><p> EnWiktionary </p></td> <td><p> http://dumps.wikimedia.org/enwiktionary/latest/enwiktionary-latest-pages-meta-history.xml.7z </p></td> </tr> <tr> <td><p> EURLex </p></td> <td><p> http://www.epsiplatform.eu/ </p></td> </tr> <tr> <td><p> Factbook </p></td> <td><p> http://www.cs.washington.edu/research/xmldatasets/www/repository.html </p></td> </tr> <tr> <td><p> Freebase </p></td> <td><p> http://download.freebase.com/wex </p></td> </tr> <tr> <td><p> FreeDB </p></td> <td><p> http://www.xmldatabases.org/radio/xmlDatabases/projects/FreeDBtoXML </p></td> </tr> <tr> <td><p> Freshmeat </p></td> <td><p> http://freshmeat.net/articles/freshmeat-xml-rpc-api-available </p></td> </tr> <tr> <td><p> Genome1 </p></td> <td><p> ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/XML/ds_ch1.xml.gz </p></td> </tr> <tr> <td><p> HCIBIB2 </p></td> <td><p> http://inex.is.informatik.uni-duisburg.de/2005/ </p></td> </tr> <tr> <td><p> Inex2009 </p></td> <td><p> http://www.mpi-inf.mpg.de/departments/d5/software/inex </p></td> </tr> <tr> <td><p> IntAct </p></td> <td><p> ftp://ftp.ebi.ac.uk/pub/databases/intact/current/index.html </p></td> </tr> <tr> <td><p> InterPro </p></td> <td><p> ftp://ftp.bio.net/biomirror/interpro/match_complete.xml.gz </p></td> </tr> <tr> <td><p> iProClass </p></td> <td><p> ftp://ftp.pir.georgetown.edu/pir_databases/iproclass/iproclass.xml.gz </p></td> </tr> <tr> <td><p> JMNEdict </p></td> <td><p> ftp://ftp.monash.edu.au/pub/nihongo/enamdict_doc.html </p></td> </tr> <tr> <td><p> KanjiDic2 </p></td> <td><p> http://www.csse.monash.edu.au/~jwb/kanjidic2 </p></td> </tr> <tr> <td><p> MedLine </p></td> <td><p> http://www.nlm.nih.gov/bsd </p></td> </tr> <tr> <td><p> MeSH </p></td> <td><p> http://www.nlm.nih.gov/mesh/xmlmesh.html </p></td> </tr> <tr> <td><p> MovieDB </p></td> <td><p> | + | <table><tr> <td width=120><p> <b>Instances</b> </p></td> <td><p> <b>Source</b> </p></td> </tr> <tr> <td><p> AirBase </p></td> <td><p> http://air-climate.eionet.europa.eu/databases/airbase/airbasexml </p></td> </tr> <tr> <td><p> Alfred </p></td> <td><p> http://alfred.med.yale.edu/alfred/alfredWithDescription.zip </p></td> </tr> <tr> <td><p> BibDBPub </p></td> <td><p> http://inex.is.informatik.uni-duisburg.de/2005/ </p></td> </tr> <tr> <td><p> CoPhIR </p></td> <td><p> http://cophir.isti.cnr.it/ </p></td> </tr> <tr> <td><p> DBLP </p></td> <td><p> http://dblp.uni-trier.de/xml </p></td> </tr> <tr> <td><p> DBLP2 </p></td> <td><p> http://inex.is.informatik.uni-duisburg.de/2005/ </p></td> </tr> <tr> <td><p> DDI </p></td> <td><p> http://tools.ddialliance.org/ </p></td> </tr> <tr> <td><p> EnWikiMeta </p></td> <td><p> http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-meta-current.xml.bz2 </p></td> </tr> <tr> <td><p> EnWikipedia </p></td> <td><p> http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 </p></td> </tr> <tr> <td><p> EnWikiRDF </p></td> <td><p> http://www.xml-benchmark.org/ generated with xmlgen </p></td> </tr> <tr> <td><p> EnWiktionary </p></td> <td><p> http://dumps.wikimedia.org/enwiktionary/latest/enwiktionary-latest-pages-meta-history.xml.7z </p></td> </tr> <tr> <td><p> EURLex </p></td> <td><p> http://www.epsiplatform.eu/ </p></td> </tr> <tr> <td><p> Factbook </p></td> <td><p> http://www.cs.washington.edu/research/xmldatasets/www/repository.html </p></td> </tr> <tr> <td><p> Freebase </p></td> <td><p> http://download.freebase.com/wex </p></td> </tr> <tr> <td><p> FreeDB </p></td> <td><p> http://www.xmldatabases.org/radio/xmlDatabases/projects/FreeDBtoXML </p></td> </tr> <tr> <td><p> Freshmeat </p></td> <td><p> http://freshmeat.net/articles/freshmeat-xml-rpc-api-available </p></td> </tr> <tr> <td><p> Genome1 </p></td> <td><p> ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/XML/ds_ch1.xml.gz </p></td> </tr> <tr> <td><p> HCIBIB2 </p></td> <td><p> http://inex.is.informatik.uni-duisburg.de/2005/ </p></td> </tr> <tr> <td><p> Inex2009 </p></td> <td><p> http://www.mpi-inf.mpg.de/departments/d5/software/inex </p></td> </tr> <tr> <td><p> IntAct </p></td> <td><p> ftp://ftp.ebi.ac.uk/pub/databases/intact/current/index.html </p></td> </tr> <tr> <td><p> InterPro </p></td> <td><p> ftp://ftp.bio.net/biomirror/interpro/match_complete.xml.gz </p></td> </tr> <tr> <td><p> iProClass </p></td> <td><p> ftp://ftp.pir.georgetown.edu/pir_databases/iproclass/iproclass.xml.gz </p></td> </tr> <tr> <td><p> JMNEdict </p></td> <td><p> ftp://ftp.monash.edu.au/pub/nihongo/enamdict_doc.html </p></td> </tr> <tr> <td><p> KanjiDic2 </p></td> <td><p> http://www.csse.monash.edu.au/~jwb/kanjidic2 </p></td> </tr> <tr> <td><p> MedLine </p></td> <td><p> http://www.nlm.nih.gov/bsd </p></td> </tr> <tr> <td><p> MeSH </p></td> <td><p> http://www.nlm.nih.gov/mesh/xmlmesh.html </p></td> </tr> <tr> <td><p> MovieDB </p></td> <td><p> http://infovis%202007%20contest:%20IMDB%20Data/ </p></td> </tr> <tr> <td><p> MusicXML </p></td> <td><p> http://www.recordare.com/xml/samples.html </p></td> </tr> <tr> <td><p> Nasa </p></td> <td><p> http://www.cs.washington.edu/research/xmldatasets/www/repository.html </p></td> </tr> <tr> <td><p> NewYorkTimes </p></td> <td><p> http://www.nytimes.com/ref/membercenter/nytarchive.html </p></td> </tr> <tr> <td><p> OpenStreetMap </p></td> <td><p> http://dump.wiki.openstreetmap.org/osmwiki-latest-files.tar.gz </p></td> </tr> <tr> <td><p> Organizations </p></td> <td><p> http://www.data.gov/raw/1358 </p></td> </tr> <tr> <td><p> RuWikiHist </p></td> <td><p> http://dumps.wikimedia.org/ruwiki/latest/ruwiki-latest-pages-meta-history.xml.7z </p></td> </tr> <tr> <td><p> SDMX </p></td> <td><p> http://www.metadatatechnology.com/ </p></td> </tr> <tr> <td><p> Shakespeare </p></td> <td><p> http://www.cafeconleche.org/examples/shakespeare </p></td> </tr> <tr> <td><p> SwissProt </p></td> <td><p> ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase </p></td> </tr> <tr> <td><p> Thesaurus </p></td> <td><p> http://www.drze.de/BELIT/thesaurus </p></td> </tr> <tr> <td><p> Treebank </p></td> <td><p> http://www.cs.washington.edu/research/xmldatasets </p></td> </tr> <tr> <td><p> TreeOfLife </p></td> <td><p> http://tolweb.org/data/tolskeletaldump.xml </p></td> </tr> <tr> <td><p> TrEMBL </p></td> <td><p> ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase </p></td> </tr> <tr> <td><p> Wikicorpus </p></td> <td><p> http://www-connex.lip6.fr/~denoyer/wikipediaXML </p></td> </tr> <tr> <td><p> XMark </p></td> <td><p> http://www.xml-benchmark.org/ generated with xmlgen </p></td> </tr> <tr> <td><p> ZDNET </p></td> <td><p> http://inex.is.informatik.uni-duisburg.de/2005/ </p></td> </tr> <tr> <td><p> ZhWikiHist </p></td> <td><p> http://dumps.wikimedia.org/zhwiki/latest/zhwiki-latest-pages-meta-history.xml.7z </p></td> </tr> <tr> <td></td> <td></td> </tr> <tr> <td><p> LibraryUKN </p></td> <td><p> generated from university library data </p></td> </tr> <tr> <td><p> MediaUKN </p></td> <td><p> generated from university library data </p></td> </tr> <tr> <td><p> DeepFS </p></td> <td><p> generated from filesystem structure </p></td> </tr> <tr> <td><p> University </p></td> <td><p> generated from students test data </p></td> </tr> <tr> <td><p> Feeds </p></td> <td><p> compiled from news feeds </p></td> </tr> <tr> <td><p> Twitter </p></td> <td><p> compiled from Twitter feeds </p></td> </tr></table> |
Revision as of 14:30, 14 January 2011
The following table lists statistics on various XML instances that have been created with BaseX and, if available or public, links to the source documents.
The database size does not include any indexes
- #nodes represents the number of XML nodes which have been created in the database
- #atr, #eln, and #uri represent the number of distinct attributes, element names, and namespaces
Databases
Instances | file size | db size | #nodes | #atr | #eln | #atn | #uri | height | #docs |
RuWikiHist | 421 GiB | 416 GiB | 324,848,508 | 3 | 21 | 6 | 2 | 6 | 1 |
ZhWikiHist | 126 GiB | 120 GiB | 179,199,662 | 3 | 21 | 6 | 2 | 6 | 1 |
EnWiktionary | 79 GiB | 75 GiB | 134,380,393 | 3 | 21 | 6 | 2 | 6 | 1 |
XMark | 55 GiB | 64 GiB | 1,615,071,348 | 2 | 74 | 9 | 0 | 13 | 1 |
EnWikiMeta | 54 GiB | 52 GiB | 401,456,348 | 3 | 21 | 6 | 2 | 6 | 1 |
MedLine | 38 GiB | 36 GiB | 1,623,764,254 | 2 | 84 | 6 | 0 | 9 | 379 |
iProClass | 36 GiB | 37 GiB | 1,631,218,984 | 3 | 245 | 4 | 2 | 9 | 1 |
Inex209 | 31 GiB | 34 GiB | 1,336,110,639 | 15 | 28,034 | 451 | 1 | 37 | 2,666,500 |
CoPhIR | 29 GiB | 31 GiB | 1,104,623,376 | 10 | 42 | 42 | 0 | 8 | 10,000,000 |
EnWikipedia | 26 GiB | 25 GiB | 198,546,747 | 3 | 24 | 21 | 2 | 6 | 1 |
XMark | 22 GiB | 26 GiB | 645,997,965 | 2 | 74 | 9 | 0 | 13 | 1 |
InterPro | 14 GiB | 19 GiB | 860,304,235 | 5 | 7 | 15 | 0 | 4 | 1 |
Genome1 | 13 GiB | 13 GiB | 432,628,105 | 12 | 26 | 101 | 2 | 6 | 1 |
NewYorkTimes | 12 GiB | 13 GiB | 280,407,005 | 5 | 41 | 33 | 0 | 6 | 1,855,659 |
TrEMBL | 11 GiB | 14 GiB | 589,650,535 | 8 | 47 | 30 | 2 | 7 | 1 |
XMark | 11 GiB | 13 GiB | 323,083,409 | 2 | 74 | 9 | 0 | 13 | 1 |
IntAct | 7973 MiB | 6717 MiB | 297,478,392 | 7 | 64 | 22 | 2 | 14 | 25,624 |
Freebase | 7366 MiB | 10 GiB | 443,627,994 | 8 | 61 | 283 | 1 | 93 | 1 |
SDMX | 6356 MiB | 8028 MiB | 395,871,872 | 2 | 22 | 6 | 3 | 7 | 1 |
OpenStreetMap | 5312 MiB | 5171 MiB | 6,910,669 | 3 | 19 | 5 | 2 | 6 | 1 |
SwissProt | 4604 MiB | 5422 MiB | 241,274,406 | 8 | 70 | 39 | 2 | 7 | 1 |
EURLex | 4815 MiB | 5532 MiB | 167,328,039 | 23 | 186 | 46 | 1 | 12 | 1 |
Wikicorpus | 4492 MiB | 4432 MiB | 157,948,561 | 12 | 1,257 | 2,687 | 2 | 50 | 659,338 |
EnWikiRDF | 3679 MiB | 3537 MiB | 98,433,194 | 1 | 11 | 2 | 11 | 4 | 1 |
CoPhIR | 2695 MiB | 2882 MiB | 101,638,857 | 10 | 42 | 42 | 0 | 8 | 1,000,000 |
MeSH | 2091 MiB | 2410 MiB | 104,845,819 | 3 | 6 | 5 | 2 | 5 | 1 |
FreeDB | 1723 MiB | 2462 MiB | 102,901,519 | 2 | 7 | 3 | 0 | 4 | 1 |
XMark | 1134 MiB | 1303 MiB | 32,298,989 | 2 | 74 | 9 | 0 | 13 | 1 |
DeepFS | 810 MiB | 850 MiB | 44,821,506 | 4 | 3 | 6 | 0 | 24 | 1 |
LibraryUKN | 760 MiB | 918 MiB | 46,401,941 | 3 | 23 | 3 | 0 | 5 | 1 |
736 MiB | 767 MiB | 15,309,015 | 0 | 8 | 0 | 0 | 3 | 1,177,495 | |
Organizations | 733 MiB | 724 MiB | 33,112,392 | 3 | 38 | 9 | 0 | 7 | 1,019,132 |
DBLP | 694 MiB | 944 MiB | 36,878,181 | 4 | 35 | 6 | 0 | 7 | 1 |
Feeds | 692 MiB | 604 MiB | 5,933,713 | 0 | 8 | 0 | 0 | 3 | 444,014 |
MedLineSupp | 477 MiB | 407 MiB | 21,602,141 | 5 | 55 | 7 | 0 | 9 | 1 |
AirBase | 449 MiB | 273 MiB | 14,512,851 | 1 | 111 | 5 | 0 | 11 | 38 |
MedLineDesc | 260 MiB | 195 MiB | 10,401,847 | 5 | 66 | 8 | 0 | 9 | 1 |
ZDNET | 130 MiB | 133 MiB | 3,060,186 | 21 | 40 | 90 | 0 | 13 | 95,663 |
JMNEdict | 124 MiB | 171 MiB | 8,592,666 | 0 | 10 | 0 | 0 | 5 | 1 |
XMark | 111 MiB | 130 MiB | 3,221,926 | 2 | 74 | 9 | 0 | 13 | 1 |
Freshmeat | 105 MiB | 86 MiB | 3,832,028 | 1 | 58 | 1 | 0 | 6 | 1 |
DeepFS | 83 MiB | 93 MiB | 4,842,638 | 4 | 3 | 6 | 0 | 21 | 1 |
Treebank | 82 MiB | 92 MiB | 3,829,513 | 1 | 250 | 1 | 0 | 37 | 1 |
DBLP2 | 80 MiB | 102 MiB | 4,044,649 | 4 | 35 | 6 | 0 | 6 | 170,843 |
DDI | 76 MiB | 39 MiB | 2,070,157 | 7 | 104 | 16 | 21 | 11 | 3 |
Alfred | 75 MiB | 68 MiB | 3,784,285 | 0 | 60 | 0 | 0 | 6 | 1 |
University | 56 MiB | 66 MiB | 3,468,606 | 1 | 28 | 4 | 0 | 5 | 6 |
MediaUKN | 38 MiB | 45 MiB | 1,619,443 | 3 | 21 | 3 | 0 | 5 | 1 |
HCIBIB2 | 32 MiB | 33 MiB | 617,023 | 1 | 39 | 1 | 0 | 4 | 26,390 |
Nasa | 24 MiB | 25 MiB | 845,805 | 2 | 61 | 8 | 1 | 9 | 1 |
MovieDB | 16 MiB | 19 MiB | 868,980 | 6 | 7 | 8 | 0 | 4 | 1 |
KanjiDic2 | 13 MiB | 18 MiB | 917,833 | 3 | 27 | 10 | 0 | 6 | 1 |
XMark | 11 MiB | 13 MiB | 324,274 | 2 | 74 | 9 | 0 | 13 | 1 |
Shakespeare | 7711 KiB | 9854 KiB | 327,170 | 0 | 59 | 0 | 0 | 9 | 1 |
TreeOfLife | 5425 KiB | 7106 KiB | 363,560 | 7 | 4 | 7 | 0 | 243 | 1 |
Thesaurus | 4288 KiB | 4088 KiB | 201,798 | 7 | 33 | 9 | 0 | 7 | 1 |
MusicXML | 3155 KiB | 2942 KiB | 171,400 | 8 | 179 | 56 | 0 | 8 | 17 |
BibDBPub | 2292 KiB | 2359 KiB | 80,178 | 1 | 54 | 1 | 0 | 4 | 3,465 |
Factbook | 1743 KiB | 1560 KiB | 77,315 | 16 | 23 | 32 | 0 | 6 | 1 |
XMark | 1134 KiB | 1334 KiB | 33,056 | 2 | 74 | 9 | 0 | 13 | 1 |