X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=xapian-dump.cc;h=0364481f1d99adae86272b64fa3cbe6a444d53e8;hp=6d1fc1be6150863155b52b81123b1c6b905de5ed;hb=dceb501e44b5279df6df76b19688c156256b75c2;hpb=11f99eb8eac3206f200a40a536ff3657ba577594 diff --git a/xapian-dump.cc b/xapian-dump.cc index 6d1fc1be..0364481f 100644 --- a/xapian-dump.cc +++ b/xapian-dump.cc @@ -1,4 +1,5 @@ -/* +/* xapian-dump: Create a textual dump of a Xapian database. + * * Copyright © 2009 Carl Worth * * This program is free software: you can redistribute it and/or modify @@ -17,16 +18,131 @@ * Author: Carl Worth */ +/* Currently the dumped data includes: + * + * All document IDs + * + * And for each document ID: + * + * Document data + * All document terms + * All document values + */ + #include +#include +#include #include +using namespace std; + +vector UNSERIALIZE; + +static void +print_document_terms (Xapian::Document doc) +{ + Xapian::TermIterator i; + + printf (" Terms:\n"); + + for (i = doc.termlist_begin (); i != doc.termlist_end (); i++) + cout << "\t" << *i << endl; +} + +static int +vector_int_contains (vector v, int i) +{ + vector::iterator result; + + result = find (v.begin(), v.end(), i); + + return result != v.end(); +} + +static void +print_document_values (Xapian::Document doc) +{ + Xapian::ValueIterator i; + int value_no, value_int; + double value_float; + + printf (" Values:\n"); + + for (i = doc.values_begin (); i != doc.values_end (); i++) { + value_no = i.get_valueno(); + + cout << "\t" << i.get_valueno() << ": "; + + if (vector_int_contains (UNSERIALIZE, value_no)) { + value_float = Xapian::sortable_unserialise (*i); + value_int = value_float; + if (value_int == value_float) + cout << value_int; + else + cout << value_float; + } else { + cout << *i; + } + + cout << endl; + } +} + +static void +print_document (Xapian::Database db, Xapian::docid id) +{ + Xapian::Document doc; + + printf ("Document %u:\n", id); + + doc = db.get_document (id); + + printf (" Data:\n"); + cout << "\t" << doc.get_data () << endl; + + print_document_terms (doc); + + print_document_values (doc); +} + int main (int argc, char *argv[]) { + const char *database_path; + int i; + if (argc < 2) { - fprintf (stderr, "Usage: %s \n", + fprintf (stderr, "Usage: %s [value_nos...]\n", argv[0]); + fprintf (stderr, "Dumps data from the given database.\n"); + fprintf (stderr, "The values corresponding to any value numbers given on the command line\n"); + fprintf (stderr, "will be unserialized to an before being printed.\n"); + exit (1); + } + + database_path = argv[1]; + + UNSERIALIZE = vector (); + + for (i = 2; i < argc; i++) + UNSERIALIZE.push_back (atoi (argv[i])); + + try { + + Xapian::Database db; + Xapian::PostingIterator i; + Xapian::docid doc_id; + + db = Xapian::Database (database_path); + for (i = db.postlist_begin (""); i != db.postlist_end (""); i++) { + doc_id = *i; + + print_document (db, doc_id); + } + + } catch (const Xapian::Error &error) { + cerr << "A Xapian exception occurred: " << error.get_msg () << endl; exit (1); }