]> git.lyx.org Git - features.git/commitdiff
Re-write of the BibTeX representation. The main change is that we now have
authorRichard Heck <rgheck@comcast.net>
Thu, 16 Aug 2007 01:59:20 +0000 (01:59 +0000)
committerRichard Heck <rgheck@comcast.net>
Thu, 16 Aug 2007 01:59:20 +0000 (01:59 +0000)
a structure representing field->value instead of just a single string with
all the data. The data structures are defined in src/Biblio_typedefs.h, and
the main changes are to the parser code in src/insets/InsetBibtex.cpp.

-src/Biblio_typedefs.h
 Contains typedefs for new representation. Separating them out limits how much
 gets #include'd by other files, and also resolves a circularity problem with
 Buffer.h.

-src/Biblio.{h,cpp}
 Signature changes and massive simplifications to routines that report
 BibTeX data, since we now have an articulate representation.

-src/insets/InsetBibtex.{h,cpp}
 Re-write the parser code so we store a key->value map of the BibTeX data
 rather than just one long string. This is the main work.

-src/frontends/controllers/ControlCitation.{h,cpp}
-src/insets/InsetBibitem.{h,cpp}
-src/insets/InsetCitation.cpp
 Adaptations and simplifications.

-src/insets/Inset.h
-src/Buffer.{h,cpp}
-src/insets/InsetInclude.{h,cpp}
 Signature changes.

-src/Makefile.am
-development/scons/scons_manifest.py
 Add src/Biblio_typedefs.h

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@19598 a592a061-630c-0410-9148-cb99ea01b6c8

17 files changed:
development/scons/scons_manifest.py
src/Biblio.cpp
src/Biblio.h
src/Biblio_typedefs.h [new file with mode: 0644]
src/Buffer.cpp
src/Buffer.h
src/Makefile.am
src/frontends/controllers/ControlCitation.cpp
src/frontends/controllers/ControlCitation.h
src/insets/Inset.h
src/insets/InsetBibitem.cpp
src/insets/InsetBibitem.h
src/insets/InsetBibtex.cpp
src/insets/InsetBibtex.h
src/insets/InsetCitation.cpp
src/insets/InsetInclude.cpp
src/insets/InsetInclude.h

index b45ac7ef616b9f69e31c0209efedb196815ec98b..739713fb37af6d3e03dfb1ed75e9f3492d4be7ba 100644 (file)
@@ -34,6 +34,7 @@ src_header_files = Split('''
     ASpell_local.h
     Author.h
     Biblio.h
+    Biblio_typedefs.h
     Bidi.h
     Box.h
     BranchList.h
index c3c53998ee654ffe9715dbb280ee4b4f840ddff0..e314afb6d669cdc6d36a98a90a694635b42520f6 100644 (file)
@@ -49,6 +49,22 @@ using support::trim;
 
 namespace biblio {
 
+       
+BibTeXInfo::BibTeXInfo(): isBibTeX(true)
+{}
+
+       
+BibTeXInfo::BibTeXInfo(bool isBibTeX): isBibTeX(isBibTeX)
+{};
+
+
+bool BibTeXInfo::hasKey(docstring const & key) 
+{
+       const_iterator it = find(key);
+       return it == end();
+}
+
+
 namespace {
 
        vector<string> const init_possible_cite_commands()
@@ -109,8 +125,6 @@ namespace {
 } // namespace anon
 
 
-const docstring TheBibliographyRef(from_ascii("TheBibliographyRef"));
-
 string const asValidLatexCommand(string const & input,
                                  CiteEngine const engine)
 {
@@ -182,89 +196,58 @@ docstring const familyName(docstring const & name)
 }
 
 
-docstring const getAbbreviatedAuthor(InfoMap const & map, string const & key)
+docstring const getAbbreviatedAuthor(BibKeyList const & map, string const & key)
 {
        BOOST_ASSERT(!map.empty());
-
-       InfoMap::const_iterator it = map.find(key);
+       BibKeyList::const_iterator it = map.find(key);
        if (it == map.end())
                return docstring();
-       docstring const & data = it->second;
-
-// Is the entry a BibTeX one or one from lyx-layout "bibliography"?
-       docstring::size_type const pos = data.find(TheBibliographyRef);
-       if (pos != docstring::npos) {
-               if (pos <= 2) {
-                       return docstring();
-               }
-
-               docstring const opt = trim(data.substr(0, pos - 1));
-               if (opt.empty())
-                       return docstring();
-
-               docstring authors;
-               split(opt, authors, '(');
-               return authors;
-       }
-
-       docstring author = parseBibTeX(data, "author");
-
-       if (author.empty())
-               author = parseBibTeX(data, "editor");
-
+       BibTeXInfo const & data = it->second;
+       if (!data.isBibTeX) 
+               return docstring();
+       docstring author = getValueForKey(data, "author");
        if (author.empty()) {
-               author = parseBibTeX(data, "key");
-               if (author.empty())
-               // FIXME UNICODE
-                       return from_utf8(key);
-               return author;
+               author = getValueForKey(data, "editor");
+               if (author.empty()) {
+                       author = getValueForKey(data, "key");
+                       if (author.empty())
+                               // FIXME UNICODE
+                               return from_utf8(key);
+                       else 
+                               return author; //this is the key
+               }
        }
 
+       //OK, we've got some names. Let's format them.
+       //try to split the author list on " and "
        vector<docstring> const authors = getVectorFromString(author, from_ascii(" and "));
-       if (authors.empty())
-               return author;
-
+       
        if (authors.size() == 2)
                return bformat(_("%1$s and %2$s"),
-                                                                       familyName(authors[0]), familyName(authors[1]));
-
-       if (authors.size() > 2)
+                                                                        familyName(authors[0]), familyName(authors[1]));
+       else if (authors.size() > 2)
                return bformat(_("%1$s et al."), familyName(authors[0]));
-
-       return familyName(authors[0]);
+       else  
+               return familyName(authors[0]);
 }
 
 
-docstring const getYear(InfoMap const & map, string const & key)
+docstring const getYear(BibKeyList const & map, string const & key)
 {
        BOOST_ASSERT(!map.empty());
-
-       InfoMap::const_iterator it = map.find(key);
+       BibKeyList::const_iterator it = map.find(key);
        if (it == map.end())
                return docstring();
-       docstring const & data = it->second;
-
-// Is the entry a BibTeX one or one from lyx-layout "bibliography"?
-       docstring::size_type const pos = data.find(TheBibliographyRef);
-       if (pos != docstring::npos) {
-               if (pos <= 2) {
-                       return docstring();
-               }
-
-               docstring const opt =
-                                       trim(data.substr(0, pos - 1));
-               if (opt.empty())
-                       return docstring();
-
-               docstring authors;
-               docstring const tmp = split(opt, authors, '(');
-               docstring year;
-               split(tmp, year, ')');
-               return year;
-
-       }
-
-       docstring year = parseBibTeX(data, "year");
+       BibTeXInfo const & data = it->second;
+       if (!data.isBibTeX) 
+               return docstring();
+       docstring year = getValueForKey(data, "year");
        if (year.empty())
                year = _("No year");
 
@@ -284,11 +267,11 @@ class compareNoCase: public std::binary_function<string, string, bool>
 } // namespace anon
 
 
-vector<string> const getKeys(InfoMap const & map)
+vector<string> const getKeys(BibKeyList const & map)
 {
        vector<string> bibkeys;
-       InfoMap::const_iterator it  = map.begin();
-       InfoMap::const_iterator end = map.end();
+       BibKeyList::const_iterator it  = map.begin();
+       BibKeyList::const_iterator end = map.end();
        for (; it != end; ++it) {
                bibkeys.push_back(it->first);
        }
@@ -298,72 +281,67 @@ vector<string> const getKeys(InfoMap const & map)
 }
 
 
-docstring const getInfo(InfoMap const & map, string const & key)
+docstring const getInfo(BibKeyList const & map, string const & key)
 {
        BOOST_ASSERT(!map.empty());
-
-       InfoMap::const_iterator it = map.find(key);
+       BibKeyList::const_iterator it = map.find(key);
        if (it == map.end())
                return docstring();
-       docstring const & data = it->second;
-
-// is the entry a BibTeX one or one from lyx-layout "bibliography"?
-       docstring::size_type const pos = data.find(TheBibliographyRef);
-       if (pos != docstring::npos) {
-               docstring::size_type const pos2 = pos + TheBibliographyRef.size();
-               docstring const info = trim(data.substr(pos2));
-               return info;
+       BibTeXInfo const & data = it->second;
+       if (!data.isBibTeX) {
+               BibTeXInfo::const_iterator it3 = data.find(from_ascii("ref"));
+               return it3->second;
        }
-
-// Search for all possible "required" keys
-       docstring author = parseBibTeX(data, "author");
+       //FIXME
+       //This could be made alot better using the biblio::TheEntryType
+       //field to customize the output based upon entry type.
+       
+       //Search for all possible "required" fields
+       docstring author = getValueForKey(data, "author");
        if (author.empty())
-               author = parseBibTeX(data, "editor");
-
-       docstring year      = parseBibTeX(data, "year");
-       docstring title     = parseBibTeX(data, "title");
-       docstring booktitle = parseBibTeX(data, "booktitle");
-       docstring chapter   = parseBibTeX(data, "chapter");
-       docstring number    = parseBibTeX(data, "number");
-       docstring volume    = parseBibTeX(data, "volume");
-       docstring pages     = parseBibTeX(data, "pages");
-       docstring annote    = parseBibTeX(data, "annote");
-       docstring media     = parseBibTeX(data, "journal");
-       if (media.empty())
-               media = parseBibTeX(data, "publisher");
-       if (media.empty())
-               media = parseBibTeX(data, "school");
-       if (media.empty())
-               media = parseBibTeX(data, "institution");
-
-       odocstringstream result;
-       if (!author.empty())
-               result << author << ", ";
-       if (!title.empty())
-               result << title;
-       if (!booktitle.empty())
-               result << ", in " << booktitle;
-       if (!chapter.empty())
-               result << ", Ch. " << chapter;
-       if (!media.empty())
-               result << ", " << media;
-       if (!volume.empty())
-               result << ", vol. " << volume;
-       if (!number.empty())
-               result << ", no. " << number;
-       if (!pages.empty())
-               result << ", pp. " << pages;
-       if (!year.empty())
-               result << ", " << year;
-       if (!annote.empty())
-               result << "\n\n" << annote;
-
-       docstring const result_str = rtrim(result.str());
-       if (!result_str.empty())
-               return result_str;
-
-// This should never happen (or at least be very unusual!)
-       return data;
+               author = getValueForKey(data, "editor");
+       docstring year      = getValueForKey(data, "year");
+       docstring title     = getValueForKey(data, "title");
+       docstring docLoc    = getValueForKey(data, "pages");
+       if (docLoc.empty()) {
+               docLoc = getValueForKey(data, "chapter");
+               if (!docLoc.empty())
+                       docLoc = from_ascii("Ch. ") + docLoc;
+       }       else 
+               docLoc = from_ascii("pp. ") + docLoc;
+               docstring media     = getValueForKey(data, "journal");
+               if (media.empty()) {
+                       media = getValueForKey(data, "publisher");
+                       if (media.empty()) {
+                               media = getValueForKey(data, "school");
+                               if (media.empty())
+                                       media = getValueForKey(data, "institution");
+                       }
+               }
+               docstring volume = getValueForKey(data, "volume");
+               odocstringstream result;
+               if (!author.empty())
+                       result << author << ", ";
+               if (!title.empty())
+                       result << title;
+               if (!media.empty())
+                       result << ", " << media;
+               if (!year.empty())
+                       result << ", " << year;
+               if (!docLoc.empty())
+                       result << ", " << docLoc;
+               docstring const result_str = rtrim(result.str());
+               if (!result_str.empty())
+                       return result_str;
+       // This should never happen (or at least be very unusual!)
+       return docstring();
 }
 
 
@@ -400,37 +378,33 @@ class RegexMatch : public std::unary_function<string, bool>
        public:
 // re and icase are used to construct an instance of boost::RegEx.
 // if icase is true, then matching is insensitive to case
-               RegexMatch(InfoMap const & m, string const & re, bool icase)
+               RegexMatch(BibKeyList const & m, string const & re, bool icase)
                : map_(m), regex_(re, icase) {}
 
                bool operator()(string const & key) const {
-// the data searched is the key + its associated BibTeX/biblio
-// fields
-                       string data = key;
-                       InfoMap::const_iterator info = map_.find(key);
-                       if (info != map_.end())
-       // FIXME UNICODE
-                               data += ' ' + to_utf8(info->second);
-
-// Attempts to find a match for the current RE
-// somewhere in data.
+                       //FIXME This should search the monolith.
+                       BibKeyList::const_iterator info = map_.find(key);
+                       if (info == map_.end())
+                               return false;
+                       BibTeXInfo const kvm = info->second;
+                       string const data = key + ' ' + to_utf8(kvm.allData);
+                       
                        return boost::regex_search(data, regex_);
                }
        private:
-               InfoMap const map_;
+               BibKeyList const map_;
                mutable boost::regex regex_;
 };
 
 } // namespace anon
 
 
-vector<string>::const_iterator searchKeys(InfoMap const & theMap,
+vector<string>::const_iterator searchKeys(BibKeyList const & theMap,
                vector<string> const & keys,
                string const & search_expr,
                vector<string>::const_iterator start,
-               Search type,
-               Direction dir,
-       bool caseSensitive)
+               Search type, Direction dir,     bool caseSensitive)
 {
        // Preliminary checks
        if (start < keys.begin() || start >= keys.end())
@@ -471,144 +445,17 @@ vector<string>::const_iterator searchKeys(InfoMap const & theMap,
 }
 
 
-docstring const parseBibTeX(docstring data, string const & findkey)
+docstring const getValueForKey(BibTeXInfo const & data, string const & findkey)
 {
-       // at first we delete all characters right of '%' and
-       // replace tabs through a space and remove leading spaces
-       // we read the data line by line so that the \n are
-       // ignored, too.
-       docstring data_;
-       int Entries = 0;
-       docstring dummy = token(data,'\n', Entries);
-       while (!dummy.empty()) {
-               // no tabs
-               dummy = subst(dummy, '\t', ' ');
-               // no leading spaces
-               dummy = ltrim(dummy);
-               // ignore lines with a beginning '%' or ignore all right of %
-               docstring::size_type const idx =
-                                       dummy.empty() ? docstring::npos : dummy.find('%');
-               if (idx != docstring::npos)
-                       // Check if this is really a comment or just "\%"
-                       if (idx == 0 || dummy[idx - 1] != '\\')
-                               dummy.erase(idx, docstring::npos);
-                       else
-                               //  This is "\%", so just erase the '\'
-                               dummy.erase(idx - 1, 1);
-               // do we have a new token or a new line of
-               // the same one? In the first case we ignore
-               // the \n and in the second we replace it
-               // with a space
-               if (!dummy.empty()) {
-                       if (!contains(dummy, '='))
-                               data_ += ' ' + dummy;
-                       else
-                               data_ += dummy;
-               }
-               dummy = token(data, '\n', ++Entries);
-       } //end while
-
-       // replace double commas with "" for easy scanning
-       data = subst(data_, from_ascii(",,"), from_ascii("\"\""));
-
-       // unlikely!
-       if (data.empty())
-               return docstring();
-
-       // now get only the important line of the bibtex entry.
-       // all entries are devided by ',' except the last one.
-       data += ',';
-       // now we have same behaviour for all entries because the last one
-       // is "blah ... }"
-       Entries = 0;
-       bool found = false;
-       // parsing of title and booktitle is different from the
-       // others, because booktitle contains title
-       do {
-               dummy = token(data, ',', Entries++);
-               if (!dummy.empty()) {
-                       found = contains(ascii_lowercase(dummy), from_ascii(findkey));
-                       if (findkey == "title" &&
-                                                               contains(ascii_lowercase(dummy), from_ascii("booktitle")))
-                               found = false;
-               }
-       } while (!found && !dummy.empty());
-       if (dummy.empty())
-               // no such keyword
+       docstring key = from_ascii(findkey);
+       BibTeXInfo::const_iterator it = data.find(key);
+       if (it == data.end())
                return docstring();
-
-       // we are not sure, if we get all, because "key= "blah, blah" is
-       // allowed.
-       // Therefore we read all until the next "=" character, which follows a
-       // new keyword
-       docstring keyvalue = dummy;
-       dummy = token(data, ',', Entries++);
-       while (!contains(dummy, '=') && !dummy.empty()) {
-               keyvalue += ',' + dummy;
-               dummy = token(data, ',', Entries++);
-       }
-
-       // replace double "" with originals ,, (two commas)
-       // leaving us with the all-important line
-       data = subst(keyvalue, from_ascii("\"\""), from_ascii(",,"));
-
-       // Clean-up.
-       // 1. Spaces
-       data = rtrim(data);
-       // 2. if there is no opening '{' then a closing '{' is probably cruft.
-       if (!contains(data, '{'))
-               data = rtrim(data, "}");
-       // happens, when last keyword
-               docstring::size_type const idx =
-                       !data.empty() ? data.find('=') : docstring::npos;
-
-               if (idx == docstring::npos)
-                       return docstring();
-
-               data = trim(data.substr(idx));
-
-       // a valid entry?
-       if (data.length() < 2 || data[0] != '=')
-               return docstring();
-       else {
-               // delete '=' and the following spaces
-               data = ltrim(data, " =");
-               if (data.length() < 2) {
-                       // not long enough to find delimiters
-                       return data;
-               } else {
-                       docstring::size_type keypos = 1;
-                       char_type enclosing;
-                       if (data[0] == '{') {
-                               enclosing = '}';
-                       } else if (data[0] == '"') {
-                               enclosing = '"';
-                       } else {
-                               // no {} and no "", pure data but with a
-                               // possible ',' at the end
-                               return rtrim(data, ",");
-                       }
-                       docstring tmp = data.substr(keypos);
-                       while (tmp.find('{') != docstring::npos &&
-                                       tmp.find('}') != docstring::npos &&
-                                       tmp.find('{') < tmp.find('}') &&
-                                       tmp.find('{') < tmp.find(enclosing)) {
-                               keypos += tmp.find('{') + 1;
-                               tmp = data.substr(keypos);
-                               keypos += tmp.find('}') + 1;
-                               tmp = data.substr(keypos);
-                       }
-                       if (tmp.find(enclosing) == docstring::npos)
-                               return data;
-                       else {
-                               keypos += tmp.find(enclosing);
-                               return data.substr(1, keypos - 1);
-                       }
-               }
-       }
+       //FIXME ?? return it->second??
+       BibTeXInfo & data2 = const_cast<BibTeXInfo &>(data);
+       return data2[key];
 }
 
-
 namespace {
 
 
@@ -723,7 +570,7 @@ vector<CiteStyle> const getCiteStyles(CiteEngine const engine)
 
 vector<docstring> const
        getNumericalStrings(string const & key,
-       InfoMap const & map, vector<CiteStyle> const & styles)
+       BibKeyList const & map, vector<CiteStyle> const & styles)
 {
        if (map.empty())
                return vector<docstring>();
@@ -777,7 +624,7 @@ vector<docstring> const
 
 vector<docstring> const
                getAuthorYearStrings(string const & key,
-                       InfoMap const & map, vector<CiteStyle> const & styles)
+                       BibKeyList const & map, vector<CiteStyle> const & styles)
 {
        if (map.empty())
                return vector<docstring>();
@@ -835,7 +682,7 @@ vector<docstring> const
 
 
 void fillWithBibKeys(Buffer const * const buf, 
-                     vector<pair<string, docstring> > & keys)
+                     BibKeyList & keys)
 {      
        /// if this is a child document and the parent is already loaded
        /// use the parent's list instead  [ale990412]
index a6fe6c6acd7fa6c1b05b8cb5959dd3359d704f6e..4e8e39f13bc654259012f9661b5ecdc738337336 100644 (file)
@@ -13,8 +13,8 @@
 #ifndef BIBLIO_H
 #define BIBLIO_H
 
+#include "Biblio_typedefs.h"
 #include "Buffer.h"
-#include "support/docstring.h"
 
 #include <vector>
 
@@ -22,8 +22,6 @@ namespace lyx {
        
 namespace biblio {
        
-       extern const docstring TheBibliographyRef;
-
        enum CiteEngine {
                ENGINE_BASIC,
                ENGINE_NATBIB_AUTHORYEAR,
@@ -59,8 +57,7 @@ namespace biblio {
 /** Fills keys with BibTeX information derived from the various
  *  in this document or its master document.
  */
-       void fillWithBibKeys(Buffer const * const buf, 
-               std::vector<std::pair<std::string, docstring> > & keys);
+       void fillWithBibKeys(Buffer const * const buf, BibKeyList & keys);
 
 /** Each citation engine recognizes only a subset of all possible
        *  citation commands. Given a latex command \c input, this function
@@ -69,49 +66,43 @@ namespace biblio {
        std::string const asValidLatexCommand(std::string const & input,
                                              CiteEngine const engine);
 
-/// First entry is the bibliography key, second the data
-       typedef std::map<std::string, docstring> InfoMap;
-
 /// Returns a vector of bibliography keys
-       std::vector<std::string> const getKeys(InfoMap const &);
+       std::vector<std::string> const getKeys(BibKeyList const &);
 
 /** Returns the BibTeX data associated with a given key.
        Empty if no info exists. */
-       docstring const getInfo(InfoMap const &, std::string const & key);
+       docstring const getInfo(BibKeyList const &, std::string const & key);
 
 /// return the year from the bibtex data record
-       docstring const getYear(InfoMap const & map, std::string const & key);
+       docstring const getYear(BibKeyList const & map, std::string const & key);
 
 /// return the short form of an authorlist
-       docstring const getAbbreviatedAuthor(InfoMap const & map, std::string const & key);
+       docstring const getAbbreviatedAuthor(BibKeyList const & map, std::string const & key);
 
-// return only the family name
+/// return only the family name
        docstring const familyName(docstring const & name);
 
 /** Search a BibTeX info field for the given key and return the
        associated field. */
-       docstring const parseBibTeX(docstring data, std::string const & findkey);
+       docstring const getValueForKey(BibTeXInfo const & data, std::string const & findkey);
 
 /** Returns an iterator to the first key that meets the search
        criterion, or end() if unsuccessful.
 
     User supplies :
-       the InfoMap of bibkeys info,
+       the BibKeyList of bibliography info,
        the vector of keys to be searched,
        the search criterion,
        an iterator defining the starting point of the search,
        an enum defining a Simple or Regex search,
        an enum defining the search direction.
  */
-
        std::vector<std::string>::const_iterator
-                       searchKeys(InfoMap const & map,
+                       searchKeys(BibKeyList const & map,
                                   std::vector<std::string> const & keys_to_search,
                                   docstring const & search_expression,
                                   std::vector<std::string>::const_iterator start,
-        Search,
-       Direction,
- bool caseSensitive=false);
+                                  Search, Direction, bool caseSensitive=false);
 
 
        class CitationStyle {
@@ -145,12 +136,12 @@ namespace biblio {
 
    User supplies :
        the key,
-       the InfoMap of bibkeys info,
+       the BibKeyList of bibkeys info,
        the available citation styles
  */
        std::vector<docstring> const
                        getNumericalStrings(std::string const & key,
-                                           InfoMap const & map,
+                                           BibKeyList const & map,
                                            std::vector<CiteStyle> const & styles);
 
 /**
@@ -162,12 +153,12 @@ namespace biblio {
 
    User supplies :
        the key,
-       the InfoMap of bibkeys info,
+       the BibKeyList of bibkeys info,
        the available citation styles
  */
        std::vector<docstring> const
                        getAuthorYearStrings(std::string const & key,
-                                            InfoMap const & map,
+                                            BibKeyList const & map,
                                             std::vector<CiteStyle> const & styles);
 
 } // namespace biblio
diff --git a/src/Biblio_typedefs.h b/src/Biblio_typedefs.h
new file mode 100644 (file)
index 0000000..18d345a
--- /dev/null
@@ -0,0 +1,51 @@
+// -*- C++ -*-
+/**
+ * \file Biblio_typedef.h
+ * This file is part of LyX, the document processor.
+ * Licence details can be found in the file COPYING.
+ *
+ * \author Richard Heck
+ *
+ * Full author contact details are available in file CREDITS.
+ */
+
+#ifndef BIBLIO_TYPEDEFS_H
+#define BIBLIO_TYPEDEFS_H
+
+#include "support/docstring.h"
+#include <map>
+
+namespace lyx {
+namespace biblio {
+
+/// Class to represent information about a BibTeX or
+/// bibliography entry.
+/// The keys are BibTeX fields, and the values are the
+/// associated field values.
+/// \param isBibTex false if this is from an InsetBibitem
+/// \param allData the entire BibTeX entry, more or less
+/// \param entryType the BibTeX entry type
+class BibTeXInfo : public std::map<docstring, docstring> {
+       public:
+               BibTeXInfo();
+               BibTeXInfo(bool isBibTeX);
+               bool hasKey(docstring const & key);
+               bool isBibTeX;
+               docstring allData;
+               docstring entryType;
+};
+
+/*
+class BibKeyList : public std::set<std::string, BibTeXInfo> {
+ public:
+       std::set<string> keys;
+}
+
+*/
+
+/// First entry is the bibliography key, second the data
+typedef std::map<std::string, BibTeXInfo> BibKeyList;
+       
+}
+}
+#endif
index ae0dccaf02eb10fe9553417bb0919a564075cc5d..ed969f83034bb6ecb5320d2f591a3bf0d12bfd0f 100644 (file)
@@ -13,6 +13,7 @@
 #include "Buffer.h"
 
 #include "Author.h"
+#include "Biblio.h"
 #include "BranchList.h"
 #include "buffer_funcs.h"
 #include "BufferList.h"
@@ -1359,7 +1360,7 @@ void Buffer::getLabelList(vector<docstring> & list) const
 
 
 // This is also a buffer property (ale)
-void Buffer::fillWithBibKeys(vector<pair<string, docstring> > & keys)
+void Buffer::fillWithBibKeys(biblio::BibKeyList & keys)
        const
 {
        biblio::fillWithBibKeys(this, keys);
@@ -1730,10 +1731,10 @@ void Buffer::changeRefsIfUnique(docstring const & from, docstring const & to,
        vector<docstring> labels;
 
        if (code == Inset::CITE_CODE) {
-               vector<pair<string, docstring> > keys;
+               biblio::BibKeyList keys;
                fillWithBibKeys(keys);
-               vector<pair<string, docstring> >::const_iterator bit  = keys.begin();
-               vector<pair<string, docstring> >::const_iterator bend = keys.end();
+               biblio::BibKeyList::const_iterator bit  = keys.begin();
+               biblio::BibKeyList::const_iterator bend = keys.end();
 
                for (; bit != bend; ++bit)
                        // FIXME UNICODE
index 218f472744e20343b7142b41d71941d4b8277f19..b611fd0b311c21457be66a355428a6cdadf23eca 100644 (file)
@@ -12,6 +12,7 @@
 #ifndef BUFFER_H
 #define BUFFER_H
 
+#include "Biblio_typedefs.h"
 #include "DocIterator.h"
 
 #include "support/FileName.h"
@@ -302,7 +303,7 @@ public:
        void validate(LaTeXFeatures &) const;
 
        /// return all bibkeys from buffer and its childs
-       void fillWithBibKeys(std::vector<std::pair<std::string, docstring> > & keys) const;
+       void fillWithBibKeys(biblio::BibKeyList & keys) const;
        /// Update the cache with all bibfiles in use (including bibfiles
        /// of loaded child documents).
        void updateBibfilesCache();
index 7daedf1db3a9b9b376428fc1b2489b54cfdd8185..511ff8afbe0379069d914d9e425f6917bed10203 100644 (file)
@@ -78,6 +78,7 @@ endif
 liblyxcore_la_SOURCES = \
        Author.cpp \
        Author.h \
+       Biblio_typedefs.h \
        Biblio.h \
        Biblio.cpp \
        Bidi.cpp \
index 8a3a7134ce710b1f330ab2327957489db8e81034..bd716a95dc5f848136933e8c3cefac967f654f02 100644 (file)
@@ -48,12 +48,8 @@ bool ControlCitation::initialiseParams(string const & data)
 
        bool use_styles = engine != biblio::ENGINE_BASIC;
 
-       vector<pair<string, docstring> > blist;
-       kernel().buffer().fillWithBibKeys(blist);
-       bibkeysInfo_.clear();
-       for (size_t i = 0; i < blist.size(); ++i)
-               bibkeysInfo_[blist[i].first] = blist[i].second;
-
+       kernel().buffer().fillWithBibKeys(bibkeysInfo_);
+       
        if (citeStyles_.empty())
                citeStyles_ = biblio::getCiteStyles(engine);
        else {
@@ -137,23 +133,20 @@ vector<string> ControlCitation::searchKeys(
                // it is treated as a simple string by boost::regex.
                expr = escape_special_chars(expr);
 
-       boost::regex reg_exp(to_utf8(expr), case_sensitive?
+       boost::regex reg_exp(to_utf8(expr), case_sensitive ?
                boost::regex_constants::normal : boost::regex_constants::icase);
 
        vector<string>::const_iterator it = keys_to_search.begin();
        vector<string>::const_iterator end = keys_to_search.end();
        for (; it != end; ++it ) {
-               biblio::InfoMap::const_iterator info = bibkeysInfo_.find(*it);
+               biblio::BibKeyList::const_iterator info = bibkeysInfo_.find(*it);
                if (info == bibkeysInfo_.end())
                        continue;
-
-               string data = *it;
-               // FIXME UNICODE
-               data += ' ' + to_utf8(info->second);
-
+               
+               biblio::BibTeXInfo const kvm = info->second;
+               string const data = *it + ' ' + to_utf8(kvm.allData);
+               
                try {
-                       // Attempts to find a match for the current RE
-                       // somewhere in data.
                        if (boost::regex_search(data, reg_exp))
                                foundKeys.push_back(*it);
                }
index 4bf16fde4e456942b936b4ddbeeca928115e0cb8..93219dc535a29c2f0d49000596de19fa09473a03 100644 (file)
@@ -62,7 +62,7 @@ public:
        }
 private:
        /// The info associated with each key
-       biblio::InfoMap bibkeysInfo_;
+       biblio::BibKeyList bibkeysInfo_;
 
        ///
        static std::vector<biblio::CiteStyle> citeStyles_;
index bae6669ee6a44406e3371ebc90471bde1195d4c6..438dfb54d61afa9e41b46f34e4887e3bbf2a71e7 100644 (file)
@@ -15,6 +15,7 @@
 #ifndef INSETBASE_H
 #define INSETBASE_H
 
+#include "Biblio_typedefs.h"
 #include "Changes.h"
 #include "Dimension.h"
 
@@ -440,8 +441,7 @@ public:
        virtual void addToToc(TocList &, Buffer const &, ParConstIterator const &) const {}
        /// Fill keys with BibTeX information
        virtual void fillWithBibKeys(Buffer const &,
-               std::vector<std::pair<std::string, docstring> > &,
-               InsetIterator const &) const { return; }
+               biblio::BibKeyList &, InsetIterator const &) const { return; }
        /// Update the counters of this inset and of its contents
        virtual void updateLabels(Buffer const &, ParIterator const &) {}
 
index 166bcdd5a84c27d0e0ab0c9e1887b0198b958673..8c9410b2b1fc725148940959acae7baf3b584d89 100644 (file)
@@ -187,16 +187,16 @@ docstring const bibitemWidest(Buffer const & buffer)
 
 
 void InsetBibitem::fillWithBibKeys(Buffer const & buf,
-       std::vector<std::pair<std::string, docstring> > & keys,
-       InsetIterator const & it) const
+       biblio::BibKeyList & keys, InsetIterator const & it) const
 {
        string const key = to_utf8(getParam("key"));
-       docstring const label = getParam("label");
+       biblio::BibTeXInfo keyvalmap;
+       keyvalmap[from_ascii("label")] = getParam("label");
        DocIterator doc_it(it); 
        doc_it.forwardPos();
-       docstring const ref = doc_it.paragraph().asString(buf, false);
-       docstring const info = label + biblio::TheBibliographyRef + ref;
-       keys.push_back(std::pair<string, docstring>(key, info));
+       keyvalmap [from_ascii("ref")] = doc_it.paragraph().asString(buf, false);
+       keyvalmap.isBibTeX = false;
+       keys[key] = keyvalmap;
 }
 
 } // namespace lyx
index 98db70e68d569f40858c8a47308ed1c4bdc03792..68edfca092dab8fabe100c966803b694a5590895 100644 (file)
@@ -14,6 +14,7 @@
 
 
 #include "InsetCommand.h"
+#include "Biblio_typedefs.h"
 
 
 namespace lyx {
@@ -45,8 +46,7 @@ public:
        int plaintext(Buffer const &, odocstream &, OutputParams const &) const;
        ///
        virtual void fillWithBibKeys(Buffer const &,
-               std::vector<std::pair<std::string, docstring> > &,
-               InsetIterator const &) const;
+               biblio::BibKeyList &, InsetIterator const &) const;
 
 protected:
        ///
index 7ba0a2814b59b45d475ff0ffffd0850776f514f7..4c612bd4955c54acc2d03f6443e3b8362ec6c2e4 100644 (file)
@@ -4,6 +4,7 @@
  * Licence details can be found in the file COPYING.
  *
  * \author Alejandro Aguilar Sierra
+ * \author Richard Heck (BibTeX parser improvements)
  *
  * Full author contact details are available in file CREDITS.
  */
@@ -414,14 +415,14 @@ namespace {
                // read value
                bool legalChar = true;
                while (ifs && !isSpace(ch) && 
-                          delimChars.find(ch) == docstring::npos &&
-                          (legalChar = illegalChars.find(ch) == docstring::npos)
-                          ) {
-                       if (chCase == makeLowerCase) {
+                                                delimChars.find(ch) == docstring::npos &&
+                                                (legalChar = (illegalChars.find(ch) == docstring::npos))
+                                       ) 
+               {
+                       if (chCase == makeLowerCase)
                                val += lowercase(ch);
-                       } else {
+                       else
                                val += ch;
-                       }
                        ifs.get(ch);
                }
                
@@ -478,17 +479,40 @@ namespace {
                                        return false;
 
                        } else if (ch == '"' || ch == '{') {
+                               // set end delimiter
+                               char_type delim = ch == '"' ? '"': '}';
 
-                               // read delimited text - set end delimiter
-                               char_type delim = ch == '"'? '"': '}';
-
-                               // inside this delimited text braces must match.
-                               // Thus we can have a closing delimiter only
-                               // when nestLevel == 0
+                               //Skip whitespace
+                               do {
+                                       ifs.get(ch);
+                               } while (ifs && isSpace(ch));
+                               
+                               if (!ifs)
+                                       return false;
+                               
+                               //We now have the first non-whitespace character
+                               //We'll collapse adjacent whitespace.
+                               bool lastWasWhiteSpace = false;
+                               
+                               // inside this delimited text braces must match.
+                               // Thus we can have a closing delimiter only
+                               // when nestLevel == 0
                                int nestLevel = 0;
-
-                               ifs.get(ch);
                                while (ifs && (nestLevel > 0 || ch != delim)) {
+                                       if (isSpace(ch)) {
+                                               lastWasWhiteSpace = true;
+                                               ifs.get(ch);
+                                               continue;
+                                       }
+                                       //We output the space only after we stop getting 
+                                       //whitespace so as not to output any whitespace
+                                       //at the end of the value.
+                                       if (lastWasWhiteSpace) {
+                                               lastWasWhiteSpace = false;
+                                               val += ' ';
+                                       }
+                                       
                                        val += ch;
 
                                        // update nesting level
@@ -556,8 +580,7 @@ namespace {
 
 // This method returns a comma separated list of Bibtex entries
 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
-               std::vector<std::pair<string, docstring> > & keys,
-               InsetIterator const & /*di*/) const
+               biblio::BibKeyList & keys, InsetIterator const & /*di*/) const
 {
        vector<FileName> const files = getFiles(buffer);
        for (vector<FileName>::const_iterator it = files.begin();
@@ -573,15 +596,6 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
                // - it accepts more characters in keys or value names than
                //   bibtex does.
                //
-               // TODOS:
-               // - the entries are split into name = value pairs by the
-               //   parser. These have to be merged again because of the
-               //   way lyx treats the entries ( pair<...>(...) ). The citation
-               //   mechanism in lyx should be changed such that it can use
-               //   the split entries.
-               // - messages on parsing errors can be generated.
-               //
-
                // Officially bibtex does only support ASCII, but in practice
                // you can use the encoding of the main document as long as
                // some elements like keys and names are pure ASCII. Therefore
@@ -589,9 +603,10 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
                // We don't restrict keys to ASCII in LyX, since our own
                // InsetBibitem can generate non-ASCII keys, and nonstandard
                // 8bit clean bibtex forks exist.
+               
                idocfstream ifs(it->toFilesystemEncoding().c_str(),
-                               std::ios_base::in,
-                               buffer.params().encoding().iconvName());
+                       std::ios_base::in,
+                       buffer.params().encoding().iconvName());
 
                char_type ch;
                VarMap strings;
@@ -660,25 +675,31 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
 
                        } else {
 
-                               // Citation entry. Read the key and all name = value pairs
+                               // Citation entry. Try to read the key.
                                docstring key;
-                               docstring fields;
-                               docstring name;
-                               docstring value;
-                               docstring commaNewline;
 
                                if (!readTypeOrKey(key, ifs, from_ascii(","), 
                                                   from_ascii("}"), keepCase) || !ifs)
                                        continue;
 
-                               // now we have a key, so we will add an entry
-                               // (even if it's empty, as bibtex does)
+                               /////////////////////////////////////////////
+                               // now we have a key, so we will add an entry 
+                               // (even if it's empty, as bibtex does)
                                //
-                               // all items must be separated by a comma. If
-                               // it is missing the scanning of this entry is
-                               // stopped and the next is searched.
+                               // we now read the field = value pairs.
+                               // all items must be separated by a comma. If
+                               // it is missing the scanning of this entry is
+                               // stopped and the next is searched.
+                               docstring fields;
+                               docstring name;
+                               docstring value;
+                               docstring commaNewline;
+                               docstring data;
+                               biblio::BibTeXInfo keyvalmap;
+                               keyvalmap.entryType = entryType;
+                               
                                bool readNext = removeWSAndComma(ifs);
-
                                while (ifs && readNext) {
 
                                        // read field name
@@ -699,27 +720,18 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
                                        if (!readValue(value, ifs, strings))
                                                break;
 
-                                       // append field to the total entry string.
-                                       //
-                                       // TODO: Here is where the fields can be put in
-                                       //       a more intelligent structure that preserves
-                                       //           the already known parts.
-                                       fields += commaNewline;
-                                       fields += name + from_ascii(" = {") + value + '}';
-
-                                       if (!commaNewline.length())
-                                               commaNewline = from_ascii(",\n");
+                                       keyvalmap[name] = value;
+                                       data += "\n\n" + value;
 
                                        readNext = removeWSAndComma(ifs);
                                }
 
                                // add the new entry
-                               keys.push_back(pair<string, docstring>(
-                               to_utf8(key), fields));
+                               keyvalmap.allData = data;
+                               keyvalmap.isBibTeX = true;
+                               keys[to_utf8(key)] = keyvalmap;
                        }
-
                } //< searching '@'
-
        } //< for loop over files
 }
 
index 82defd4fd72c3380c178708ebdf43ad08afec513..fcbf0b716c3773ef649e915af3ef9bb11df0aac6 100644 (file)
@@ -12,9 +12,9 @@
 #ifndef INSET_BIBTEX_H
 #define INSET_BIBTEX_H
 
-
-#include <vector>
+#include <map>
 #include "InsetCommand.h"
+#include "Biblio_typedefs.h"
 
 #include "support/FileName.h"
 
@@ -39,8 +39,7 @@ public:
        int latex(Buffer const &, odocstream &, OutputParams const &) const;
        ///
        virtual void fillWithBibKeys(Buffer const &,
-               std::vector<std::pair<std::string, docstring> > &,
-               InsetIterator const &) const;
+               biblio::BibKeyList &, InsetIterator const &) const;
        ///
        std::vector<support::FileName> const getFiles(Buffer const &) const;
        ///
index f0812e5b8fce41817158b19d244c026bf56ed8d6..6541e3ceb679d6a59b69695546705a1a5a128d76 100644 (file)
@@ -65,13 +65,13 @@ docstring const getNatbibLabel(Buffer const & buffer,
                return docstring();
 
        // Cache the labels
-       typedef std::map<Buffer const *, biblio::InfoMap> CachedMap;
+       typedef std::map<Buffer const *, biblio::BibKeyList> CachedMap;
        static CachedMap cached_keys;
 
        // and cache the timestamp of the bibliography files.
        static std::map<FileName, time_t> bibfileStatus;
 
-       biblio::InfoMap infomap;
+       biblio::BibKeyList keylist;
 
        vector<FileName> const & bibfilesCache = buffer.getBibfilesCache();
        // compare the cached timestamps with the actual ones.
@@ -97,22 +97,13 @@ docstring const getNatbibLabel(Buffer const & buffer,
 
        // build the keylist only if the bibfiles have been changed
        if (cached_keys[&buffer].empty() || bibfileStatus.empty() || changed) {
-               typedef vector<std::pair<string, docstring> > InfoType;
-               InfoType bibkeys;
-               buffer.fillWithBibKeys(bibkeys);
-
-               InfoType::const_iterator bit  = bibkeys.begin();
-               InfoType::const_iterator bend = bibkeys.end();
-
-               for (; bit != bend; ++bit)
-                       infomap[bit->first] = bit->second;
-
-               cached_keys[&buffer] = infomap;
+               buffer.fillWithBibKeys(keylist);
+               cached_keys[&buffer] = keylist;
        } else
                // use the cached keys
-               infomap = cached_keys[&buffer];
+               keylist = cached_keys[&buffer];
 
-       if (infomap.empty())
+       if (keylist.empty())
                return docstring();
 
        // the natbib citation-styles
@@ -175,8 +166,8 @@ docstring const getNatbibLabel(Buffer const & buffer,
        vector<string>::const_iterator end = keys.end();
        for (; it != end; ++it) {
                // get the bibdata corresponding to the key
-               docstring const author(biblio::getAbbreviatedAuthor(infomap, *it));
-               docstring const year(biblio::getYear(infomap, *it));
+               docstring const author(biblio::getAbbreviatedAuthor(keylist, *it));
+               docstring const year(biblio::getYear(keylist, *it));
 
                // Something isn't right. Fail safely.
                if (author.empty() || year.empty())
index 6037a956d368cd467e034995592b01a36fd8aab9..08461aadc60069ce37f889c8aaad5e165770def3 100644 (file)
@@ -727,12 +727,12 @@ void InsetInclude::getLabelList(Buffer const & buffer,
 
 
 void InsetInclude::fillWithBibKeys(Buffer const & buffer,
-               std::vector<std::pair<string, docstring> > & keys,
-               InsetIterator const & /*di*/) const
+               biblio::BibKeyList & keys, InsetIterator const & /*di*/) const
 {
        if (loadIfNeeded(buffer, params_)) {
                string const included_file = includedFilename(buffer, params_).absFilename();
                Buffer * tmp = theBufferList().getBuffer(included_file);
+               //FIXME This is kind of a dirty hack and should be made reasonable.
                tmp->setParentName("");
                tmp->fillWithBibKeys(keys);
                tmp->setParentName(parentFilename(buffer));
index 2b71890a83cdae9870c0fca20ecac201bdf5a648..dbdaa340f3c1424fb640d9f22832ee63f72b81da 100644 (file)
@@ -12,6 +12,7 @@
 #ifndef INSET_INCLUDE_H
 #define INSET_INCLUDE_H
 
+#include "Biblio_typedefs.h"
 #include "Inset.h"
 #include "InsetCommandParams.h"
 #include "RenderButton.h"
@@ -58,10 +59,11 @@ public:
        /** Fills \c keys
         *  \param buffer the Buffer containing this inset.
         *  \param keys the list of bibkeys in the child buffer.
+        *  \param it not used here
         */
-       virtual void fillWithBibKeys(Buffer const &,
-               std::vector<std::pair<std::string, docstring> > &,
-               InsetIterator const & /*di*/) const;
+       virtual void fillWithBibKeys(Buffer const & buffer,
+               biblio::BibKeyList & keys, InsetIterator const & it) const;
+       
        /** Update the cache with all bibfiles in use of the child buffer
         *  (including bibfiles of grandchild documents).
         *  Does nothing if the child document is not loaded to prevent