Re-write of the BibTeX representation. The main change is that we now have

author Richard Heck <rgheck@comcast.net>

Thu, 16 Aug 2007 01:59:20 +0000 (01:59 +0000)

committer Richard Heck <rgheck@comcast.net>

Thu, 16 Aug 2007 01:59:20 +0000 (01:59 +0000)
author Richard Heck <rgheck@comcast.net>
Thu, 16 Aug 2007 01:59:20 +0000 (01:59 +0000)
committer Richard Heck <rgheck@comcast.net>
Thu, 16 Aug 2007 01:59:20 +0000 (01:59 +0000)
diff --git a/development/scons/scons_manifest.py b/development/scons/scons_manifest.py

index b45ac7ef616b9f69e31c0209efedb196815ec98b..739713fb37af6d3e03dfb1ed75e9f3492d4be7ba 100644 (file)
--- a/development/scons/scons_manifest.py
+++ b/development/scons/scons_manifest.py
@@ -34,6 +34,7 @@ src_header_files = Split('''
      ASpell_local.h
      Author.h
      Biblio.h
+    Biblio_typedefs.h
      Bidi.h
      Box.h
      BranchList.h
diff --git a/src/Biblio.cpp b/src/Biblio.cpp

index c3c53998ee654ffe9715dbb280ee4b4f840ddff0..e314afb6d669cdc6d36a98a90a694635b42520f6 100644 (file)
--- a/src/Biblio.cpp
+++ b/src/Biblio.cpp
@@ -49,6 +49,22 @@ using support::trim;
  
  namespace biblio {
  
+       
+BibTeXInfo::BibTeXInfo(): isBibTeX(true)
+{}
+
+       
+BibTeXInfo::BibTeXInfo(bool isBibTeX): isBibTeX(isBibTeX)
+{};
+
+
+bool BibTeXInfo::hasKey(docstring const & key) 
+{
+       const_iterator it = find(key);
+       return it == end();
+}
+
+
  namespace {
  
         vector<string> const init_possible_cite_commands()
@@ -109,8 +125,6 @@ namespace {
  } // namespace anon
  
  
-const docstring TheBibliographyRef(from_ascii("TheBibliographyRef"));
-
  string const asValidLatexCommand(string const & input,
                                   CiteEngine const engine)
  {
@@ -182,89 +196,58 @@ docstring const familyName(docstring const & name)
  }
  
  
-docstring const getAbbreviatedAuthor(InfoMap const & map, string const & key)
+docstring const getAbbreviatedAuthor(BibKeyList const & map, string const & key)
  {
         BOOST_ASSERT(!map.empty());
-
-       InfoMap::const_iterator it = map.find(key);
+       BibKeyList::const_iterator it = map.find(key);
         if (it == map.end())
                 return docstring();
-       docstring const & data = it->second;
-
-// Is the entry a BibTeX one or one from lyx-layout "bibliography"?
-       docstring::size_type const pos = data.find(TheBibliographyRef);
-       if (pos != docstring::npos) {
-               if (pos <= 2) {
-                       return docstring();
-               }
-
-               docstring const opt = trim(data.substr(0, pos - 1));
-               if (opt.empty())
-                       return docstring();
-
-               docstring authors;
-               split(opt, authors, '(');
-               return authors;
-       }
-
-       docstring author = parseBibTeX(data, "author");
-
-       if (author.empty())
-               author = parseBibTeX(data, "editor");
-
+       BibTeXInfo const & data = it->second;
+ 
+       if (!data.isBibTeX) 
+               return docstring();
+ 
+       docstring author = getValueForKey(data, "author");
+ 
         if (author.empty()) {
-               author = parseBibTeX(data, "key");
-               if (author.empty())
-               // FIXME UNICODE
-                       return from_utf8(key);
-               return author;
+               author = getValueForKey(data, "editor");
+               if (author.empty()) {
+                       author = getValueForKey(data, "key");
+                       if (author.empty())
+                               // FIXME UNICODE
+                               return from_utf8(key);
+                       else 
+                               return author; //this is the key
+               }
         }
  
+       //OK, we've got some names. Let's format them.
+       //try to split the author list on " and "
         vector<docstring> const authors = getVectorFromString(author, from_ascii(" and "));
-       if (authors.empty())
-               return author;
-
+       
         if (authors.size() == 2)
                 return bformat(_("%1$s and %2$s"),
-                                                                       familyName(authors[0]), familyName(authors[1]));
-
-       if (authors.size() > 2)
+                                                                        familyName(authors[0]), familyName(authors[1]));
+       else if (authors.size() > 2)
                 return bformat(_("%1$s et al."), familyName(authors[0]));
-
-       return familyName(authors[0]);
+       else  
+               return familyName(authors[0]);
  }
  
  
-docstring const getYear(InfoMap const & map, string const & key)
+docstring const getYear(BibKeyList const & map, string const & key)
  {
         BOOST_ASSERT(!map.empty());
-
-       InfoMap::const_iterator it = map.find(key);
+ 
+       BibKeyList::const_iterator it = map.find(key);
         if (it == map.end())
                 return docstring();
-       docstring const & data = it->second;
-
-// Is the entry a BibTeX one or one from lyx-layout "bibliography"?
-       docstring::size_type const pos = data.find(TheBibliographyRef);
-       if (pos != docstring::npos) {
-               if (pos <= 2) {
-                       return docstring();
-               }
-
-               docstring const opt =
-                                       trim(data.substr(0, pos - 1));
-               if (opt.empty())
-                       return docstring();
-
-               docstring authors;
-               docstring const tmp = split(opt, authors, '(');
-               docstring year;
-               split(tmp, year, ')');
-               return year;
-
-       }
-
-       docstring year = parseBibTeX(data, "year");
+       BibTeXInfo const & data = it->second;
+ 
+       if (!data.isBibTeX) 
+               return docstring();
+ 
+       docstring year = getValueForKey(data, "year");
         if (year.empty())
                 year = _("No year");
  
@@ -284,11 +267,11 @@ class compareNoCase: public std::binary_function<string, string, bool>
  } // namespace anon
  
  
-vector<string> const getKeys(InfoMap const & map)
+vector<string> const getKeys(BibKeyList const & map)
  {
         vector<string> bibkeys;
-       InfoMap::const_iterator it  = map.begin();
-       InfoMap::const_iterator end = map.end();
+       BibKeyList::const_iterator it  = map.begin();
+       BibKeyList::const_iterator end = map.end();
         for (; it != end; ++it) {
                 bibkeys.push_back(it->first);
         }
@@ -298,72 +281,67 @@ vector<string> const getKeys(InfoMap const & map)
  }
  
  
-docstring const getInfo(InfoMap const & map, string const & key)
+docstring const getInfo(BibKeyList const & map, string const & key)
  {
         BOOST_ASSERT(!map.empty());
-
-       InfoMap::const_iterator it = map.find(key);
+ 
+       BibKeyList::const_iterator it = map.find(key);
         if (it == map.end())
                 return docstring();
-       docstring const & data = it->second;
-
-// is the entry a BibTeX one or one from lyx-layout "bibliography"?
-       docstring::size_type const pos = data.find(TheBibliographyRef);
-       if (pos != docstring::npos) {
-               docstring::size_type const pos2 = pos + TheBibliographyRef.size();
-               docstring const info = trim(data.substr(pos2));
-               return info;
+       BibTeXInfo const & data = it->second;
+ 
+       if (!data.isBibTeX) {
+               BibTeXInfo::const_iterator it3 = data.find(from_ascii("ref"));
+               return it3->second;
         }
-
-// Search for all possible "required" keys
-       docstring author = parseBibTeX(data, "author");
+ 
+       //FIXME
+       //This could be made alot better using the biblio::TheEntryType
+       //field to customize the output based upon entry type.
+       
+       //Search for all possible "required" fields
+       docstring author = getValueForKey(data, "author");
         if (author.empty())
-               author = parseBibTeX(data, "editor");
-
-       docstring year      = parseBibTeX(data, "year");
-       docstring title     = parseBibTeX(data, "title");
-       docstring booktitle = parseBibTeX(data, "booktitle");
-       docstring chapter   = parseBibTeX(data, "chapter");
-       docstring number    = parseBibTeX(data, "number");
-       docstring volume    = parseBibTeX(data, "volume");
-       docstring pages     = parseBibTeX(data, "pages");
-       docstring annote    = parseBibTeX(data, "annote");
-       docstring media     = parseBibTeX(data, "journal");
-       if (media.empty())
-               media = parseBibTeX(data, "publisher");
-       if (media.empty())
-               media = parseBibTeX(data, "school");
-       if (media.empty())
-               media = parseBibTeX(data, "institution");
-
-       odocstringstream result;
-       if (!author.empty())
-               result << author << ", ";
-       if (!title.empty())
-               result << title;
-       if (!booktitle.empty())
-               result << ", in " << booktitle;
-       if (!chapter.empty())
-               result << ", Ch. " << chapter;
-       if (!media.empty())
-               result << ", " << media;
-       if (!volume.empty())
-               result << ", vol. " << volume;
-       if (!number.empty())
-               result << ", no. " << number;
-       if (!pages.empty())
-               result << ", pp. " << pages;
-       if (!year.empty())
-               result << ", " << year;
-       if (!annote.empty())
-               result << "\n\n" << annote;
-
-       docstring const result_str = rtrim(result.str());
-       if (!result_str.empty())
-               return result_str;
-
-// This should never happen (or at least be very unusual!)
-       return data;
+               author = getValueForKey(data, "editor");
+ 
+       docstring year      = getValueForKey(data, "year");
+       docstring title     = getValueForKey(data, "title");
+       docstring docLoc    = getValueForKey(data, "pages");
+       if (docLoc.empty()) {
+               docLoc = getValueForKey(data, "chapter");
+               if (!docLoc.empty())
+                       docLoc = from_ascii("Ch. ") + docLoc;
+       }       else 
+               docLoc = from_ascii("pp. ") + docLoc;
+               docstring media     = getValueForKey(data, "journal");
+               if (media.empty()) {
+                       media = getValueForKey(data, "publisher");
+                       if (media.empty()) {
+                               media = getValueForKey(data, "school");
+                               if (media.empty())
+                                       media = getValueForKey(data, "institution");
+                       }
+               }
+               docstring volume = getValueForKey(data, "volume");
+ 
+               odocstringstream result;
+               if (!author.empty())
+                       result << author << ", ";
+               if (!title.empty())
+                       result << title;
+               if (!media.empty())
+                       result << ", " << media;
+               if (!year.empty())
+                       result << ", " << year;
+               if (!docLoc.empty())
+                       result << ", " << docLoc;
+ 
+               docstring const result_str = rtrim(result.str());
+               if (!result_str.empty())
+                       return result_str;
+ 
+       // This should never happen (or at least be very unusual!)
+       return docstring();
  }
  
  
@@ -400,37 +378,33 @@ class RegexMatch : public std::unary_function<string, bool>
         public:
  // re and icase are used to construct an instance of boost::RegEx.
  // if icase is true, then matching is insensitive to case
-               RegexMatch(InfoMap const & m, string const & re, bool icase)
+               RegexMatch(BibKeyList const & m, string const & re, bool icase)
                 : map_(m), regex_(re, icase) {}
  
                 bool operator()(string const & key) const {
-// the data searched is the key + its associated BibTeX/biblio
-// fields
-                       string data = key;
-                       InfoMap::const_iterator info = map_.find(key);
-                       if (info != map_.end())
-       // FIXME UNICODE
-                               data += ' ' + to_utf8(info->second);
-
-// Attempts to find a match for the current RE
-// somewhere in data.
+                       //FIXME This should search the monolith.
+                       BibKeyList::const_iterator info = map_.find(key);
+                       if (info == map_.end())
+                               return false;
+ 
+                       BibTeXInfo const kvm = info->second;
+                       string const data = key + ' ' + to_utf8(kvm.allData);
+                       
                         return boost::regex_search(data, regex_);
                 }
         private:
-               InfoMap const map_;
+               BibKeyList const map_;
                 mutable boost::regex regex_;
  };
  
  } // namespace anon
  
  
-vector<string>::const_iterator searchKeys(InfoMap const & theMap,
+vector<string>::const_iterator searchKeys(BibKeyList const & theMap,
                 vector<string> const & keys,
                 string const & search_expr,
                 vector<string>::const_iterator start,
-               Search type,
-               Direction dir,
-       bool caseSensitive)
+               Search type, Direction dir,     bool caseSensitive)
  {
         // Preliminary checks
         if (start < keys.begin() || start >= keys.end())
@@ -471,144 +445,17 @@ vector<string>::const_iterator searchKeys(InfoMap const & theMap,
  }
  
  
-docstring const parseBibTeX(docstring data, string const & findkey)
+docstring const getValueForKey(BibTeXInfo const & data, string const & findkey)
  {
-       // at first we delete all characters right of '%' and
-       // replace tabs through a space and remove leading spaces
-       // we read the data line by line so that the \n are
-       // ignored, too.
-       docstring data_;
-       int Entries = 0;
-       docstring dummy = token(data,'\n', Entries);
-       while (!dummy.empty()) {
-               // no tabs
-               dummy = subst(dummy, '\t', ' ');
-               // no leading spaces
-               dummy = ltrim(dummy);
-               // ignore lines with a beginning '%' or ignore all right of %
-               docstring::size_type const idx =
-                                       dummy.empty() ? docstring::npos : dummy.find('%');
-               if (idx != docstring::npos)
-                       // Check if this is really a comment or just "\%"
-                       if (idx == 0 || dummy[idx - 1] != '\\')
-                               dummy.erase(idx, docstring::npos);
-                       else
-                               //  This is "\%", so just erase the '\'
-                               dummy.erase(idx - 1, 1);
-               // do we have a new token or a new line of
-               // the same one? In the first case we ignore
-               // the \n and in the second we replace it
-               // with a space
-               if (!dummy.empty()) {
-                       if (!contains(dummy, '='))
-                               data_ += ' ' + dummy;
-                       else
-                               data_ += dummy;
-               }
-               dummy = token(data, '\n', ++Entries);
-       } //end while
-
-       // replace double commas with "" for easy scanning
-       data = subst(data_, from_ascii(",,"), from_ascii("\"\""));
-
-       // unlikely!
-       if (data.empty())
-               return docstring();
-
-       // now get only the important line of the bibtex entry.
-       // all entries are devided by ',' except the last one.
-       data += ',';
-       // now we have same behaviour for all entries because the last one
-       // is "blah ... }"
-       Entries = 0;
-       bool found = false;
-       // parsing of title and booktitle is different from the
-       // others, because booktitle contains title
-       do {
-               dummy = token(data, ',', Entries++);
-               if (!dummy.empty()) {
-                       found = contains(ascii_lowercase(dummy), from_ascii(findkey));
-                       if (findkey == "title" &&
-                                                               contains(ascii_lowercase(dummy), from_ascii("booktitle")))
-                               found = false;
-               }
-       } while (!found && !dummy.empty());
-       if (dummy.empty())
-               // no such keyword
+       docstring key = from_ascii(findkey);
+       BibTeXInfo::const_iterator it = data.find(key);
+       if (it == data.end())
                 return docstring();
-
-       // we are not sure, if we get all, because "key= "blah, blah" is
-       // allowed.
-       // Therefore we read all until the next "=" character, which follows a
-       // new keyword
-       docstring keyvalue = dummy;
-       dummy = token(data, ',', Entries++);
-       while (!contains(dummy, '=') && !dummy.empty()) {
-               keyvalue += ',' + dummy;
-               dummy = token(data, ',', Entries++);
-       }
-
-       // replace double "" with originals ,, (two commas)
-       // leaving us with the all-important line
-       data = subst(keyvalue, from_ascii("\"\""), from_ascii(",,"));
-
-       // Clean-up.
-       // 1. Spaces
-       data = rtrim(data);
-       // 2. if there is no opening '{' then a closing '{' is probably cruft.
-       if (!contains(data, '{'))
-               data = rtrim(data, "}");
-       // happens, when last keyword
-               docstring::size_type const idx =
-                       !data.empty() ? data.find('=') : docstring::npos;
-
-               if (idx == docstring::npos)
-                       return docstring();
-
-               data = trim(data.substr(idx));
-
-       // a valid entry?
-       if (data.length() < 2 || data[0] != '=')
-               return docstring();
-       else {
-               // delete '=' and the following spaces
-               data = ltrim(data, " =");
-               if (data.length() < 2) {
-                       // not long enough to find delimiters
-                       return data;
-               } else {
-                       docstring::size_type keypos = 1;
-                       char_type enclosing;
-                       if (data[0] == '{') {
-                               enclosing = '}';
-                       } else if (data[0] == '"') {
-                               enclosing = '"';
-                       } else {
-                               // no {} and no "", pure data but with a
-                               // possible ',' at the end
-                               return rtrim(data, ",");
-                       }
-                       docstring tmp = data.substr(keypos);
-                       while (tmp.find('{') != docstring::npos &&
-                                       tmp.find('}') != docstring::npos &&
-                                       tmp.find('{') < tmp.find('}') &&
-                                       tmp.find('{') < tmp.find(enclosing)) {
-                               keypos += tmp.find('{') + 1;
-                               tmp = data.substr(keypos);
-                               keypos += tmp.find('}') + 1;
-                               tmp = data.substr(keypos);
-                       }
-                       if (tmp.find(enclosing) == docstring::npos)
-                               return data;
-                       else {
-                               keypos += tmp.find(enclosing);
-                               return data.substr(1, keypos - 1);
-                       }
-               }
-       }
+       //FIXME ?? return it->second??
+       BibTeXInfo & data2 = const_cast<BibTeXInfo &>(data);
+       return data2[key];
  }
  
-
  namespace {
  
  
@@ -723,7 +570,7 @@ vector<CiteStyle> const getCiteStyles(CiteEngine const engine)
  
  vector<docstring> const
         getNumericalStrings(string const & key,
-       InfoMap const & map, vector<CiteStyle> const & styles)
+       BibKeyList const & map, vector<CiteStyle> const & styles)
  {
         if (map.empty())
                 return vector<docstring>();
@@ -777,7 +624,7 @@ vector<docstring> const
  
  vector<docstring> const
                 getAuthorYearStrings(string const & key,
-                       InfoMap const & map, vector<CiteStyle> const & styles)
+                       BibKeyList const & map, vector<CiteStyle> const & styles)
  {
         if (map.empty())
                 return vector<docstring>();
@@ -835,7 +682,7 @@ vector<docstring> const
  
  
  void fillWithBibKeys(Buffer const * const buf, 
-                     vector<pair<string, docstring> > & keys)
+                     BibKeyList & keys)
  {      
         /// if this is a child document and the parent is already loaded
         /// use the parent's list instead  [ale990412]
diff --git a/src/Biblio.h b/src/Biblio.h

index a6fe6c6acd7fa6c1b05b8cb5959dd3359d704f6e..4e8e39f13bc654259012f9661b5ecdc738337336 100644 (file)
--- a/src/Biblio.h
+++ b/src/Biblio.h
@@ -13,8 +13,8 @@
  #ifndef BIBLIO_H
  #define BIBLIO_H
  
+#include "Biblio_typedefs.h"
  #include "Buffer.h"
-#include "support/docstring.h"
  
  #include <vector>
  
@@ -22,8 +22,6 @@ namespace lyx {
         
  namespace biblio {
         
-       extern const docstring TheBibliographyRef;
-
         enum CiteEngine {
                 ENGINE_BASIC,
                 ENGINE_NATBIB_AUTHORYEAR,
@@ -59,8 +57,7 @@ namespace biblio {
  /** Fills keys with BibTeX information derived from the various
   *  in this document or its master document.
   */
-       void fillWithBibKeys(Buffer const * const buf, 
-               std::vector<std::pair<std::string, docstring> > & keys);
+       void fillWithBibKeys(Buffer const * const buf, BibKeyList & keys);
  
  /** Each citation engine recognizes only a subset of all possible
         *  citation commands. Given a latex command \c input, this function
@@ -69,49 +66,43 @@ namespace biblio {
         std::string const asValidLatexCommand(std::string const & input,
                                               CiteEngine const engine);
  
-/// First entry is the bibliography key, second the data
-       typedef std::map<std::string, docstring> InfoMap;
-
  /// Returns a vector of bibliography keys
-       std::vector<std::string> const getKeys(InfoMap const &);
+       std::vector<std::string> const getKeys(BibKeyList const &);
  
  /** Returns the BibTeX data associated with a given key.
         Empty if no info exists. */
-       docstring const getInfo(InfoMap const &, std::string const & key);
+       docstring const getInfo(BibKeyList const &, std::string const & key);
  
  /// return the year from the bibtex data record
-       docstring const getYear(InfoMap const & map, std::string const & key);
+       docstring const getYear(BibKeyList const & map, std::string const & key);
  
  /// return the short form of an authorlist
-       docstring const getAbbreviatedAuthor(InfoMap const & map, std::string const & key);
+       docstring const getAbbreviatedAuthor(BibKeyList const & map, std::string const & key);
  
-// return only the family name
+/// return only the family name
         docstring const familyName(docstring const & name);
  
  /** Search a BibTeX info field for the given key and return the
         associated field. */
-       docstring const parseBibTeX(docstring data, std::string const & findkey);
+       docstring const getValueForKey(BibTeXInfo const & data, std::string const & findkey);
  
  /** Returns an iterator to the first key that meets the search
         criterion, or end() if unsuccessful.
  
      User supplies :
-       the InfoMap of bibkeys info,
+       the BibKeyList of bibliography info,
         the vector of keys to be searched,
         the search criterion,
         an iterator defining the starting point of the search,
         an enum defining a Simple or Regex search,
         an enum defining the search direction.
   */
-
         std::vector<std::string>::const_iterator
-                       searchKeys(InfoMap const & map,
+                       searchKeys(BibKeyList const & map,
                                    std::vector<std::string> const & keys_to_search,
                                    docstring const & search_expression,
                                    std::vector<std::string>::const_iterator start,
-        Search,
-       Direction,
- bool caseSensitive=false);
+                                  Search, Direction, bool caseSensitive=false);
  
  
         class CitationStyle {
@@ -145,12 +136,12 @@ namespace biblio {
  
     User supplies :
         the key,
-       the InfoMap of bibkeys info,
+       the BibKeyList of bibkeys info,
         the available citation styles
   */
         std::vector<docstring> const
                         getNumericalStrings(std::string const & key,
-                                           InfoMap const & map,
+                                           BibKeyList const & map,
                                             std::vector<CiteStyle> const & styles);
  
  /**
@@ -162,12 +153,12 @@ namespace biblio {
  
     User supplies :
         the key,
-       the InfoMap of bibkeys info,
+       the BibKeyList of bibkeys info,
         the available citation styles
   */
         std::vector<docstring> const
                         getAuthorYearStrings(std::string const & key,
-                                            InfoMap const & map,
+                                            BibKeyList const & map,
                                              std::vector<CiteStyle> const & styles);
  
  } // namespace biblio
diff --git a/src/Biblio_typedefs.h b/src/Biblio_typedefs.h

new file mode 100644 (file)

index 0000000..18d345a
--- /dev/null
+++ b/src/Biblio_typedefs.h
@@ -0,0 +1,51 @@
+// -*- C++ -*-
+/**
+ * \file Biblio_typedef.h
+ * This file is part of LyX, the document processor.
+ * Licence details can be found in the file COPYING.
+ *
+ * \author Richard Heck
+ *
+ * Full author contact details are available in file CREDITS.
+ */
+
+#ifndef BIBLIO_TYPEDEFS_H
+#define BIBLIO_TYPEDEFS_H
+
+#include "support/docstring.h"
+#include <map>
+
+namespace lyx {
+namespace biblio {
+
+/// Class to represent information about a BibTeX or
+/// bibliography entry.
+/// The keys are BibTeX fields, and the values are the
+/// associated field values.
+/// \param isBibTex false if this is from an InsetBibitem
+/// \param allData the entire BibTeX entry, more or less
+/// \param entryType the BibTeX entry type
+class BibTeXInfo : public std::map<docstring, docstring> {
+       public:
+               BibTeXInfo();
+               BibTeXInfo(bool isBibTeX);
+               bool hasKey(docstring const & key);
+               bool isBibTeX;
+               docstring allData;
+               docstring entryType;
+};
+
+/*
+class BibKeyList : public std::set<std::string, BibTeXInfo> {
+ public:
+       std::set<string> keys;
+}
+
+*/
+
+/// First entry is the bibliography key, second the data
+typedef std::map<std::string, BibTeXInfo> BibKeyList;
+       
+}
+}
+#endif
diff --git a/src/Buffer.cpp b/src/Buffer.cpp

index ae0dccaf02eb10fe9553417bb0919a564075cc5d..ed969f83034bb6ecb5320d2f591a3bf0d12bfd0f 100644 (file)
--- a/src/Buffer.cpp
+++ b/src/Buffer.cpp
@@ -13,6 +13,7 @@
  #include "Buffer.h"
  
  #include "Author.h"
+#include "Biblio.h"
  #include "BranchList.h"
  #include "buffer_funcs.h"
  #include "BufferList.h"
@@ -1359,7 +1360,7 @@ void Buffer::getLabelList(vector<docstring> & list) const
  
  
  // This is also a buffer property (ale)
-void Buffer::fillWithBibKeys(vector<pair<string, docstring> > & keys)
+void Buffer::fillWithBibKeys(biblio::BibKeyList & keys)
         const
  {
         biblio::fillWithBibKeys(this, keys);
@@ -1730,10 +1731,10 @@ void Buffer::changeRefsIfUnique(docstring const & from, docstring const & to,
         vector<docstring> labels;
  
         if (code == Inset::CITE_CODE) {
-               vector<pair<string, docstring> > keys;
+               biblio::BibKeyList keys;
                 fillWithBibKeys(keys);
-               vector<pair<string, docstring> >::const_iterator bit  = keys.begin();
-               vector<pair<string, docstring> >::const_iterator bend = keys.end();
+               biblio::BibKeyList::const_iterator bit  = keys.begin();
+               biblio::BibKeyList::const_iterator bend = keys.end();
  
                 for (; bit != bend; ++bit)
                         // FIXME UNICODE
diff --git a/src/Buffer.h b/src/Buffer.h

index 218f472744e20343b7142b41d71941d4b8277f19..b611fd0b311c21457be66a355428a6cdadf23eca 100644 (file)
--- a/src/Buffer.h
+++ b/src/Buffer.h
@@ -12,6 +12,7 @@
  #ifndef BUFFER_H
  #define BUFFER_H
  
+#include "Biblio_typedefs.h"
  #include "DocIterator.h"
  
  #include "support/FileName.h"
@@ -302,7 +303,7 @@ public:
         void validate(LaTeXFeatures &) const;
  
         /// return all bibkeys from buffer and its childs
-       void fillWithBibKeys(std::vector<std::pair<std::string, docstring> > & keys) const;
+       void fillWithBibKeys(biblio::BibKeyList & keys) const;
         /// Update the cache with all bibfiles in use (including bibfiles
         /// of loaded child documents).
         void updateBibfilesCache();
diff --git a/src/Makefile.am b/src/Makefile.am

index 7daedf1db3a9b9b376428fc1b2489b54cfdd8185..511ff8afbe0379069d914d9e425f6917bed10203 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -78,6 +78,7 @@ endif
  liblyxcore_la_SOURCES = \
         Author.cpp \
         Author.h \
+       Biblio_typedefs.h \
         Biblio.h \
         Biblio.cpp \
         Bidi.cpp \
diff --git a/src/frontends/controllers/ControlCitation.cpp b/src/frontends/controllers/ControlCitation.cpp

index 8a3a7134ce710b1f330ab2327957489db8e81034..bd716a95dc5f848136933e8c3cefac967f654f02 100644 (file)
--- a/src/frontends/controllers/ControlCitation.cpp
+++ b/src/frontends/controllers/ControlCitation.cpp
@@ -48,12 +48,8 @@ bool ControlCitation::initialiseParams(string const & data)
  
         bool use_styles = engine != biblio::ENGINE_BASIC;
  
-       vector<pair<string, docstring> > blist;
-       kernel().buffer().fillWithBibKeys(blist);
-       bibkeysInfo_.clear();
-       for (size_t i = 0; i < blist.size(); ++i)
-               bibkeysInfo_[blist[i].first] = blist[i].second;
-
+       kernel().buffer().fillWithBibKeys(bibkeysInfo_);
+       
         if (citeStyles_.empty())
                 citeStyles_ = biblio::getCiteStyles(engine);
         else {
@@ -137,23 +133,20 @@ vector<string> ControlCitation::searchKeys(
                 // it is treated as a simple string by boost::regex.
                 expr = escape_special_chars(expr);
  
-       boost::regex reg_exp(to_utf8(expr), case_sensitive?
+       boost::regex reg_exp(to_utf8(expr), case_sensitive ?
                 boost::regex_constants::normal : boost::regex_constants::icase);
  
         vector<string>::const_iterator it = keys_to_search.begin();
         vector<string>::const_iterator end = keys_to_search.end();
         for (; it != end; ++it ) {
-               biblio::InfoMap::const_iterator info = bibkeysInfo_.find(*it);
+               biblio::BibKeyList::const_iterator info = bibkeysInfo_.find(*it);
                 if (info == bibkeysInfo_.end())
                         continue;
-
-               string data = *it;
-               // FIXME UNICODE
-               data += ' ' + to_utf8(info->second);
-
+               
+               biblio::BibTeXInfo const kvm = info->second;
+               string const data = *it + ' ' + to_utf8(kvm.allData);
+               
                 try {
-                       // Attempts to find a match for the current RE
-                       // somewhere in data.
                         if (boost::regex_search(data, reg_exp))
                                 foundKeys.push_back(*it);
                 }
diff --git a/src/frontends/controllers/ControlCitation.h b/src/frontends/controllers/ControlCitation.h

index 4bf16fde4e456942b936b4ddbeeca928115e0cb8..93219dc535a29c2f0d49000596de19fa09473a03 100644 (file)
--- a/src/frontends/controllers/ControlCitation.h
+++ b/src/frontends/controllers/ControlCitation.h
@@ -62,7 +62,7 @@ public:
         }
  private:
         /// The info associated with each key
-       biblio::InfoMap bibkeysInfo_;
+       biblio::BibKeyList bibkeysInfo_;
  
         ///
         static std::vector<biblio::CiteStyle> citeStyles_;
diff --git a/src/insets/Inset.h b/src/insets/Inset.h

index bae6669ee6a44406e3371ebc90471bde1195d4c6..438dfb54d61afa9e41b46f34e4887e3bbf2a71e7 100644 (file)
--- a/src/insets/Inset.h
+++ b/src/insets/Inset.h
@@ -15,6 +15,7 @@
  #ifndef INSETBASE_H
  #define INSETBASE_H
  
+#include "Biblio_typedefs.h"
  #include "Changes.h"
  #include "Dimension.h"
  
@@ -440,8 +441,7 @@ public:
         virtual void addToToc(TocList &, Buffer const &, ParConstIterator const &) const {}
         /// Fill keys with BibTeX information
         virtual void fillWithBibKeys(Buffer const &,
-               std::vector<std::pair<std::string, docstring> > &,
-               InsetIterator const &) const { return; }
+               biblio::BibKeyList &, InsetIterator const &) const { return; }
         /// Update the counters of this inset and of its contents
         virtual void updateLabels(Buffer const &, ParIterator const &) {}
  
diff --git a/src/insets/InsetBibitem.cpp b/src/insets/InsetBibitem.cpp

index 166bcdd5a84c27d0e0ab0c9e1887b0198b958673..8c9410b2b1fc725148940959acae7baf3b584d89 100644 (file)
--- a/src/insets/InsetBibitem.cpp
+++ b/src/insets/InsetBibitem.cpp
@@ -187,16 +187,16 @@ docstring const bibitemWidest(Buffer const & buffer)
  
  
  void InsetBibitem::fillWithBibKeys(Buffer const & buf,
-       std::vector<std::pair<std::string, docstring> > & keys,
-       InsetIterator const & it) const
+       biblio::BibKeyList & keys, InsetIterator const & it) const
  {
         string const key = to_utf8(getParam("key"));
-       docstring const label = getParam("label");
+       biblio::BibTeXInfo keyvalmap;
+       keyvalmap[from_ascii("label")] = getParam("label");
         DocIterator doc_it(it); 
         doc_it.forwardPos();
-       docstring const ref = doc_it.paragraph().asString(buf, false);
-       docstring const info = label + biblio::TheBibliographyRef + ref;
-       keys.push_back(std::pair<string, docstring>(key, info));
+       keyvalmap [from_ascii("ref")] = doc_it.paragraph().asString(buf, false);
+       keyvalmap.isBibTeX = false;
+       keys[key] = keyvalmap;
  }
  
  } // namespace lyx
diff --git a/src/insets/InsetBibitem.h b/src/insets/InsetBibitem.h

index 98db70e68d569f40858c8a47308ed1c4bdc03792..68edfca092dab8fabe100c966803b694a5590895 100644 (file)
--- a/src/insets/InsetBibitem.h
+++ b/src/insets/InsetBibitem.h
@@ -14,6 +14,7 @@
  
  
  #include "InsetCommand.h"
+#include "Biblio_typedefs.h"
  
  
  namespace lyx {
@@ -45,8 +46,7 @@ public:
         int plaintext(Buffer const &, odocstream &, OutputParams const &) const;
         ///
         virtual void fillWithBibKeys(Buffer const &,
-               std::vector<std::pair<std::string, docstring> > &,
-               InsetIterator const &) const;
+               biblio::BibKeyList &, InsetIterator const &) const;
  
  protected:
         ///
diff --git a/src/insets/InsetBibtex.cpp b/src/insets/InsetBibtex.cpp

index 7ba0a2814b59b45d475ff0ffffd0850776f514f7..4c612bd4955c54acc2d03f6443e3b8362ec6c2e4 100644 (file)
--- a/src/insets/InsetBibtex.cpp
+++ b/src/insets/InsetBibtex.cpp
@@ -4,6 +4,7 @@
   * Licence details can be found in the file COPYING.
   *
   * \author Alejandro Aguilar Sierra
+ * \author Richard Heck (BibTeX parser improvements)
   *
   * Full author contact details are available in file CREDITS.
   */
@@ -414,14 +415,14 @@ namespace {
                 // read value
                 bool legalChar = true;
                 while (ifs && !isSpace(ch) && 
-                          delimChars.find(ch) == docstring::npos &&
-                          (legalChar = illegalChars.find(ch) == docstring::npos)
-                          ) {
-                       if (chCase == makeLowerCase) {
+                                                delimChars.find(ch) == docstring::npos &&
+                                                (legalChar = (illegalChars.find(ch) == docstring::npos))
+                                       ) 
+               {
+                       if (chCase == makeLowerCase)
                                 val += lowercase(ch);
-                       } else {
+                       else
                                 val += ch;
-                       }
                         ifs.get(ch);
                 }
                 
@@ -478,17 +479,40 @@ namespace {
                                         return false;
  
                         } else if (ch == '"' || ch == '{') {
+                               // set end delimiter
+                               char_type delim = ch == '"' ? '"': '}';
  
-                               // read delimited text - set end delimiter
-                               char_type delim = ch == '"'? '"': '}';
-
-                               // inside this delimited text braces must match.
-                               // Thus we can have a closing delimiter only
-                               // when nestLevel == 0
+                               //Skip whitespace
+                               do {
+                                       ifs.get(ch);
+                               } while (ifs && isSpace(ch));
+                               
+                               if (!ifs)
+                                       return false;
+                               
+                               //We now have the first non-whitespace character
+                               //We'll collapse adjacent whitespace.
+                               bool lastWasWhiteSpace = false;
+                               
+                               // inside this delimited text braces must match.
+                               // Thus we can have a closing delimiter only
+                               // when nestLevel == 0
                                 int nestLevel = 0;
-
-                               ifs.get(ch);
+ 
                                 while (ifs && (nestLevel > 0 || ch != delim)) {
+                                       if (isSpace(ch)) {
+                                               lastWasWhiteSpace = true;
+                                               ifs.get(ch);
+                                               continue;
+                                       }
+                                       //We output the space only after we stop getting 
+                                       //whitespace so as not to output any whitespace
+                                       //at the end of the value.
+                                       if (lastWasWhiteSpace) {
+                                               lastWasWhiteSpace = false;
+                                               val += ' ';
+                                       }
+                                       
                                         val += ch;
  
                                         // update nesting level
@@ -556,8 +580,7 @@ namespace {
  
  // This method returns a comma separated list of Bibtex entries
  void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
-               std::vector<std::pair<string, docstring> > & keys,
-               InsetIterator const & /*di*/) const
+               biblio::BibKeyList & keys, InsetIterator const & /*di*/) const
  {
         vector<FileName> const files = getFiles(buffer);
         for (vector<FileName>::const_iterator it = files.begin();
@@ -573,15 +596,6 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
                 // - it accepts more characters in keys or value names than
                 //   bibtex does.
                 //
-               // TODOS:
-               // - the entries are split into name = value pairs by the
-               //   parser. These have to be merged again because of the
-               //   way lyx treats the entries ( pair<...>(...) ). The citation
-               //   mechanism in lyx should be changed such that it can use
-               //   the split entries.
-               // - messages on parsing errors can be generated.
-               //
-
                 // Officially bibtex does only support ASCII, but in practice
                 // you can use the encoding of the main document as long as
                 // some elements like keys and names are pure ASCII. Therefore
@@ -589,9 +603,10 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
                 // We don't restrict keys to ASCII in LyX, since our own
                 // InsetBibitem can generate non-ASCII keys, and nonstandard
                 // 8bit clean bibtex forks exist.
+               
                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
-                               std::ios_base::in,
-                               buffer.params().encoding().iconvName());
+                       std::ios_base::in,
+                       buffer.params().encoding().iconvName());
  
                 char_type ch;
                 VarMap strings;
@@ -660,25 +675,31 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
  
                         } else {
  
-                               // Citation entry. Read the key and all name = value pairs
+                               // Citation entry. Try to read the key.
                                 docstring key;
-                               docstring fields;
-                               docstring name;
-                               docstring value;
-                               docstring commaNewline;
  
                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
                                                    from_ascii("}"), keepCase) || !ifs)
                                         continue;
  
-                               // now we have a key, so we will add an entry
-                               // (even if it's empty, as bibtex does)
+                               /////////////////////////////////////////////
+                               // now we have a key, so we will add an entry 
+                               // (even if it's empty, as bibtex does)
                                 //
-                               // all items must be separated by a comma. If
-                               // it is missing the scanning of this entry is
-                               // stopped and the next is searched.
+                               // we now read the field = value pairs.
+                               // all items must be separated by a comma. If
+                               // it is missing the scanning of this entry is
+                               // stopped and the next is searched.
+                               docstring fields;
+                               docstring name;
+                               docstring value;
+                               docstring commaNewline;
+                               docstring data;
+                               biblio::BibTeXInfo keyvalmap;
+                               keyvalmap.entryType = entryType;
+                               
                                 bool readNext = removeWSAndComma(ifs);
-
+ 
                                 while (ifs && readNext) {
  
                                         // read field name
@@ -699,27 +720,18 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
                                         if (!readValue(value, ifs, strings))
                                                 break;
  
-                                       // append field to the total entry string.
-                                       //
-                                       // TODO: Here is where the fields can be put in
-                                       //       a more intelligent structure that preserves
-                                       //           the already known parts.
-                                       fields += commaNewline;
-                                       fields += name + from_ascii(" = {") + value + '}';
-
-                                       if (!commaNewline.length())
-                                               commaNewline = from_ascii(",\n");
+                                       keyvalmap[name] = value;
+                                       data += "\n\n" + value;
  
                                         readNext = removeWSAndComma(ifs);
                                 }
  
                                 // add the new entry
-                               keys.push_back(pair<string, docstring>(
-                               to_utf8(key), fields));
+                               keyvalmap.allData = data;
+                               keyvalmap.isBibTeX = true;
+                               keys[to_utf8(key)] = keyvalmap;
                         }
-
                 } //< searching '@'
-
         } //< for loop over files
  }
  
diff --git a/src/insets/InsetBibtex.h b/src/insets/InsetBibtex.h

index 82defd4fd72c3380c178708ebdf43ad08afec513..fcbf0b716c3773ef649e915af3ef9bb11df0aac6 100644 (file)
--- a/src/insets/InsetBibtex.h
+++ b/src/insets/InsetBibtex.h
@@ -12,9 +12,9 @@
  #ifndef INSET_BIBTEX_H
  #define INSET_BIBTEX_H
  
-
-#include <vector>
+#include <map>
  #include "InsetCommand.h"
+#include "Biblio_typedefs.h"
  
  #include "support/FileName.h"
  
@@ -39,8 +39,7 @@ public:
         int latex(Buffer const &, odocstream &, OutputParams const &) const;
         ///
         virtual void fillWithBibKeys(Buffer const &,
-               std::vector<std::pair<std::string, docstring> > &,
-               InsetIterator const &) const;
+               biblio::BibKeyList &, InsetIterator const &) const;
         ///
         std::vector<support::FileName> const getFiles(Buffer const &) const;
         ///
diff --git a/src/insets/InsetCitation.cpp b/src/insets/InsetCitation.cpp

index f0812e5b8fce41817158b19d244c026bf56ed8d6..6541e3ceb679d6a59b69695546705a1a5a128d76 100644 (file)
--- a/src/insets/InsetCitation.cpp
+++ b/src/insets/InsetCitation.cpp
@@ -65,13 +65,13 @@ docstring const getNatbibLabel(Buffer const & buffer,
                 return docstring();
  
         // Cache the labels
-       typedef std::map<Buffer const *, biblio::InfoMap> CachedMap;
+       typedef std::map<Buffer const *, biblio::BibKeyList> CachedMap;
         static CachedMap cached_keys;
  
         // and cache the timestamp of the bibliography files.
         static std::map<FileName, time_t> bibfileStatus;
  
-       biblio::InfoMap infomap;
+       biblio::BibKeyList keylist;
  
         vector<FileName> const & bibfilesCache = buffer.getBibfilesCache();
         // compare the cached timestamps with the actual ones.
@@ -97,22 +97,13 @@ docstring const getNatbibLabel(Buffer const & buffer,
  
         // build the keylist only if the bibfiles have been changed
         if (cached_keys[&buffer].empty() || bibfileStatus.empty() || changed) {
-               typedef vector<std::pair<string, docstring> > InfoType;
-               InfoType bibkeys;
-               buffer.fillWithBibKeys(bibkeys);
-
-               InfoType::const_iterator bit  = bibkeys.begin();
-               InfoType::const_iterator bend = bibkeys.end();
-
-               for (; bit != bend; ++bit)
-                       infomap[bit->first] = bit->second;
-
-               cached_keys[&buffer] = infomap;
+               buffer.fillWithBibKeys(keylist);
+               cached_keys[&buffer] = keylist;
         } else
                 // use the cached keys
-               infomap = cached_keys[&buffer];
+               keylist = cached_keys[&buffer];
  
-       if (infomap.empty())
+       if (keylist.empty())
                 return docstring();
  
         // the natbib citation-styles
@@ -175,8 +166,8 @@ docstring const getNatbibLabel(Buffer const & buffer,
         vector<string>::const_iterator end = keys.end();
         for (; it != end; ++it) {
                 // get the bibdata corresponding to the key
-               docstring const author(biblio::getAbbreviatedAuthor(infomap, *it));
-               docstring const year(biblio::getYear(infomap, *it));
+               docstring const author(biblio::getAbbreviatedAuthor(keylist, *it));
+               docstring const year(biblio::getYear(keylist, *it));
  
                 // Something isn't right. Fail safely.
                 if (author.empty() || year.empty())
diff --git a/src/insets/InsetInclude.cpp b/src/insets/InsetInclude.cpp

index 6037a956d368cd467e034995592b01a36fd8aab9..08461aadc60069ce37f889c8aaad5e165770def3 100644 (file)
--- a/src/insets/InsetInclude.cpp
+++ b/src/insets/InsetInclude.cpp
@@ -727,12 +727,12 @@ void InsetInclude::getLabelList(Buffer const & buffer,
  
  
  void InsetInclude::fillWithBibKeys(Buffer const & buffer,
-               std::vector<std::pair<string, docstring> > & keys,
-               InsetIterator const & /*di*/) const
+               biblio::BibKeyList & keys, InsetIterator const & /*di*/) const
  {
         if (loadIfNeeded(buffer, params_)) {
                 string const included_file = includedFilename(buffer, params_).absFilename();
                 Buffer * tmp = theBufferList().getBuffer(included_file);
+               //FIXME This is kind of a dirty hack and should be made reasonable.
                 tmp->setParentName("");
                 tmp->fillWithBibKeys(keys);
                 tmp->setParentName(parentFilename(buffer));
diff --git a/src/insets/InsetInclude.h b/src/insets/InsetInclude.h

index 2b71890a83cdae9870c0fca20ecac201bdf5a648..dbdaa340f3c1424fb640d9f22832ee63f72b81da 100644 (file)
--- a/src/insets/InsetInclude.h
+++ b/src/insets/InsetInclude.h
@@ -12,6 +12,7 @@
  #ifndef INSET_INCLUDE_H
  #define INSET_INCLUDE_H
  
+#include "Biblio_typedefs.h"
  #include "Inset.h"
  #include "InsetCommandParams.h"
  #include "RenderButton.h"
@@ -58,10 +59,11 @@ public:
         /** Fills \c keys
          *  \param buffer the Buffer containing this inset.
          *  \param keys the list of bibkeys in the child buffer.
+        *  \param it not used here
          */
-       virtual void fillWithBibKeys(Buffer const &,
-               std::vector<std::pair<std::string, docstring> > &,
-               InsetIterator const & /*di*/) const;
+       virtual void fillWithBibKeys(Buffer const & buffer,
+               biblio::BibKeyList & keys, InsetIterator const & it) const;
+       
         /** Update the cache with all bibfiles in use of the child buffer
          *  (including bibfiles of grandchild documents).
          *  Does nothing if the child document is not loaded to prevent
author	Richard Heck <rgheck@comcast.net>
	Thu, 16 Aug 2007 01:59:20 +0000 (01:59 +0000)
committer	Richard Heck <rgheck@comcast.net>
	Thu, 16 Aug 2007 01:59:20 +0000 (01:59 +0000)
development/scons/scons_manifest.py		patch \| blob \| history
src/Biblio.cpp		patch \| blob \| history
src/Biblio.h		patch \| blob \| history
src/Biblio_typedefs.h	[new file with mode: 0644]	patch \| blob
src/Buffer.cpp		patch \| blob \| history
src/Buffer.h		patch \| blob \| history
src/Makefile.am		patch \| blob \| history
src/frontends/controllers/ControlCitation.cpp		patch \| blob \| history
src/frontends/controllers/ControlCitation.h		patch \| blob \| history
src/insets/Inset.h		patch \| blob \| history
src/insets/InsetBibitem.cpp		patch \| blob \| history
src/insets/InsetBibitem.h		patch \| blob \| history
src/insets/InsetBibtex.cpp		patch \| blob \| history
src/insets/InsetBibtex.h		patch \| blob \| history
src/insets/InsetCitation.cpp		patch \| blob \| history
src/insets/InsetInclude.cpp		patch \| blob \| history
src/insets/InsetInclude.h		patch \| blob \| history