Logo Search packages:      
Sourcecode: highlight version File versions  Download package

languagedefinition.h

/***************************************************************************
                          languagedefinition.h  -  description
                             -------------------
    begin                : Wed Nov 28 2001
    copyright            : (C) 2001-2008 by Andre Simon
    email                : andre.simon1@gmx.de
 ***************************************************************************/


/*
This file is part of Highlight.

Highlight is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Highlight is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
*/


#ifndef LANGUAGEDEFINITION_H
#define LANGUAGEDEFINITION_H

#include <string>
#include <map>
#include <iostream>
#include <fstream>
#include <iterator>
#include <sstream>

#include "configurationreader.h"
#include "platform_fs.h"
#include "enums.h"
#include "re/Pattern.h"
#include "re/Matcher.h"

namespace highlight
{

00047       class RegexElement;

      /** maps keywords and the corresponding class IDs*/
      typedef map <string, int> KeywordMap;

      /**\brief Contains specific data of the programming language being processed.

         The load() method will only read a new language definition if the given
         file path is not equal to the path of the current language definition.

      * @author Andre  Simon
      */

00060       class LanguageDefinition
      {

            public:

                  LanguageDefinition();

                  ~LanguageDefinition();

                  /**\return Symbol string, containg all known symbols with the referencing state ids*/
00070                   const string &getSymbolString() const { return symbolString; }

                  /**\return Symbol string, containg all known symbols with the referencing state ids*/
00073                   const string &getFailedRegex() const { return failedRegex; }

                  /** \return Prefix of raw strings */
00076                   unsigned char getRawStringPrefix() const { return rawStringPrefix; }

                  /** \return Continuation Character */
00079                   unsigned char getContinuationChar() const { return continuationChar; }

                  /** \return true if syntax highlighting is enabled*/
00082                   bool getSyntaxHighlight() const { return !disableHighlighting;}

                  /** \return True if language is case sensitive */
00085                   bool isIgnoreCase() const { return ignoreCase;}

                  /** \param s String
                       \return class id of keyword, 0 if s is not a keyword */
                  int isKeyword ( const string &s ) ;

                  /** Load new language definition
                      \param langDefPath Path of language definition
                      \param clear Test if former data should be deleted
                      \return True if successfull  */
                  bool load ( const string& langDefPath, bool clear=true );

                  /** \return True if multi line comments may be nested */
00098                   bool allowNestedMLComments() const { return allowNestedComments; }

                  /** \return True if highlighting is disabled */
00101                   bool highlightingDisabled() const  { return disableHighlighting; }

                  /** \return True the next load() call will load a new language definition
                      \param  langDefPath Path to language definition  */
00105                   bool needsReload ( const string &langDefPath ) const { return currentPath!=langDefPath; }

                  /** \return True if current language may be reformatted (c, c++, c#, java) */
00108                   bool enableReformatting() const { return reformatCode;}

                  /** \return True if escape sequences are allowed outsde of strings */
00111                   bool allowExtEscSeq() const { return allowExtEscape; }

                  /** \return keywords*/
00114                   const KeywordMap& getKeywords() const { return keywords; }

                  /** \return keyword classes*/
00117                   const vector<string>& getKeywordClasses() const { return keywordClasses;}

                  /** \return regular expressions */
00120                   const vector<RegexElement*>& getRegexElements() const {return regex;};

                  /** \return description of the programming language */
00123                   const string & getDescription () const {return langDesc;}

                  /**
                       \param stateID state id
                       \return true,  if no closing delimiter exists (open and close delimiters are equal)
                   */
00129                   bool delimiterIsDistinct ( int stateID )
                  {
                        return delimiterDistinct[stateID];
                  }

                  /**  Pairs of open/close tokens have a unique ID to test if two tokens act as delimiters
                       \param token delimiter token
                       \return token ID
                   */
00138                   int getDelimiterPairID ( const string& token )
                  {
                        return delimiterPair[token];
                  }

            private:

                  static const string REGEX_IDENTIFIER;
                  static const string REGEX_NUMBER;

                  // string containing symbols and their IDs of the programming language
                  string symbolString;

                  // path to laoded language definition
                  string currentPath;

                  // Language description
                  string langDesc;

                  string failedRegex;

                  KeywordMap keywords;

                  vector <string> keywordClasses;

                  vector <RegexElement*> regex;

                  KeywordMap delimiterPrefixes;

                  // saves if delimiter pair consists of the same delimiter symbol
                  map <int, bool> delimiterDistinct;

                  map <string, int> delimiterPair;

                  // keywords are not case sensitive if set
                  bool ignoreCase,

                  // highlighting is disabled
                  disableHighlighting,

                  // Escape sequences are allowed outrside of strings
                  allowExtEscape,

                  // switch to enable VHDL workarounds
//   vhdlMode,

                  // allow nested multi line comment blocks
                  allowNestedComments,

                  // single line comments have to start in coloumn 1 if set
                  fullLineComment,

                  // code formatting is enabled if set
                  reformatCode;

                  // character which is prefix of raw string (c#)
                  unsigned char rawStringPrefix,

                  //character which continues curreent style on next line
                  continuationChar;

                  /* reset members */
                  void reset();

                  // add a symbol sequence to the symbolStream
                  void addSimpleSymbol ( stringstream& symbolStream, State state,
                                         const string& paramValue );

                  void addSymbol ( stringstream& symbolStream,
                                   State stateBegin,
                                   State stateEnd,
                                   bool isDelimiter,
                                   const string& paramValue,
                                   unsigned int classID=0 );

                  // add a delimiter symbol sequence to the symbolStream
                  void addDelimiterSymbol ( stringstream& symbolStream,
                                            State stateBegin, State stateEnd,
                                            const string& paramValue,
                                            unsigned int classID=0 );

                  bool getFlag ( string& paramValue );

                  unsigned char getSymbol ( const string& paramValue );

                  // generate a unique class ID of the class name
                  unsigned int generateNewKWClass ( const string& newClassName );

                  // add keywords to the given class
                  void addKeywords ( const string &kwList,State stateBegin, State stateEnd, int classID );

                  struct RegexDef extractRegex ( const string &paramValue );

                  Pattern * reDefPattern;

      };


      /**\brief Association of a regex with a state description

        A RegexElement associates a regular expression with the state information
        (opening and closing state, pattern, keyword class and keyword group id)
      */
00241       class RegexElement
      {
            public:
                  RegexElement() :open ( STANDARD ), end ( STANDARD ), rePattern ( NULL ), kwClass ( 0 ),capturingGroup ( -1 )
                  {
                  }

                  RegexElement ( State oState, State eState, Pattern *re, unsigned int cID=0, int group=-1 ) :
                              open ( oState ), end ( eState ), rePattern ( re ), kwClass ( cID ), capturingGroup ( group )
                  {
                        // cerr << "new re element "<<  rePattern->getPattern() <<" open: "<<open<<" end "<<end<<"\n";
                  }

                  ~RegexElement() { if ( rePattern ) delete rePattern; }

00256                   State open, ///< opening state
                  end;  ///< closing state
00258                   Pattern *rePattern;          ///< regex pattern
00259                   unsigned int kwClass;        ///< keyword class
00260                   int capturingGroup;          ///< capturing group ID
      };

      /**\brief Association of a regex and its relevant capturing group
      */
00265       struct RegexDef
      {
            RegexDef() :capturingGroup ( -1 ) {}
00268             string reString;     ///< regex string
00269             int capturingGroup;  ///< capturing group which should be recognized as token
      };

}
#endif

Generated by  Doxygen 1.6.0   Back to index