Logo Search packages:      
Sourcecode: highlight version File versions  Download package

codegenerator.h

/***************************************************************************
                          codegenerator.h  -  description
                             -------------------
    begin                : Die Jul 9 2002
    copyright            : (C) 2002-2009 by Andre Simon
    email                : andre.simon1@gmx.de
 ***************************************************************************/


/*
This file is part of Highlight.

Highlight is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Highlight is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
*/


#ifndef CODEPARSER_H
#define CODEPARSER_H

#include <iostream>
#include <sstream>
#include <string>
#include <iomanip>

#include "languagedefinition.h"
#include "documentstyle.h"
#include "ctagsreader.h"
#include "astyle/astyle.h"
#include "preformatter.h"
#include "enums.h"
#include "stringtools.h"

/// The highlight namespace contains all classes and data structures needed for parsing input data.

namespace highlight
{


      /** \brief Regular Expession Information

          This class associates a processing state with a keyword class and the length of the matched token.

      * @author Andre Simon
      */
00056       class ReGroup
      {
            public:

                  /// Constructor
00061                   ReGroup() : length ( 0 ), state ( STANDARD ), kwClass ( 0 )
                  {
                  }

                  /// Constructor
00066                   ReGroup ( State s, unsigned int l , unsigned int c ) :
                              length ( l ), state ( s ), kwClass ( c )
                  {
                  }

                  /// Copy Constructor
00072                   ReGroup ( const ReGroup& other )
                  {
                        length = other.length;
                        state = other.state;
                        kwClass = other.kwClass;
                  }

                  /// Operator overloading
00080                   ReGroup& operator= ( const ReGroup & other )
                  {
                        length = other.length;
                        state = other.state;
                        kwClass = other.kwClass;
                        return *this;
                  }

                  ~ReGroup()
                  {
                  }

00092                   unsigned int length;    ///< length of the token
00093                   State state;            ///< state of the matched token (keyword, string, etc)
00094                   unsigned int kwClass;   ///< keyword class if state is keyword
      };


      /** \brief Base class for parsing. Works like a finite state machine.

          The virtual class provides source code parsing functionality, based on
          information stored in language definitions.<br>
          The derived classes have to define the output format.<br>
          The colour information is stored in a DocumentStyle instance.<br>
          Codegenerator is a singleton class.<br>
          Use getInstance for a singleton class instance. Then call the init* methods
          and loadLanguage to initialize the parser. Call generate* methods to get results.

      * @author Andre Simon
      */

00111       class CodeGenerator
      {

            public:

                  virtual ~CodeGenerator();

                  /**
                    Get appropriate Codegenerator instance (should be used with auto_ptr)
                    \param type Output file type (HTML, XHTML, RTF, LATEX, TEX, ANSI, XTERM256)
                    \return CodeGenerator
                  */
                  static CodeGenerator* getInstance ( OutputType type );

                  /**
                    Delete CodeGenerator instance (this is intended for SWIG integration only,
                    in normal C++ code the result of getInstance() should be saved in an auto_ptr)
                    \param CodeGenerator* CodeGenerator instance
                  */
00130                   static void deleteInstance ( CodeGenerator* inst ) {if ( inst ) delete inst;}

                  /**
                   Define colour theme information; needs to be called before using a generate* method.
                   Call this method before loadLanguage().
                   \param themePath Path of style description file
                   \return true if successfull
                  */
                  virtual bool initTheme ( const string& themePath );

                  /** initialize source code indentation and reformatting scheme;
                      needs to be called before using a generate* method
                      \param indentScheme Name of indentation scheme
                      \return true if successfull
                   */
                  bool initIndentationScheme ( const string&indentScheme );

                  /**
                   Load ctags meta information; needs to be called before using a generate* method
                   \param ctagsPath Path of tags file
                   \return true if successfull
                  */
                  bool initTagInformation ( const string& ctagsPath );

                  /** \param langDefPath Absolute path to language definition, may be used multiple times for a generator instance
                      \return  LOAD_FAILED: failure,
                               LOAD_NEW:    Reload necessary,
                               LOAD_NONE:   no reload necessary
                  */
                  LoadResult loadLanguage ( const string& langDefPath );

                  /**
                   Generate output file from input file
                   \param inFileName Path of input file (if empty use stdin)
                   \param outFileName Path of output file (if empty use stdout)
                   \return ParseError
                  */
                  ParseError generateFile ( const string &inFileName, const string &outFileName );

                  /**
                   Generate output string from input string
                   \param input input code string
                   \return formatted output code
                  */
                  string generateString ( const string &input );

                  /**
                   Generate output string from input file
                   \param inFileName file path
                   \return formatted output code
                  */
                  string generateStringFromFile ( const string &inFileName );

                  /** Print style definitions to external file or stdout
                    \param outFile Path of external style definition; print to stdout if empty
                     \return true if successfull
                    */
                  bool printExternalStyle ( const string &outFile );

                  /** Print index file with all input file names
                     \param fileList List of output file names
                     \param outPath Output path
                      \return true if successfull
                   */
                  virtual bool printIndexFile ( const vector<string> & fileList,
                                                const string &outPath );


                  /** define the preformatting parameters. Preformatting takes place before
                      the optional astyle reformatting and indenting is performed (defined by initIndentationScheme)
                     \param lineWrappingStyle wrapping style (WRAP_DISABLED, WRAP_SIMPLE, WRAP_DEFAULT)
                     \param lineLength max line length
                     \param numberSpaces number of spaces which replace a tab
                  */
                  void setPreformatting ( WrapMode lineWrappingStyle, unsigned int lineLength,int numberSpaces );

                  /** \deprecated
                      \return True if document style was found */
                  bool styleFound();

                  /** \return True if reformatting of current input is disabled */
                  bool formattingDisabled();

                  /** \return True if reformatting of current input is possible */
                  bool formattingIsPossible();

                  /** \deprecated
                      \param langDefPath Absolute path to language definition; use loadLanguage instead
                      \return  LOAD_FAILED: failure,
                               LOAD_NEW:    Reload necessary,
                               LOAD_NONE:   no reload necessary
                  */
00222                   LoadResult initLanguage ( const string& langDefPath ) { return loadLanguage ( langDefPath );}

                  /** \return Language definition*/
                  const LanguageDefinition &getLanguage();

                  /** output line numbers
                     \param flag true if line numbers should be printed
                     \param startCnt line number starting count
                  */
                  void setPrintLineNumbers ( bool flag, unsigned int startCnt=1 );

                  /** \return line number flag */
                  bool getPrintLineNumbers();

                  /** output line numbers filled with zeroes
                      \param  flag true if zeroes should be printed
                  */
                  void setPrintZeroes ( bool flag );

                  /** \return print zeroes flag */
                  bool getPrintZeroes();

                  /** omit document header and footer
                     \param  flag true if output should be fragmented
                  */
                  void setFragmentCode ( bool flag );

                  /** \return fragment flag */
                  bool getFragmentCode();

                  /** define line number width
                     \param  w width
                  */
                  void setLineNumberWidth ( int w );

                  /** \return line number width */
                  int getLineNumberWidth();

                  /** check if input is binary or text
                     \param  flag true if input should be checked
                  */
                  void setValidateInput ( bool flag );

                  /** \return input validation flag */
                  bool getValidateInput();

                  /** \return style path */
                  const string& getStyleName();

                  /** use this font as base font
                  \param s the font name, e.g. "Courier New"
                   */
                  void setBaseFont ( const string& s );

                  /** \return base font */
                  const string getBaseFont() const ;

                  /** use this size as base font size
                  \param s the font size, e.g. "12"
                   */
                  void setBaseFontSize ( const string& s );

                  /** \return base font size*/
                  const string& getBaseFontSize();

                  /** tell parser the include style definition in output
                      \param flag true if style should be included
                   */
                  void setIncludeStyle ( bool flag );

                  /** Set style input path
                      \param path path to style input file
                    */
                  void setStyleInputPath ( const string& path );

                  /** Set style output path
                    \param path path to style output file
                  */
                  void setStyleOutputPath ( const string& path );

                  /** Set encoding (output encoding must match input file)
                    \param encodingName encoding name
                  */
                  void setEncoding ( const string& encodingName );

                  /** \return style input file path */
                  const string&  getStyleInputPath();

                  /** \return style output file path */
                  const string&  getStyleOutputPath();

                  /** \param title Document title */
                  void setTitle ( const string & title );

                  /** \return Document title */
                  string getTitle();

                  /** \param cnt maximum number of input lines to be processed */
                  void setMaxInputLineCnt ( unsigned int cnt );

                  /** \return true if chosen document style has white background colour */
                  bool hasWhiteBGColour();

                  /** \param keyCase Keyword case */
                  void setKeyWordCase ( StringTools::KeywordCase keyCase );

                  /** \param lineNo number of line that should be marked
                      \param helpTxt additional help text */
                  void addMarkedLine ( int lineNo, string &helpTxt );


                  /** set HTML output anchor flag
                   */
00335                   virtual void setHTMLAttachAnchors ( bool )  {};

                  /** set HTML output ordered list flag
                   */
00339                   virtual void setHTMLOrderedList ( bool )  {};

                  /** set HTML output inline CSS flag
                   */
00343                   virtual void setHTMLInlineCSS ( bool )  {};

                  /** set HTML output enclose pre tag flag
                   */
00347                   virtual void setHTMLEnclosePreTag ( bool )  {};

                  /** set HTML output anchor prefix
                   */
00351                   virtual void setHTMLAnchorPrefix ( const string& )  {};

                  /** set HTML output class name
                   */
00355                   virtual void setHTMLClassName ( const string& )  {};

                  /** set LaTeX replace quotes flag
                   */
00359                   virtual void setLATEXReplaceQuotes ( bool )  {};

                  /** set LaTeX no Babel shorthands flag
                   */
00363                   virtual void setLATEXNoShorthands ( bool )  {};

                  /** set LaTeX pretty Symbols flag
                   */
00367                   virtual void setLATEXPrettySymbols ( bool )  {};

                  /** set RTF page size
                   */
00371                   virtual void setRTFPageSize ( const string& )  {};

                  /** set RTF output character styles flag
                   */
00375                   virtual void setRTFCharStyles ( bool )  {};

                  /** set SVG page size
                   */
00379                   virtual void setSVGSize ( const string&, const string& )  {};


            protected:

                  static const unsigned int NUMBER_BUILTIN_STATES;  ///< number of token states (without keyword group IDs)

                  /** \param type Output type */
                  CodeGenerator ( highlight::OutputType type );
                  CodeGenerator() {};

                  /** \param c Character to be masked
                      \return Escape sequence of output format */
                  virtual string maskCharacter ( unsigned char c ) = 0;

                  /** \param ss destination stream
                      \param s string */
                  void maskString ( ostream& ss, const string &s ) ;

                  /** \param s Symbol string
                      \param searchPos Position where search starts
                      \return Found state (integer value)  */
                  State getState ( const string &s, unsigned int searchPos );

                  /** Get current line number
                    \return line number  */
                  unsigned int getLineNumber();

00407                   vector <string> styleTagOpen,   ///< list of format delimiters (open new format descriptions)
                  styleTagClose;   ///< list of format delimiters (close format descriptions)

                  /** Description of document colour style*/
00411                   DocumentStyle docStyle;

                  /** Language definition*/
00414                   LanguageDefinition langInfo;

                  /** CTags meta information */
00417                   CTagsReader metaInfo;

                  /** Tag for inserting line feeds*/
00420                   string newLineTag;

                  /** String that represents a white space in output */
00423                   string spacer;

                  /** file input*/
00426                   istream *in;

                  /** file output*/
00429                   ostream *out;

00431                   string maskWsBegin,  ///< open whitespace mask
                  maskWsEnd;    ///< close whitespace mask

00434                   string styleCommentOpen,  ///< open comment delimiter
                  styleCommentClose; ///< close comment delimiter

                  /** Encoding name */
00438                   string encoding;

                  /** document title */
00441                   string docTitle;

00443                   string inFile,   ///< input file name
                  outFile; ///< output file name

                  /** Test if maskWsBegin and maskWsEnd should be applied */
00447                   bool maskWs;

                  /** Test if whitespace sould always be separated from enclosing tokens */
00450                   bool excludeWs;

                  /** Test if header and footer should be omitted */
00453                   bool fragmentOutput;

                  /** Test if line numbers should be printed */
00456                   bool showLineNumbers;

                  /** Test if leading spyce of line number should be filled with zeroes*/
00459                   bool lineNumberFillZeroes;

                  /** The base font to use */
00462                   string baseFont ;

                  /** The base font size to use */
00465                   string baseFontSize ;

                  /** Current line of input file*/
00468                   string line;

                  /** Current line number */
00471                   unsigned int lineNumber;

                  /**output line number count start */
00474                   int lineNumberOffset;

                  /** Current state*/
00477                   State currentState;

                  /** keyword class id, used to apply the corresponding keyword style*/
00480                   unsigned int currentKeywordClass;

                  /** Processes origin state */
                  void processRootState();

                  /** \return line break sequence */
                  virtual string getNewLine();

                  /**
                     \param s current state
                     \param kwClassID keyword class (has to be set when s=KEYWORD)
                     \return Index of style tag corresponding to the states
                  */
                  unsigned int getStyleID ( State s, unsigned int kwClassID = 0 );

                  /** \return line index */
                  unsigned int getLineIndex();

                  /** print all remaining white space*/
                  void flushWs();

                  /** \return Content of user defined input style */
                  string readUserStyleDef();

                  /** \return Style definition of the chosen output format */
00505                   virtual string  getStyleDefinition() {return "";};

                  /** \return true id encoding is defined */
00508                   bool encodingDefined() {return StringTools::change_case ( encoding ) !="none";}

                  /** contains white space, which will be printed after a closing tag */
00511                   string wsBuffer;

                  /** Flag to test if style definition should be included in output document */
00514                   bool includeStyleDef;

                  /** map which saves all lines that should be highlghted */
00517                   map <int, string> markLines;

                  /** Class for line wrapping and tab replacement*/
00520                   PreFormatter preFormatter;

            private:

                  CodeGenerator ( const CodeGenerator& ) {}

                  CodeGenerator& operator= ( CodeGenerator& ) { return *this;}

                  /** Insert line number at the beginning of current output line */
                  virtual void insertLineNumber ( bool insertNewLine=true );

                  /** Prints document footer
                      @return footer */
                  virtual string getFooter() = 0;

                  /** Prints document body*/
                  virtual void printBody() = 0;

                  /** Prints document header
                      @return header
                  */
                  virtual string getHeader() = 0;

                  /** return matching open tags of the given state */
                  virtual string getMatchingOpenTag ( unsigned int ) = 0;

                  /** return matching close tags of the given state */
                  virtual string getMatchingCloseTag ( unsigned int ) = 0;

                  /** return open tag to include ctags meta information
                     \param info tag information of current token
                     \return opening tag
                   */
00553                   virtual string getMetaInfoOpenTag ( const TagInfo& info ) {return "";}

                  /** return close tag of meta information
                     \return closing tag
                   */
00558                   virtual string getMetaInfoCloseTag() {return "";}

                  /** open a new tag, set current state to s*/
                  void openTag ( State s );

                  /** close opened tag, clear current state */
                  void closeTag ( State s );

                  void closeTag ( unsigned int styleID );

                  void openTag ( unsigned int styleID );

                  /// path to style definition file
00571                   string themePath;

                  /// contains current position in line
00574                   unsigned int lineIndex;

                  /// width of line numbering coloumn
00577                   unsigned int lineNumberWidth;

                  /**maximum count of input lines to be processed*/
00580                   unsigned int maxLineCnt;

                  /**last character of the last line*/
00583                   unsigned char terminatingChar;

                  /** Class for reformatting */
00586                   astyle::ASFormatter *formatter;

                  /** Flag to test if formatting is enabled with current input document*/
00589                   bool formattingEnabled;

                  /** Flag to test if formatting is possible with current input document*/
00592                   bool formattingPossible;

                  /** Flag to test if input should be validated (binary or text) */
00595                   bool validateInput;

                  /** Flag to test if ctags information is available */
00598                   bool tagsEnabled;

                  /** flag which determines keyword output (unchangeed, uppercase, lowercase)*/
00601                   StringTools::KeywordCase keywordCase;

                  /** contains the current token*/
00604                   string token;

00606                   string styleInputPath,   ///< style input file path
                  styleOutputPath;  ///< style output file path

                  /** Resets parser to origin state, call this after every file conversion */
                  void reset();

                  /** read new line from in stream */
                  bool readNewLine ( string &newLine );

                  /** return next character from in stream */
                  unsigned char getInputChar();

                  OutputType outputType;

                  /** return new state */
                  State getCurrentState ();

                  /* Methods that represent a parsing state */
                  bool processKeywordState ( State myState );  ///< process keywords
                  bool processNumberState() ;               ///< process numbers
                  bool processMultiLineCommentState();      ///< process multi line comments
                  bool processSingleLineCommentState();     ///< process single line comments
                  bool processStringState ( State oldState );  ///< process strings
                  bool processEscapeCharState();            ///< process escape characters
                  bool processDirectiveState();             ///< process directives
                  bool processTagState();                   ///< process tags
                  bool processSymbolState();                ///< process symbols
                  void processWsState();                    ///< process whitespace

                  /** print escaped token and clears it
                     \param addMetaInfo set true if token may have meta information
                     \param flushWhiteSpace set true if white space should be flushed
                     \param tcase keyword case
                  */
                  void printMaskedToken ( bool addMetaInfo = false, bool flushWhiteSpace = true,
                                          StringTools::KeywordCase tcase = StringTools::CASE_UNCHANGED );

                  /** close Keyword tag of corresponding style ID */
                  void closeKWTag ( unsigned int styleID );

                  /** open Keyword tag of corresponding style ID */
                  void openKWTag ( unsigned int styleID );

                  /** look for special commands in comments
                      \return true if command was found
                  */
                  bool checkSpecialCmd();

                  /** association of matched regexes and the corresponding keyword class ids*/
00655                   map <int, ReGroup> regexGroups;

                  /** test for regular expressions
                      \param line current input line*/
                  void matchRegex ( const string &line );

                  /** \return true if input is no binary stream */
                  bool validateInputStream();

      };

}

#endif

Generated by  Doxygen 1.6.0   Back to index