------------------------------------------------------------------------------ -- -- -- GNATPP COMPONENTS -- -- -- -- Pp -- -- -- -- S p e c -- -- -- -- Copyright (C) 2001-2019, AdaCore -- -- -- -- GNATPP is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free -- -- Software Foundation; either version 3, or ( at your option) any later -- -- version. GNATCHECK is distributed in the hope that it will be useful, -- -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -- -- Public License for more details. You should have received a copy of the -- -- GNU General Public License distributed with GNAT; see file COPYING3. If -- -- not, go to http://www.gnu.org/licenses for a complete copy of the -- -- license. -- -- -- -- GNATPP is maintained by AdaCore (http://www.adacore.com) -- -- -- ------------------------------------------------------------------------------ pragma Ada_2012; with Ada.Containers.Bounded_Vectors; with Ada.Containers.Indefinite_Vectors; with System.WCh_Con; with Namet; with GNAT.OS_Lib; use GNAT.OS_Lib; with ASIS_UL.Vectors; with ASIS_UL.Char_Vectors; use ASIS_UL.Char_Vectors; use ASIS_UL.Char_Vectors.Char_Vectors; with Pp.Buffers; use Pp.Buffers; use Pp.Buffers.Marker_Vectors; with Pp.Scanner; use Pp.Scanner.Token_Vectors; package Pp.Formatting is type PP_Casing is -- Defines the casing of identifiers and keywords generated by gnatpp (Lower_Case, -- All letters are lowercase Upper_Case, -- All letters are uppercase Mixed, -- For both defining and usage occurrences of identifiers The first letter -- and each letter which immediately follows the underscore are uppercase, -- and all the other letters are lowercase As_Declared); -- All the usage occurrences of identifiers have the same casing as -- defining occurrences, defining occurrences have the same casing as -- the corresponding defining occurrences in the argument source. subtype Lower_Upper_PP_Casing is PP_Casing with Predicate => Lower_Upper_PP_Casing in Lower_Case | Upper_Case; subtype Lower_Upper_Mixed_PP_Casing is PP_Casing with Predicate => Lower_Upper_Mixed_PP_Casing in Lower_Case | Upper_Case | Mixed; Line_Len_Limit : constant Natural := 256; type Formatting_Options is record PP_Indentation : Positive range 1 .. 9 := 3; -- Indentation level PP_Cont_Line_Indentation : Positive range 1 .. 9 := 2; -- Indentation for continuation lines PP_Attribute_Casing : Lower_Upper_Mixed_PP_Casing := Mixed; PP_Keyword_Casing : Lower_Upper_PP_Casing := Lower_Case; PP_Pragma_Casing : Lower_Upper_Mixed_PP_Casing := Mixed; -- Specifies the casing of pragma names and identifiers specific to -- pragmas PP_Name_Casing : PP_Casing := As_Declared; -- Defines the casing for both defining and usage occurrences of the -- names PP_Enum_Literal_Casing : PP_Casing := As_Declared; -- Defines the casing for both defining and usage occurrences of the -- enumeration literals. PP_Type_Casing : PP_Casing := As_Declared; -- Defines the casing for both defining and usage occurrences of the -- type (and subtype???) names. PP_Nnumbers_Casing : PP_Casing := As_Declared; -- Defines the casing for both defining and usage occurrences of the -- named numbers names. Use_Predefined_Casing : Boolean := True; -- This flag specifies if for the predefined names should be used the -- same casing as given in RM95 Use_Dictionary : Boolean := False; -- This flag specifies if the exception dictionary should be used for -- defining the name casing Dictionary_File_Names : String_Vector; -- Names of dictionary files specified by -D switches. Nonempty if and -- only if Use_Dictionary is True. (So why have Use_Dictionary???) Format_Comments : Boolean := True; -- If this flag is set OFF, all the comments (comment lines and -- end-of-line comments) are moved into the result unchanged (no -- indentation or long line splitting is performed). GNAT_Comment_Inden : Boolean := True; -- Comment lines are indented in GNAT style. The difference with -- Standard_Comment_Indent is that comment lines preceding if and -- case statements alternatives and 'begin/ keywords are indented as -- the corresponding alternatives or keywords, but not as enclosing -- statements. Standard_Comment_Indent : Boolean := False; -- Comment lines are indented as the corresponding code lines. GNAT_Comment_Start : Boolean := False; -- The comment (if non-empty) should have at least two space characters -- after '--' Reformat_Comment_Block : Boolean := False; -- For sequences of comment lines (separated by space lines or empty -- comment lines (lines containing only two minuses) the attempt should -- be made to reformat the text of the comment in a word processor style -- - that is, to put as many words in the line as possible, using only -- one space as a separator. Preserve_Special_Comments : Boolean := False; -- Do not change the special comment lines. A comment line is considered -- as a special comments if it has a special character just after '--'. -- See ??? for the details of the definition of a special character No_Tab_In_Comments : Boolean := False; -- Remove HT and VT from the content of the comments. If this flag is -- set ON, all the VT characters are removed from the comment text and -- replaced with spaces to get to the nearest Tab stop (the Tab step is -- supposed to be equal to 8), and after that the comment line may be -- further reformatted to get the indentation and maximum line length -- rules. As for now, reformattable comment blocks can not contain HT -- characters, and VT are removed from reformattable blocks as a part -- of reformatting. Comments_Only : Boolean := False; End_Labels : Boolean := True; -- Do set end/exit labels even if missed in the argument source; -- Not used. Add_FF : Boolean := False; -- Add Form Feed after a pragma Page. Compact_Layout : Boolean := True; -- Use compact layout for records and named statements; End_Id : Boolean := True; -- Change "end;" to "end Some_Name;". Separate_Line_For_IS : Boolean := True; -- Use a separate sine for IS in subprogram body in case if we need more -- than one line for subprogram specification Separate_Line_For_THEN_and_LOOP : Boolean := False; -- Use a separate line for THEN in if statements and LOOP in FOR and -- WHILE loops. No_Separate_Line_For_THEN_and_LOOP : Boolean := False; -- Do not use a separate line for THEN in if statements and LOOP in FOR -- and WHILE loops. -- -- If both Separate_Line_For_THEN_and_LOOP and -- No_Separate_Line_For_THEN_and_LOOP flags are off, the layout of -- THEN and LOOP keywords are defined by other formatting rules Separate_Line_For_Label : Boolean := False; -- Use a separate line for statement label(s). Separate_Line_For_USE : Boolean := False; -- Use a separate line for each USE clause that is a part of a context -- clause, applied to both type and package use clauses. Separate_Line_For_Stmt_Name : Boolean := False; -- Use a separate line for a loop or block name and do not use an extra -- indentation level for a statement itself. This overrides the layout -- of the named statements that is specified by -l(1|2|3) option. Split_Line_Before_Op : Boolean := False; RM_Style_Spacing : Boolean := False; -- Follow Ada Reference Manual style when placing spaces before -- delimiters: - no space before '(' - no space between a statement -- name and colon. - what else? Add_Empty_Lines : Boolean := True; -- Add empty lines (if needed to separate compound statements, bodies -- and return statements) Insert_Blank_Lines : Boolean := False; -- Insert blank lines at certain places (between bodies, for example) Preserve_Blank_Lines : Boolean := False; -- Don't squeeze multiple blank lines down to one Max_Line_Length : Natural range 32 .. Line_Len_Limit := 79; Align_Colons_In_Decl : Boolean := True; Align_Asign_In_Decl : Boolean := True; Align_Asign_In_Stmts : Boolean := True; Align_Arrows : Boolean := True; Align_Ats : Boolean := True; Case_Threshold : Natural := 10; -- Starting from this number an extra indentation level is not used for -- variants in record variant part and case statement alternatives in -- case statements, the value 0 means that the extra level is used for -- any number of variants and case alternatives Par_Specs_Threshold : Natural := Natural'Last; -- If the length of parameter specification list is greater than this -- number, each parameter specification is placed on a separate line -- (for functions the threshold is this value minus 1). The default is -- huge, which effectively disables this feature. Par_Associations_Threshold : Natural := Natural'Last; -- If the length of parameter association list is greater than this -- number, and the list contains at least one named association, then -- each parameter association is placed on a separate line. Decimal_Grouping : Natural := 0; -- Number of characters between underscores added to numeric literals -- with no base specified. E.g. "123_456". Zero means don't add -- underscores. Based_Grouping : Natural := 0; -- Same as Decimal_Grouping, but used when a base (including base 10) -- has been specified. E.g. "16#DEAD_BEEF#". Pp_Off_String, Pp_On_String : String_Access := null; -- Comment strings that cause pretty printing to be turned off and -- on. The initial lead "--" is not included, but initial leading -- blanks, if any are included. Output_Encoding : System.WCh_Con.WC_Encoding_Method := System.WCh_Con.WCEM_Brackets; -- Encoding method for output of wide characters. Defaults to the input -- method. Is_PP : Boolean := False; -- True if this is gnatpp; False for xml2gnat. There are some formatting -- options that don't quite work in xml2gnat, which is why this is -- needed. end record; -- Formatting_Options function Comment_Filling_Enabled (Options : Formatting_Options) return Boolean is (Options.Format_Comments and Options.Reformat_Comment_Block); function Alignment_Enabled (Options : Formatting_Options) return Boolean is (Options.Align_Colons_In_Decl or else Options.Align_Asign_In_Decl or else Options.Align_Asign_In_Stmts or else Options.Align_Arrows or else Options.Align_Ats); -- The old gnatpp had the ability to individually enable different kinds of -- alignment; the new gnatpp does not. Instead, we align if ANY alignment -- option is enabled; if all alignment is turned off, we don't align. Token_Mismatch : exception; -- Raised by Tree_To_Ada if it detects a bug in itself that causes the -- output tokens to not match the input properly. ---------------- pragma Style_Checks ("M82"); generic type Ada_Tree_Base is private; -- For now, we make this generic, so we can pass in the ASIS-based -- Ada_Tree_Base type, or the libadalang-based Ada_Node type. with procedure Error_Message (Message : String); -- Should print the message and raise an exception to abort processing. -- The exception is different between ASIS and LAL. with function Is_Null (Tree : Ada_Tree_Base) return Boolean is <>; with function T_Img (Tree : Ada_Tree_Base) return String is <>; package Generic_Lines_Data is ------------------- -- Line Breaking -- ------------------- type Nesting_Level is new Natural; type Line_Break is record Mark : Marker; -- Marks the (potential) line break in the buffer. For a hard line -- break, there is an NL character at that position. For a soft one, -- there is initially nothing in the buffer; an NL will be inserted -- at Mark if the line break becomes enabled. -- -- The reason for inserting NL characters is so we can call Get_Tokens -- on the buffer. The reason for not doing so for soft line breaks -- is that it's not necessary (there will always be something to -- prevent two tokens running together), and it makes the line -- length calculation simpler. Hard : Boolean; -- True for a hard line break, False for a soft one Affects_Comments : Boolean; -- True if the indentation of this Line_Break should affect the -- indentation of surrounding comments. For example, True for '$' but -- False for '%" (see type Ada_Template). Enabled : Boolean; -- True if this line break will appear in the final output Level : Nesting_Level := 1000; -- Nesting level of [...] (continuation-line indentation, mainly for -- soft line breaks). Indentation : Natural := 1000; -- Indentation level of this line break Length : Natural := Natural'Last; -- Number of characters in line, not counting NL. Calculated by -- Split_Lines. Valid only for enabled line breaks. -- For debugging: -- ???? Kind : Ada_Tree_Kind; Template : Namet.Name_Id; UID : Modular := 123_456_789; end record; -- Line_Break type Line_Break_Index is new Positive; type Line_Break_Array is array (Line_Break_Index range <>) of Line_Break; package Line_Break_Vectors is new ASIS_UL.Vectors (Line_Break_Index, Line_Break, Line_Break_Array); subtype Line_Break_Vector is Line_Break_Vectors.Vector; use Line_Break_Vectors; -- use all type Line_Break_Vector; ------------------------ -- Tabs and Alignment -- ------------------------ -- We use "tabs" to implement alignment. For example, if the input is: -- X : Integer := 123; -- Long_Ident : Boolean := False; -- Y : constant Long_Type_Name := Something; -- we're going to align the ":" and ":=" in the output, like this: -- X : Integer := 123; -- Long_Ident : Boolean := False; -- Y : constant Long_Type_Name := Something; -- -- A "tab" appears before each ":" and ":=" in the above. This information -- is recorded in Tabs, below. The position of the tab in the buffer -- is indicated by Mark, which gets automatically updated as unrelated -- passes update Out_Buf. Finally, Insert_Alignment calculates the Col -- and Num_Blanks for each tab, and then inserts blanks accordingly. -- -- A tab always occurs at the start of a token. type Tab_Index_In_Line is range 1 .. 9; -- We probably never have more than a few tabs in a given construct, so 9 -- should be plenty, and it allows us to use a single digit in the -- templates, as in "^2". type Tab_Rec is record Parent, Tree : Ada_Tree_Base; -- Tree is the tree whose template generated this tab, and Parent is its -- parent. Tree is used to ensure that the relevant tabs within a single -- line all come from the same tree; other tabs in the line are ignored. -- Parent is used across lines to ensure that all lines within a -- paragraph to be aligned together all come from the same parent tree. Token : Namet.Name_Id := Namet.Name_Find (""); -- This is some text associated with the Tab. Usually, it is the text of -- the token that follows the Tab in the template. Mark : Marker; -- Position in the buffer of the tab Index_In_Line : Tab_Index_In_Line := Tab_Index_In_Line'Last; Col : Positive := Positive'Last; -- Column number of the tab Num_Blanks : Natural := 0; -- Number of blanks this tab should expand into Is_Fake : Boolean; -- True if this is a "fake tab", which means that it doesn't actually -- insert any blanks (Num_Blanks = 0). See Append_Tab for more -- explanation. Is_Insertion_Point : Boolean; -- False for "^", true for "&". Normally, "^" means insert blanks at the -- point of the "^" to align things. However, if there is a preceding -- (and matching) "&", then the blanks are inserted at the "insertion -- point" indicated by "&". This feature provides for -- right-justification. -- See Tree_To_Ada.Insert_Alignment.Calculate_Num_Blanks.Process_Line in -- pp-formatting.adb for more information. end record; type Tab_Index is new Positive; type Tab_Array is array (Tab_Index range <>) of Tab_Rec; package Tab_Vectors is new ASIS_UL.Vectors (Tab_Index, Tab_Rec, Tab_Array); subtype Tab_Vector is Tab_Vectors.Vector; use Tab_Vectors; -- use all type Tab_Vector; package Tab_In_Line_Vectors is new Ada.Containers.Bounded_Vectors (Tab_Index_In_Line, Tab_Index); use Tab_In_Line_Vectors; subtype Tab_In_Line_Vector is Tab_In_Line_Vectors .Vector (Capacity => Ada.Containers.Count_Type (Tab_Index_In_Line'Last)); type Tab_In_Line_Vector_Index is new Positive; package Tab_In_Line_Vector_Vectors is new Ada.Containers.Indefinite_Vectors (Tab_In_Line_Vector_Index, Tab_In_Line_Vector); -- We use Indefinite_Vectors rather than Vectors because otherwise we get -- "discriminant check failed" at a-cobove.ads:371. I'm not sure whether -- that's a compiler bug. use Tab_In_Line_Vector_Vectors; type Lines_Data_Rec is record Out_Buf : Buffer; -- Buffer containing the text that we will eventually output as the -- final result. We first fill this with initially formatted text by -- walking the tree, and then we modify it repeatedly in multiple -- passes. Cur_Indentation : Natural := 0; Next_Line_Break_Unique_Id : Modular := 1; -- Used to set Line_Break.UID for debugging. -- Each line break is represented by a Line_Break appended onto the -- Line_Breaks vector. Hard line breaks are initially enabled. Soft -- line breaks are initially disabled, and will be enabled if -- necessary to make lines short enough. All_Line_Breaks : Line_Break_Vector; -- All line breaks in the whole input file. Built in two passes. Temp_Line_Breaks : Line_Break_Vector; -- Used by Insert_Comments_And_Blank_Lines to add new line breaks to -- All_Line_Breaks; they are appended to Temp_Line_Breaks, which is -- then merged with All_Line_Breaks when done. This is for efficiency -- and to keep the tables in source-location order. Enabled_Line_Breaks : Line_Break_Vector; -- All enabled line breaks Syntax_Line_Breaks : Line_Break_Vector; -- All (enabled) nonblank hard line breaks. These are called -- "Syntax_..." because they are determined by the syntax (e.g. we -- always put a line break after a statement). -- ???Perhaps make the above tables contain Line_Break_Indexes -- instead of Line_Breaks. Can we use an index into a single table -- instead of UID? Tabs : Tab_Vector; -- All of the tabs in the whole input file, in increasing order Src_Tokens, -- from original source file (Src_Buf) Out_Tokens : -- from Out_Buf Scanner.Token_Vector; Out_Buf_Line_Ends : aliased Marker_Vector; ------------------------------------- -- Support for -pp-off and --pp-on -- ------------------------------------- Pp_Off_On_Delimiters : Scanner.Pp_Off_On_Delimiters_Rec; -- Debugging: Check_Whitespace : Boolean := True; -- Used during the Subtree_To_Ada phase. True except within comments and -- literals. Check for two blanks in a row. end record; -- Lines_Data_Rec procedure Collect_Enabled_Line_Breaks (Lines_Data : in out Lines_Data_Rec; Syntax_Also : Boolean); -- Collect all the enabled line breaks, and (if Syntax_Also is True) also -- the syntax line breaks. function Next_Enabled (Line_Breaks : Line_Break_Vector; F : Line_Break_Index) return Line_Break_Index; -- Next currently-enabled line break after F. Thus, F..Next_Enabled(F) is a -- line. function Is_Empty_Line (Out_Buf : Buffer; Line_Breaks : Line_Break_Vector; F, L : Line_Break_Index) return Boolean; -- True if F..L forms an empty line (or would, if both were enabled). ---------------- function Good_Column (PP_Indentation : Positive; Indentation : Natural) return Natural is ((Indentation / PP_Indentation) * PP_Indentation); -- Make sure indentation is a multiple of PP_Indentation; otherwise style -- checking complains "(style) bad column". procedure Do_Comments_Only (Lines_Data : in out Lines_Data_Rec; Src_Buf : in out Buffer; Options : Formatting_Options); -- Implement the --comments-only switch. This skips most of the usual -- pretty-printing passes, and just formats comments. procedure Post_Tree_Phases (Lines_Data : in out Lines_Data_Rec; Source_File_Name : String; Src_Buf : in out Buffer; Options : Formatting_Options); -- The first pretty-printing pass walks the tree and produces text, -- along with various tables. This performs the remaining passes, which -- do not make use of the tree. ---------------- -- Debugging: function Line_Text (Out_Buf : Buffer; Line_Breaks : Line_Break_Vector; F, L : Line_Break_Index) return W_Str; -- F and L are the first and last index forming a line; returns the text of -- the line, not including any new-lines. function Tab_Image (Out_Buf : Buffer; Tabs : Tab_Vector; X : Tab_Index) return String; procedure Put_Line_Breaks (Out_Buf : Buffer; Line_Breaks : Line_Break_Vector); -- ???This doesn't work unless Line_Breaks is All_Line_Breaks, because of -- various global variables! procedure Put_Line_Break (Out_Buf : Buffer; Break : Line_Break); procedure Put_Buf_With_Marks (Lines_Data : Lines_Data_Rec); procedure Format_Debug_Output (Lines_Data : Lines_Data_Rec; Message : String); Simulate_Token_Mismatch : Boolean renames Debug.Debug_Flag_8; Disable_Final_Check : Boolean renames Debug.Debug_Flag_7; function Enable_Token_Mismatch return Boolean is ((Assert_Enabled or Debug.Debug_Flag_5) and not Simulate_Token_Mismatch and not Debug.Debug_Flag_6); end Generic_Lines_Data; pragma Style_Checks ("M79"); ---------------------------------------------------------------- -- -- Formatting uses the following major passes. Convert_Tree_To_Ada has a -- version based on ASIS and a version based on libadalang. (???Software -- present tense!) Split_Lines through Final_Check are done by -- Post_Tree_Phases above. -- -- Convert_Tree_To_Ada -- Walks the Ada_Tree, using Ada_Templates to convert the tree into -- text form in Out_Buf. Out_Buf is further modified by subsequent -- passes. Builds the Line_Break table for use by Split_Lines and -- Insert_NLs_And_Indentation. Builds the Tabs table for use by -- Insert_Alignment. -- -- Subsequent passes work on the text in Out_Buf, and not the -- Ada_Tree. Therefore, if they need any syntactic/structural -- information, it must be encoded in other data structures, such as the -- Line_Breaks and Tabs tables. -- -- Split_Lines (first time) -- Determine which soft line breaks should be enabled. -- -- Insert_Comments_And_Blank_Lines -- Step through the source tokens and Out_Buf tokens. Copy comment and -- blank line tokens into Out_Buf as they are encountered. -- -- Split_Lines (again) -- We do this again because inserted end-of-line comments can cause -- lines to be too long. We don't want to split the line just before the -- comment; we want to split at some auspicious soft line break(s). -- -- Insert_NLs_And_Indentation -- Insert newline characters and leading blanks for each soft line break -- that was enabled by Split_Lines. -- -- Insert_Alignment -- Walk the Tabs table to calculate how many blanks (if any) should be -- inserted for each Tab. Then insert those blanks in Out_Buf. -- -- Keyword_Casing -- Convert reserved words to the appropriate case as specified by -- command-line options. -- -- Insert_Form_Feeds -- Implement the -ff switch, by inserting FF characters after -- "pragma Page;". -- -- Copy_Pp_Off_Regions -- Regions where pretty printing should be turned off have been -- formatted as usual. This phase undoes all that formatting by copying -- text from Src_Buf to Out_Buf. -- -- Final_Check -- Go through the source tokens and Out_Buf tokens (the latter now -- containing comments and blank lines), and make sure they (mostly) -- match. If there is any mismatch besides a small set of allowed ones, -- raise an exception. This pass makes no changes, so it serves no -- useful purpose unless there is a bug in some previous pass; the -- purpose is to prevent gnatpp from damaging the user's source code. -- The algorithm in this pass is quite similar to the one in -- Insert_Comments_And_Blank_Lines. -- -- Write_Out_Buf -- Write Out_Buf to the appropriate file (or Current_Output). -- -- Each pass expects to be entered with Out_Buf's 'point' at the beginning, -- and returns with Out_Buf's 'point' STILL at the beginning. Thus, passes -- that step through Out_Buf need to call Reset(Out_Buf) before returning. -- ---------------------------------------------------------------- end Pp.Formatting;