In This Topic
    Get information about characters & fonts
    In This Topic

    Introduction

    The following example demonstrates how to get information related to words that use a specific font in a PDF document.

    It also briefly covers how to get examples of words that use the target font, as well as how to get a character count for a specific word.

    Get information about words in a PDF document (C#)
    Copy Code
    public static void GetCharactersAndFonts()
    {
      Console.WriteLine( "=== ANALYZE CHARACTERS AND FONTS ===" );
      var fileName = "Two Page Text Only - from libre office.pdf";
    
      // Loads a document.
      using( var pdfDoc = PdfDocument.Load( TextsSample.TextsSampleResourcesDirectory + $"{fileName}" ) )
      {
        var outputFileName = "GetCharactersAndFonts.pdf";
        var outputPath = TextsSample.TextsSampleOutputDirectory + outputFileName;
    
        // Creates a PdfDocument.
        using( var pdfoutput = PdfDocument.Create( outputPath ) )
        {
          // Gets the first Page.
          var page = pdfoutput.Pages.First();
    
          // Adds a title.
          var titleTextStyle = TextStyle.WithFont( pdfoutput.Fonts.GetStandardFont( StandardFontType.Helvetica ), 15d );
          page.AddParagraph( "Get Characters and Fonts", titleTextStyle, new ParagraphStyle( ParagraphHorizontalAlignment.Center ) );
    
          // Adds red Courier text at a specific Y position.
          var redTextStyle = TextStyle.WithFontAndColor( pdfoutput.Fonts.GetStandardFont( StandardFontType.Courier ), 12d, Brushes.Red );
          page.AddParagraph( $"Printing characters and fonts for 1st page of: {fileName}", 60, redTextStyle );
    
          // Gets the Words from the first Page of loaded document.
          var words = pdfDoc.Pages[ 0 ].Words;
    
          // Groups Words by their Font's Name.
          var wordsByFont = words.GroupBy( w => w.TextStyle.Font?.Name ?? "No font" );
    
          var textStyle = TextStyle.WithFont( pdfoutput.Fonts.GetStandardFont( StandardFontType.Courier ), 12d );
          var counter = 0;
          foreach( var group in wordsByFont )
          {
            // Adds text for the Font at a specific Y position.
            page.AddParagraph( $"Font Name: {group.Key}", 105 + ( counter * 70 ), textStyle );
            page.AddParagraph( $"  Number of words: {group.Count()}", 120 + ( counter * 70 ), textStyle );
    
            var firstWord = group.First();
    
            // Adds text for the Font size at a specific Y position.
            page.AddParagraph( $"  Font size: {firstWord.TextStyle.FontSize:F1} points", 135 + ( counter * 70 ), textStyle );
    
            // Adds text for example of words using this font at a specific Y position.
            page.AddParagraph( $"  Example of words: {string.Join( ", ", group.Take( 3 ).Select( w => $"'{w.Text}'" ) )}", 150 + ( counter * 70 ), textStyle );
            counter++;
          }
    
          // Gives a detailed analysis of a specific Word.         
          if( words.Count > 0 )
          {
            // Gets the first Word of the loaded document.
            var exampleWord = words[ 0 ];
    
            // Adds text detailing which Word is analysed and gives its character count, all at a specific Y position.
            page.AddParagraph( $"Detailed analysis of word '{exampleWord.Text}':", 260, redTextStyle );
            page.AddParagraph( $"  Number of characters: {exampleWord.Characters.Count}", 275, textStyle );
    
            counter = 0;
            for( int i = 0; i < exampleWord.Characters.Count; i++ )
            {
              var character = exampleWord.Characters[ i ];
    
              // Adds text to detail all the Word's characters at a specific Y position.
              page.AddParagraph( $"    Character {i + 1}: '{character.Value}'; Location: ({character.Bounds.Left:F0}, {character.Bounds.Top:F0}); Width: {character.Bounds.Width:F2} points",
                                 290 + ( counter * 15 ),
                                 textStyle );
              counter++;
            }
    
            // Saves the output document.
            pdfoutput.Save();
            Console.WriteLine( $"Info exported to path: {outputFileName}" );
          }
        }
      }
    }

     

    See Also