The following example demonstrates how to get information related to words that use a specific font in a PDF document.
It also briefly covers how to get examples of words that use the target font, as well as how to get a character count for a specific word.
| Get information about words in a PDF document (C#) |
Copy Code |
|---|---|
public static void GetCharactersAndFonts() { Console.WriteLine( "=== ANALYZE CHARACTERS AND FONTS ===" ); var fileName = "Two Page Text Only - from libre office.pdf"; // Loads a document. using( var pdfDoc = PdfDocument.Load( TextsSample.TextsSampleResourcesDirectory + $"{fileName}" ) ) { var outputFileName = "GetCharactersAndFonts.pdf"; var outputPath = TextsSample.TextsSampleOutputDirectory + outputFileName; // Creates a PdfDocument. using( var pdfoutput = PdfDocument.Create( outputPath ) ) { // Gets the first Page. var page = pdfoutput.Pages.First(); // Adds a title. var titleTextStyle = TextStyle.WithFont( pdfoutput.Fonts.GetStandardFont( StandardFontType.Helvetica ), 15d ); page.AddParagraph( "Get Characters and Fonts", titleTextStyle, new ParagraphStyle( ParagraphHorizontalAlignment.Center ) ); // Adds red Courier text at a specific Y position. var redTextStyle = TextStyle.WithFontAndColor( pdfoutput.Fonts.GetStandardFont( StandardFontType.Courier ), 12d, Brushes.Red ); page.AddParagraph( $"Printing characters and fonts for 1st page of: {fileName}", 60, redTextStyle ); // Gets the Words from the first Page of loaded document. var words = pdfDoc.Pages[ 0 ].Words; // Groups Words by their Font's Name. var wordsByFont = words.GroupBy( w => w.TextStyle.Font?.Name ?? "No font" ); var textStyle = TextStyle.WithFont( pdfoutput.Fonts.GetStandardFont( StandardFontType.Courier ), 12d ); var counter = 0; foreach( var group in wordsByFont ) { // Adds text for the Font at a specific Y position. page.AddParagraph( $"Font Name: {group.Key}", 105 + ( counter * 70 ), textStyle ); page.AddParagraph( $" Number of words: {group.Count()}", 120 + ( counter * 70 ), textStyle ); var firstWord = group.First(); // Adds text for the Font size at a specific Y position. page.AddParagraph( $" Font size: {firstWord.TextStyle.FontSize:F1} points", 135 + ( counter * 70 ), textStyle ); // Adds text for example of words using this font at a specific Y position. page.AddParagraph( $" Example of words: {string.Join( ", ", group.Take( 3 ).Select( w => $"'{w.Text}'" ) )}", 150 + ( counter * 70 ), textStyle ); counter++; } // Gives a detailed analysis of a specific Word. if( words.Count > 0 ) { // Gets the first Word of the loaded document. var exampleWord = words[ 0 ]; // Adds text detailing which Word is analysed and gives its character count, all at a specific Y position. page.AddParagraph( $"Detailed analysis of word '{exampleWord.Text}':", 260, redTextStyle ); page.AddParagraph( $" Number of characters: {exampleWord.Characters.Count}", 275, textStyle ); counter = 0; for( int i = 0; i < exampleWord.Characters.Count; i++ ) { var character = exampleWord.Characters[ i ]; // Adds text to detail all the Word's characters at a specific Y position. page.AddParagraph( $" Character {i + 1}: '{character.Value}'; Location: ({character.Bounds.Left:F0}, {character.Bounds.Top:F0}); Width: {character.Bounds.Width:F2} points", 290 + ( counter * 15 ), textStyle ); counter++; } // Saves the output document. pdfoutput.Save(); Console.WriteLine( $"Info exported to path: {outputFileName}" ); } } } } | |