The following example demonstrates how to get information about specific words in a PDF document.
| Get information about words in a PDF document (C#) |
Copy Code |
|---|---|
public static void GetWords() { Console.WriteLine( "=== GetWords ===" ); var fileName = "Two Page Text Only - from libre office.pdf"; // Loads a PdfDocument. using( var pdfDoc = PdfDocument.Load( TextsSample.TextsSampleResourcesDirectory + $"{fileName}" ) ) { var outputFileName = "GetWords.pdf"; var outputPath = TextsSample.TextsSampleOutputDirectory + outputFileName; // Creates a PdfDocument. using( var pdfoutput = PdfDocument.Create( outputPath ) ) { // Gets the first Page. var page = pdfoutput.Pages.First(); // Adds a title. var titleTextStyle = TextStyle.WithFont( pdfoutput.Fonts.GetStandardFont( StandardFontType.Helvetica ), 15d ); page.AddParagraph( "Get Words", titleTextStyle, new ParagraphStyle( ParagraphHorizontalAlignment.Center ) ); // Adds red Courier text at a specific Y position. var redTextStyle = TextStyle.WithFontAndColor( pdfoutput.Fonts.GetStandardFont( StandardFontType.Courier ), 12d, Brushes.Red ); page.AddParagraph( $"Printing 5 words from: {fileName}", 40, redTextStyle ); // Gets the Words from the first Page of the loaded document. var words = pdfDoc.Pages[ 0 ].Words; // Shows information about the first 5 words. var textStyle = TextStyle.WithFont( pdfoutput.Fonts.GetStandardFont( StandardFontType.Courier ), 12d ); for( int i = 0; i < Math.Min( 5, words.Count ); i++ ) { var word = words[ i ]; // Adds text at a specific Y position. page.AddParagraph( $"Word {i + 1}: '{word.Text}'; Location: ({word.Bounds.Left:F0}, {word.Bounds.Top:F0}); Size: {word.Bounds.Width:F0} x {word.Bounds.Height:F0};", 90 + ( i * 15 ), textStyle ); } // Saves the output document. pdfoutput.Save(); Console.WriteLine( $"Info exported to path: {outputFileName}" ); } } } | |