Hướng dẫn html to rtf converter

tags result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*div([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*tr([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*p([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // Remove remaining tags like , links, images, // comments etc - anything thats enclosed inside < > result = System.Text.RegularExpressions.Regex.Replace(result, @"<[^>]*>", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase); // replace special characters: result = System.Text.RegularExpressions.Regex.Replace(result, @" ", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"•", " * ", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"‹", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"›", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"™", "(tm)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"⁄", "/", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"<", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @">", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"©", "(c)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"®", "(r)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // Remove all others. More can be added, see // http://hotwired.lycos.com/webmonkey/reference/special_characters/ result = System.Text.RegularExpressions.Regex.Replace(result, @"&(.{2,6});", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase); // make line breaking consistent result = result.Replace("\n", "\r"); // Remove extra line breaks and tabs: // replace over 2 breaks with 2 and over 4 tabs with 4. // Prepare first to remove any whitespaces inbetween // the escaped characters and remove redundant tabs inbetween linebreaks result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\r)", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\t)", "\t\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\r)", "\t\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\t)", "\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // Remove redundant tabs result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+(\r)", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // Remove multible tabs followind a linebreak with just one tab result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+", "\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // Initial replacement target string for linebreaks string breaks = "\r\r\r"; // Initial replacement target string for tabs string tabs = "\t\t\t\t\t"; for (int index = 0; index < result.Length; index++) { result = result.Replace(breaks, "\r\r"); result = result.Replace(tabs, "\t\t\t\t"); breaks = breaks + "\r"; tabs = tabs + "\t"; } // Thats it. return result; }

How do I convert a file to RTF?

To save a file as RTF.

Open the Word document..

From the File button, choose Save As from the menu..

Rename the file if desired..

Click on the Save As type choice button..

Scroll down the list of file types to Rich Text Format (*.rtf).

Click Save..

Is RTF format still used?

Microsoft discontinued the development of RTF in 2008, but it's still widely supported by apps on almost every operating system.

Can PDF be converted to RTF?

Open your PDF. Click Tools > Export. Select RTF for your preferred file format. Export, save, and share.

What is a RTF file used for?

What is Rich Text Format (RTF)? Rich Text Format (RTF) is a file format that lets you exchange text files between different word processors in different operating systems (OSes). For example, you can create a file in Microsoft Word and then open it in another word processor, such as Apple Pages or Google Docs.

You can also convert HTML into many other file formats. Please see the complete list below.

HTML TO DOC Converter (Microsoft Word Document)

HTML TO DOCM Converter (Microsoft Word Macro-Enabled Document)

HTML TO DOCX Converter (Microsoft Word Open XML Document)

HTML TO DOT Converter (Microsoft Word Document Template)

HTML TO DOTM Converter (Microsoft Word Macro-Enabled Template)

HTML TO DOTX Converter (Word Open XML Document Template)

HTML TO ODT Converter (Open Document Text)

HTML TO OTT Converter (Open Document Template)

HTML TO TXT Converter (Plain Text File Format)

HTML TO MD Converter (Markdown)

HTML TO TIFF Converter (Tagged Image File Format)

HTML TO TIF Converter (Tagged Image File Format)

HTML TO JPG Converter (Joint Photographic Expert Group Image File)

HTML TO JPEG Converter (JPEG Image)

HTML TO PNG Converter (Portable Network Graphic)

HTML TO GIF Converter (Graphical Interchange Format File)

HTML TO BMP Converter (Bitmap File Format)

HTML TO ICO Converter (Microsoft Icon File)

HTML TO PSD Converter (Adobe Photoshop Document)

HTML TO WMF Converter (Windows Metafile)

HTML TO EMF Converter (Enhanced Metafile Format)

Convert HTML TO DCM (DICOM Image)

Convert HTML TO DICOM (Digital Imaging and Communications in Medicine)

Convert HTML TO WEBP (Raster Web Image File Format)

Convert HTML TO SVG (Scalable Vector Graphics File)

Convert HTML TO JP2 (JPEG 2000 Core Image File)

Convert HTML TO EMZ (Enhanced Windows Metafile Compressed)

Convert HTML TO WMZ (Windows Metafile Compressed)

Convert HTML TO SVGZ (Compressed Scalable Vector Graphics File)

Convert HTML TO TGA (Targa Graphic)

Convert HTML TO PSB (Adobe Photoshop Image File)

Convert HTML TO PPT (PowerPoint Presentation)

Convert HTML TO PPS (Microsoft PowerPoint Slide Show)

Convert HTML TO PPTX (PowerPoint Open XML Presentation)

Convert HTML TO PPSX (PowerPoint Open XML Slide Show)

Convert HTML TO ODP (OpenDocument Presentation File Format)

Convert HTML TO OTP (Origin Graph Template)

Convert HTML TO POTX (Microsoft PowerPoint Open XML Template)

Convert HTML TO POT (PowerPoint Template)

Convert HTML TO POTM (Microsoft PowerPoint Template)

Convert HTML TO PPTM (Microsoft PowerPoint Presentation)

Convert HTML TO PPSM (Microsoft PowerPoint Slide Show)

HTML TO FODP Conversion (OpenDocument Flat XML Presentation)

HTML TO HTML Conversion (Hyper Text Markup Language)

HTML TO HTM Conversion (Hypertext Markup Language File)

HTML TO MHT Conversion (MIME Encapsulation of Aggregate HTML)

HTML TO MHTML Conversion (MIME Encapsulation of Aggregate HTML)

HTML TO XLS Conversion (Microsoft Excel Binary File Format)

HTML TO XLSX Conversion (Microsoft Excel Open XML Spreadsheet)

HTML TO XLSM Conversion (Microsoft Excel Macro-Enabled Spreadsheet)

HTML TO XLSB Conversion (Microsoft Excel Binary Spreadsheet File)

HTML TO ODS Conversion (Open Document Spreadsheet)

HTML TO XLTX Conversion (Microsoft Excel Open XML Template)

HTML TO XLT Conversion (Microsoft Excel Template)

HTML TO XLTM Conversion (Microsoft Excel Macro-Enabled Template)

HTML TO TSV Conversion (Tab Separated Values File)

HTML TO XLAM Conversion (Microsoft Excel Macro-Enabled Add-In)

HTML TO CSV Conversion (Comma Separated Values File)

HTML TO FODS Conversion (OpenDocument Flat XML Spreadsheet)

HTML TO SXC Conversion (StarOffice Calc Spreadsheet)

HTML TO PDF Conversion (Portable Document)

HTML TO EPUB Conversion (Digital E-Book File Format)

HTML TO XPS Conversion (Open XML Paper Specification)

HTML TO TEX Conversion (LaTeX Source Document)

It is not perfect of course, but here is the code I use to convert HTML to plain text.

(I was not the original author, I adapted it from code found on the web)

public static string ConvertHtmlToText(string source) {

            string result;

            // Remove HTML Development formatting
            // Replace line breaks with space
            // because browsers inserts space
            result = source.Replace("\r", " ");
            // Replace line breaks with space
            // because browsers inserts space
            result = result.Replace("\n", " ");
            // Remove step-formatting
            result = result.Replace("\t", string.Empty);
            // Remove repeating speces becuase browsers ignore them
            result = System.Text.RegularExpressions.Regex.Replace(result,
                                                                  @"( )+", " ");

            // Remove the header (prepare first by clearing attributes)
            result = System.Text.RegularExpressions.Regex.Replace(result,
                     @"<( )*head([^>])*>", "",
                     System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            result = System.Text.RegularExpressions.Regex.Replace(result,
                     @"(<( )*(/)( )*head( )*>)", "",
                     System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            result = System.Text.RegularExpressions.Regex.Replace(result,
                     "().*()", string.Empty,
                     System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            // remove all scripts (prepare first by clearing attributes)
            result = System.Text.RegularExpressions.Regex.Replace(result,
                     @"<( )*script([^>])*>", "",
                     System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            //result = System.Text.RegularExpressions.Regex.Replace(result, 
            //         @"()])*()",
            //         string.Empty, 
            //         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            result = System.Text.RegularExpressions.Regex.Replace(result,
                     @"()", string.Empty,
                     System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            // remove all styles (prepare first by clearing attributes)
            result = System.Text.RegularExpressions.Regex.Replace(result,
                     @"<( )*style([^>])*>", "",
                     System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            result = System.Text.RegularExpressions.Regex.Replace(result,
                     "()", string.Empty,
                     System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            // insert tabs in spaces of 
tags result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*td([^>])*>", "\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // insert line breaks in places of
and
  • tags result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*br( )*>", "\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*li( )*>", "\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase); // insert line paragraphs (double line breaks) in place // if

    ,

    and