在之前的文章里,有介绍如何将Html转成PDF的,详见:https://blog.terrynow.com/2021/08/10/itextpdf-5-x-convert-html-to-pdf-support-css-and-chinese/
本次又遇到一个新需求,就是Html里面有图片<img src="xxx" /> 期中xxx是http://192.168.1.1/xxx.jpg,这种链接方式的图片,发现图片是无法显示出来的,所以想办法src里面的图片转成base64的格式,例如<img src="data:image/png;base64,xxxxx" />,不过默认的情况下,iTextPDF还是无法识别并显示出来。
实现
这里,我们需要做一些特别的处理,让iTextPDF能识别这样的img格式。
新建ImageTagProcessor.java
import com.itextpdf.text.Chunk; import com.itextpdf.text.Element; import com.itextpdf.text.Image; import com.itextpdf.text.pdf.codec.Base64; import com.itextpdf.tool.xml.NoCustomContextException; import com.itextpdf.tool.xml.Tag; import com.itextpdf.tool.xml.WorkerContext; import com.itextpdf.tool.xml.exceptions.RuntimeWorkerException; import com.itextpdf.tool.xml.html.HTML; import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * @author Terry E-mail: yaoxinghuo at 126 dot com * @date 2021/9/22 17:53 * @description */ public class ImageTagProcessor extends com.itextpdf.tool.xml.html.Image { @Override public List<Element> end(final WorkerContext ctx, final Tag tag, final List<Element> currentContent) { final Map<String, String> attributes = tag.getAttributes(); String src = attributes.get(HTML.Attribute.SRC); List<Element> elements = new ArrayList<>(1); if (null != src && src.length() > 0) { Image img = null; if (src.startsWith("data:image/")) { final String base64Data = src.substring(src.indexOf(",") + 1); try { img = Image.getInstance(Base64.decode(base64Data)); } catch (Exception ignored) { } if (img != null) { try { final HtmlPipelineContext htmlPipelineContext = getHtmlPipelineContext(ctx); elements.add(getCssAppliers().apply(new Chunk((com.itextpdf.text.Image) getCssAppliers().apply(img, tag, htmlPipelineContext), 0, 0, true), tag, htmlPipelineContext)); } catch (NoCustomContextException e) { throw new RuntimeWorkerException(e); } } } if (img == null) { elements = super.end(ctx, tag, currentContent); } } return elements; } }
新建AsianFontProvider.java 实现中文支持,解决中文乱码或者不显示的问题
import com.itextpdf.text.BaseColor; import com.itextpdf.text.Font; import com.itextpdf.text.pdf.BaseFont; import com.itextpdf.tool.xml.XMLWorkerFontProvider; /** * @author Terry E-mail: yaoxinghuo at 126 dot com * @date 2021/9/22 19:55 * @description */ public class AsianFontProvider extends XMLWorkerFontProvider { private BaseFont baseFont; private String pdfFontBold; public AsianFontProvider(String pdfFontBasic, String pdfFontBold) { this.baseFont = createBaseFont(pdfFontBasic); this.pdfFontBold = pdfFontBold; } @Override public Font getFont(String face, String encode, boolean b, float size, int style, BaseColor baseColor) { BaseFont bfChinese; // if (style == Font.BOLD) { // bfChinese = createBaseFont("/Users/Terry/Downloads/WeiRuanYaQiHei-Bold/MicrosoftYaqiHeiBold-2.ttf"); // } else { // bfChinese = createBaseFont("/Users/Terry/Downloads/WeiRuanYaHeiTi/WeiRuanYaHei-1.ttf"); // } if (style == Font.BOLD) { bfChinese = createBaseFont(pdfFontBold); } else { bfChinese = baseFont; } // System.out.println("get font called, s:" + face + ",s1:" + encode); // BaseFont bfChinese = BaseFont.createFont("/Users/Terry/Downloads/STSong.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); // BaseFont bfChinese = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED); return new Font(bfChinese, size, style, baseColor); } private static BaseFont createBaseFont(String fontDir) { try { return BaseFont.createFont(fontDir, BaseFont.IDENTITY_H, BaseFont.EMBEDDED); } catch (Exception e) { e.printStackTrace(); return null; } } }
生成PDF的工具方法:
/** * @param htmlFile 原始html文件 * @param pdfFontBasic 中文ttf字体的路径 * @param pdfFontBasic 中文ttf字体粗体的路径 * @return 这里返回的PDF我返回成了bytes,也可以稍加修改ByteArrayOutputStream改成FileOutputStream 输出到文件 */ public static byte[] convertHtmlToPDF(String pdfFontBasic, String pdfFontBold, InputStream htmlFile) throws Exception { Document document = new com.itextpdf.text.Document(PageSize.LETTER); try { ByteArrayOutputStream bos = new ByteArrayOutputStream(); PdfWriter pdfWriter = PdfWriter.getInstance(document, bos); document.open(); TagProcessorFactory tagProcessorFactory = Tags.getHtmlTagProcessorFactory(); tagProcessorFactory.removeProcessor(HTML.Tag.IMG); tagProcessorFactory.addProcessor(new ImageTagProcessor(), HTML.Tag.IMG); CssFilesImpl cssFiles = new CssFilesImpl(); cssFiles.add(XMLWorkerHelper.getInstance().getDefaultCSS()); StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles); HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new AsianFontProvider(pdfFontBasic, pdfFontBold))); hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(tagProcessorFactory); HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(document, pdfWriter)); CssResolverPipeline pipeline = new CssResolverPipeline(cssResolver, htmlPipeline); XMLWorker worker = new XMLWorker(pipeline, true); Charset charset = StandardCharsets.UTF_8; XMLParser parser = new XMLParser(true, worker, StandardCharsets.UTF_8); parser.parse(htmlFile, charset); pdfWriter.flush(); pdfWriter.close(); return bos.toByteArray(); } finally { document.close(); } }
文章评论