1 module embaked.embaked; 2 3 import std.algorithm; 4 import std.array; 5 import std.ascii; 6 import std.base64; 7 import std.file; 8 import std.path; 9 import std.range; 10 import std.regex; 11 import std.string; 12 import std.traits; 13 14 import css.parser; 15 import html; 16 17 18 enum Options { 19 BakeImages = 1 << 0, 20 BakeStyles = 1 << 1, 21 BakeContentID = 1 << 2, // bake images but replace them and any inline images with a generated content-id 22 StripClasses = 1 << 3, // strip css classes from all elements 23 Default = BakeImages | BakeStyles | StripClasses 24 } 25 26 27 private __gshared Selector allWithClassAttr_ = Selector.parse("[class]"); 28 private __gshared auto pattern = ctRegex!(`data:([^;]+);([^,]+),(.*)`, "gis"); 29 30 31 struct EmbeddedContent { 32 const(char)[] id; 33 const(char)[] mime; 34 const(char)[] encoding; 35 const(char)[] content; 36 } 37 38 39 struct EmbakeResult { 40 const(char)[] html; 41 EmbeddedContent[] content; 42 } 43 44 45 EmbakeResult embake(const(char)[] source, Options options, const(char)[][] paths) { 46 return embake(source, options, (const(char)[] uri, const(char)[] name) => defaultResolver(uri, name, paths)); 47 } 48 49 50 EmbeddedContent[] embake(Appender)(const(char)[] source, Options options, ref Appender app, const(char)[][] paths) { 51 return embake(source, options, app, (const(char)[] uri, const(char)[] name) => defaultResolver(uri, name, paths)); 52 } 53 54 55 EmbeddedContent[] embake(ref Document doc, Options options, const(char)[][] paths) { 56 return embake(doc, options, (const(char)[] uri, const(char)[] name) => defaultResolver(uri, name, paths)); 57 } 58 59 60 EmbakeResult embake(Resolver)(const(char)[] source, Options options, Resolver resolve) if (isSomeFunction!Resolver) { 61 auto app = appender!(const(char)[]); 62 auto content = embake(source, options, app, resolve); 63 return EmbakeResult(app.data, content); 64 } 65 66 67 EmbeddedContent[] embake(Appender, Resolver)(const(char)[] source, Options options, ref Appender app, Resolver resolve) if (isSomeFunction!Resolver) { 68 auto doc = createDocument(source); 69 auto content = embake(doc, options, resolve); 70 doc.root.innerHTML(app); 71 return content; 72 } 73 74 75 EmbeddedContent[] embake(Resolver)(ref Document doc, Options options, Resolver resolve) if (isSomeFunction!Resolver) { 76 EmbeddedContent[] content; 77 78 if ((options & Options.BakeImages) != 0) { 79 foreach (img; doc.elementsByTagName("img")) { 80 auto src = img.attr("src"); 81 if (src.empty) 82 continue; 83 84 if (options & Options.BakeContentID) { 85 EmbeddedContent image; 86 87 if (src.indexOf("data:") == 0) { 88 auto matches = src.matchFirst(pattern); 89 if (matches.empty) 90 continue; 91 92 image.mime = matches[1]; 93 image.encoding = matches[2]; 94 image.content = matches[3]; 95 } else { 96 image.mime = extensionToMimeType(extension(src)); 97 if (image.mime.empty) 98 continue; 99 100 auto data = loadFile(src, resolve, true); 101 if (data.empty) 102 continue; 103 104 image.encoding = "base64"; 105 image.content = mimeEncode(data); 106 } 107 108 image.id = generateCID(cast(ubyte[])image.content); 109 img.attr("src", "cid:" ~ image.id); 110 111 auto duplicate = false; 112 foreach(ref c; content) { 113 if (c.id == image.id) { 114 duplicate = true; 115 break; 116 } 117 } 118 119 if (!duplicate) 120 content ~= image; 121 } else { 122 if (src.indexOf("data:") == 0) 123 continue; 124 125 auto source = loadFile(src, resolve, true); 126 if (source.empty) 127 continue; 128 129 auto mime = extensionToMimeType(extension(src)); 130 if (mime.empty) 131 continue; 132 133 img.attr("src", format("data:%s;base64,%s", mime, mimeEncode(source))); 134 } 135 } 136 } 137 138 if ((options & Options.BakeStyles) != 0) { 139 Style[] styles; 140 styles.reserve(128); 141 142 NodeWrapper!Node[] useless; 143 auto handler = CSSHandler(styles); 144 145 foreach (style; doc.elementsByTagName("style")) { 146 if (!style.hasAttr("ignore-inline")) { 147 parseCSS(style.text, handler); 148 useless ~= style; 149 } else { 150 style.removeAttr("ignore-inline"); 151 } 152 } 153 154 foreach (link; doc.elementsByTagName("link")) { 155 auto rel = link.attr("rel"); 156 if (rel.toLower != "stylesheet") 157 continue; 158 159 auto href = link.attr("href"); 160 if (href.empty) 161 continue; 162 163 auto source = loadFile(href, resolve, false); 164 if (source.length) { 165 parseCSS(cast(char[])source, handler); 166 useless ~= link; 167 } 168 } 169 // iterate in reverse order to avoid double destruction 170 foreach(node; useless.retro) 171 node.destroy; 172 173 styles.sort!((ref a, ref b) => (a.selector.specificity() != b.selector.specificity()) ? a.selector.specificity() > b.selector.specificity() : a.index > b.index); 174 175 foreach (style; styles) { 176 foreach (element; doc.querySelectorAll(style.selector)) { 177 HTMLString curr = std..string.strip(element.attr("style")); 178 if (curr.empty || (curr.length < style.properties.length) || (curr.indexOf(style.properties) == -1)) 179 element.attr("style", style.properties ~ curr); 180 } 181 } 182 183 if (options & Options.StripClasses) 184 foreach(element; doc.querySelectorAll(allWithClassAttr_)) { 185 element.removeAttr("class"); 186 } 187 } 188 189 return content; 190 } 191 192 193 const(char)[] defaultResolver(const(char)[] uri, const(char)[] fileName, const(char)[][] paths) { 194 if (fileName.empty) 195 return null; 196 197 if (exists(fileName)) 198 return fileName; 199 200 if (fileName[0] == '/') 201 fileName = fileName[1..$]; 202 203 foreach(path; paths) { 204 auto name = buildNormalizedPath(path, fileName); 205 if (exists(name)) 206 return name; 207 } 208 209 return null; 210 } 211 212 213 private struct Style { 214 Selector selector; 215 const(char)[] selectorSource; 216 const(char)[] properties; 217 size_t index; 218 } 219 220 221 private struct CSSHandler { 222 this(ref Style[] styles) { 223 styles_ = &styles; 224 } 225 226 void onSelector(const(char)[] data) { 227 selectors_ ~= data; 228 } 229 230 void onSelectorEnd() { 231 } 232 233 void onBlockEnd() { 234 if (!app_.data.empty) { 235 auto style = app_.data.dup; 236 app_.clear; 237 238 foreach(selector; selectors_) { 239 *styles_ ~= Style(Selector.parse(selector), selector, style, styles_.length); 240 } 241 } 242 selectors_.length = 0; 243 } 244 245 void onPropertyName(const(char)[] data) { 246 prop_ = data; 247 value_.length = 0; 248 } 249 250 void onPropertyValue(const(char)[] data) { 251 value_ ~= data; 252 } 253 254 void onPropertyValueEnd() { 255 app_.put(prop_); 256 app_.put(':'); 257 app_.put(value_); 258 app_.put(';'); 259 260 prop_.length = 0; 261 value_.length = 0; 262 } 263 264 void onComment(const(char)[] data) { 265 } 266 267 private: 268 Appender!(char[]) app_; 269 270 Style[]* styles_; 271 const(char)[][] selectors_; 272 const(char)[] prop_; 273 const(char)[] value_; 274 } 275 276 277 private const(char)[] stripUTFbyteOrderMarker(const(char)[] x) { 278 if (x.length >= 3 && (x[0] == 0xef) && (x[1] == 0xbb) && (x[2] == 0xbf)) 279 return x[3..$]; 280 return x; 281 } 282 283 284 private const(ubyte)[] loadFile(Resolver)(const(char)[] uri, Resolver resolve, bool binary) { 285 auto fileName = uri; 286 auto protocolLength = uri.indexOf("://"); 287 if (protocolLength != -1) { 288 auto start = uri.indexOf('/', protocolLength + 3); 289 if (start == -1) 290 return null; 291 292 auto end = uri.lastIndexOf('?', start + 1); 293 if (end == -1) 294 end = uri.length; 295 fileName = uri[start..end]; 296 } 297 298 auto resolved = resolve(uri, fileName); 299 if (exists(resolved)) { 300 if (!binary) { 301 return cast(ubyte[])((cast(const(char)[])read(resolved)).stripUTFbyteOrderMarker); 302 } else { 303 return cast(ubyte[])read(resolved); 304 } 305 } 306 return null; 307 } 308 309 310 private const(char)[] extensionToMimeType(const(char)[] ext) { 311 switch(ext.toLower()) { 312 case ".jpg": 313 case ".jpeg": 314 return "image/jpeg"; 315 case ".png": 316 return "image/png"; 317 case ".gif": 318 return "image/gif"; 319 case ".tga": 320 return "image/targa"; 321 case ".tif": 322 return "image/tiff"; 323 default: 324 break; 325 } 326 return null; 327 } 328 329 330 private const(char)[] mimeEncode(const(ubyte)[] input) { 331 auto mime = appender!(char[]); 332 foreach (ref encoded; Base64.encoder(chunks(cast(ubyte[])input, 57))) { 333 mime.put(encoded); 334 mime.put("\r\n"); 335 } 336 return mime.data(); 337 } 338 339 340 private const(char)[] generateCID(const(ubyte)[] content) { 341 import std.digest.md; 342 return md5Of(content).toHexString!(Order.increasing, LetterCase.lower)(); 343 }