1 module embaked.embaked;
2 
3 import std.algorithm;
4 import std.array;
5 import std.ascii;
6 import std.base64;
7 import std.file;
8 import std.path;
9 import std.range;
10 import std.regex;
11 import std.string;
12 import std.traits;
13 
14 import css.parser;
15 import html;
16 
17 
18 enum Options {
19 	BakeImages		= 1 << 0,
20 	BakeStyles		= 1 << 1,
21 	BakeContentID	= 1 << 2,	// bake images but replace them and any inline images with a generated content-id
22 	StripClasses	= 1 << 3, // strip css classes from all elements
23 	Default = BakeImages | BakeStyles | StripClasses
24 }
25 
26 
27 private __gshared Selector allWithClassAttr_ = Selector.parse("[class]");
28 private __gshared auto pattern = ctRegex!(`data:([^;]+);([^,]+),(.*)`, "gis");
29 
30 
31 struct EmbeddedContent {
32 	const(char)[] id;
33 	const(char)[] mime;
34 	const(char)[] encoding;
35 	const(char)[] content;
36 }
37 
38 
39 struct EmbakeResult {
40 	const(char)[] html;
41 	EmbeddedContent[] content;
42 }
43 
44 
45 EmbakeResult embake(const(char)[] source, Options options, const(char)[][] paths) {
46 	return embake(source, options, (const(char)[] uri, const(char)[] name) => defaultResolver(uri, name, paths));
47 }
48 
49 
50 EmbeddedContent[] embake(Appender)(const(char)[] source, Options options, ref Appender app, const(char)[][] paths) {
51 	return embake(source, options, app, (const(char)[] uri, const(char)[] name) => defaultResolver(uri, name, paths));
52 }
53 
54 
55 EmbeddedContent[] embake(ref Document doc, Options options, const(char)[][] paths) {
56 	return embake(doc, options, (const(char)[] uri, const(char)[] name) => defaultResolver(uri, name, paths));
57 }
58 
59 
60 EmbakeResult embake(Resolver)(const(char)[] source, Options options, Resolver resolve) if (isSomeFunction!Resolver) {
61 	auto app = appender!(const(char)[]);
62 	auto content = embake(source, options, app, resolve);
63 	return EmbakeResult(app.data, content);
64 }
65 
66 
67 EmbeddedContent[] embake(Appender, Resolver)(const(char)[] source, Options options, ref Appender app, Resolver resolve) if (isSomeFunction!Resolver) {
68 	auto doc = createDocument(source);
69 	auto content = embake(doc, options, resolve);
70 	doc.root.innerHTML(app);
71 	return content;
72 }
73 
74 
75 EmbeddedContent[] embake(Resolver)(ref Document doc, Options options, Resolver resolve) if (isSomeFunction!Resolver) {
76 	EmbeddedContent[] content;
77 
78 	if ((options & Options.BakeImages) != 0) {
79 		foreach (img; doc.elementsByTagName("img")) {
80 			auto src = img.attr("src");
81 			if (src.empty)
82 				continue;
83 
84 			if (options & Options.BakeContentID) {
85 				EmbeddedContent image;
86 
87 				if (src.indexOf("data:") == 0) {
88 					auto matches = src.matchFirst(pattern);
89 					if (matches.empty)
90 						continue;
91 
92 					image.mime = matches[1];
93 					image.encoding = matches[2];
94 					image.content = matches[3];
95 				} else {
96 					image.mime = extensionToMimeType(extension(src));
97 					if (image.mime.empty)
98 						continue;
99 
100 					auto data = loadFile(src, resolve, true);
101 					if (data.empty)
102 						continue;
103 
104 					image.encoding = "base64";
105 					image.content = mimeEncode(data);
106 				}
107 
108 				image.id = generateCID(cast(ubyte[])image.content);
109 				img.attr("src", "cid:" ~ image.id);
110 
111 				auto duplicate = false;
112 				foreach(ref c; content) {
113 					if (c.id == image.id) {
114 						duplicate = true;
115 						break;
116 					}
117 				}
118 
119 				if (!duplicate)
120 					content ~= image;
121 			} else {
122 				if (src.indexOf("data:") == 0)
123 					continue;
124 
125 				auto source = loadFile(src, resolve, true);
126 				if (source.empty)
127 					continue;
128 
129 				auto mime = extensionToMimeType(extension(src));
130 				if (mime.empty)
131 					continue;
132 
133 				img.attr("src", format("data:%s;base64,%s", mime, mimeEncode(source)));
134 			}
135 		}
136 	}
137 
138 	if ((options & Options.BakeStyles) != 0) {
139 		Style[] styles;
140 		styles.reserve(128);
141 
142 		NodeWrapper!Node[] useless;
143 		auto handler = CSSHandler(styles);
144 
145 		foreach (style; doc.elementsByTagName("style")) {
146 			if (!style.hasAttr("ignore-inline")) {
147 				parseCSS(style.text, handler);
148 				useless ~= style;
149 			} else {
150 				style.removeAttr("ignore-inline");
151 			}
152 		}
153 
154 		foreach (link; doc.elementsByTagName("link")) {
155 			auto rel = link.attr("rel");
156 			if (rel.toLower != "stylesheet")
157 				continue;
158 
159 			auto href = link.attr("href");
160 			if (href.empty)
161 				continue;
162 
163 			auto source = loadFile(href, resolve, false);
164 			if (source.length) {
165 				parseCSS(cast(char[])source, handler);
166 				useless ~= link;
167 			}
168 		}
169 		// iterate in reverse order to avoid double destruction
170 		foreach(node; useless.retro)
171 			node.destroy;
172 
173 		styles.sort!((ref a, ref b) => (a.selector.specificity() != b.selector.specificity()) ? a.selector.specificity() > b.selector.specificity() : a.index > b.index);
174 
175 		foreach (style; styles) {
176 			foreach (element; doc.querySelectorAll(style.selector)) {
177 				HTMLString curr = std..string.strip(element.attr("style"));
178 				if (curr.empty || (curr.length < style.properties.length) || (curr.indexOf(style.properties) == -1))
179 					element.attr("style", style.properties ~ curr);
180 			}
181 		}
182 
183 		if (options & Options.StripClasses)
184 			foreach(element; doc.querySelectorAll(allWithClassAttr_)) {
185 				element.removeAttr("class");
186 			}
187 	}
188 
189 	return content;
190 }
191 
192 
193 const(char)[] defaultResolver(const(char)[] uri, const(char)[] fileName, const(char)[][] paths) {
194 	if (fileName.empty)
195 		return null;
196 
197 	if (exists(fileName))
198 		return fileName;
199 
200 	if (fileName[0] == '/')
201 		fileName = fileName[1..$];
202 
203 	foreach(path; paths) {
204 		auto name = buildNormalizedPath(path, fileName);
205 		if (exists(name))
206 			return name;
207 	}
208 
209 	return null;
210 }
211 
212 
213 private struct Style {
214 	Selector selector;
215 	const(char)[] selectorSource;
216 	const(char)[] properties;
217 	size_t index;
218 }
219 
220 
221 private struct CSSHandler {
222 	this(ref Style[] styles) {
223 		styles_ = &styles;
224 	}
225 
226 	void onSelector(const(char)[] data) {
227 		selectors_ ~= data;
228 	}
229 
230 	void onSelectorEnd() {
231 	}
232 
233 	void onBlockEnd() {
234 		if (!app_.data.empty) {
235 			auto style = app_.data.dup;
236 			app_.clear;
237 
238 			foreach(selector; selectors_) {
239 				*styles_ ~= Style(Selector.parse(selector), selector, style, styles_.length);
240 			}
241 		}
242 		selectors_.length = 0;
243 	}
244 
245 	void onPropertyName(const(char)[] data) {
246 		prop_ = data;
247 		value_.length = 0;
248 	}
249 
250 	void onPropertyValue(const(char)[] data) {
251 		value_ ~= data;
252 	}
253 
254 	void onPropertyValueEnd() {
255 		app_.put(prop_);
256 		app_.put(':');
257 		app_.put(value_);
258 		app_.put(';');
259 
260 		prop_.length = 0;
261 		value_.length = 0;
262 	}
263 
264 	void onComment(const(char)[] data) {
265 	}
266 
267 private:
268 	Appender!(char[]) app_;
269 
270 	Style[]* styles_;
271 	const(char)[][] selectors_;
272 	const(char)[] prop_;
273 	const(char)[] value_;
274 }
275 
276 
277 private const(char)[] stripUTFbyteOrderMarker(const(char)[] x) {
278 	if (x.length >= 3 && (x[0] == 0xef) && (x[1] == 0xbb) && (x[2] == 0xbf))
279 		return x[3..$];
280 	return x;
281 }
282 
283 
284 private const(ubyte)[] loadFile(Resolver)(const(char)[] uri, Resolver resolve, bool binary) {
285 	auto fileName = uri;
286 	auto protocolLength = uri.indexOf("://");
287 	if (protocolLength != -1) {
288 		auto start = uri.indexOf('/', protocolLength + 3);
289 		if (start == -1)
290 			return null;
291 
292 		auto end = uri.lastIndexOf('?', start + 1);
293 		if (end == -1)
294 			end = uri.length;
295 		fileName = uri[start..end];
296 	}
297 
298 	auto resolved = resolve(uri, fileName);
299 	if (exists(resolved)) {
300 		if (!binary) {
301 			return cast(ubyte[])((cast(const(char)[])read(resolved)).stripUTFbyteOrderMarker);
302 		} else {
303 			return cast(ubyte[])read(resolved);
304 		}
305 	}
306 	return null;
307 }
308 
309 
310 private const(char)[] extensionToMimeType(const(char)[] ext) {
311 	switch(ext.toLower()) {
312 		case ".jpg":
313 		case ".jpeg":
314 			return "image/jpeg";
315 		case ".png":
316 			return "image/png";
317 		case ".gif":
318 			return "image/gif";
319 		case ".tga":
320 			return "image/targa";
321 		case ".tif":
322 			return "image/tiff";
323 		default:
324 			break;
325 	}
326 	return null;
327 }
328 
329 
330 private const(char)[] mimeEncode(const(ubyte)[] input) {
331 	auto mime = appender!(char[]);
332 	foreach (ref encoded; Base64.encoder(chunks(cast(ubyte[])input, 57))) {
333 		mime.put(encoded);
334 		mime.put("\r\n");
335 	}
336 	return mime.data();
337 }
338 
339 
340 private const(char)[] generateCID(const(ubyte)[] content) {
341 	import std.digest.md;
342 	return md5Of(content).toHexString!(Order.increasing, LetterCase.lower)();
343 }