|
1 | 1 | /* |
2 | | - * Copyright 2004-2025 the original author or authors. |
| 2 | + * Copyright 2004-2026 the original author or authors. |
3 | 3 | * |
4 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | 5 | * you may not use this file except in compliance with the License. |
|
17 | 17 |
|
18 | 18 | import java.io.IOException; |
19 | 19 |
|
20 | | -import com.opensymphony.module.sitemesh.Page; |
21 | | -import com.opensymphony.module.sitemesh.html.HTMLProcessor; |
22 | | -import com.opensymphony.module.sitemesh.html.State; |
23 | | -import com.opensymphony.module.sitemesh.html.StateTransitionRule; |
24 | | -import com.opensymphony.module.sitemesh.html.rules.BodyTagRule; |
25 | | -import com.opensymphony.module.sitemesh.html.rules.ContentBlockExtractingRule; |
26 | | -import com.opensymphony.module.sitemesh.html.rules.FramesetRule; |
27 | | -import com.opensymphony.module.sitemesh.html.rules.HeadExtractingRule; |
28 | | -import com.opensymphony.module.sitemesh.html.rules.HtmlAttributesRule; |
29 | | -import com.opensymphony.module.sitemesh.html.rules.MSOfficeDocumentPropertiesRule; |
30 | | -import com.opensymphony.module.sitemesh.html.rules.MetaTagRule; |
31 | | -import com.opensymphony.module.sitemesh.html.rules.PageBuilder; |
32 | | -import com.opensymphony.module.sitemesh.html.rules.ParameterExtractingRule; |
33 | | -import com.opensymphony.module.sitemesh.html.rules.TitleExtractingRule; |
34 | | -import com.opensymphony.module.sitemesh.html.util.CharArray; |
| 20 | +import com.opensymphony.module.sitemesh.SitemeshBuffer; |
35 | 21 | import com.opensymphony.module.sitemesh.parser.HTMLPageParser; |
36 | 22 | import com.opensymphony.module.sitemesh.parser.TokenizedHTMLPage; |
37 | 23 | import com.opensymphony.sitemesh.Content; |
38 | 24 |
|
39 | 25 | public class GrailsHTMLPageParser extends HTMLPageParser { |
40 | 26 |
|
41 | | - @Override |
42 | | - public Page parse(char[] data) throws IOException { |
43 | | - CharArray head = new CharArray(64); |
44 | | - CharArray body = new CharArray(4096); |
45 | | - TokenizedHTMLPage page = new TokenizedHTMLPage(data, body, head); |
46 | | - HTMLProcessor processor = new HTMLProcessor(data, body); |
47 | | - State html = processor.defaultState(); |
48 | | - |
49 | | - // Core rules for SiteMesh to be functional. |
50 | | - html.addRule(new HeadExtractingRule(head)); // contents of <head> |
51 | | - html.addRule(new BodyTagRule(page, body)); // contents of <body> |
52 | | - html.addRule(new TitleExtractingRule(page)); // the <title> |
53 | | - html.addRule(new FramesetRule(page)); // if the page is a frameset |
54 | | - |
55 | | - // Additional rules - designed to be tweaked. |
56 | | - addUserDefinedRules(html, page); |
57 | | - |
58 | | - processor.process(); |
59 | | - return page; |
60 | | - } |
61 | | - |
62 | 27 | public Content parseContent(char[] data) throws IOException { |
63 | 28 | return new TokenizedHTMLPage2Content((TokenizedHTMLPage) parse(data)); |
64 | 29 | } |
65 | 30 |
|
66 | | - @Override |
67 | | - protected void addUserDefinedRules(State html, PageBuilder page) { |
68 | | - // Ensure that while in <xml> tag, none of the other rules kick in. |
69 | | - // For example <xml><book><title>hello</title></book></xml> should not change the affect the title of the page. |
70 | | - State xml = new State(); |
71 | | - html.addRule(new StateTransitionRule("xml", xml)); |
72 | | - |
73 | | - // Useful properties |
74 | | - html.addRule(new HtmlAttributesRule(page)); // attributes in <html> element |
75 | | - html.addRule(new MetaTagRule(page)); // all <meta> tags |
76 | | - html.addRule(new ParameterExtractingRule(page)); // <parameter> blocks |
77 | | - html.addRule(new ContentBlockExtractingRule(page)); // <content> blocks |
78 | | - |
79 | | - // Capture properties written to documents by MS Office (author, version, company, etc). |
80 | | - // Note: These properties are from the xml state, not the html state. |
81 | | - xml.addRule(new MSOfficeDocumentPropertiesRule(page)); |
| 31 | + public Content parseContent(SitemeshBuffer buffer) throws IOException { |
| 32 | + return new TokenizedHTMLPage2Content((TokenizedHTMLPage) parse(buffer)); |
82 | 33 | } |
83 | 34 |
|
84 | 35 | } |
0 commit comments