作業部屋の使い方を試しています。
(empty log message)
@@ -191,7 +191,12 @@ | ||
191 | 191 | </not> |
192 | 192 | </and> |
193 | 193 | </condition> |
194 | - <property name="javac.fork" value="${jdkBug6558476}"/> | |
194 | + <condition else="false" property="javac.fork"> | |
195 | + <or> | |
196 | + <istrue value="${jdkBug6558476}"/> | |
197 | + <istrue value="${javac.external.vm}"/> | |
198 | + </or> | |
199 | + </condition> | |
195 | 200 | <property name="jar.index" value="false"/> |
196 | 201 | <property name="jar.index.metainf" value="${jar.index}"/> |
197 | 202 | <property name="copylibs.rebase" value="true"/> |
@@ -217,6 +222,7 @@ | ||
217 | 222 | <condition else="" property="testng.debug.mode" value="-mixed"> |
218 | 223 | <istrue value="${junit+testng.available}"/> |
219 | 224 | </condition> |
225 | + <property name="java.failonerror" value="true"/> | |
220 | 226 | </target> |
221 | 227 | <target name="-post-init"> |
222 | 228 | <!-- Empty placeholder for easier customization. --> |
@@ -693,7 +699,7 @@ | ||
693 | 699 | <sequential> |
694 | 700 | <property environment="env"/> |
695 | 701 | <resolve name="profiler.current.path" value="${profiler.info.pathvar}"/> |
696 | - <java classname="@{classname}" dir="${profiler.info.dir}" fork="true" jvm="${profiler.info.jvm}"> | |
702 | + <java classname="@{classname}" dir="${profiler.info.dir}" failonerror="${java.failonerror}" fork="true" jvm="${profiler.info.jvm}"> | |
697 | 703 | <jvmarg line="${endorsed.classpath.cmd.line.arg}"/> |
698 | 704 | <jvmarg value="${profiler.info.jvmargs.agent}"/> |
699 | 705 | <jvmarg line="${profiler.info.jvmargs}"/> |
@@ -768,7 +774,7 @@ | ||
768 | 774 | <attribute default="${debug.classpath}" name="classpath"/> |
769 | 775 | <element name="customize" optional="true"/> |
770 | 776 | <sequential> |
771 | - <java classname="@{classname}" dir="${work.dir}" fork="true"> | |
777 | + <java classname="@{classname}" dir="${work.dir}" failonerror="${java.failonerror}" fork="true"> | |
772 | 778 | <jvmarg line="${endorsed.classpath.cmd.line.arg}"/> |
773 | 779 | <jvmarg line="${debug-args-line}"/> |
774 | 780 | <jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/> |
@@ -795,7 +801,7 @@ | ||
795 | 801 | <attribute default="jvm" name="jvm"/> |
796 | 802 | <element name="customize" optional="true"/> |
797 | 803 | <sequential> |
798 | - <java classname="@{classname}" dir="${work.dir}" fork="true"> | |
804 | + <java classname="@{classname}" dir="${work.dir}" failonerror="${java.failonerror}" fork="true"> | |
799 | 805 | <jvmarg line="${endorsed.classpath.cmd.line.arg}"/> |
800 | 806 | <jvmarg value="-Dfile.encoding=${runtime.encoding}"/> |
801 | 807 | <redirector errorencoding="${runtime.encoding}" inputencoding="${runtime.encoding}" outputencoding="${runtime.encoding}"/> |
@@ -1,264 +0,0 @@ | ||
1 | -/* | |
2 | - * Copyright (C) 2014 kgto. | |
3 | - * | |
4 | - * This library is free software; you can redistribute it and/or | |
5 | - * modify it under the terms of the GNU Lesser General Public | |
6 | - * License as published by the Free Software Foundation; either | |
7 | - * version 2.1 of the License, or (at your option) any later version. | |
8 | - * | |
9 | - * This library is distributed in the hope that it will be useful, | |
10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | - * Lesser General Public License for more details. | |
13 | - * | |
14 | - * You should have received a copy of the GNU Lesser General Public | |
15 | - * License along with this library; if not, write to the Free Software | |
16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | - * MA 02110-1301 USA | |
18 | - */ | |
19 | -/* | |
20 | - * $Id$ | |
21 | - */ | |
22 | - | |
23 | -package webScraping.core; | |
24 | - | |
25 | -import java.io.File; | |
26 | -import java.io.FileInputStream; | |
27 | -import java.io.FileNotFoundException; | |
28 | -import java.io.IOException; | |
29 | -import java.util.logging.FileHandler; | |
30 | -import java.util.logging.Formatter; | |
31 | -import java.util.logging.Handler; | |
32 | -import java.util.logging.Level; | |
33 | -import java.util.logging.LogManager; | |
34 | -import java.util.logging.LogRecord; | |
35 | -import java.util.logging.Logger; | |
36 | -import javax.swing.text.MutableAttributeSet; | |
37 | -import javax.swing.text.html.HTML; | |
38 | - | |
39 | -/** | |
40 | - * デバック情報. | |
41 | - * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。 | |
42 | - * @author kgto | |
43 | - */ | |
44 | -public class DebugProcess { | |
45 | - // 設定ファイル名 | |
46 | - protected static final String configurationFilename = "Debug.prop"; | |
47 | - // ロガー名 | |
48 | - protected static final Logger logger = Logger.getLogger("WebScraping"); | |
49 | - // ログ出力デフォルトレベル | |
50 | - protected static final Level loggerlevel = Level.FINEST; | |
51 | - | |
52 | - | |
53 | - /** | |
54 | - * ログ出力設定. | |
55 | - * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、 | |
56 | - * ファイルハンドラの設定と出力書式の設定を行う。 | |
57 | - */ | |
58 | - public static void debuglog_set() { | |
59 | - try { | |
60 | - initLogConfiguration(); | |
61 | - | |
62 | - if(Level.ALL.equals(logger.getLevel())) { | |
63 | - //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2)); | |
64 | - logger.addHandler(new FileHandler("WebScraping%g.log", true)); | |
65 | - } | |
66 | - setFomatter(); | |
67 | - | |
68 | - } catch (IOException | SecurityException ex) { | |
69 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
70 | - } | |
71 | - } | |
72 | - | |
73 | - /** | |
74 | - * ログ出力設定解除. | |
75 | - */ | |
76 | - public static void debuglog_unset() { | |
77 | - } | |
78 | - | |
79 | - | |
80 | - /** | |
81 | - * デバック出力(HTML解析-タグ&属性). | |
82 | - * HTMLのタグと属性の解析状態を出力する。 | |
83 | - * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br> | |
84 | - * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br> | |
85 | - * @param tag タグ | |
86 | - * @param attr 属性 | |
87 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
88 | - * @param count HTMLタグの階層レベル | |
89 | - */ | |
90 | - public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr, | |
91 | - String methodname, int count) { | |
92 | - | |
93 | - // ログ出力レベルチェック | |
94 | - if(logger.getLevel() == null) { | |
95 | - return; | |
96 | - } | |
97 | - if(logger.getLevel().intValue() > loggerlevel.intValue()) { | |
98 | - return; | |
99 | - } | |
100 | - | |
101 | - // 編集処理 | |
102 | - char kbn = ' '; | |
103 | - if("handleStartTag".equals(methodname)) { | |
104 | - kbn = 'F'; | |
105 | - } | |
106 | - if("handleEndTag".equals(methodname)) { | |
107 | - kbn = 'E'; | |
108 | - } | |
109 | - if("handleSimpleTag".equals(methodname)) { | |
110 | - kbn = 'S'; | |
111 | - } | |
112 | - | |
113 | - StringBuilder strBuf = new StringBuilder(80); | |
114 | - strBuf.append(count).append(" : "); | |
115 | - strBuf.append(kbn).append(" : "); | |
116 | - strBuf.append(tag.toString()); | |
117 | - // 属性情報 | |
118 | - if(attr != null) { | |
119 | - if(attr.getAttributeCount() != 0) { | |
120 | - AttributeData handleAttrData = new AttributeData(); | |
121 | - handleAttrData.add(tag, attr); | |
122 | - for(int i = 0; i < handleAttrData.size; i++) { | |
123 | - strBuf.append(" ["); | |
124 | - strBuf.append(handleAttrData.getattrname(i)); | |
125 | - strBuf.append("]"); | |
126 | - strBuf.append(handleAttrData.getcount(i)); | |
127 | - strBuf.append(" = "); | |
128 | - strBuf.append(handleAttrData.getattrvalue(i)); | |
129 | - } | |
130 | - } | |
131 | - } | |
132 | - | |
133 | - logger.log(loggerlevel, strBuf.toString()); | |
134 | - } | |
135 | - | |
136 | - /** | |
137 | - * デバック出力(メッセージ). | |
138 | - * 引数に渡された任意のメッセージを出力する。 | |
139 | - * @param str メッセージ | |
140 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
141 | - */ | |
142 | - public static void htmlinfo(String str, String methodname) { | |
143 | - logger.log(loggerlevel, str); | |
144 | - } | |
145 | - | |
146 | - public static void htmlinfo(String str) { | |
147 | - logger.log(loggerlevel, str); | |
148 | - } | |
149 | - | |
150 | - /** | |
151 | - * デバック出力(HTML解析-本文). | |
152 | - * 本文の内容を出力する。 | |
153 | - * @param data 本文(HTML内の文字列) | |
154 | - * @param methodname このメソッドを呼び出した親メソッド名 | |
155 | - */ | |
156 | - public static void htmlinfo(char[] data, String methodname) { | |
157 | - String dat = new String(data); | |
158 | - logger.log(loggerlevel, dat); | |
159 | - } | |
160 | - | |
161 | - public static void htmlinfo(char[] data) { | |
162 | - String dat = new String(data); | |
163 | - logger.log(loggerlevel, dat); | |
164 | - } | |
165 | - | |
166 | - /** | |
167 | - * デバック出力(検索キー). | |
168 | - * 検索キー(SearchData)の内容を出力する。 | |
169 | - * @param skey | |
170 | - */ | |
171 | - public static void searchDatainfo(SearchData skey) { | |
172 | - | |
173 | - StringBuilder strBuf = new StringBuilder(30); | |
174 | - strBuf.append("SearchData KEY tag["); | |
175 | - strBuf.append(skey.getHtmltag()); | |
176 | - strBuf.append("] ID["); | |
177 | - strBuf.append(skey.getHtmlid()); | |
178 | - strBuf.append("] CLASS["); | |
179 | - strBuf.append(skey.getHtmlclass()); | |
180 | - strBuf.append("]\n"); | |
181 | - | |
182 | - logger.log(loggerlevel, strBuf.toString()); | |
183 | - } | |
184 | - | |
185 | - /** | |
186 | - * ログ出力設定ファイルチェック. | |
187 | - * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。 | |
188 | - */ | |
189 | - private static void initLogConfiguration() { | |
190 | - | |
191 | - File file = new File(configurationFilename); | |
192 | - try { | |
193 | - if(file.exists()) { | |
194 | - FileInputStream inputStream = new FileInputStream(file); | |
195 | - // 設定ファイルの読み込み | |
196 | - LogManager.getLogManager().readConfiguration(inputStream); | |
197 | - } | |
198 | - | |
199 | - } catch (FileNotFoundException ex) { | |
200 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
201 | - } catch (IOException ex) { | |
202 | - Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
203 | - } | |
204 | - } | |
205 | - | |
206 | - /** | |
207 | - * ログ出力フォーマッター設定. | |
208 | - * ファイルへログ出力時の書式を設定する。 | |
209 | - */ | |
210 | - private static void setFomatter() { | |
211 | - Handler[] handlers = logger.getHandlers(); | |
212 | - for(int i = 0 ; i < handlers.length ; i++) { | |
213 | - if(handlers[i] instanceof java.util.logging.FileHandler) { | |
214 | - handlers[i].setFormatter(new HtmlFormatter()); | |
215 | - } | |
216 | - } | |
217 | - } | |
218 | - | |
219 | -} | |
220 | - | |
221 | -/** | |
222 | - * ログ出力フォーマッター. | |
223 | - * @author kgto | |
224 | - */ | |
225 | -class HtmlFormatter extends Formatter { | |
226 | - /** | |
227 | - * Logの出力文字列を生成する。 | |
228 | - * 出力書式:<br> | |
229 | - * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ | |
230 | - */ | |
231 | - @Override | |
232 | - public synchronized String format(final LogRecord aRecord) { | |
233 | - | |
234 | - final StringBuffer message = new StringBuffer(100); | |
235 | - | |
236 | - long millis = aRecord.getMillis(); | |
237 | - String time = String.format("%tF %<tT", millis); | |
238 | - | |
239 | - message.append(time); | |
240 | - message.append(' '); | |
241 | - | |
242 | - message.append(aRecord.getLevel()); | |
243 | - message.append('<'); | |
244 | - String methodName = aRecord.getSourceMethodName(); | |
245 | - message.append(methodName != null ? methodName : "N/A"); | |
246 | - message.append('>'); | |
247 | - | |
248 | - message.append(formatMessage(aRecord)); | |
249 | - message.append('\n'); | |
250 | - | |
251 | - // 例外エラーの場合、エラー内容とスタックトレース出力 | |
252 | - Throwable throwable = aRecord.getThrown(); | |
253 | - if (throwable != null) { | |
254 | - message.append(throwable.toString()); | |
255 | - message.append('\n'); | |
256 | - for (StackTraceElement trace : throwable.getStackTrace()) { | |
257 | - message.append('\t'); | |
258 | - message.append(trace.toString()); | |
259 | - message.append('\n'); | |
260 | - } | |
261 | - } | |
262 | - return message.toString(); | |
263 | - } | |
264 | -} |
@@ -1,164 +0,0 @@ | ||
1 | -/* | |
2 | - * Copyright (C) 2014 kgto. | |
3 | - * | |
4 | - * This library is free software; you can redistribute it and/or | |
5 | - * modify it under the terms of the GNU Lesser General Public | |
6 | - * License as published by the Free Software Foundation; either | |
7 | - * version 2.1 of the License, or (at your option) any later version. | |
8 | - * | |
9 | - * This library is distributed in the hope that it will be useful, | |
10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | - * Lesser General Public License for more details. | |
13 | - * | |
14 | - * You should have received a copy of the GNU Lesser General Public | |
15 | - * License along with this library; if not, write to the Free Software | |
16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | - * MA 02110-1301 USA | |
18 | - */ | |
19 | -/* | |
20 | - * $Id$ | |
21 | - */ | |
22 | - | |
23 | -package webScraping.core; | |
24 | - | |
25 | -import java.util.ArrayList; | |
26 | -import java.util.Enumeration; | |
27 | -import javax.swing.text.MutableAttributeSet; | |
28 | -import javax.swing.text.html.HTML; | |
29 | - | |
30 | -/** | |
31 | - * HTMLタグの属性情報を保持する. | |
32 | - * @author kgto | |
33 | - */ | |
34 | -public class AttributeData { | |
35 | - | |
36 | - public AttributeData() { | |
37 | - AttrList = new ArrayList(); | |
38 | - size = 0; | |
39 | - } | |
40 | - | |
41 | - /** | |
42 | - * 属性情報追加. | |
43 | - * @param tag | |
44 | - * @param attr | |
45 | - */ | |
46 | - public void add(HTML.Tag tag, MutableAttributeSet attr) { | |
47 | - | |
48 | - int tagcount = tagcnt(tag); | |
49 | - ++tagcount; | |
50 | - | |
51 | - Enumeration e = attr.getAttributeNames(); | |
52 | - while(e.hasMoreElements()) { | |
53 | - Object obj = e.nextElement(); | |
54 | - | |
55 | - AttrData a = new AttrData(); | |
56 | - a.tag = tag; | |
57 | - a.count = tagcount; | |
58 | - a.attrname = obj.toString(); | |
59 | - a.attrvalue = attr.getAttribute(obj).toString(); | |
60 | - | |
61 | - AttrList.add(a); | |
62 | - size = AttrList.size(); | |
63 | - } | |
64 | - | |
65 | - } | |
66 | - | |
67 | - /** | |
68 | - * 属性情報検索. | |
69 | - * @param tag | |
70 | - * @param attrname | |
71 | - * @param attrvalue | |
72 | - * @return | |
73 | - */ | |
74 | - public boolean search(HTML.Tag tag, String attrname, String attrvalue) { | |
75 | - boolean ret = false; | |
76 | - for (Object AttrList1 : AttrList) { | |
77 | - AttrData a = (AttrData)AttrList1; | |
78 | - if(a.tag == tag) { | |
79 | - //if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) { | |
80 | - if(a.attrname.equals(attrname) && a.attrvalue.startsWith(attrvalue)) { | |
81 | - ret = true; | |
82 | - } | |
83 | - } | |
84 | - } | |
85 | - return ret; | |
86 | - } | |
87 | - | |
88 | - public boolean searchId(HTML.Tag tag, String attrvalue) { | |
89 | - return search(tag, "id", attrvalue); | |
90 | - } | |
91 | - | |
92 | - public boolean searchClass(HTML.Tag tag, String attrvalue) { | |
93 | - return search(tag, "class", attrvalue); | |
94 | - } | |
95 | - | |
96 | - /** | |
97 | - * 属性の値を取得する. | |
98 | - * @param tag | |
99 | - * @param attrname | |
100 | - * @return | |
101 | - */ | |
102 | - public ArrayList getvale(HTML.Tag tag, String attrname) { | |
103 | - ArrayList ret = new ArrayList(); | |
104 | - for (Object AttrList1 : AttrList) { | |
105 | - AttrData a = (AttrData)AttrList1; | |
106 | - if(a.tag == tag) { | |
107 | - if(a.attrname.equals(attrname)) { | |
108 | - ret.add(a.attrvalue); | |
109 | - } | |
110 | - } | |
111 | - } | |
112 | - return ret; | |
113 | - } | |
114 | - | |
115 | - /** | |
116 | - * 引数で渡されたTAGの最新カウント数を返す. | |
117 | - * @param tag | |
118 | - * @return | |
119 | - */ | |
120 | - private int tagcnt(HTML.Tag tag) { | |
121 | - int wkcnt = 0; | |
122 | - for (Object AttrList1 : AttrList) { | |
123 | - AttrData a = (AttrData)AttrList1; | |
124 | - if(a.tag == tag) { | |
125 | - if(wkcnt < a.count) { | |
126 | - wkcnt = a.count; | |
127 | - } | |
128 | - } | |
129 | - } | |
130 | - return wkcnt; | |
131 | - } | |
132 | - | |
133 | - // AttrList の内容を返すメソッド | |
134 | - public HTML.Tag gettag(int i) { | |
135 | - AttrData a = (AttrData)AttrList.get(i); | |
136 | - return a.tag; | |
137 | - } | |
138 | - | |
139 | - public int getcount(int i) { | |
140 | - AttrData a = (AttrData)AttrList.get(i); | |
141 | - return a.count; | |
142 | - } | |
143 | - | |
144 | - public String getattrname(int i) { | |
145 | - AttrData a = (AttrData)AttrList.get(i); | |
146 | - return a.attrname; | |
147 | - } | |
148 | - | |
149 | - public String getattrvalue(int i) { | |
150 | - AttrData a = (AttrData)AttrList.get(i); | |
151 | - return a.attrvalue; | |
152 | - } | |
153 | - | |
154 | - // フィールド変数 | |
155 | - public class AttrData { | |
156 | - public HTML.Tag tag; | |
157 | - public int count; | |
158 | - public String attrname; | |
159 | - public String attrvalue; | |
160 | - } | |
161 | - public ArrayList AttrList; | |
162 | - public int size; // AttrListのサイズ | |
163 | - | |
164 | -} |
@@ -1,222 +0,0 @@ | ||
1 | -/* | |
2 | - * Copyright (C) 2014 kgto. | |
3 | - * | |
4 | - * This library is free software; you can redistribute it and/or | |
5 | - * modify it under the terms of the GNU Lesser General Public | |
6 | - * License as published by the Free Software Foundation; either | |
7 | - * version 2.1 of the License, or (at your option) any later version. | |
8 | - * | |
9 | - * This library is distributed in the hope that it will be useful, | |
10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | - * Lesser General Public License for more details. | |
13 | - * | |
14 | - * You should have received a copy of the GNU Lesser General Public | |
15 | - * License along with this library; if not, write to the Free Software | |
16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | - * MA 02110-1301 USA | |
18 | - */ | |
19 | -/* | |
20 | - * $Id$ | |
21 | - */ | |
22 | - | |
23 | -package webScraping.core; | |
24 | - | |
25 | -import java.util.ArrayList; | |
26 | -import java.util.HashMap; | |
27 | -import javax.swing.text.MutableAttributeSet; | |
28 | -import javax.swing.text.html.HTML; | |
29 | -import javax.swing.text.html.HTMLEditorKit; | |
30 | - | |
31 | -/** | |
32 | - * HTMLパーサ部品. | |
33 | - * @author kgto | |
34 | - */ | |
35 | -class HtmlParserCallback extends HTMLEditorKit.ParserCallback { | |
36 | - /* ---------------------------------------------------------------------- * | |
37 | - * フィールド | |
38 | - * ---------------------------------------------------------------------- */ | |
39 | - // Tag毎の階層 | |
40 | - HashMap<HTML.Tag,Integer> tagMap = new HashMap<>(); | |
41 | - | |
42 | - // serach key 情報 | |
43 | - String keytag; | |
44 | - String keyid; | |
45 | - String keyclass; | |
46 | - | |
47 | - // serach key と一致時の情報退避 | |
48 | - int bufCount = 0; | |
49 | - HTML.Tag bufTag = null; | |
50 | - // serach key と一致時の情報格納ワーク | |
51 | - StringBuilder bufText; | |
52 | - | |
53 | - // serach key と一致時のデータ一覧 | |
54 | - ArrayList sData; | |
55 | - | |
56 | - // 属性データ | |
57 | - AttributeData attrdata; | |
58 | - | |
59 | - /* ---------------------------------------------------------------------- * | |
60 | - * コンストラクタ | |
61 | - * ---------------------------------------------------------------------- */ | |
62 | - protected HtmlParserCallback(SearchData skey) { | |
63 | - | |
64 | - // キー情報展開 | |
65 | - keytag = skey.getHtmltag(); | |
66 | - keyid = skey.getHtmlid(); | |
67 | - keyclass = skey.getHtmlclass(); | |
68 | - | |
69 | - sData = new ArrayList(); | |
70 | - } | |
71 | - | |
72 | - /* ---------------------------------------------------------------------- * | |
73 | - * Getter | |
74 | - * ---------------------------------------------------------------------- */ | |
75 | - ArrayList getrtnData() { | |
76 | - return this.sData; | |
77 | - } | |
78 | - | |
79 | - /* ---------------------------------------------------------------------- * | |
80 | - * メソッド | |
81 | - * ---------------------------------------------------------------------- */ | |
82 | - @Override | |
83 | - public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
84 | - // Tag毎の階層を保持 | |
85 | - int count = 1; | |
86 | - if(tagMap.containsKey(tag)) { | |
87 | - count = tagMap.get(tag); | |
88 | - count++; | |
89 | - } | |
90 | - tagMap.put(tag, count); | |
91 | - | |
92 | - // 属性解析 | |
93 | - AttributeData handleStartattrdata = new AttributeData(); | |
94 | - handleStartattrdata.add(tag, attr); | |
95 | - | |
96 | - DebugProcess.htmlinfo(tag, attr, "handleStartTag", count); | |
97 | - | |
98 | - if(bufCount == 0) { | |
99 | - if(tag.toString().equals(keytag)) { | |
100 | - //if(serachAttribute(attr)) { | |
101 | - if(serachAttribute(tag, handleStartattrdata)) { | |
102 | - bufCount = count; | |
103 | - bufTag = tag; | |
104 | - attrdata = new AttributeData(); | |
105 | - bufText = new StringBuilder(); | |
106 | - } | |
107 | - } | |
108 | - } | |
109 | - if(bufCount > 0) { | |
110 | - attrdata.add(tag, attr); | |
111 | - } | |
112 | - } | |
113 | - | |
114 | - @Override | |
115 | - public void handleEndTag(HTML.Tag tag, int pos){ | |
116 | - // Tag毎の階層を取得 | |
117 | - int count = 0; | |
118 | - if(tagMap.containsKey(tag)) { | |
119 | - count = tagMap.get(tag); | |
120 | - } | |
121 | - | |
122 | - DebugProcess.htmlinfo(tag, null, "handleEndTag", count); | |
123 | - | |
124 | - if(tag.equals(bufTag) && count <= bufCount) { | |
125 | - | |
126 | - // 溜め込んだ一致情報をリストへ格納 | |
127 | - sData.add(bufText.toString()); | |
128 | - | |
129 | - // 退避したserach keyとの一致情報クリア | |
130 | - bufCount = 0; | |
131 | - bufTag = null; | |
132 | - bufText = null; | |
133 | - } | |
134 | - | |
135 | - // Tag毎の階層減算 | |
136 | - tagMap.put(tag, --count); | |
137 | - } | |
138 | - | |
139 | - @Override | |
140 | - public void handleText(char[] data, int pos){ | |
141 | - | |
142 | - DebugProcess.htmlinfo(data, "handleText"); | |
143 | - | |
144 | - String splitchar = "\t"; | |
145 | - //制御文字の削除 | |
146 | - // 0xa0 | |
147 | - StringBuilder buf = new StringBuilder(); | |
148 | - for(int i = 0; i < data.length; i++) { | |
149 | - if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) { | |
150 | - buf.append(data[i]); | |
151 | - } | |
152 | - } | |
153 | - if(bufCount > 0) { | |
154 | - if(bufText.length() > 0) { | |
155 | - bufText.append(splitchar); | |
156 | - } | |
157 | - bufText.append(buf.toString()); | |
158 | - } | |
159 | - } | |
160 | - | |
161 | - @Override | |
162 | - public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
163 | - if(bufCount > 0) { | |
164 | - attrdata.add(tag, attr); | |
165 | - } | |
166 | - DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0); | |
167 | - } | |
168 | - | |
169 | - /** | |
170 | - * ページ内のID/CLASS値と検索キーを比較する. | |
171 | - * @param attr ページのMutableAttributeSet | |
172 | - * @return boolean 検索キーと一致の時、true | |
173 | - */ | |
174 | - boolean serachAttribute(MutableAttributeSet attr) { | |
175 | - String currentID = (String)attr.getAttribute(HTML.Attribute.ID); | |
176 | - String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS); | |
177 | - | |
178 | - if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
179 | - if(keyid.equals(currentID) && keyclass.equals(currentClass)) { | |
180 | - return true; | |
181 | - } | |
182 | - } | |
183 | - | |
184 | - if(keyid.isEmpty() == false) { | |
185 | - if(keyid.equals(currentID)) { | |
186 | - return true; | |
187 | - } | |
188 | - } | |
189 | - | |
190 | - if(keyclass.isEmpty() == false) { | |
191 | - if(keyclass.equals(currentClass)) { | |
192 | - return true; | |
193 | - } | |
194 | - } | |
195 | - | |
196 | - return false; | |
197 | - } | |
198 | - | |
199 | - /** | |
200 | - * ページ内のID/CLASS値と検索キーを比較する. | |
201 | - * @param tag | |
202 | - * @param attrdata | |
203 | - * @return boolean 検索キーと一致の時、true | |
204 | - */ | |
205 | - boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) { | |
206 | - // ID と CLASS の両方にキー入力有りの場合 | |
207 | - if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
208 | - if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) { | |
209 | - return true; | |
210 | - } | |
211 | - } | |
212 | - // ID のキーチェック | |
213 | - if(keyid.isEmpty() == false) { | |
214 | - return attrdata.searchId(tag, keyid); | |
215 | - } | |
216 | - // CLASS のキーチェック | |
217 | - if(keyclass.isEmpty() == false) { | |
218 | - return attrdata.searchClass(tag, keyclass); | |
219 | - } | |
220 | - return false; | |
221 | - } | |
222 | -} |
@@ -1,200 +0,0 @@ | ||
1 | -/* | |
2 | - * Copyright (C) 2014 kgto. | |
3 | - * | |
4 | - * This library is free software; you can redistribute it and/or | |
5 | - * modify it under the terms of the GNU Lesser General Public | |
6 | - * License as published by the Free Software Foundation; either | |
7 | - * version 2.1 of the License, or (at your option) any later version. | |
8 | - * | |
9 | - * This library is distributed in the hope that it will be useful, | |
10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | - * Lesser General Public License for more details. | |
13 | - * | |
14 | - * You should have received a copy of the GNU Lesser General Public | |
15 | - * License along with this library; if not, write to the Free Software | |
16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | - * MA 02110-1301 USA | |
18 | - */ | |
19 | -/* | |
20 | - * $Id$ | |
21 | - */ | |
22 | - | |
23 | -package webScraping.core; | |
24 | - | |
25 | -import java.util.ArrayList; | |
26 | - | |
27 | -/** | |
28 | - * タグ検索データ. | |
29 | - * @author kgto | |
30 | - */ | |
31 | -public class SearchData { | |
32 | - /* ---------------------------------------------------------------------- * | |
33 | - * フィールド | |
34 | - * ---------------------------------------------------------------------- */ | |
35 | - private String item; | |
36 | - private String htmltag; | |
37 | - private String htmlid; | |
38 | - private String htmlclass; | |
39 | - private String around; | |
40 | - private String regexp; | |
41 | - | |
42 | - /* ---------------------------------------------------------------------- * | |
43 | - * static 処理 | |
44 | - * ---------------------------------------------------------------------- */ | |
45 | - public static class Context { | |
46 | - public Class columnClass; | |
47 | - public String columnName; | |
48 | - public String columnNameJp; | |
49 | - | |
50 | - public Context(Class columnClass, String columnName, String columnNameJp) { | |
51 | - this.columnClass = columnClass; | |
52 | - this.columnName = columnName; | |
53 | - this.columnNameJp = columnNameJp; | |
54 | - } | |
55 | - } | |
56 | - | |
57 | - public static final Context[] context = { | |
58 | - /* 0 */ new Context(String.class , "item" , "項目名"), | |
59 | - /* 1 */ new Context(String.class , "htmltag" , "タグ"), | |
60 | - /* 2 */ new Context(String.class , "htmlid" , "ID"), | |
61 | - /* 3 */ new Context(String.class , "htmlclass" , "クラス"), | |
62 | - /* 4 */ new Context(String.class , "around" , "位置"), | |
63 | - /* 5 */ new Context(String.class , "regexp" , "抽出条件") | |
64 | - }; | |
65 | - | |
66 | - /* ---------------------------------------------------------------------- */ | |
67 | - private static ArrayList<SearchData> slist = new ArrayList<>(); | |
68 | - | |
69 | - public static void addSearchData( | |
70 | - String item, String htmltag, String htmlid, | |
71 | - String htmlclass, String around, String regexp) { | |
72 | - SearchData sdat = new SearchData(); | |
73 | - sdat.setitem(item); | |
74 | - sdat.setHtmltag(htmltag); | |
75 | - sdat.setHtmlid(htmlid); | |
76 | - sdat.setHtmlclass(htmlclass); | |
77 | - sdat.setaround(around); | |
78 | - sdat.setregexp(regexp); | |
79 | - | |
80 | - slist.add(sdat); | |
81 | - } | |
82 | - | |
83 | - public static void add(SearchData sdat) { | |
84 | - slist.add(sdat); | |
85 | - } | |
86 | - | |
87 | - public static SearchData get(int i) { | |
88 | - return slist.get(i); | |
89 | - } | |
90 | - | |
91 | - public static int size() { | |
92 | - return slist.size(); | |
93 | - } | |
94 | - | |
95 | - public static SearchData remove(int index) { | |
96 | - return slist.remove(index); | |
97 | - } | |
98 | - | |
99 | - public static void clear() { | |
100 | - slist.clear(); | |
101 | - } | |
102 | - | |
103 | - /* ---------------------------------------------------------------------- * | |
104 | - * コンストラクタ | |
105 | - * ---------------------------------------------------------------------- */ | |
106 | - public SearchData() { | |
107 | - initialize(); | |
108 | - } | |
109 | - | |
110 | - public SearchData(SearchData dat) { | |
111 | - this.item = dat.getitem(); | |
112 | - this.htmltag = dat.getHtmltag(); | |
113 | - this.htmlid = dat.getHtmlid(); | |
114 | - this.htmlclass = dat.getHtmlclass(); | |
115 | - this.around = dat.getaround(); | |
116 | - this.regexp = dat.getregexp(); | |
117 | - } | |
118 | - | |
119 | - /* ---------------------------------------------------------------------- * | |
120 | - * Setter | |
121 | - * ---------------------------------------------------------------------- */ | |
122 | - public void setitem(String item) { | |
123 | - this.item = item; | |
124 | - } | |
125 | - | |
126 | - public void setHtmltag(String htmltag) { | |
127 | - this.htmltag = htmltag; | |
128 | - } | |
129 | - | |
130 | - public void setHtmlid(String htmlid) { | |
131 | - this.htmlid = htmlid; | |
132 | - } | |
133 | - | |
134 | - public void setHtmlclass(String htmlclass) { | |
135 | - this.htmlclass = htmlclass; | |
136 | - } | |
137 | - | |
138 | - public void setaround(String around) { | |
139 | - this.around = around; | |
140 | - } | |
141 | - | |
142 | - public void setregexp(String regexp) { | |
143 | - this.regexp = regexp; | |
144 | - } | |
145 | - | |
146 | - /* ---------------------------------------------------------------------- * | |
147 | - * Getter | |
148 | - * ---------------------------------------------------------------------- */ | |
149 | - public String getitem() { | |
150 | - return item; | |
151 | - } | |
152 | - | |
153 | - public String getHtmltag() { | |
154 | - return htmltag; | |
155 | - } | |
156 | - | |
157 | - public String getHtmlid() { | |
158 | - return htmlid; | |
159 | - } | |
160 | - | |
161 | - public String getHtmlclass() { | |
162 | - return htmlclass; | |
163 | - } | |
164 | - | |
165 | - public String getaround() { | |
166 | - return around; | |
167 | - } | |
168 | - | |
169 | - public String getregexp() { | |
170 | - return regexp; | |
171 | - } | |
172 | - | |
173 | - /* ---------------------------------------------------------------------- * | |
174 | - * メソッド | |
175 | - * ---------------------------------------------------------------------- */ | |
176 | - /** | |
177 | - * データ初期化. | |
178 | - */ | |
179 | - public final void initialize() { | |
180 | - this.item = ""; | |
181 | - this.htmltag = ""; | |
182 | - this.htmlid = ""; | |
183 | - this.htmlclass = ""; | |
184 | - this.around = ""; | |
185 | - this.regexp = ""; | |
186 | - } | |
187 | - | |
188 | - public Object[] getObjData() { | |
189 | - Object[] obj = { | |
190 | - /* 0 */ getitem(), // 項目名 | |
191 | - /* 1 */ getHtmltag(), // タグ | |
192 | - /* 2 */ getHtmlid(), // ID | |
193 | - /* 3 */ getHtmlclass(), // クラス | |
194 | - /* 4 */ getaround(), // 位置 | |
195 | - /* 5 */ getregexp() // 抽出条件 | |
196 | - }; | |
197 | - return obj; | |
198 | - } | |
199 | - | |
200 | -} |
@@ -1,273 +0,0 @@ | ||
1 | -/* | |
2 | - * Copyright (C) 2014 kgto. | |
3 | - * | |
4 | - * This library is free software; you can redistribute it and/or | |
5 | - * modify it under the terms of the GNU Lesser General Public | |
6 | - * License as published by the Free Software Foundation; either | |
7 | - * version 2.1 of the License, or (at your option) any later version. | |
8 | - * | |
9 | - * This library is distributed in the hope that it will be useful, | |
10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | - * Lesser General Public License for more details. | |
13 | - * | |
14 | - * You should have received a copy of the GNU Lesser General Public | |
15 | - * License along with this library; if not, write to the Free Software | |
16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | - * MA 02110-1301 USA | |
18 | - */ | |
19 | -/* | |
20 | - * $Id$ | |
21 | - */ | |
22 | - | |
23 | -package webScraping.core; | |
24 | - | |
25 | -import java.io.*; | |
26 | -import java.net.*; | |
27 | -import java.util.ArrayList; | |
28 | -import java.util.logging.Level; | |
29 | -import java.util.logging.Logger; | |
30 | -import java.util.regex.Matcher; | |
31 | -import java.util.regex.Pattern; | |
32 | -import javax.swing.text.html.parser.ParserDelegator; | |
33 | - | |
34 | -/** | |
35 | - * HTMLパーサ. | |
36 | - * @author kgto | |
37 | - */ | |
38 | -public class HtmlParser { | |
39 | - /* ---------------------------------------------------------------------- * | |
40 | - * フィールド | |
41 | - * ---------------------------------------------------------------------- */ | |
42 | - URL url; | |
43 | - String pageData; | |
44 | - ArrayList sData; | |
45 | - | |
46 | - // 作業ワーク | |
47 | - private String htmltag; | |
48 | - private String htmlid; | |
49 | - private String htmlclass; | |
50 | - | |
51 | - /* ---------------------------------------------------------------------- * | |
52 | - * コンストラクタ | |
53 | - * ---------------------------------------------------------------------- */ | |
54 | - public HtmlParser(URL UrlAdress) { | |
55 | - DebugProcess.debuglog_set(); | |
56 | - this.url = UrlAdress; | |
57 | - getPageData(); | |
58 | - } | |
59 | - | |
60 | - public HtmlParser(String UrlAdress) { | |
61 | - DebugProcess.debuglog_set(); | |
62 | - try { | |
63 | - url = new URL(UrlAdress); | |
64 | - getPageData(); | |
65 | - | |
66 | - } catch (MalformedURLException ex) { | |
67 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
68 | - } | |
69 | - } | |
70 | - | |
71 | - public HtmlParser() { | |
72 | - DebugProcess.debuglog_set(); | |
73 | - url = null; | |
74 | - } | |
75 | - | |
76 | - /* ---------------------------------------------------------------------- * | |
77 | - * Getter | |
78 | - * ---------------------------------------------------------------------- */ | |
79 | - public String getStringPageData() { | |
80 | - return pageData; | |
81 | - } | |
82 | - | |
83 | - /* ---------------------------------------------------------------------- * | |
84 | - * Setter | |
85 | - * ---------------------------------------------------------------------- */ | |
86 | - public void seturl(URL UrlAdress) { | |
87 | - this.url = UrlAdress; | |
88 | - getPageData(); | |
89 | - } | |
90 | - | |
91 | - /* ---------------------------------------------------------------------- * | |
92 | - * メソッド | |
93 | - * ---------------------------------------------------------------------- */ | |
94 | - public void seturl(String UrlAdress) { | |
95 | - try { | |
96 | - url = new URL(UrlAdress); | |
97 | - getPageData(); | |
98 | - | |
99 | - } catch (MalformedURLException ex) { | |
100 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
101 | - } | |
102 | - } | |
103 | - | |
104 | - /** | |
105 | - * HTMLページ内検索. | |
106 | - * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、 | |
107 | - * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を | |
108 | - * 行った結果を返す。<br> | |
109 | - * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br> | |
110 | - * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br> | |
111 | - * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。 | |
112 | - * @param skey 検索キーデータ(SearchData) | |
113 | - * @return String 検索キーに一致するデータの文字列 | |
114 | - */ | |
115 | - public String search(SearchData skey) { | |
116 | - | |
117 | - // htmlページ内を検索 | |
118 | - if(isHtmlkeyEq(skey) == false) { | |
119 | - searchPageData(skey); | |
120 | - } | |
121 | - /* | |
122 | - around 出現位置指定 入力有り:指定された位置の情報のみ返す。 | |
123 | - 入力無し:取得した全ての情報を返す。 | |
124 | - */ | |
125 | - String regexp = skey.getregexp(); | |
126 | - if(skey.getaround().length() > 0) { | |
127 | - int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換 | |
128 | - if(wkAround < sData.size()) { | |
129 | - String str = (String)sData.get(wkAround); | |
130 | - String rtn = RegularExpression(str, regexp); | |
131 | - return rtn; | |
132 | - } | |
133 | - } else { | |
134 | - StringBuilder strbuf = new StringBuilder(); | |
135 | - for (Object sData1 : sData) { | |
136 | - String str = (String)sData1; | |
137 | - String rtn = RegularExpression(str, regexp); | |
138 | - if(strbuf.length() > 0) { | |
139 | - strbuf.append("\t"); | |
140 | - } | |
141 | - strbuf.append(rtn); | |
142 | - } | |
143 | - return strbuf.toString(); | |
144 | - } | |
145 | - return null; | |
146 | - } | |
147 | - | |
148 | - /** | |
149 | - * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する. | |
150 | - * @param skey HTMLタグ/ID/CLASSが格納された検索キー | |
151 | - * @return boolean HTMLタグ/ID/CLASS値が一致する時、true | |
152 | - */ | |
153 | - boolean isHtmlkeyEq(SearchData skey) { | |
154 | - | |
155 | - String stag = skey.getHtmltag(); | |
156 | - String sid = skey.getHtmlid(); | |
157 | - String sclass = skey.getHtmlclass(); | |
158 | - | |
159 | - boolean rtn = true; | |
160 | - | |
161 | - // htmltag | |
162 | - if(htmltag == null) { | |
163 | - rtn = false; | |
164 | - } else { | |
165 | - if(htmltag.equals(stag) == false) { | |
166 | - rtn = false; | |
167 | - } | |
168 | - } | |
169 | - | |
170 | - // htmlid | |
171 | - if(htmlid == null) { | |
172 | - rtn = false; | |
173 | - } else { | |
174 | - if(htmlid.equals(sid) == false) { | |
175 | - rtn = false; | |
176 | - } | |
177 | - } | |
178 | - | |
179 | - // htmlclass | |
180 | - if(htmlclass == null) { | |
181 | - rtn = false; | |
182 | - } else { | |
183 | - if(htmlclass.equals(sclass) == false) { | |
184 | - rtn = false; | |
185 | - } | |
186 | - } | |
187 | - | |
188 | - if(!rtn) { | |
189 | - htmltag = stag; | |
190 | - htmlid = sid; | |
191 | - htmlclass = sclass; | |
192 | - } | |
193 | - | |
194 | - return rtn; | |
195 | - } | |
196 | - | |
197 | - /** | |
198 | - * 正規表現検索. | |
199 | - * @param strdata | |
200 | - * @param regexp | |
201 | - * @return | |
202 | - */ | |
203 | - String RegularExpression(String strdata, String regexp) { | |
204 | - String expdata = null; | |
205 | - | |
206 | - //regexpのチェック | |
207 | - if(regexp.isEmpty()) { | |
208 | - expdata = strdata; | |
209 | - return expdata; | |
210 | - } | |
211 | - | |
212 | - //正規表現検索 | |
213 | - Pattern ptn = Pattern.compile(regexp); | |
214 | - Matcher matchdata = ptn.matcher(strdata); | |
215 | - if (matchdata.find()) { | |
216 | - if(matchdata.groupCount() >= 1) { | |
217 | - expdata = matchdata.group(1); | |
218 | - } | |
219 | - } | |
220 | - return expdata; | |
221 | - } | |
222 | - | |
223 | - /** | |
224 | - * インターネット接続. | |
225 | - */ | |
226 | - private void getPageData() { | |
227 | - HttpURLConnection con = null; | |
228 | - try { | |
229 | - con = (HttpURLConnection)url.openConnection(); | |
230 | - con.setRequestMethod("GET"); | |
231 | - BufferedReader reader = new BufferedReader( | |
232 | - new InputStreamReader(con.getInputStream(), "utf-8")); | |
233 | - String wkline; | |
234 | - StringBuilder sb = new StringBuilder(); | |
235 | - while((wkline = reader.readLine()) != null) { | |
236 | - sb.append(wkline).append("\n"); | |
237 | - } | |
238 | - pageData = sb.toString(); | |
239 | - | |
240 | - } catch(FileNotFoundException ex) { | |
241 | - pageData = null; | |
242 | - } catch (IOException ex) { | |
243 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
244 | - } finally { | |
245 | - if(con != null) { | |
246 | - con.disconnect(); | |
247 | - } | |
248 | - } | |
249 | - } | |
250 | - | |
251 | - /** | |
252 | - * HTMLパーサ. | |
253 | - * @param skey | |
254 | - */ | |
255 | - private void searchPageData(SearchData skey) { | |
256 | - | |
257 | - DebugProcess.searchDatainfo(skey); | |
258 | - | |
259 | - Reader reader; | |
260 | - try { | |
261 | - reader = new BufferedReader(new StringReader(pageData)); | |
262 | - HtmlParserCallback cb = new HtmlParserCallback(skey); | |
263 | - ParserDelegator pd = new ParserDelegator(); | |
264 | - pd.parse(reader, cb, true); | |
265 | - reader.close(); | |
266 | - | |
267 | - sData = cb.getrtnData(); | |
268 | - | |
269 | - } catch (IOException ex) { | |
270 | - Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
271 | - } | |
272 | - } | |
273 | -} |
@@ -1,568 +0,0 @@ | ||
1 | -/* | |
2 | - * Copyright (C) 2014 kgto. | |
3 | - * | |
4 | - * This library is free software; you can redistribute it and/or | |
5 | - * modify it under the terms of the GNU Lesser General Public | |
6 | - * License as published by the Free Software Foundation; either | |
7 | - * version 2.1 of the License, or (at your option) any later version. | |
8 | - * | |
9 | - * This library is distributed in the hope that it will be useful, | |
10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | - * Lesser General Public License for more details. | |
13 | - * | |
14 | - * You should have received a copy of the GNU Lesser General Public | |
15 | - * License along with this library; if not, write to the Free Software | |
16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | - * MA 02110-1301 USA | |
18 | - */ | |
19 | -/* | |
20 | - * $Id$ | |
21 | - */ | |
22 | -package webScraping.utility; | |
23 | - | |
24 | -import webScraping.core.HtmlParser; | |
25 | -import webScraping.core.SearchData; | |
26 | -import java.awt.Desktop; | |
27 | -import java.io.File; | |
28 | -import java.io.IOException; | |
29 | -import java.net.URI; | |
30 | -import java.net.URISyntaxException; | |
31 | -import java.util.logging.Level; | |
32 | -import java.util.logging.Logger; | |
33 | -import javax.swing.JFileChooser; | |
34 | -import javax.swing.filechooser.FileFilter; | |
35 | -import javax.swing.filechooser.FileNameExtensionFilter; | |
36 | -import javax.swing.table.DefaultTableModel; | |
37 | - | |
38 | -/** | |
39 | - * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する. | |
40 | - * @author kgto | |
41 | - */ | |
42 | -public class HtmlSearch extends javax.swing.JFrame { | |
43 | - private final ScrapingXml xmlwriter = new ScrapingXml(); | |
44 | - | |
45 | - SearchDataTableModel sdatatblmodel; | |
46 | - | |
47 | - /** | |
48 | - * Creates new form Frame1 | |
49 | - */ | |
50 | - public HtmlSearch() { | |
51 | - sdatatblmodel = new SearchDataTableModel(); | |
52 | - | |
53 | - initComponents(); | |
54 | - | |
55 | - // カレントディレクトリ取得 | |
56 | - String dir = System.getProperty("user.dir"); | |
57 | - File file = new java.io.File(dir + "\\data"); | |
58 | - jFileChooser1.setCurrentDirectory(file); | |
59 | - | |
60 | - FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml"); | |
61 | - FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt"); | |
62 | - jFileChooser1.addChoosableFileFilter(filter1); | |
63 | - jFileChooser1.addChoosableFileFilter(filter2); | |
64 | - jFileChooser1.setFileFilter(filter1); | |
65 | - | |
66 | - } | |
67 | - | |
68 | - /** | |
69 | - * This method is called from within the constructor to initialize the form. | |
70 | - * WARNING: Do NOT modify this code. The content of this method is always | |
71 | - * regenerated by the Form Editor. | |
72 | - */ | |
73 | - @SuppressWarnings("unchecked") | |
74 | - // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents | |
75 | - private void initComponents() { | |
76 | - | |
77 | - jFileChooser1 = new javax.swing.JFileChooser(); | |
78 | - jRadioButton1 = new javax.swing.JRadioButton(); | |
79 | - jLabel1 = new javax.swing.JLabel(); | |
80 | - jTxtUrl = new javax.swing.JTextField(); | |
81 | - jBtnSearch = new javax.swing.JButton(); | |
82 | - jTabbedPane1 = new javax.swing.JTabbedPane(); | |
83 | - jPanelTab1 = new javax.swing.JPanel(); | |
84 | - jScrollPane1 = new javax.swing.JScrollPane(); | |
85 | - jTable1 = new javax.swing.JTable(); | |
86 | - jBtnRowIns = new javax.swing.JButton(); | |
87 | - jBtnRowDel = new javax.swing.JButton(); | |
88 | - jBtnRowCpy = new javax.swing.JButton(); | |
89 | - jPanelTab2 = new javax.swing.JPanel(); | |
90 | - jScrollPaneLabel = new javax.swing.JScrollPane(); | |
91 | - jTxtLabel = new javax.swing.JTextArea(); | |
92 | - jScrollPane404msg = new javax.swing.JScrollPane(); | |
93 | - jTxt404msg = new javax.swing.JTextArea(); | |
94 | - jPanelRtn = new javax.swing.JPanel(); | |
95 | - jScrollPaneRtn = new javax.swing.JScrollPane(); | |
96 | - jTxtRtn = new javax.swing.JTextArea(); | |
97 | - jMenuBar1 = new javax.swing.JMenuBar(); | |
98 | - jMenu1 = new javax.swing.JMenu(); | |
99 | - jMenuLoad = new javax.swing.JMenuItem(); | |
100 | - jMenuSave = new javax.swing.JMenuItem(); | |
101 | - jMenu3 = new javax.swing.JMenu(); | |
102 | - jMenuItem1 = new javax.swing.JMenuItem(); | |
103 | - jMenu2 = new javax.swing.JMenu(); | |
104 | - | |
105 | - jFileChooser1.setCurrentDirectory(null); | |
106 | - jFileChooser1.setDialogTitle(""); | |
107 | - | |
108 | - jRadioButton1.setText("jRadioButton1"); | |
109 | - | |
110 | - setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); | |
111 | - setTitle("タグ検索"); | |
112 | - | |
113 | - jLabel1.setText(" URL:"); | |
114 | - | |
115 | - jBtnSearch.setText("検索"); | |
116 | - jBtnSearch.addActionListener(new java.awt.event.ActionListener() { | |
117 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
118 | - jBtnSearchActionPerformed(evt); | |
119 | - } | |
120 | - }); | |
121 | - | |
122 | - jPanelTab1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報")); | |
123 | - | |
124 | - jTable1.setModel(sdatatblmodel); | |
125 | - jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION); | |
126 | - jTable1.getTableHeader().setReorderingAllowed(false); | |
127 | - jScrollPane1.setViewportView(jTable1); | |
128 | - | |
129 | - jBtnRowIns.setText("行挿入"); | |
130 | - jBtnRowIns.addActionListener(new java.awt.event.ActionListener() { | |
131 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
132 | - jBtnRowInsActionPerformed(evt); | |
133 | - } | |
134 | - }); | |
135 | - | |
136 | - jBtnRowDel.setText("行削除"); | |
137 | - jBtnRowDel.addActionListener(new java.awt.event.ActionListener() { | |
138 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
139 | - jBtnRowDelActionPerformed(evt); | |
140 | - } | |
141 | - }); | |
142 | - | |
143 | - jBtnRowCpy.setText("行コピー"); | |
144 | - jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() { | |
145 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
146 | - jBtnRowCpyActionPerformed(evt); | |
147 | - } | |
148 | - }); | |
149 | - | |
150 | - javax.swing.GroupLayout jPanelTab1Layout = new javax.swing.GroupLayout(jPanelTab1); | |
151 | - jPanelTab1.setLayout(jPanelTab1Layout); | |
152 | - jPanelTab1Layout.setHorizontalGroup( | |
153 | - jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
154 | - .addGroup(jPanelTab1Layout.createSequentialGroup() | |
155 | - .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
156 | - .addComponent(jBtnRowCpy) | |
157 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
158 | - .addComponent(jBtnRowDel) | |
159 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
160 | - .addComponent(jBtnRowIns)) | |
161 | - .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE) | |
162 | - ); | |
163 | - jPanelTab1Layout.setVerticalGroup( | |
164 | - jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
165 | - .addGroup(jPanelTab1Layout.createSequentialGroup() | |
166 | - .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 173, Short.MAX_VALUE) | |
167 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
168 | - .addGroup(jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
169 | - .addComponent(jBtnRowDel) | |
170 | - .addComponent(jBtnRowIns) | |
171 | - .addComponent(jBtnRowCpy))) | |
172 | - ); | |
173 | - | |
174 | - jTabbedPane1.addTab("キー設定", jPanelTab1); | |
175 | - | |
176 | - jPanelTab2.setBorder(javax.swing.BorderFactory.createTitledBorder("メッセージ")); | |
177 | - | |
178 | - jScrollPaneLabel.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER); | |
179 | - jScrollPaneLabel.setVerticalScrollBarPolicy(javax.swing.ScrollPaneConstants.VERTICAL_SCROLLBAR_NEVER); | |
180 | - | |
181 | - jTxtLabel.setEditable(false); | |
182 | - jTxtLabel.setBackground(java.awt.Color.lightGray); | |
183 | - jTxtLabel.setColumns(20); | |
184 | - jTxtLabel.setFont(new java.awt.Font("MS UI Gothic", 0, 12)); // NOI18N | |
185 | - jTxtLabel.setLineWrap(true); | |
186 | - jTxtLabel.setRows(2); | |
187 | - jTxtLabel.setText("取得ページに以下のメッセージが含まれていた場合、対象データが取得出来なかったと通知します。"); | |
188 | - jTxtLabel.setAutoscrolls(false); | |
189 | - jTxtLabel.setBorder(null); | |
190 | - jTxtLabel.setCursor(new java.awt.Cursor(java.awt.Cursor.DEFAULT_CURSOR)); | |
191 | - jTxtLabel.setFocusable(false); | |
192 | - jTxtLabel.setHighlighter(null); | |
193 | - jTxtLabel.setKeymap(null); | |
194 | - jTxtLabel.setOpaque(false); | |
195 | - jTxtLabel.setRequestFocusEnabled(false); | |
196 | - jTxtLabel.setVerifyInputWhenFocusTarget(false); | |
197 | - jScrollPaneLabel.setViewportView(jTxtLabel); | |
198 | - | |
199 | - jTxt404msg.setColumns(20); | |
200 | - jTxt404msg.setRows(3); | |
201 | - jTxt404msg.setText("一致する銘柄は見つかりませんでした\n"); | |
202 | - jScrollPane404msg.setViewportView(jTxt404msg); | |
203 | - | |
204 | - javax.swing.GroupLayout jPanelTab2Layout = new javax.swing.GroupLayout(jPanelTab2); | |
205 | - jPanelTab2.setLayout(jPanelTab2Layout); | |
206 | - jPanelTab2Layout.setHorizontalGroup( | |
207 | - jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
208 | - .addComponent(jScrollPane404msg) | |
209 | - .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, jPanelTab2Layout.createSequentialGroup() | |
210 | - .addContainerGap() | |
211 | - .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.DEFAULT_SIZE, 359, Short.MAX_VALUE) | |
212 | - .addContainerGap()) | |
213 | - ); | |
214 | - jPanelTab2Layout.setVerticalGroup( | |
215 | - jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
216 | - .addGroup(jPanelTab2Layout.createSequentialGroup() | |
217 | - .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 38, javax.swing.GroupLayout.PREFERRED_SIZE) | |
218 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) | |
219 | - .addComponent(jScrollPane404msg)) | |
220 | - ); | |
221 | - | |
222 | - jTabbedPane1.addTab("結果無し判定", jPanelTab2); | |
223 | - | |
224 | - jPanelRtn.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果")); | |
225 | - | |
226 | - jTxtRtn.setColumns(20); | |
227 | - jTxtRtn.setRows(5); | |
228 | - jScrollPaneRtn.setViewportView(jTxtRtn); | |
229 | - | |
230 | - javax.swing.GroupLayout jPanelRtnLayout = new javax.swing.GroupLayout(jPanelRtn); | |
231 | - jPanelRtn.setLayout(jPanelRtnLayout); | |
232 | - jPanelRtnLayout.setHorizontalGroup( | |
233 | - jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
234 | - .addComponent(jScrollPaneRtn) | |
235 | - ); | |
236 | - jPanelRtnLayout.setVerticalGroup( | |
237 | - jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
238 | - .addComponent(jScrollPaneRtn, javax.swing.GroupLayout.DEFAULT_SIZE, 163, Short.MAX_VALUE) | |
239 | - ); | |
240 | - | |
241 | - jMenu1.setText("ファイル"); | |
242 | - | |
243 | - jMenuLoad.setText("LOAD"); | |
244 | - jMenuLoad.addActionListener(new java.awt.event.ActionListener() { | |
245 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
246 | - jMenuLoadActionPerformed(evt); | |
247 | - } | |
248 | - }); | |
249 | - jMenu1.add(jMenuLoad); | |
250 | - | |
251 | - jMenuSave.setText("SAVE"); | |
252 | - jMenuSave.addActionListener(new java.awt.event.ActionListener() { | |
253 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
254 | - jMenuSaveActionPerformed(evt); | |
255 | - } | |
256 | - }); | |
257 | - jMenu1.add(jMenuSave); | |
258 | - | |
259 | - jMenuBar1.add(jMenu1); | |
260 | - | |
261 | - jMenu3.setText("ツール"); | |
262 | - | |
263 | - jMenuItem1.setText("ブラウザで表示"); | |
264 | - jMenuItem1.addActionListener(new java.awt.event.ActionListener() { | |
265 | - public void actionPerformed(java.awt.event.ActionEvent evt) { | |
266 | - jMenuItem1ActionPerformed(evt); | |
267 | - } | |
268 | - }); | |
269 | - jMenu3.add(jMenuItem1); | |
270 | - | |
271 | - jMenuBar1.add(jMenu3); | |
272 | - | |
273 | - jMenu2.setText("検索"); | |
274 | - jMenu2.addMouseListener(new java.awt.event.MouseAdapter() { | |
275 | - public void mouseClicked(java.awt.event.MouseEvent evt) { | |
276 | - jMenu2MouseClicked(evt); | |
277 | - } | |
278 | - }); | |
279 | - jMenuBar1.add(jMenu2); | |
280 | - | |
281 | - setJMenuBar(jMenuBar1); | |
282 | - | |
283 | - javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane()); | |
284 | - getContentPane().setLayout(layout); | |
285 | - layout.setHorizontalGroup( | |
286 | - layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
287 | - .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
288 | - .addGroup(layout.createSequentialGroup() | |
289 | - .addComponent(jLabel1) | |
290 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
291 | - .addComponent(jTxtUrl) | |
292 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
293 | - .addComponent(jBtnSearch)) | |
294 | - .addComponent(jTabbedPane1) | |
295 | - ); | |
296 | - layout.setVerticalGroup( | |
297 | - layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
298 | - .addGroup(layout.createSequentialGroup() | |
299 | - .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
300 | - .addComponent(jLabel1) | |
301 | - .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE) | |
302 | - .addComponent(jBtnSearch)) | |
303 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
304 | - .addComponent(jTabbedPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 250, javax.swing.GroupLayout.PREFERRED_SIZE) | |
305 | - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
306 | - .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
307 | - .addContainerGap()) | |
308 | - ); | |
309 | - | |
310 | - pack(); | |
311 | - }// </editor-fold>//GEN-END:initComponents | |
312 | - | |
313 | - private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed | |
314 | - int SelectedRow = jTable1.getSelectedRow(); | |
315 | - SearchData sdata = new SearchData(); | |
316 | - if(SelectedRow >= 0) { | |
317 | - sdatatblmodel.insertRow(SelectedRow, sdata); | |
318 | - } else { | |
319 | - sdatatblmodel.addRow(sdata); | |
320 | - } | |
321 | - }//GEN-LAST:event_jBtnRowInsActionPerformed | |
322 | - | |
323 | - private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed | |
324 | - int SelectedRow = jTable1.getSelectedRow(); | |
325 | - if(!(SelectedRow < 0)) { | |
326 | - sdatatblmodel.removeRow(SelectedRow); | |
327 | - } | |
328 | - }//GEN-LAST:event_jBtnRowDelActionPerformed | |
329 | - | |
330 | - private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed | |
331 | - jFileChooser1.setDialogTitle("読込"); | |
332 | - int selected = jFileChooser1.showOpenDialog(this); | |
333 | - if (selected == JFileChooser.APPROVE_OPTION) { | |
334 | - File file = jFileChooser1.getSelectedFile(); | |
335 | - xmlwriter.load(file); | |
336 | - jTxtUrl.setText(xmlwriter.getTestUrl()); | |
337 | - xmlwriter.getSdata(); | |
338 | - sdatatblmodel.setRowCount(0); | |
339 | - for(int i = 0; i < SearchData.size(); i++) { | |
340 | - SearchData sdata = SearchData.get(i); | |
341 | - sdatatblmodel.addRow(sdata); | |
342 | - } | |
343 | - } | |
344 | - }//GEN-LAST:event_jMenuLoadActionPerformed | |
345 | - | |
346 | - private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed | |
347 | - jFileChooser1.setDialogTitle("保存"); | |
348 | - int selected = jFileChooser1.showSaveDialog(this); | |
349 | - if (selected == JFileChooser.APPROVE_OPTION) { | |
350 | - File file = jFileChooser1.getSelectedFile(); | |
351 | - xmlwriter.setTestUrl(jTxtUrl.getText()); | |
352 | - | |
353 | - SearchData.clear(); | |
354 | - for(int row = 0; row < sdatatblmodel.getRowCount(); row++) { | |
355 | - SearchData sdata = sdatatblmodel.getSearchData(row); | |
356 | - SearchData.add(sdata); | |
357 | - } | |
358 | - xmlwriter.setSdata(); | |
359 | - xmlwriter.save(file); | |
360 | - } | |
361 | - }//GEN-LAST:event_jMenuSaveActionPerformed | |
362 | - | |
363 | - private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed | |
364 | - int SelectedRow = jTable1.getSelectedRow(); | |
365 | - if(SelectedRow >= 0) { | |
366 | - SearchData sdata = sdatatblmodel.getSearchData(SelectedRow); | |
367 | - sdatatblmodel.insertRow(SelectedRow, sdata); | |
368 | - } | |
369 | - }//GEN-LAST:event_jBtnRowCpyActionPerformed | |
370 | - | |
371 | - private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed | |
372 | - Desktop desktop = Desktop.getDesktop(); | |
373 | - String uriString = jTxtUrl.getText(); | |
374 | - try { | |
375 | - URI uri = new URI(uriString); | |
376 | - desktop.browse(uri); | |
377 | - | |
378 | - } catch (URISyntaxException | IOException ex) { | |
379 | - Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex); | |
380 | - } | |
381 | - }//GEN-LAST:event_jMenuItem1ActionPerformed | |
382 | - | |
383 | - private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked | |
384 | - Search_execution(); | |
385 | - }//GEN-LAST:event_jMenu2MouseClicked | |
386 | - | |
387 | - private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed | |
388 | - Search_execution(); | |
389 | - }//GEN-LAST:event_jBtnSearchActionPerformed | |
390 | - | |
391 | - /** | |
392 | - * 検索実行. | |
393 | - */ | |
394 | - void Search_execution() { | |
395 | - jTxtRtn.setText(null); | |
396 | - HtmlParser par = new HtmlParser(jTxtUrl.getText()); | |
397 | - | |
398 | - // データ無し(404)判定 | |
399 | - String strdata = par.getStringPageData(); | |
400 | - if(strdata == null) { | |
401 | - jTxtRtn.append("読込みページがありません"); | |
402 | - return; | |
403 | - } | |
404 | - String text = jTxt404msg.getText(); | |
405 | - String[] strsearch = text.split("\n"); | |
406 | - for(String strsearch1 : strsearch) { | |
407 | - if(strdata.contains(strsearch1)) { | |
408 | - jTxtRtn.append(strsearch1); | |
409 | - return; | |
410 | - } | |
411 | - } | |
412 | - | |
413 | - // 検索結果 | |
414 | - for(int row = 0; row < sdatatblmodel.getRowCount(); row++) { | |
415 | - SearchData sdata = sdatatblmodel.getSearchData(row); | |
416 | - String ans = sdata.getitem(); | |
417 | - String rtn = par.search(sdata); | |
418 | - jTxtRtn.append(ans + "\t" + rtn + "\n"); | |
419 | - } | |
420 | - | |
421 | - jTxtRtn.setCaretPosition(0); | |
422 | - } | |
423 | - | |
424 | - /** | |
425 | - * @param args the command line arguments | |
426 | - */ | |
427 | - public static void main(String args[]) { | |
428 | - /* Set the Nimbus look and feel */ | |
429 | - //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) "> | |
430 | - /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel. | |
431 | - * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html | |
432 | - */ | |
433 | - try { | |
434 | - for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) { | |
435 | - if ("Nimbus".equals(info.getName())) { | |
436 | - javax.swing.UIManager.setLookAndFeel(info.getClassName()); | |
437 | - break; | |
438 | - } | |
439 | - } | |
440 | - } catch (ClassNotFoundException | |
441 | - | InstantiationException | |
442 | - | IllegalAccessException | |
443 | - | javax.swing.UnsupportedLookAndFeelException ex) { | |
444 | - java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); | |
445 | - } | |
446 | - //</editor-fold> | |
447 | - | |
448 | - /* Create and display the form */ | |
449 | - java.awt.EventQueue.invokeLater(new Runnable() { | |
450 | - @Override | |
451 | - public void run() { | |
452 | - new HtmlSearch().setVisible(true); | |
453 | - } | |
454 | - }); | |
455 | - } | |
456 | - | |
457 | - // Variables declaration - do not modify//GEN-BEGIN:variables | |
458 | - private javax.swing.JButton jBtnRowCpy; | |
459 | - private javax.swing.JButton jBtnRowDel; | |
460 | - private javax.swing.JButton jBtnRowIns; | |
461 | - private javax.swing.JButton jBtnSearch; | |
462 | - private javax.swing.JFileChooser jFileChooser1; | |
463 | - private javax.swing.JLabel jLabel1; | |
464 | - private javax.swing.JMenu jMenu1; | |
465 | - private javax.swing.JMenu jMenu2; | |
466 | - private javax.swing.JMenu jMenu3; | |
467 | - private javax.swing.JMenuBar jMenuBar1; | |
468 | - private javax.swing.JMenuItem jMenuItem1; | |
469 | - private javax.swing.JMenuItem jMenuLoad; | |
470 | - private javax.swing.JMenuItem jMenuSave; | |
471 | - private javax.swing.JPanel jPanelRtn; | |
472 | - private javax.swing.JPanel jPanelTab1; | |
473 | - private javax.swing.JPanel jPanelTab2; | |
474 | - private javax.swing.JRadioButton jRadioButton1; | |
475 | - private javax.swing.JScrollPane jScrollPane1; | |
476 | - private javax.swing.JScrollPane jScrollPane404msg; | |
477 | - private javax.swing.JScrollPane jScrollPaneLabel; | |
478 | - private javax.swing.JScrollPane jScrollPaneRtn; | |
479 | - private javax.swing.JTabbedPane jTabbedPane1; | |
480 | - private javax.swing.JTable jTable1; | |
481 | - private javax.swing.JTextArea jTxt404msg; | |
482 | - private javax.swing.JTextArea jTxtLabel; | |
483 | - private javax.swing.JTextArea jTxtRtn; | |
484 | - private javax.swing.JTextField jTxtUrl; | |
485 | - // End of variables declaration//GEN-END:variables | |
486 | -} | |
487 | - | |
488 | -class SearchDataTableModel extends DefaultTableModel { | |
489 | - /* ---------------------------------------------------------------------- * | |
490 | - * データ属性 | |
491 | - * ---------------------------------------------------------------------- */ | |
492 | - public String[] columnName = { | |
493 | - /* 0 */ "項目名", | |
494 | - /* 1 */ "タグ", | |
495 | - /* 2 */ "ID", | |
496 | - /* 3 */ "クラス", | |
497 | - /* 4 */ "位置", | |
498 | - /* 5 */ "抽出条件" | |
499 | - }; | |
500 | - | |
501 | - public Class[] columnClass = { | |
502 | - /* 0 */ String.class, | |
503 | - /* 1 */ String.class, | |
504 | - /* 2 */ String.class, | |
505 | - /* 3 */ String.class, | |
506 | - /* 4 */ String.class, | |
507 | - /* 5 */ String.class | |
508 | - }; | |
509 | - | |
510 | - int column_item = 0; | |
511 | - int column_htmltag = 1; | |
512 | - int column_htmlid = 2; | |
513 | - int column_htmlclass = 3; | |
514 | - int column_around = 4; | |
515 | - int column_regexp = 5; | |
516 | - | |
517 | - /* ---------------------------------------------------------------------- * | |
518 | - * 処理 | |
519 | - * ---------------------------------------------------------------------- */ | |
520 | - @Override | |
521 | - public String getColumnName(int modelIndex) { | |
522 | - return columnName[modelIndex]; | |
523 | - } | |
524 | - | |
525 | - @Override | |
526 | - public Class<?> getColumnClass(int modelIndex) { | |
527 | - return columnClass[modelIndex]; | |
528 | - } | |
529 | - | |
530 | - @Override | |
531 | - public int getColumnCount() { | |
532 | - return columnName.length; | |
533 | - } | |
534 | - | |
535 | - /* ---------------------------------------------------------------------- */ | |
536 | - | |
537 | - public SearchData getSearchData(int row) { | |
538 | - SearchData sdata = new SearchData(); | |
539 | - sdata.setitem(String.valueOf(getValueAt(row, column_item))); | |
540 | - sdata.setHtmltag(String.valueOf(getValueAt(row, column_htmltag))); | |
541 | - sdata.setHtmlid(String.valueOf(getValueAt(row, column_htmlid))); | |
542 | - sdata.setHtmlclass(String.valueOf(getValueAt(row, column_htmlclass))); | |
543 | - sdata.setaround(String.valueOf(getValueAt(row, column_around))); | |
544 | - sdata.setregexp(String.valueOf(getValueAt(row, column_regexp))); | |
545 | - return sdata; | |
546 | - } | |
547 | - | |
548 | - public void addRow(SearchData sdata) { | |
549 | - addRow(getObjdata(sdata)); | |
550 | - } | |
551 | - | |
552 | - public void insertRow(int row, SearchData sdata) { | |
553 | - insertRow(row, getObjdata(sdata)); | |
554 | - } | |
555 | - | |
556 | - private Object[] getObjdata(SearchData sdata) { | |
557 | - Object[] obj = new Object[] { | |
558 | - sdata.getitem(), | |
559 | - sdata.getHtmltag(), | |
560 | - sdata.getHtmlid(), | |
561 | - sdata.getHtmlclass(), | |
562 | - sdata.getaround(), | |
563 | - sdata.getregexp() | |
564 | - }; | |
565 | - return obj; | |
566 | - } | |
567 | - | |
568 | -} | |
\ No newline at end of file |
@@ -1,142 +0,0 @@ | ||
1 | -/* | |
2 | - * Copyright (C) 2014-2015 kgto. | |
3 | - * | |
4 | - * This library is free software; you can redistribute it and/or | |
5 | - * modify it under the terms of the GNU Lesser General Public | |
6 | - * License as published by the Free Software Foundation; either | |
7 | - * version 2.1 of the License, or (at your option) any later version. | |
8 | - * | |
9 | - * This library is distributed in the hope that it will be useful, | |
10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | - * Lesser General Public License for more details. | |
13 | - * | |
14 | - * You should have received a copy of the GNU Lesser General Public | |
15 | - * License along with this library; if not, write to the Free Software | |
16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | - * MA 02110-1301 USA | |
18 | - */ | |
19 | -/* | |
20 | - * $Id$ | |
21 | - */ | |
22 | - | |
23 | -package webScraping.utility; | |
24 | - | |
25 | -import java.io.File; | |
26 | -import java.io.FileNotFoundException; | |
27 | -import java.io.FileOutputStream; | |
28 | -import java.io.IOException; | |
29 | -import java.util.logging.Level; | |
30 | -import java.util.logging.Logger; | |
31 | - | |
32 | -import javax.xml.parsers.DocumentBuilder; | |
33 | -import javax.xml.parsers.DocumentBuilderFactory; | |
34 | -import javax.xml.parsers.ParserConfigurationException; | |
35 | -import javax.xml.transform.Transformer; | |
36 | -import javax.xml.transform.TransformerConfigurationException; | |
37 | -import javax.xml.transform.TransformerException; | |
38 | -import javax.xml.transform.TransformerFactory; | |
39 | -import javax.xml.transform.dom.DOMSource; | |
40 | -import javax.xml.transform.stream.StreamResult; | |
41 | - | |
42 | -import org.w3c.dom.DOMImplementation; | |
43 | -import org.w3c.dom.Document; | |
44 | -import org.w3c.dom.Element; | |
45 | -import org.w3c.dom.Node; | |
46 | -import org.w3c.dom.NodeList; | |
47 | -import org.xml.sax.SAXException; | |
48 | - | |
49 | -public class LibraryXml { | |
50 | - | |
51 | - String xmlrootname = "xmlcontainer"; | |
52 | - | |
53 | - DocumentBuilder builder; | |
54 | - public Document readdoc, writedoc; | |
55 | - Element xmlroot; | |
56 | - | |
57 | - /* ---------------------------------------------------------------------- * | |
58 | - * コンストラクタ | |
59 | - * ---------------------------------------------------------------------- */ | |
60 | - public LibraryXml() { | |
61 | - try { | |
62 | - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
63 | - builder = factory.newDocumentBuilder(); | |
64 | - | |
65 | - } catch (ParserConfigurationException ex) { | |
66 | - Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
67 | - } | |
68 | - } | |
69 | - | |
70 | - /* ---------------------------------------------------------------------- * | |
71 | - * メソッド | |
72 | - * ---------------------------------------------------------------------- */ | |
73 | - /* 読込み処理 */ | |
74 | - public Element getwriteRoot(String elementName) { | |
75 | - mainElement(); | |
76 | - Element element = writedoc.createElement(elementName); | |
77 | - xmlroot.appendChild(element); | |
78 | - return element; | |
79 | - } | |
80 | - | |
81 | - private void mainElement() { | |
82 | - if(writedoc == null) { | |
83 | - DOMImplementation domImpl = builder.getDOMImplementation(); | |
84 | - writedoc = domImpl.createDocument("", xmlrootname, null); | |
85 | - xmlroot = writedoc.getDocumentElement(); | |
86 | - } | |
87 | - } | |
88 | - | |
89 | - /** | |
90 | - * XML書込み. | |
91 | - * @param file | |
92 | - */ | |
93 | - public void write(File file) { | |
94 | - try (FileOutputStream os = new FileOutputStream(file)) { | |
95 | - TransformerFactory transFactory = TransformerFactory.newInstance(); | |
96 | - Transformer transformer = transFactory.newTransformer(); | |
97 | - | |
98 | - transformer.setOutputProperty("indent", "yes"); // 改行指定 | |
99 | - transformer.setOutputProperty("method", "xml"); | |
100 | - | |
101 | - DOMSource source = new DOMSource(writedoc); | |
102 | - StreamResult result = new StreamResult(os); | |
103 | - transformer.transform(source, result); | |
104 | - | |
105 | - // 作成したXMLをクリア | |
106 | - writedoc = null; | |
107 | - | |
108 | - } catch (TransformerConfigurationException ex) { | |
109 | - Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
110 | - } catch (FileNotFoundException | TransformerException ex) { | |
111 | - Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
112 | - } catch (IOException ex) { | |
113 | - Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
114 | - } | |
115 | - } | |
116 | - | |
117 | - /* ---------------------------------------------------------------------- */ | |
118 | - /* 書込み処理 */ | |
119 | - | |
120 | - public Element getreadRoot(String elementName) { | |
121 | - NodeList nodelist = xmlroot.getElementsByTagName(elementName); | |
122 | - Node node = nodelist.item(0); | |
123 | - return (node.getNodeType() == Node.ELEMENT_NODE ? (Element)node : null); | |
124 | - } | |
125 | - | |
126 | - /** | |
127 | - * XML読込み. | |
128 | - * @param file | |
129 | - */ | |
130 | - public void read(File file) { | |
131 | - try { | |
132 | - readdoc = builder.parse(file); | |
133 | - xmlroot = readdoc.getDocumentElement(); | |
134 | - | |
135 | - } catch (SAXException | IOException ex) { | |
136 | - Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
137 | - } | |
138 | - } | |
139 | - | |
140 | - /* ---------------------------------------------------------------------- */ | |
141 | - | |
142 | -} |
@@ -1,198 +0,0 @@ | ||
1 | -/* | |
2 | - * Copyright (C) 2014-2015 kgto. | |
3 | - * | |
4 | - * This library is free software; you can redistribute it and/or | |
5 | - * modify it under the terms of the GNU Lesser General Public | |
6 | - * License as published by the Free Software Foundation; either | |
7 | - * version 2.1 of the License, or (at your option) any later version. | |
8 | - * | |
9 | - * This library is distributed in the hope that it will be useful, | |
10 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | - * Lesser General Public License for more details. | |
13 | - * | |
14 | - * You should have received a copy of the GNU Lesser General Public | |
15 | - * License along with this library; if not, write to the Free Software | |
16 | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | - * MA 02110-1301 USA | |
18 | - */ | |
19 | -/* | |
20 | - * $Id$ | |
21 | - */ | |
22 | - | |
23 | -package webScraping.utility; | |
24 | - | |
25 | -import webScraping.core.SearchData; | |
26 | -import java.io.File; | |
27 | -import java.util.ArrayList; | |
28 | -import org.w3c.dom.Element; | |
29 | -import org.w3c.dom.Node; | |
30 | -import org.w3c.dom.NodeList; | |
31 | - | |
32 | -public class ScrapingXml { | |
33 | - /* ---------------------------------------------------------------------- * | |
34 | - * フィールド | |
35 | - * ---------------------------------------------------------------------- */ | |
36 | - String rootnameScraping = "webscraping"; | |
37 | - | |
38 | - private String testUrl; | |
39 | - private SearchData[] sdata; | |
40 | - | |
41 | - public LibraryXml xlib = new LibraryXml(); | |
42 | - public Element root; | |
43 | - | |
44 | - /* ---------------------------------------------------------------------- * | |
45 | - * コンストラクタ | |
46 | - * ---------------------------------------------------------------------- */ | |
47 | - public ScrapingXml() { | |
48 | - } | |
49 | - | |
50 | - /* ---------------------------------------------------------------------- * | |
51 | - * Setter | |
52 | - * ---------------------------------------------------------------------- */ | |
53 | - public void setTestUrl(String testUrl) { | |
54 | - this.testUrl = testUrl; | |
55 | - } | |
56 | - | |
57 | - public void setSdata() { | |
58 | - this.sdata = new SearchData[SearchData.size()]; | |
59 | - for(int i = 0; i < SearchData.size(); i++) { | |
60 | - this.sdata[i] = SearchData.get(i); | |
61 | - } | |
62 | - } | |
63 | - | |
64 | - /* ---------------------------------------------------------------------- * | |
65 | - * Getter | |
66 | - * ---------------------------------------------------------------------- */ | |
67 | - public String getTestUrl() { | |
68 | - return testUrl; | |
69 | - } | |
70 | - | |
71 | - public void getSdata() { | |
72 | - SearchData.clear(); | |
73 | - for(SearchData sdata1 : sdata) { | |
74 | - SearchData.add(sdata1); | |
75 | - } | |
76 | - } | |
77 | - | |
78 | - /* ---------------------------------------------------------------------- * | |
79 | - * メソッド | |
80 | - * ---------------------------------------------------------------------- */ | |
81 | - public void save(File file) { | |
82 | - | |
83 | - elementset(); | |
84 | - | |
85 | - xlib.write(file); | |
86 | - } | |
87 | - | |
88 | - public void elementset() { | |
89 | - root = xlib.getwriteRoot(rootnameScraping); | |
90 | - elementsetUrl(); | |
91 | - elementsetSearchdata(); | |
92 | - System.out.println("elementset XmlScraping"); | |
93 | - } | |
94 | - | |
95 | - private void elementsetUrl() { | |
96 | - Element url = xlib.writedoc.createElement("url"); | |
97 | - url.appendChild(xlib.writedoc.createTextNode(testUrl)); | |
98 | - root.appendChild(url); | |
99 | - } | |
100 | - | |
101 | - private void elementsetSearchdata() { | |
102 | - int count = 0; | |
103 | - for(SearchData sdat : sdata) { | |
104 | - Element cslist = xlib.writedoc.createElement("searchlist"); | |
105 | - cslist.setAttribute("listNo", String.valueOf(++count)); | |
106 | - | |
107 | - addChild(cslist, "item" , sdat.getitem()); | |
108 | - addChild(cslist, "htmltag" , sdat.getHtmltag()); | |
109 | - addChild(cslist, "htmlid" , sdat.getHtmlid()); | |
110 | - addChild(cslist, "htmlclass", sdat.getHtmlclass()); | |
111 | - addChild(cslist, "around" , sdat.getaround()); | |
112 | - addChild(cslist, "regexp" , sdat.getregexp()); | |
113 | - | |
114 | - root.appendChild(cslist); | |
115 | - } | |
116 | - } | |
117 | - | |
118 | - private void addChild(Element cslist, String keyword, String data) { | |
119 | - if(!data.isEmpty()) { | |
120 | - Element element = xlib.writedoc.createElement(keyword); | |
121 | - element.appendChild(xlib.writedoc.createTextNode(data)); | |
122 | - cslist.appendChild(element); | |
123 | - } | |
124 | - } | |
125 | - | |
126 | - /* ---------------------------------------------------------------------- */ | |
127 | - | |
128 | - void load(File file) { | |
129 | - xlib.read(file); | |
130 | - elementget(); | |
131 | - } | |
132 | - | |
133 | - public void elementget() { | |
134 | - root = xlib.getreadRoot(rootnameScraping); | |
135 | - elementgetUrl(); | |
136 | - elementgetSearchdata(); | |
137 | - } | |
138 | - | |
139 | - private void elementgetUrl() { | |
140 | - NodeList nodelist = root.getElementsByTagName("url"); | |
141 | - Node node = nodelist.item(0); | |
142 | - testUrl = node.getFirstChild().getNodeValue(); | |
143 | - } | |
144 | - | |
145 | - private void elementgetSearchdata() { | |
146 | - ArrayList<SearchData> slist = new ArrayList<>(); | |
147 | - | |
148 | - NodeList nodelist = root.getElementsByTagName("searchlist"); | |
149 | - for(int i = 0; i < nodelist.getLength(); i++) { | |
150 | - Node childnode = nodelist.item(i); | |
151 | - | |
152 | - boolean sdatflg = false; | |
153 | - SearchData sdat = new SearchData(); | |
154 | - for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
155 | - if(child.getNodeType() == Node.ELEMENT_NODE) { | |
156 | - String tag = child.getNodeName(); | |
157 | - String rtn = ""; | |
158 | - if(child.getFirstChild() != null) { | |
159 | - rtn = child.getFirstChild().getNodeValue(); | |
160 | - } | |
161 | - switch (tag) { | |
162 | - case "item" : | |
163 | - sdat.setitem(rtn); | |
164 | - sdatflg = true; | |
165 | - break; | |
166 | - case "htmltag" : | |
167 | - sdat.setHtmltag(rtn); | |
168 | - sdatflg = true; | |
169 | - break; | |
170 | - case "htmlid" : | |
171 | - sdat.setHtmlid(rtn); | |
172 | - sdatflg = true; | |
173 | - break; | |
174 | - case "htmlclass" : | |
175 | - sdat.setHtmlclass(rtn); | |
176 | - sdatflg = true; | |
177 | - break; | |
178 | - case "around" : | |
179 | - sdat.setaround(rtn); | |
180 | - sdatflg = true; | |
181 | - break; | |
182 | - case "regexp" : | |
183 | - sdat.setregexp(rtn); | |
184 | - sdatflg = true; | |
185 | - break; | |
186 | - } | |
187 | - } | |
188 | - } | |
189 | - if(sdatflg) slist.add(sdat); | |
190 | - } | |
191 | - // 配列化 | |
192 | - sdata = new SearchData[slist.size()]; | |
193 | - for(int i = 0; i < slist.size(); i++) { | |
194 | - sdata[i] = slist.get(i); | |
195 | - } | |
196 | - } | |
197 | - | |
198 | -} |
@@ -0,0 +1,264 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2014 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: DebugProcess.java 106 2014-12-10 13:45:01Z tuna_p $ | |
21 | + */ | |
22 | + | |
23 | +package webScraping.core; | |
24 | + | |
25 | +import java.io.File; | |
26 | +import java.io.FileInputStream; | |
27 | +import java.io.FileNotFoundException; | |
28 | +import java.io.IOException; | |
29 | +import java.util.logging.FileHandler; | |
30 | +import java.util.logging.Formatter; | |
31 | +import java.util.logging.Handler; | |
32 | +import java.util.logging.Level; | |
33 | +import java.util.logging.LogManager; | |
34 | +import java.util.logging.LogRecord; | |
35 | +import java.util.logging.Logger; | |
36 | +import javax.swing.text.MutableAttributeSet; | |
37 | +import javax.swing.text.html.HTML; | |
38 | + | |
39 | +/** | |
40 | + * デバック情報. | |
41 | + * カレントディレクトリに設定ファイル(Debug.prop)を置くことで、デバックログの出力を制御する。 | |
42 | + * @author kgto | |
43 | + */ | |
44 | +public class DebugProcess { | |
45 | + // 設定ファイル名 | |
46 | + protected static final String configurationFilename = "Debug.prop"; | |
47 | + // ロガー名 | |
48 | + protected static final Logger logger = Logger.getLogger("WebScraping"); | |
49 | + // ログ出力デフォルトレベル | |
50 | + protected static final Level loggerlevel = Level.FINEST; | |
51 | + | |
52 | + | |
53 | + /** | |
54 | + * ログ出力設定. | |
55 | + * ログ設定ファイルの存在をチェック、(最終的な)ログレベルにより、 | |
56 | + * ファイルハンドラの設定と出力書式の設定を行う。 | |
57 | + */ | |
58 | + public static void debuglog_set() { | |
59 | + try { | |
60 | + initLogConfiguration(); | |
61 | + | |
62 | + if(Level.ALL.equals(logger.getLevel())) { | |
63 | + //logger.addHandler(new FileHandler("WebScraping%g.log", 100000, 2)); | |
64 | + logger.addHandler(new FileHandler("WebScraping%g.log", true)); | |
65 | + } | |
66 | + setFomatter(); | |
67 | + | |
68 | + } catch (IOException | SecurityException ex) { | |
69 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
70 | + } | |
71 | + } | |
72 | + | |
73 | + /** | |
74 | + * ログ出力設定解除. | |
75 | + */ | |
76 | + public static void debuglog_unset() { | |
77 | + } | |
78 | + | |
79 | + | |
80 | + /** | |
81 | + * デバック出力(HTML解析-タグ&属性). | |
82 | + * HTMLのタグと属性の解析状態を出力する。 | |
83 | + * 書式: 9 : x : タグ名 [属性名]属性数 = 属性値<br> | |
84 | + * 凡例: 9 = 階層レベル(count値), x = F(tagの開始)/E(tagの終了)/S(単独tag)の何れか1文字<br> | |
85 | + * @param tag タグ | |
86 | + * @param attr 属性 | |
87 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
88 | + * @param count HTMLタグの階層レベル | |
89 | + */ | |
90 | + public static void htmlinfo(HTML.Tag tag, MutableAttributeSet attr, | |
91 | + String methodname, int count) { | |
92 | + | |
93 | + // ログ出力レベルチェック | |
94 | + if(logger.getLevel() == null) { | |
95 | + return; | |
96 | + } | |
97 | + if(logger.getLevel().intValue() > loggerlevel.intValue()) { | |
98 | + return; | |
99 | + } | |
100 | + | |
101 | + // 編集処理 | |
102 | + char kbn = ' '; | |
103 | + if("handleStartTag".equals(methodname)) { | |
104 | + kbn = 'F'; | |
105 | + } | |
106 | + if("handleEndTag".equals(methodname)) { | |
107 | + kbn = 'E'; | |
108 | + } | |
109 | + if("handleSimpleTag".equals(methodname)) { | |
110 | + kbn = 'S'; | |
111 | + } | |
112 | + | |
113 | + StringBuilder strBuf = new StringBuilder(80); | |
114 | + strBuf.append(count).append(" : "); | |
115 | + strBuf.append(kbn).append(" : "); | |
116 | + strBuf.append(tag.toString()); | |
117 | + // 属性情報 | |
118 | + if(attr != null) { | |
119 | + if(attr.getAttributeCount() != 0) { | |
120 | + AttributeData handleAttrData = new AttributeData(); | |
121 | + handleAttrData.add(tag, attr); | |
122 | + for(int i = 0; i < handleAttrData.size; i++) { | |
123 | + strBuf.append(" ["); | |
124 | + strBuf.append(handleAttrData.getattrname(i)); | |
125 | + strBuf.append("]"); | |
126 | + strBuf.append(handleAttrData.getcount(i)); | |
127 | + strBuf.append(" = "); | |
128 | + strBuf.append(handleAttrData.getattrvalue(i)); | |
129 | + } | |
130 | + } | |
131 | + } | |
132 | + | |
133 | + logger.log(loggerlevel, strBuf.toString()); | |
134 | + } | |
135 | + | |
136 | + /** | |
137 | + * デバック出力(メッセージ). | |
138 | + * 引数に渡された任意のメッセージを出力する。 | |
139 | + * @param str メッセージ | |
140 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
141 | + */ | |
142 | + public static void htmlinfo(String str, String methodname) { | |
143 | + logger.log(loggerlevel, str); | |
144 | + } | |
145 | + | |
146 | + public static void htmlinfo(String str) { | |
147 | + logger.log(loggerlevel, str); | |
148 | + } | |
149 | + | |
150 | + /** | |
151 | + * デバック出力(HTML解析-本文). | |
152 | + * 本文の内容を出力する。 | |
153 | + * @param data 本文(HTML内の文字列) | |
154 | + * @param methodname このメソッドを呼び出した親メソッド名 | |
155 | + */ | |
156 | + public static void htmlinfo(char[] data, String methodname) { | |
157 | + String dat = new String(data); | |
158 | + logger.log(loggerlevel, dat); | |
159 | + } | |
160 | + | |
161 | + public static void htmlinfo(char[] data) { | |
162 | + String dat = new String(data); | |
163 | + logger.log(loggerlevel, dat); | |
164 | + } | |
165 | + | |
166 | + /** | |
167 | + * デバック出力(検索キー). | |
168 | + * 検索キー(SearchData)の内容を出力する。 | |
169 | + * @param skey | |
170 | + */ | |
171 | + public static void searchDatainfo(SearchData skey) { | |
172 | + | |
173 | + StringBuilder strBuf = new StringBuilder(30); | |
174 | + strBuf.append("SearchData KEY tag["); | |
175 | + strBuf.append(skey.getHtmltag()); | |
176 | + strBuf.append("] ID["); | |
177 | + strBuf.append(skey.getHtmlid()); | |
178 | + strBuf.append("] CLASS["); | |
179 | + strBuf.append(skey.getHtmlclass()); | |
180 | + strBuf.append("]\n"); | |
181 | + | |
182 | + logger.log(loggerlevel, strBuf.toString()); | |
183 | + } | |
184 | + | |
185 | + /** | |
186 | + * ログ出力設定ファイルチェック. | |
187 | + * 設定ファイルの存在をチェックし存在する場合、設定ファイルの内容を設定する。 | |
188 | + */ | |
189 | + private static void initLogConfiguration() { | |
190 | + | |
191 | + File file = new File(configurationFilename); | |
192 | + try { | |
193 | + if(file.exists()) { | |
194 | + FileInputStream inputStream = new FileInputStream(file); | |
195 | + // 設定ファイルの読み込み | |
196 | + LogManager.getLogManager().readConfiguration(inputStream); | |
197 | + } | |
198 | + | |
199 | + } catch (FileNotFoundException ex) { | |
200 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
201 | + } catch (IOException ex) { | |
202 | + Logger.getLogger(DebugProcess.class.getName()).log(Level.SEVERE, null, ex); | |
203 | + } | |
204 | + } | |
205 | + | |
206 | + /** | |
207 | + * ログ出力フォーマッター設定. | |
208 | + * ファイルへログ出力時の書式を設定する。 | |
209 | + */ | |
210 | + private static void setFomatter() { | |
211 | + Handler[] handlers = logger.getHandlers(); | |
212 | + for(int i = 0 ; i < handlers.length ; i++) { | |
213 | + if(handlers[i] instanceof java.util.logging.FileHandler) { | |
214 | + handlers[i].setFormatter(new HtmlFormatter()); | |
215 | + } | |
216 | + } | |
217 | + } | |
218 | + | |
219 | +} | |
220 | + | |
221 | +/** | |
222 | + * ログ出力フォーマッター. | |
223 | + * @author kgto | |
224 | + */ | |
225 | +class HtmlFormatter extends Formatter { | |
226 | + /** | |
227 | + * Logの出力文字列を生成する。 | |
228 | + * 出力書式:<br> | |
229 | + * YYYY-MM-DD HH:SS:MM ログレベル<メソッド名>メッセージ | |
230 | + */ | |
231 | + @Override | |
232 | + public synchronized String format(final LogRecord aRecord) { | |
233 | + | |
234 | + final StringBuffer message = new StringBuffer(100); | |
235 | + | |
236 | + long millis = aRecord.getMillis(); | |
237 | + String time = String.format("%tF %<tT", millis); | |
238 | + | |
239 | + message.append(time); | |
240 | + message.append(' '); | |
241 | + | |
242 | + message.append(aRecord.getLevel()); | |
243 | + message.append('<'); | |
244 | + String methodName = aRecord.getSourceMethodName(); | |
245 | + message.append(methodName != null ? methodName : "N/A"); | |
246 | + message.append('>'); | |
247 | + | |
248 | + message.append(formatMessage(aRecord)); | |
249 | + message.append('\n'); | |
250 | + | |
251 | + // 例外エラーの場合、エラー内容とスタックトレース出力 | |
252 | + Throwable throwable = aRecord.getThrown(); | |
253 | + if (throwable != null) { | |
254 | + message.append(throwable.toString()); | |
255 | + message.append('\n'); | |
256 | + for (StackTraceElement trace : throwable.getStackTrace()) { | |
257 | + message.append('\t'); | |
258 | + message.append(trace.toString()); | |
259 | + message.append('\n'); | |
260 | + } | |
261 | + } | |
262 | + return message.toString(); | |
263 | + } | |
264 | +} |
@@ -0,0 +1,164 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2014 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: AttributeData.java 132 2015-03-31 02:44:57Z tuna_p $ | |
21 | + */ | |
22 | + | |
23 | +package webScraping.core; | |
24 | + | |
25 | +import java.util.ArrayList; | |
26 | +import java.util.Enumeration; | |
27 | +import javax.swing.text.MutableAttributeSet; | |
28 | +import javax.swing.text.html.HTML; | |
29 | + | |
30 | +/** | |
31 | + * HTMLタグの属性情報を保持する. | |
32 | + * @author kgto | |
33 | + */ | |
34 | +public class AttributeData { | |
35 | + | |
36 | + public AttributeData() { | |
37 | + AttrList = new ArrayList(); | |
38 | + size = 0; | |
39 | + } | |
40 | + | |
41 | + /** | |
42 | + * 属性情報追加. | |
43 | + * @param tag | |
44 | + * @param attr | |
45 | + */ | |
46 | + public void add(HTML.Tag tag, MutableAttributeSet attr) { | |
47 | + | |
48 | + int tagcount = tagcnt(tag); | |
49 | + ++tagcount; | |
50 | + | |
51 | + Enumeration e = attr.getAttributeNames(); | |
52 | + while(e.hasMoreElements()) { | |
53 | + Object obj = e.nextElement(); | |
54 | + | |
55 | + AttrData a = new AttrData(); | |
56 | + a.tag = tag; | |
57 | + a.count = tagcount; | |
58 | + a.attrname = obj.toString(); | |
59 | + a.attrvalue = attr.getAttribute(obj).toString(); | |
60 | + | |
61 | + AttrList.add(a); | |
62 | + size = AttrList.size(); | |
63 | + } | |
64 | + | |
65 | + } | |
66 | + | |
67 | + /** | |
68 | + * 属性情報検索. | |
69 | + * @param tag | |
70 | + * @param attrname | |
71 | + * @param attrvalue | |
72 | + * @return | |
73 | + */ | |
74 | + public boolean search(HTML.Tag tag, String attrname, String attrvalue) { | |
75 | + boolean ret = false; | |
76 | + for (Object AttrList1 : AttrList) { | |
77 | + AttrData a = (AttrData)AttrList1; | |
78 | + if(a.tag == tag) { | |
79 | + //if(a.attrname.equals(attrname) && a.attrvalue.equals(attrvalue)) { | |
80 | + if(a.attrname.equals(attrname) && a.attrvalue.startsWith(attrvalue)) { | |
81 | + ret = true; | |
82 | + } | |
83 | + } | |
84 | + } | |
85 | + return ret; | |
86 | + } | |
87 | + | |
88 | + public boolean searchId(HTML.Tag tag, String attrvalue) { | |
89 | + return search(tag, "id", attrvalue); | |
90 | + } | |
91 | + | |
92 | + public boolean searchClass(HTML.Tag tag, String attrvalue) { | |
93 | + return search(tag, "class", attrvalue); | |
94 | + } | |
95 | + | |
96 | + /** | |
97 | + * 属性の値を取得する. | |
98 | + * @param tag | |
99 | + * @param attrname | |
100 | + * @return | |
101 | + */ | |
102 | + public ArrayList getvale(HTML.Tag tag, String attrname) { | |
103 | + ArrayList ret = new ArrayList(); | |
104 | + for (Object AttrList1 : AttrList) { | |
105 | + AttrData a = (AttrData)AttrList1; | |
106 | + if(a.tag == tag) { | |
107 | + if(a.attrname.equals(attrname)) { | |
108 | + ret.add(a.attrvalue); | |
109 | + } | |
110 | + } | |
111 | + } | |
112 | + return ret; | |
113 | + } | |
114 | + | |
115 | + /** | |
116 | + * 引数で渡されたTAGの最新カウント数を返す. | |
117 | + * @param tag | |
118 | + * @return | |
119 | + */ | |
120 | + private int tagcnt(HTML.Tag tag) { | |
121 | + int wkcnt = 0; | |
122 | + for (Object AttrList1 : AttrList) { | |
123 | + AttrData a = (AttrData)AttrList1; | |
124 | + if(a.tag == tag) { | |
125 | + if(wkcnt < a.count) { | |
126 | + wkcnt = a.count; | |
127 | + } | |
128 | + } | |
129 | + } | |
130 | + return wkcnt; | |
131 | + } | |
132 | + | |
133 | + // AttrList の内容を返すメソッド | |
134 | + public HTML.Tag gettag(int i) { | |
135 | + AttrData a = (AttrData)AttrList.get(i); | |
136 | + return a.tag; | |
137 | + } | |
138 | + | |
139 | + public int getcount(int i) { | |
140 | + AttrData a = (AttrData)AttrList.get(i); | |
141 | + return a.count; | |
142 | + } | |
143 | + | |
144 | + public String getattrname(int i) { | |
145 | + AttrData a = (AttrData)AttrList.get(i); | |
146 | + return a.attrname; | |
147 | + } | |
148 | + | |
149 | + public String getattrvalue(int i) { | |
150 | + AttrData a = (AttrData)AttrList.get(i); | |
151 | + return a.attrvalue; | |
152 | + } | |
153 | + | |
154 | + // フィールド変数 | |
155 | + public class AttrData { | |
156 | + public HTML.Tag tag; | |
157 | + public int count; | |
158 | + public String attrname; | |
159 | + public String attrvalue; | |
160 | + } | |
161 | + public ArrayList AttrList; | |
162 | + public int size; // AttrListのサイズ | |
163 | + | |
164 | +} |
@@ -0,0 +1,222 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2014 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: HtmlParserCallback.java 132 2015-03-31 02:44:57Z tuna_p $ | |
21 | + */ | |
22 | + | |
23 | +package webScraping.core; | |
24 | + | |
25 | +import java.util.ArrayList; | |
26 | +import java.util.HashMap; | |
27 | +import javax.swing.text.MutableAttributeSet; | |
28 | +import javax.swing.text.html.HTML; | |
29 | +import javax.swing.text.html.HTMLEditorKit; | |
30 | + | |
31 | +/** | |
32 | + * HTMLパーサ部品. | |
33 | + * @author kgto | |
34 | + */ | |
35 | +class HtmlParserCallback extends HTMLEditorKit.ParserCallback { | |
36 | + /* ---------------------------------------------------------------------- * | |
37 | + * フィールド | |
38 | + * ---------------------------------------------------------------------- */ | |
39 | + // Tag毎の階層 | |
40 | + HashMap<HTML.Tag,Integer> tagMap = new HashMap<>(); | |
41 | + | |
42 | + // serach key 情報 | |
43 | + String keytag; | |
44 | + String keyid; | |
45 | + String keyclass; | |
46 | + | |
47 | + // serach key と一致時の情報退避 | |
48 | + int bufCount = 0; | |
49 | + HTML.Tag bufTag = null; | |
50 | + // serach key と一致時の情報格納ワーク | |
51 | + StringBuilder bufText; | |
52 | + | |
53 | + // serach key と一致時のデータ一覧 | |
54 | + ArrayList sData; | |
55 | + | |
56 | + // 属性データ | |
57 | + AttributeData attrdata; | |
58 | + | |
59 | + /* ---------------------------------------------------------------------- * | |
60 | + * コンストラクタ | |
61 | + * ---------------------------------------------------------------------- */ | |
62 | + protected HtmlParserCallback(SearchData skey) { | |
63 | + | |
64 | + // キー情報展開 | |
65 | + keytag = skey.getHtmltag(); | |
66 | + keyid = skey.getHtmlid(); | |
67 | + keyclass = skey.getHtmlclass(); | |
68 | + | |
69 | + sData = new ArrayList(); | |
70 | + } | |
71 | + | |
72 | + /* ---------------------------------------------------------------------- * | |
73 | + * Getter | |
74 | + * ---------------------------------------------------------------------- */ | |
75 | + ArrayList getrtnData() { | |
76 | + return this.sData; | |
77 | + } | |
78 | + | |
79 | + /* ---------------------------------------------------------------------- * | |
80 | + * メソッド | |
81 | + * ---------------------------------------------------------------------- */ | |
82 | + @Override | |
83 | + public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
84 | + // Tag毎の階層を保持 | |
85 | + int count = 1; | |
86 | + if(tagMap.containsKey(tag)) { | |
87 | + count = tagMap.get(tag); | |
88 | + count++; | |
89 | + } | |
90 | + tagMap.put(tag, count); | |
91 | + | |
92 | + // 属性解析 | |
93 | + AttributeData handleStartattrdata = new AttributeData(); | |
94 | + handleStartattrdata.add(tag, attr); | |
95 | + | |
96 | + DebugProcess.htmlinfo(tag, attr, "handleStartTag", count); | |
97 | + | |
98 | + if(bufCount == 0) { | |
99 | + if(tag.toString().equals(keytag)) { | |
100 | + //if(serachAttribute(attr)) { | |
101 | + if(serachAttribute(tag, handleStartattrdata)) { | |
102 | + bufCount = count; | |
103 | + bufTag = tag; | |
104 | + attrdata = new AttributeData(); | |
105 | + bufText = new StringBuilder(); | |
106 | + } | |
107 | + } | |
108 | + } | |
109 | + if(bufCount > 0) { | |
110 | + attrdata.add(tag, attr); | |
111 | + } | |
112 | + } | |
113 | + | |
114 | + @Override | |
115 | + public void handleEndTag(HTML.Tag tag, int pos){ | |
116 | + // Tag毎の階層を取得 | |
117 | + int count = 0; | |
118 | + if(tagMap.containsKey(tag)) { | |
119 | + count = tagMap.get(tag); | |
120 | + } | |
121 | + | |
122 | + DebugProcess.htmlinfo(tag, null, "handleEndTag", count); | |
123 | + | |
124 | + if(tag.equals(bufTag) && count <= bufCount) { | |
125 | + | |
126 | + // 溜め込んだ一致情報をリストへ格納 | |
127 | + sData.add(bufText.toString()); | |
128 | + | |
129 | + // 退避したserach keyとの一致情報クリア | |
130 | + bufCount = 0; | |
131 | + bufTag = null; | |
132 | + bufText = null; | |
133 | + } | |
134 | + | |
135 | + // Tag毎の階層減算 | |
136 | + tagMap.put(tag, --count); | |
137 | + } | |
138 | + | |
139 | + @Override | |
140 | + public void handleText(char[] data, int pos){ | |
141 | + | |
142 | + DebugProcess.htmlinfo(data, "handleText"); | |
143 | + | |
144 | + String splitchar = "\t"; | |
145 | + //制御文字の削除 | |
146 | + // 0xa0 | |
147 | + StringBuilder buf = new StringBuilder(); | |
148 | + for(int i = 0; i < data.length; i++) { | |
149 | + if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) { | |
150 | + buf.append(data[i]); | |
151 | + } | |
152 | + } | |
153 | + if(bufCount > 0) { | |
154 | + if(bufText.length() > 0) { | |
155 | + bufText.append(splitchar); | |
156 | + } | |
157 | + bufText.append(buf.toString()); | |
158 | + } | |
159 | + } | |
160 | + | |
161 | + @Override | |
162 | + public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){ | |
163 | + if(bufCount > 0) { | |
164 | + attrdata.add(tag, attr); | |
165 | + } | |
166 | + DebugProcess.htmlinfo(tag, attr, "handleSimpleTag", 0); | |
167 | + } | |
168 | + | |
169 | + /** | |
170 | + * ページ内のID/CLASS値と検索キーを比較する. | |
171 | + * @param attr ページのMutableAttributeSet | |
172 | + * @return boolean 検索キーと一致の時、true | |
173 | + */ | |
174 | + boolean serachAttribute(MutableAttributeSet attr) { | |
175 | + String currentID = (String)attr.getAttribute(HTML.Attribute.ID); | |
176 | + String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS); | |
177 | + | |
178 | + if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
179 | + if(keyid.equals(currentID) && keyclass.equals(currentClass)) { | |
180 | + return true; | |
181 | + } | |
182 | + } | |
183 | + | |
184 | + if(keyid.isEmpty() == false) { | |
185 | + if(keyid.equals(currentID)) { | |
186 | + return true; | |
187 | + } | |
188 | + } | |
189 | + | |
190 | + if(keyclass.isEmpty() == false) { | |
191 | + if(keyclass.equals(currentClass)) { | |
192 | + return true; | |
193 | + } | |
194 | + } | |
195 | + | |
196 | + return false; | |
197 | + } | |
198 | + | |
199 | + /** | |
200 | + * ページ内のID/CLASS値と検索キーを比較する. | |
201 | + * @param tag | |
202 | + * @param attrdata | |
203 | + * @return boolean 検索キーと一致の時、true | |
204 | + */ | |
205 | + boolean serachAttribute(HTML.Tag tag, AttributeData attrdata) { | |
206 | + // ID と CLASS の両方にキー入力有りの場合 | |
207 | + if(keyid.isEmpty() == false && keyclass.isEmpty() == false) { | |
208 | + if(attrdata.searchId(tag, keyid) && attrdata.searchClass(tag, keyclass)) { | |
209 | + return true; | |
210 | + } | |
211 | + } | |
212 | + // ID のキーチェック | |
213 | + if(keyid.isEmpty() == false) { | |
214 | + return attrdata.searchId(tag, keyid); | |
215 | + } | |
216 | + // CLASS のキーチェック | |
217 | + if(keyclass.isEmpty() == false) { | |
218 | + return attrdata.searchClass(tag, keyclass); | |
219 | + } | |
220 | + return false; | |
221 | + } | |
222 | +} |
@@ -0,0 +1,200 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2014 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: SearchData.java 132 2015-03-31 02:44:57Z tuna_p $ | |
21 | + */ | |
22 | + | |
23 | +package webScraping.core; | |
24 | + | |
25 | +import java.util.ArrayList; | |
26 | + | |
27 | +/** | |
28 | + * タグ検索データ. | |
29 | + * @author kgto | |
30 | + */ | |
31 | +public class SearchData { | |
32 | + /* ---------------------------------------------------------------------- * | |
33 | + * フィールド | |
34 | + * ---------------------------------------------------------------------- */ | |
35 | + private String item; | |
36 | + private String htmltag; | |
37 | + private String htmlid; | |
38 | + private String htmlclass; | |
39 | + private String around; | |
40 | + private String regexp; | |
41 | + | |
42 | + /* ---------------------------------------------------------------------- * | |
43 | + * static 処理 | |
44 | + * ---------------------------------------------------------------------- */ | |
45 | + public static class Context { | |
46 | + public Class columnClass; | |
47 | + public String columnName; | |
48 | + public String columnNameJp; | |
49 | + | |
50 | + public Context(Class columnClass, String columnName, String columnNameJp) { | |
51 | + this.columnClass = columnClass; | |
52 | + this.columnName = columnName; | |
53 | + this.columnNameJp = columnNameJp; | |
54 | + } | |
55 | + } | |
56 | + | |
57 | + public static final Context[] context = { | |
58 | + /* 0 */ new Context(String.class , "item" , "項目名"), | |
59 | + /* 1 */ new Context(String.class , "htmltag" , "タグ"), | |
60 | + /* 2 */ new Context(String.class , "htmlid" , "ID"), | |
61 | + /* 3 */ new Context(String.class , "htmlclass" , "クラス"), | |
62 | + /* 4 */ new Context(String.class , "around" , "位置"), | |
63 | + /* 5 */ new Context(String.class , "regexp" , "抽出条件") | |
64 | + }; | |
65 | + | |
66 | + /* ---------------------------------------------------------------------- */ | |
67 | + private static ArrayList<SearchData> slist = new ArrayList<>(); | |
68 | + | |
69 | + public static void addSearchData( | |
70 | + String item, String htmltag, String htmlid, | |
71 | + String htmlclass, String around, String regexp) { | |
72 | + SearchData sdat = new SearchData(); | |
73 | + sdat.setitem(item); | |
74 | + sdat.setHtmltag(htmltag); | |
75 | + sdat.setHtmlid(htmlid); | |
76 | + sdat.setHtmlclass(htmlclass); | |
77 | + sdat.setaround(around); | |
78 | + sdat.setregexp(regexp); | |
79 | + | |
80 | + slist.add(sdat); | |
81 | + } | |
82 | + | |
83 | + public static void add(SearchData sdat) { | |
84 | + slist.add(sdat); | |
85 | + } | |
86 | + | |
87 | + public static SearchData get(int i) { | |
88 | + return slist.get(i); | |
89 | + } | |
90 | + | |
91 | + public static int size() { | |
92 | + return slist.size(); | |
93 | + } | |
94 | + | |
95 | + public static SearchData remove(int index) { | |
96 | + return slist.remove(index); | |
97 | + } | |
98 | + | |
99 | + public static void clear() { | |
100 | + slist.clear(); | |
101 | + } | |
102 | + | |
103 | + /* ---------------------------------------------------------------------- * | |
104 | + * コンストラクタ | |
105 | + * ---------------------------------------------------------------------- */ | |
106 | + public SearchData() { | |
107 | + initialize(); | |
108 | + } | |
109 | + | |
110 | + public SearchData(SearchData dat) { | |
111 | + this.item = dat.getitem(); | |
112 | + this.htmltag = dat.getHtmltag(); | |
113 | + this.htmlid = dat.getHtmlid(); | |
114 | + this.htmlclass = dat.getHtmlclass(); | |
115 | + this.around = dat.getaround(); | |
116 | + this.regexp = dat.getregexp(); | |
117 | + } | |
118 | + | |
119 | + /* ---------------------------------------------------------------------- * | |
120 | + * Setter | |
121 | + * ---------------------------------------------------------------------- */ | |
122 | + public void setitem(String item) { | |
123 | + this.item = item; | |
124 | + } | |
125 | + | |
126 | + public void setHtmltag(String htmltag) { | |
127 | + this.htmltag = htmltag; | |
128 | + } | |
129 | + | |
130 | + public void setHtmlid(String htmlid) { | |
131 | + this.htmlid = htmlid; | |
132 | + } | |
133 | + | |
134 | + public void setHtmlclass(String htmlclass) { | |
135 | + this.htmlclass = htmlclass; | |
136 | + } | |
137 | + | |
138 | + public void setaround(String around) { | |
139 | + this.around = around; | |
140 | + } | |
141 | + | |
142 | + public void setregexp(String regexp) { | |
143 | + this.regexp = regexp; | |
144 | + } | |
145 | + | |
146 | + /* ---------------------------------------------------------------------- * | |
147 | + * Getter | |
148 | + * ---------------------------------------------------------------------- */ | |
149 | + public String getitem() { | |
150 | + return item; | |
151 | + } | |
152 | + | |
153 | + public String getHtmltag() { | |
154 | + return htmltag; | |
155 | + } | |
156 | + | |
157 | + public String getHtmlid() { | |
158 | + return htmlid; | |
159 | + } | |
160 | + | |
161 | + public String getHtmlclass() { | |
162 | + return htmlclass; | |
163 | + } | |
164 | + | |
165 | + public String getaround() { | |
166 | + return around; | |
167 | + } | |
168 | + | |
169 | + public String getregexp() { | |
170 | + return regexp; | |
171 | + } | |
172 | + | |
173 | + /* ---------------------------------------------------------------------- * | |
174 | + * メソッド | |
175 | + * ---------------------------------------------------------------------- */ | |
176 | + /** | |
177 | + * データ初期化. | |
178 | + */ | |
179 | + public final void initialize() { | |
180 | + this.item = ""; | |
181 | + this.htmltag = ""; | |
182 | + this.htmlid = ""; | |
183 | + this.htmlclass = ""; | |
184 | + this.around = ""; | |
185 | + this.regexp = ""; | |
186 | + } | |
187 | + | |
188 | + public Object[] getObjData() { | |
189 | + Object[] obj = { | |
190 | + /* 0 */ getitem(), // 項目名 | |
191 | + /* 1 */ getHtmltag(), // タグ | |
192 | + /* 2 */ getHtmlid(), // ID | |
193 | + /* 3 */ getHtmlclass(), // クラス | |
194 | + /* 4 */ getaround(), // 位置 | |
195 | + /* 5 */ getregexp() // 抽出条件 | |
196 | + }; | |
197 | + return obj; | |
198 | + } | |
199 | + | |
200 | +} |
@@ -0,0 +1,273 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2014 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: HtmlParser.java 132 2015-03-31 02:44:57Z tuna_p $ | |
21 | + */ | |
22 | + | |
23 | +package webScraping.core; | |
24 | + | |
25 | +import java.io.*; | |
26 | +import java.net.*; | |
27 | +import java.util.ArrayList; | |
28 | +import java.util.logging.Level; | |
29 | +import java.util.logging.Logger; | |
30 | +import java.util.regex.Matcher; | |
31 | +import java.util.regex.Pattern; | |
32 | +import javax.swing.text.html.parser.ParserDelegator; | |
33 | + | |
34 | +/** | |
35 | + * HTMLパーサ. | |
36 | + * @author kgto | |
37 | + */ | |
38 | +public class HtmlParser { | |
39 | + /* ---------------------------------------------------------------------- * | |
40 | + * フィールド | |
41 | + * ---------------------------------------------------------------------- */ | |
42 | + URL url; | |
43 | + String pageData; | |
44 | + ArrayList sData; | |
45 | + | |
46 | + // 作業ワーク | |
47 | + private String htmltag; | |
48 | + private String htmlid; | |
49 | + private String htmlclass; | |
50 | + | |
51 | + /* ---------------------------------------------------------------------- * | |
52 | + * コンストラクタ | |
53 | + * ---------------------------------------------------------------------- */ | |
54 | + public HtmlParser(URL UrlAdress) { | |
55 | + DebugProcess.debuglog_set(); | |
56 | + this.url = UrlAdress; | |
57 | + getPageData(); | |
58 | + } | |
59 | + | |
60 | + public HtmlParser(String UrlAdress) { | |
61 | + DebugProcess.debuglog_set(); | |
62 | + try { | |
63 | + url = new URL(UrlAdress); | |
64 | + getPageData(); | |
65 | + | |
66 | + } catch (MalformedURLException ex) { | |
67 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
68 | + } | |
69 | + } | |
70 | + | |
71 | + public HtmlParser() { | |
72 | + DebugProcess.debuglog_set(); | |
73 | + url = null; | |
74 | + } | |
75 | + | |
76 | + /* ---------------------------------------------------------------------- * | |
77 | + * Getter | |
78 | + * ---------------------------------------------------------------------- */ | |
79 | + public String getStringPageData() { | |
80 | + return pageData; | |
81 | + } | |
82 | + | |
83 | + /* ---------------------------------------------------------------------- * | |
84 | + * Setter | |
85 | + * ---------------------------------------------------------------------- */ | |
86 | + public void seturl(URL UrlAdress) { | |
87 | + this.url = UrlAdress; | |
88 | + getPageData(); | |
89 | + } | |
90 | + | |
91 | + /* ---------------------------------------------------------------------- * | |
92 | + * メソッド | |
93 | + * ---------------------------------------------------------------------- */ | |
94 | + public void seturl(String UrlAdress) { | |
95 | + try { | |
96 | + url = new URL(UrlAdress); | |
97 | + getPageData(); | |
98 | + | |
99 | + } catch (MalformedURLException ex) { | |
100 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
101 | + } | |
102 | + } | |
103 | + | |
104 | + /** | |
105 | + * HTMLページ内検索. | |
106 | + * 検索キーとして渡されたタグ,ID,クラスから、対象となるタグを探し出し、 | |
107 | + * around(タグ位置)として指定された箇所の文字列をregexp(正規表現)で指定された整形を | |
108 | + * 行った結果を返す。<br> | |
109 | + * aroundの初期値:0 検索キーとして未指定(未入力)の場合、最初(0)の文字列。<br> | |
110 | + * regexpが指定(入力)ありの場合、正規表現にて整形を行う。<br> | |
111 | + * 渡された検索キーに一致するタグが存在しなかった場合、NULLを返す。 | |
112 | + * @param skey 検索キーデータ(SearchData) | |
113 | + * @return String 検索キーに一致するデータの文字列 | |
114 | + */ | |
115 | + public String search(SearchData skey) { | |
116 | + | |
117 | + // htmlページ内を検索 | |
118 | + if(isHtmlkeyEq(skey) == false) { | |
119 | + searchPageData(skey); | |
120 | + } | |
121 | + /* | |
122 | + around 出現位置指定 入力有り:指定された位置の情報のみ返す。 | |
123 | + 入力無し:取得した全ての情報を返す。 | |
124 | + */ | |
125 | + String regexp = skey.getregexp(); | |
126 | + if(skey.getaround().length() > 0) { | |
127 | + int wkAround = Integer.parseInt(skey.getaround()); // 検索位置を数値変換 | |
128 | + if(wkAround < sData.size()) { | |
129 | + String str = (String)sData.get(wkAround); | |
130 | + String rtn = RegularExpression(str, regexp); | |
131 | + return rtn; | |
132 | + } | |
133 | + } else { | |
134 | + StringBuilder strbuf = new StringBuilder(); | |
135 | + for (Object sData1 : sData) { | |
136 | + String str = (String)sData1; | |
137 | + String rtn = RegularExpression(str, regexp); | |
138 | + if(strbuf.length() > 0) { | |
139 | + strbuf.append("\t"); | |
140 | + } | |
141 | + strbuf.append(rtn); | |
142 | + } | |
143 | + return strbuf.toString(); | |
144 | + } | |
145 | + return null; | |
146 | + } | |
147 | + | |
148 | + /** | |
149 | + * 直近のHTMLタグ/ID/CLASS値と引数の値を比較する. | |
150 | + * @param skey HTMLタグ/ID/CLASSが格納された検索キー | |
151 | + * @return boolean HTMLタグ/ID/CLASS値が一致する時、true | |
152 | + */ | |
153 | + boolean isHtmlkeyEq(SearchData skey) { | |
154 | + | |
155 | + String stag = skey.getHtmltag(); | |
156 | + String sid = skey.getHtmlid(); | |
157 | + String sclass = skey.getHtmlclass(); | |
158 | + | |
159 | + boolean rtn = true; | |
160 | + | |
161 | + // htmltag | |
162 | + if(htmltag == null) { | |
163 | + rtn = false; | |
164 | + } else { | |
165 | + if(htmltag.equals(stag) == false) { | |
166 | + rtn = false; | |
167 | + } | |
168 | + } | |
169 | + | |
170 | + // htmlid | |
171 | + if(htmlid == null) { | |
172 | + rtn = false; | |
173 | + } else { | |
174 | + if(htmlid.equals(sid) == false) { | |
175 | + rtn = false; | |
176 | + } | |
177 | + } | |
178 | + | |
179 | + // htmlclass | |
180 | + if(htmlclass == null) { | |
181 | + rtn = false; | |
182 | + } else { | |
183 | + if(htmlclass.equals(sclass) == false) { | |
184 | + rtn = false; | |
185 | + } | |
186 | + } | |
187 | + | |
188 | + if(!rtn) { | |
189 | + htmltag = stag; | |
190 | + htmlid = sid; | |
191 | + htmlclass = sclass; | |
192 | + } | |
193 | + | |
194 | + return rtn; | |
195 | + } | |
196 | + | |
197 | + /** | |
198 | + * 正規表現検索. | |
199 | + * @param strdata | |
200 | + * @param regexp | |
201 | + * @return | |
202 | + */ | |
203 | + String RegularExpression(String strdata, String regexp) { | |
204 | + String expdata = null; | |
205 | + | |
206 | + //regexpのチェック | |
207 | + if(regexp.isEmpty()) { | |
208 | + expdata = strdata; | |
209 | + return expdata; | |
210 | + } | |
211 | + | |
212 | + //正規表現検索 | |
213 | + Pattern ptn = Pattern.compile(regexp); | |
214 | + Matcher matchdata = ptn.matcher(strdata); | |
215 | + if (matchdata.find()) { | |
216 | + if(matchdata.groupCount() >= 1) { | |
217 | + expdata = matchdata.group(1); | |
218 | + } | |
219 | + } | |
220 | + return expdata; | |
221 | + } | |
222 | + | |
223 | + /** | |
224 | + * インターネット接続. | |
225 | + */ | |
226 | + private void getPageData() { | |
227 | + HttpURLConnection con = null; | |
228 | + try { | |
229 | + con = (HttpURLConnection)url.openConnection(); | |
230 | + con.setRequestMethod("GET"); | |
231 | + BufferedReader reader = new BufferedReader( | |
232 | + new InputStreamReader(con.getInputStream(), "utf-8")); | |
233 | + String wkline; | |
234 | + StringBuilder sb = new StringBuilder(); | |
235 | + while((wkline = reader.readLine()) != null) { | |
236 | + sb.append(wkline).append("\n"); | |
237 | + } | |
238 | + pageData = sb.toString(); | |
239 | + | |
240 | + } catch(FileNotFoundException ex) { | |
241 | + pageData = null; | |
242 | + } catch (IOException ex) { | |
243 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
244 | + } finally { | |
245 | + if(con != null) { | |
246 | + con.disconnect(); | |
247 | + } | |
248 | + } | |
249 | + } | |
250 | + | |
251 | + /** | |
252 | + * HTMLパーサ. | |
253 | + * @param skey | |
254 | + */ | |
255 | + private void searchPageData(SearchData skey) { | |
256 | + | |
257 | + DebugProcess.searchDatainfo(skey); | |
258 | + | |
259 | + Reader reader; | |
260 | + try { | |
261 | + reader = new BufferedReader(new StringReader(pageData)); | |
262 | + HtmlParserCallback cb = new HtmlParserCallback(skey); | |
263 | + ParserDelegator pd = new ParserDelegator(); | |
264 | + pd.parse(reader, cb, true); | |
265 | + reader.close(); | |
266 | + | |
267 | + sData = cb.getrtnData(); | |
268 | + | |
269 | + } catch (IOException ex) { | |
270 | + Logger.getLogger(HtmlParser.class.getName()).log(Level.SEVERE, null, ex); | |
271 | + } | |
272 | + } | |
273 | +} |
@@ -0,0 +1,71 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2016 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: Scraping.java 139 2016-05-17 09:16:40Z tuna_p $ | |
21 | + */ | |
22 | +package webScraping.core; | |
23 | + | |
24 | +import java.net.URL; | |
25 | +import webScraping.core.HtmlParser; | |
26 | +import webScraping.core.SearchData; | |
27 | + | |
28 | +/** | |
29 | + * | |
30 | + * @author kgto | |
31 | + */ | |
32 | +public class Scraping { | |
33 | + | |
34 | + public Scraping() { | |
35 | + } | |
36 | + | |
37 | + /** | |
38 | + * HTML解析. | |
39 | + * @param url | |
40 | + * @return | |
41 | + */ | |
42 | + public String[] getResult(URL url) { | |
43 | + | |
44 | + HtmlParser par = new HtmlParser(url); | |
45 | + | |
46 | + String[] result = new String[SearchData.size()]; | |
47 | + for(int i = 0; i < SearchData.size(); i++) { | |
48 | + result[i] = par.search(SearchData.get(i)); | |
49 | + } | |
50 | + | |
51 | + if(!resultCheck(result)) { | |
52 | + return null; | |
53 | + } | |
54 | + return result; | |
55 | + } | |
56 | + | |
57 | + /** | |
58 | + * 結果文字列チェック. | |
59 | + * @param result | |
60 | + * @return 文字列配列に1文字でも入力有り(null/SPACE以外)の時、true | |
61 | + */ | |
62 | + boolean resultCheck(String[] result) { | |
63 | + for (String result1 : result) { | |
64 | + if (result1 != null && result1.trim().length() > 0) { | |
65 | + return true; | |
66 | + } | |
67 | + } | |
68 | + return false; | |
69 | + } | |
70 | + | |
71 | +} |
@@ -0,0 +1,153 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2014-2015 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: LibraryXml.java 138 2016-05-17 06:40:29Z tuna_p $ | |
21 | + */ | |
22 | + | |
23 | +package webScraping.utility; | |
24 | + | |
25 | +import java.io.File; | |
26 | +import java.io.FileNotFoundException; | |
27 | +import java.io.FileOutputStream; | |
28 | +import java.io.IOException; | |
29 | +import java.io.InputStream; | |
30 | +import java.util.logging.Level; | |
31 | +import java.util.logging.Logger; | |
32 | + | |
33 | +import javax.xml.parsers.DocumentBuilder; | |
34 | +import javax.xml.parsers.DocumentBuilderFactory; | |
35 | +import javax.xml.parsers.ParserConfigurationException; | |
36 | +import javax.xml.transform.Transformer; | |
37 | +import javax.xml.transform.TransformerConfigurationException; | |
38 | +import javax.xml.transform.TransformerException; | |
39 | +import javax.xml.transform.TransformerFactory; | |
40 | +import javax.xml.transform.dom.DOMSource; | |
41 | +import javax.xml.transform.stream.StreamResult; | |
42 | + | |
43 | +import org.w3c.dom.DOMImplementation; | |
44 | +import org.w3c.dom.Document; | |
45 | +import org.w3c.dom.Element; | |
46 | +import org.w3c.dom.Node; | |
47 | +import org.w3c.dom.NodeList; | |
48 | +import org.xml.sax.SAXException; | |
49 | + | |
50 | +public class LibraryXml { | |
51 | + | |
52 | + String xmlrootname = "xmlcontainer"; | |
53 | + | |
54 | + DocumentBuilder builder; | |
55 | + public Document readdoc, writedoc; | |
56 | + Element xmlroot; | |
57 | + | |
58 | + /* ---------------------------------------------------------------------- * | |
59 | + * コンストラクタ | |
60 | + * ---------------------------------------------------------------------- */ | |
61 | + public LibraryXml() { | |
62 | + try { | |
63 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
64 | + builder = factory.newDocumentBuilder(); | |
65 | + | |
66 | + } catch (ParserConfigurationException ex) { | |
67 | + Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
68 | + } | |
69 | + } | |
70 | + | |
71 | + /* ---------------------------------------------------------------------- * | |
72 | + * メソッド | |
73 | + * ---------------------------------------------------------------------- */ | |
74 | + /* 読込み処理 */ | |
75 | + public Element getwriteRoot(String elementName) { | |
76 | + mainElement(); | |
77 | + Element element = writedoc.createElement(elementName); | |
78 | + xmlroot.appendChild(element); | |
79 | + return element; | |
80 | + } | |
81 | + | |
82 | + private void mainElement() { | |
83 | + if(writedoc == null) { | |
84 | + DOMImplementation domImpl = builder.getDOMImplementation(); | |
85 | + writedoc = domImpl.createDocument("", xmlrootname, null); | |
86 | + xmlroot = writedoc.getDocumentElement(); | |
87 | + } | |
88 | + } | |
89 | + | |
90 | + /** | |
91 | + * XML書込み. | |
92 | + * @param file | |
93 | + */ | |
94 | + public void write(File file) { | |
95 | + try (FileOutputStream os = new FileOutputStream(file)) { | |
96 | + TransformerFactory transFactory = TransformerFactory.newInstance(); | |
97 | + Transformer transformer = transFactory.newTransformer(); | |
98 | + | |
99 | + transformer.setOutputProperty("indent", "yes"); // 改行指定 | |
100 | + transformer.setOutputProperty("method", "xml"); | |
101 | + | |
102 | + DOMSource source = new DOMSource(writedoc); | |
103 | + StreamResult result = new StreamResult(os); | |
104 | + transformer.transform(source, result); | |
105 | + | |
106 | + // 作成したXMLをクリア | |
107 | + writedoc = null; | |
108 | + | |
109 | + } catch (TransformerConfigurationException ex) { | |
110 | + Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
111 | + } catch (FileNotFoundException | TransformerException ex) { | |
112 | + Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
113 | + } catch (IOException ex) { | |
114 | + Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
115 | + } | |
116 | + } | |
117 | + | |
118 | + /* ---------------------------------------------------------------------- */ | |
119 | + /* 書込み処理 */ | |
120 | + | |
121 | + public Element getreadRoot(String elementName) { | |
122 | + NodeList nodelist = xmlroot.getElementsByTagName(elementName); | |
123 | + Node node = nodelist.item(0); | |
124 | + return (node.getNodeType() == Node.ELEMENT_NODE ? (Element)node : null); | |
125 | + } | |
126 | + | |
127 | + /** | |
128 | + * XML読込み. | |
129 | + * @param file | |
130 | + */ | |
131 | + public void read(File file) { | |
132 | + try { | |
133 | + readdoc = builder.parse(file); | |
134 | + xmlroot = readdoc.getDocumentElement(); | |
135 | + | |
136 | + } catch (SAXException | IOException ex) { | |
137 | + Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
138 | + } | |
139 | + } | |
140 | + | |
141 | + public void read(InputStream is) { | |
142 | + try { | |
143 | + readdoc = builder.parse(is); | |
144 | + xmlroot = readdoc.getDocumentElement(); | |
145 | + | |
146 | + } catch (SAXException | IOException ex) { | |
147 | + Logger.getLogger(LibraryXml.class.getName()).log(Level.SEVERE, null, ex); | |
148 | + } | |
149 | + } | |
150 | + | |
151 | + /* ---------------------------------------------------------------------- */ | |
152 | + | |
153 | +} |
@@ -0,0 +1,209 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2014-2015 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: ScrapingXml.java 138 2016-05-17 06:40:29Z tuna_p $ | |
21 | + */ | |
22 | + | |
23 | +package webScraping.utility; | |
24 | + | |
25 | +import webScraping.core.SearchData; | |
26 | +import java.io.File; | |
27 | +import java.io.InputStream; | |
28 | +import java.util.ArrayList; | |
29 | +import org.w3c.dom.Element; | |
30 | +import org.w3c.dom.Node; | |
31 | +import org.w3c.dom.NodeList; | |
32 | + | |
33 | +public class ScrapingXml { | |
34 | + /* ---------------------------------------------------------------------- * | |
35 | + * フィールド | |
36 | + * ---------------------------------------------------------------------- */ | |
37 | + String rootnameScraping = "webscraping"; | |
38 | + | |
39 | + private String testUrl; | |
40 | + private SearchData[] sdata; | |
41 | + | |
42 | + public LibraryXml xlib = new LibraryXml(); | |
43 | + public Element root; | |
44 | + | |
45 | + /* ---------------------------------------------------------------------- * | |
46 | + * コンストラクタ | |
47 | + * ---------------------------------------------------------------------- */ | |
48 | + public ScrapingXml() { | |
49 | + } | |
50 | + | |
51 | + /* ---------------------------------------------------------------------- * | |
52 | + * Setter | |
53 | + * ---------------------------------------------------------------------- */ | |
54 | + public void setTestUrl(String testUrl) { | |
55 | + this.testUrl = testUrl; | |
56 | + } | |
57 | + | |
58 | + public void setSdata() { | |
59 | + this.sdata = new SearchData[SearchData.size()]; | |
60 | + for(int i = 0; i < SearchData.size(); i++) { | |
61 | + this.sdata[i] = SearchData.get(i); | |
62 | + } | |
63 | + } | |
64 | + | |
65 | + /* ---------------------------------------------------------------------- * | |
66 | + * Getter | |
67 | + * ---------------------------------------------------------------------- */ | |
68 | + public String getTestUrl() { | |
69 | + return testUrl; | |
70 | + } | |
71 | + | |
72 | + public void getSdata() { | |
73 | + SearchData.clear(); | |
74 | + for(SearchData sdata1 : sdata) { | |
75 | + SearchData.add(sdata1); | |
76 | + } | |
77 | + } | |
78 | + | |
79 | + /* ---------------------------------------------------------------------- * | |
80 | + * メソッド | |
81 | + * ---------------------------------------------------------------------- */ | |
82 | + public void save(File file) { | |
83 | + | |
84 | + elementset(); | |
85 | + | |
86 | + xlib.write(file); | |
87 | + } | |
88 | + | |
89 | + public void elementset() { | |
90 | + root = xlib.getwriteRoot(rootnameScraping); | |
91 | + elementsetUrl(); | |
92 | + elementsetSearchdata(); | |
93 | + System.out.println("elementset XmlScraping"); | |
94 | + } | |
95 | + | |
96 | + private void elementsetUrl() { | |
97 | + if(testUrl == null) return; | |
98 | + if(testUrl.isEmpty()) return; | |
99 | + | |
100 | + Element url = xlib.writedoc.createElement("url"); | |
101 | + url.appendChild(xlib.writedoc.createTextNode(testUrl)); | |
102 | + root.appendChild(url); | |
103 | + } | |
104 | + | |
105 | + private void elementsetSearchdata() { | |
106 | + int count = 0; | |
107 | + for(SearchData sdat : sdata) { | |
108 | + Element cslist = xlib.writedoc.createElement("searchlist"); | |
109 | + cslist.setAttribute("listNo", String.valueOf(++count)); | |
110 | + | |
111 | + addChild(cslist, "item" , sdat.getitem()); | |
112 | + addChild(cslist, "htmltag" , sdat.getHtmltag()); | |
113 | + addChild(cslist, "htmlid" , sdat.getHtmlid()); | |
114 | + addChild(cslist, "htmlclass", sdat.getHtmlclass()); | |
115 | + addChild(cslist, "around" , sdat.getaround()); | |
116 | + addChild(cslist, "regexp" , sdat.getregexp()); | |
117 | + | |
118 | + root.appendChild(cslist); | |
119 | + } | |
120 | + } | |
121 | + | |
122 | + private void addChild(Element cslist, String keyword, String data) { | |
123 | + if(!data.isEmpty()) { | |
124 | + Element element = xlib.writedoc.createElement(keyword); | |
125 | + element.appendChild(xlib.writedoc.createTextNode(data)); | |
126 | + cslist.appendChild(element); | |
127 | + } | |
128 | + } | |
129 | + | |
130 | + /* ---------------------------------------------------------------------- */ | |
131 | + | |
132 | + public void load(File file) { | |
133 | + xlib.read(file); | |
134 | + elementget(); | |
135 | + } | |
136 | + | |
137 | + public void load(InputStream is) { | |
138 | + xlib.read(is); | |
139 | + elementget(); | |
140 | + } | |
141 | + | |
142 | + public void elementget() { | |
143 | + root = xlib.getreadRoot(rootnameScraping); | |
144 | + elementgetUrl(); | |
145 | + elementgetSearchdata(); | |
146 | + } | |
147 | + | |
148 | + private void elementgetUrl() { | |
149 | + NodeList nodelist = root.getElementsByTagName("url"); | |
150 | + if(nodelist.getLength() > 0) { | |
151 | + Node node = nodelist.item(0); | |
152 | + testUrl = node.getFirstChild().getNodeValue(); | |
153 | + } | |
154 | + } | |
155 | + | |
156 | + private void elementgetSearchdata() { | |
157 | + ArrayList<SearchData> slist = new ArrayList<>(); | |
158 | + | |
159 | + NodeList nodelist = root.getElementsByTagName("searchlist"); | |
160 | + for(int i = 0; i < nodelist.getLength(); i++) { | |
161 | + Node childnode = nodelist.item(i); | |
162 | + | |
163 | + boolean sdatflg = false; | |
164 | + SearchData sdat = new SearchData(); | |
165 | + for (Node child = childnode.getFirstChild(); child != null; child = child.getNextSibling()) { | |
166 | + if(child.getNodeType() == Node.ELEMENT_NODE) { | |
167 | + String tag = child.getNodeName(); | |
168 | + String rtn = ""; | |
169 | + if(child.getFirstChild() != null) { | |
170 | + rtn = child.getFirstChild().getNodeValue(); | |
171 | + } | |
172 | + switch (tag) { | |
173 | + case "item" : | |
174 | + sdat.setitem(rtn); | |
175 | + sdatflg = true; | |
176 | + break; | |
177 | + case "htmltag" : | |
178 | + sdat.setHtmltag(rtn); | |
179 | + sdatflg = true; | |
180 | + break; | |
181 | + case "htmlid" : | |
182 | + sdat.setHtmlid(rtn); | |
183 | + sdatflg = true; | |
184 | + break; | |
185 | + case "htmlclass" : | |
186 | + sdat.setHtmlclass(rtn); | |
187 | + sdatflg = true; | |
188 | + break; | |
189 | + case "around" : | |
190 | + sdat.setaround(rtn); | |
191 | + sdatflg = true; | |
192 | + break; | |
193 | + case "regexp" : | |
194 | + sdat.setregexp(rtn); | |
195 | + sdatflg = true; | |
196 | + break; | |
197 | + } | |
198 | + } | |
199 | + } | |
200 | + if(sdatflg) slist.add(sdat); | |
201 | + } | |
202 | + // 配列化 | |
203 | + sdata = new SearchData[slist.size()]; | |
204 | + for(int i = 0; i < slist.size(); i++) { | |
205 | + sdata[i] = slist.get(i); | |
206 | + } | |
207 | + } | |
208 | + | |
209 | +} |
@@ -0,0 +1,611 @@ | ||
1 | +/* | |
2 | + * Copyright (C) 2014 kgto. | |
3 | + * | |
4 | + * This library is free software; you can redistribute it and/or | |
5 | + * modify it under the terms of the GNU Lesser General Public | |
6 | + * License as published by the Free Software Foundation; either | |
7 | + * version 2.1 of the License, or (at your option) any later version. | |
8 | + * | |
9 | + * This library is distributed in the hope that it will be useful, | |
10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | + * Lesser General Public License for more details. | |
13 | + * | |
14 | + * You should have received a copy of the GNU Lesser General Public | |
15 | + * License along with this library; if not, write to the Free Software | |
16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
17 | + * MA 02110-1301 USA | |
18 | + */ | |
19 | +/* | |
20 | + * $Id: HtmlSearch.java 139 2016-05-17 09:16:40Z tuna_p $ | |
21 | + */ | |
22 | +package webScraping.utility; | |
23 | + | |
24 | +import webScraping.core.Scraping; | |
25 | +import webScraping.core.HtmlParser; | |
26 | +import java.awt.Desktop; | |
27 | +import java.io.File; | |
28 | +import java.io.IOException; | |
29 | +import java.net.MalformedURLException; | |
30 | +import java.net.URI; | |
31 | +import java.net.URISyntaxException; | |
32 | +import java.net.URL; | |
33 | +import java.util.logging.Level; | |
34 | +import java.util.logging.Logger; | |
35 | +import javax.swing.JFileChooser; | |
36 | +import javax.swing.filechooser.FileFilter; | |
37 | +import javax.swing.filechooser.FileNameExtensionFilter; | |
38 | +import javax.swing.table.DefaultTableModel; | |
39 | +import webScraping.core.SearchData; | |
40 | + | |
41 | +/** | |
42 | + * HTMLページ上の特定の項目を検索し、その項目内容の値を取得する. | |
43 | + * @author kgto | |
44 | + */ | |
45 | +public class HtmlSearch extends javax.swing.JFrame { | |
46 | + private final ScrapingXml xmlwriter = new ScrapingXml(); | |
47 | + | |
48 | + SearchDataTableModel sdatatblmodel; | |
49 | + | |
50 | + /** | |
51 | + * Creates new form Frame1 | |
52 | + */ | |
53 | + public HtmlSearch() { | |
54 | + sdatatblmodel = new SearchDataTableModel(); | |
55 | + | |
56 | + initComponents(); | |
57 | + | |
58 | + // カレントディレクトリ取得 | |
59 | + String dir = System.getProperty("user.dir"); | |
60 | + File file = new java.io.File(dir + "\\data"); | |
61 | + jFileChooser1.setCurrentDirectory(file); | |
62 | + | |
63 | + FileFilter filter1 = new FileNameExtensionFilter("XMLファイル", "xml"); | |
64 | + FileFilter filter2 = new FileNameExtensionFilter("TEXTファイル", "txt"); | |
65 | + jFileChooser1.addChoosableFileFilter(filter1); | |
66 | + jFileChooser1.addChoosableFileFilter(filter2); | |
67 | + jFileChooser1.setFileFilter(filter1); | |
68 | + | |
69 | + } | |
70 | + | |
71 | + /** | |
72 | + * This method is called from within the constructor to initialize the form. | |
73 | + * WARNING: Do NOT modify this code. The content of this method is always | |
74 | + * regenerated by the Form Editor. | |
75 | + */ | |
76 | + @SuppressWarnings("unchecked") | |
77 | + // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents | |
78 | + private void initComponents() { | |
79 | + | |
80 | + jFileChooser1 = new javax.swing.JFileChooser(); | |
81 | + jRadioButton1 = new javax.swing.JRadioButton(); | |
82 | + jLabel1 = new javax.swing.JLabel(); | |
83 | + jTxtUrl = new javax.swing.JTextField(); | |
84 | + jBtnSearch = new javax.swing.JButton(); | |
85 | + jTabbedPane1 = new javax.swing.JTabbedPane(); | |
86 | + jPanelTab1 = new javax.swing.JPanel(); | |
87 | + jScrollPane1 = new javax.swing.JScrollPane(); | |
88 | + jTable1 = new javax.swing.JTable(); | |
89 | + jBtnRowIns = new javax.swing.JButton(); | |
90 | + jBtnRowDel = new javax.swing.JButton(); | |
91 | + jBtnRowCpy = new javax.swing.JButton(); | |
92 | + jPanelTab2 = new javax.swing.JPanel(); | |
93 | + jScrollPaneLabel = new javax.swing.JScrollPane(); | |
94 | + jTxtLabel = new javax.swing.JTextArea(); | |
95 | + jScrollPane404msg = new javax.swing.JScrollPane(); | |
96 | + jTxt404msg = new javax.swing.JTextArea(); | |
97 | + jPanelRtn = new javax.swing.JPanel(); | |
98 | + jScrollPaneRtn = new javax.swing.JScrollPane(); | |
99 | + jTxtRtn = new javax.swing.JTextArea(); | |
100 | + jMenuBar1 = new javax.swing.JMenuBar(); | |
101 | + jMenu1 = new javax.swing.JMenu(); | |
102 | + jMenuLoad = new javax.swing.JMenuItem(); | |
103 | + jMenuSave = new javax.swing.JMenuItem(); | |
104 | + jMenu3 = new javax.swing.JMenu(); | |
105 | + jMenuItem1 = new javax.swing.JMenuItem(); | |
106 | + jMenu2 = new javax.swing.JMenu(); | |
107 | + | |
108 | + jFileChooser1.setCurrentDirectory(null); | |
109 | + jFileChooser1.setDialogTitle(""); | |
110 | + | |
111 | + jRadioButton1.setText("jRadioButton1"); | |
112 | + | |
113 | + setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); | |
114 | + setTitle("タグ検索"); | |
115 | + | |
116 | + jLabel1.setText(" URL:"); | |
117 | + | |
118 | + jBtnSearch.setText("検索"); | |
119 | + jBtnSearch.addActionListener(new java.awt.event.ActionListener() { | |
120 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
121 | + jBtnSearchActionPerformed(evt); | |
122 | + } | |
123 | + }); | |
124 | + | |
125 | + jPanelTab1.setBorder(javax.swing.BorderFactory.createTitledBorder("検索情報")); | |
126 | + | |
127 | + jTable1.setModel(sdatatblmodel); | |
128 | + jTable1.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION); | |
129 | + jTable1.getTableHeader().setReorderingAllowed(false); | |
130 | + jScrollPane1.setViewportView(jTable1); | |
131 | + | |
132 | + jBtnRowIns.setText("行挿入"); | |
133 | + jBtnRowIns.addActionListener(new java.awt.event.ActionListener() { | |
134 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
135 | + jBtnRowInsActionPerformed(evt); | |
136 | + } | |
137 | + }); | |
138 | + | |
139 | + jBtnRowDel.setText("行削除"); | |
140 | + jBtnRowDel.addActionListener(new java.awt.event.ActionListener() { | |
141 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
142 | + jBtnRowDelActionPerformed(evt); | |
143 | + } | |
144 | + }); | |
145 | + | |
146 | + jBtnRowCpy.setText("行コピー"); | |
147 | + jBtnRowCpy.addActionListener(new java.awt.event.ActionListener() { | |
148 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
149 | + jBtnRowCpyActionPerformed(evt); | |
150 | + } | |
151 | + }); | |
152 | + | |
153 | + javax.swing.GroupLayout jPanelTab1Layout = new javax.swing.GroupLayout(jPanelTab1); | |
154 | + jPanelTab1.setLayout(jPanelTab1Layout); | |
155 | + jPanelTab1Layout.setHorizontalGroup( | |
156 | + jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
157 | + .addGroup(jPanelTab1Layout.createSequentialGroup() | |
158 | + .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
159 | + .addComponent(jBtnRowCpy) | |
160 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
161 | + .addComponent(jBtnRowDel) | |
162 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
163 | + .addComponent(jBtnRowIns)) | |
164 | + .addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 0, Short.MAX_VALUE) | |
165 | + ); | |
166 | + jPanelTab1Layout.setVerticalGroup( | |
167 | + jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
168 | + .addGroup(jPanelTab1Layout.createSequentialGroup() | |
169 | + .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 173, Short.MAX_VALUE) | |
170 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
171 | + .addGroup(jPanelTab1Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
172 | + .addComponent(jBtnRowDel) | |
173 | + .addComponent(jBtnRowIns) | |
174 | + .addComponent(jBtnRowCpy))) | |
175 | + ); | |
176 | + | |
177 | + jTabbedPane1.addTab("キー設定", jPanelTab1); | |
178 | + | |
179 | + jPanelTab2.setBorder(javax.swing.BorderFactory.createTitledBorder("メッセージ")); | |
180 | + | |
181 | + jScrollPaneLabel.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER); | |
182 | + jScrollPaneLabel.setVerticalScrollBarPolicy(javax.swing.ScrollPaneConstants.VERTICAL_SCROLLBAR_NEVER); | |
183 | + | |
184 | + jTxtLabel.setEditable(false); | |
185 | + jTxtLabel.setBackground(java.awt.Color.lightGray); | |
186 | + jTxtLabel.setColumns(20); | |
187 | + jTxtLabel.setFont(new java.awt.Font("MS UI Gothic", 0, 12)); // NOI18N | |
188 | + jTxtLabel.setLineWrap(true); | |
189 | + jTxtLabel.setRows(2); | |
190 | + jTxtLabel.setText("取得ページに以下のメッセージが含まれていた場合、対象データが取得出来なかったと通知します。"); | |
191 | + jTxtLabel.setAutoscrolls(false); | |
192 | + jTxtLabel.setBorder(null); | |
193 | + jTxtLabel.setCursor(new java.awt.Cursor(java.awt.Cursor.DEFAULT_CURSOR)); | |
194 | + jTxtLabel.setFocusable(false); | |
195 | + jTxtLabel.setHighlighter(null); | |
196 | + jTxtLabel.setKeymap(null); | |
197 | + jTxtLabel.setOpaque(false); | |
198 | + jTxtLabel.setRequestFocusEnabled(false); | |
199 | + jTxtLabel.setVerifyInputWhenFocusTarget(false); | |
200 | + jScrollPaneLabel.setViewportView(jTxtLabel); | |
201 | + | |
202 | + jTxt404msg.setColumns(20); | |
203 | + jTxt404msg.setRows(3); | |
204 | + jTxt404msg.setText("一致する銘柄は見つかりませんでした\n"); | |
205 | + jScrollPane404msg.setViewportView(jTxt404msg); | |
206 | + | |
207 | + javax.swing.GroupLayout jPanelTab2Layout = new javax.swing.GroupLayout(jPanelTab2); | |
208 | + jPanelTab2.setLayout(jPanelTab2Layout); | |
209 | + jPanelTab2Layout.setHorizontalGroup( | |
210 | + jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
211 | + .addComponent(jScrollPane404msg) | |
212 | + .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, jPanelTab2Layout.createSequentialGroup() | |
213 | + .addContainerGap() | |
214 | + .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.DEFAULT_SIZE, 359, Short.MAX_VALUE) | |
215 | + .addContainerGap()) | |
216 | + ); | |
217 | + jPanelTab2Layout.setVerticalGroup( | |
218 | + jPanelTab2Layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
219 | + .addGroup(jPanelTab2Layout.createSequentialGroup() | |
220 | + .addComponent(jScrollPaneLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 38, javax.swing.GroupLayout.PREFERRED_SIZE) | |
221 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) | |
222 | + .addComponent(jScrollPane404msg)) | |
223 | + ); | |
224 | + | |
225 | + jTabbedPane1.addTab("結果無し判定", jPanelTab2); | |
226 | + | |
227 | + jPanelRtn.setBorder(javax.swing.BorderFactory.createTitledBorder("検索結果")); | |
228 | + | |
229 | + jTxtRtn.setColumns(20); | |
230 | + jTxtRtn.setRows(5); | |
231 | + jScrollPaneRtn.setViewportView(jTxtRtn); | |
232 | + | |
233 | + javax.swing.GroupLayout jPanelRtnLayout = new javax.swing.GroupLayout(jPanelRtn); | |
234 | + jPanelRtn.setLayout(jPanelRtnLayout); | |
235 | + jPanelRtnLayout.setHorizontalGroup( | |
236 | + jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
237 | + .addComponent(jScrollPaneRtn) | |
238 | + ); | |
239 | + jPanelRtnLayout.setVerticalGroup( | |
240 | + jPanelRtnLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
241 | + .addComponent(jScrollPaneRtn, javax.swing.GroupLayout.DEFAULT_SIZE, 163, Short.MAX_VALUE) | |
242 | + ); | |
243 | + | |
244 | + jMenu1.setText("ファイル"); | |
245 | + | |
246 | + jMenuLoad.setText("LOAD"); | |
247 | + jMenuLoad.addActionListener(new java.awt.event.ActionListener() { | |
248 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
249 | + jMenuLoadActionPerformed(evt); | |
250 | + } | |
251 | + }); | |
252 | + jMenu1.add(jMenuLoad); | |
253 | + | |
254 | + jMenuSave.setText("SAVE"); | |
255 | + jMenuSave.addActionListener(new java.awt.event.ActionListener() { | |
256 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
257 | + jMenuSaveActionPerformed(evt); | |
258 | + } | |
259 | + }); | |
260 | + jMenu1.add(jMenuSave); | |
261 | + | |
262 | + jMenuBar1.add(jMenu1); | |
263 | + | |
264 | + jMenu3.setText("ツール"); | |
265 | + | |
266 | + jMenuItem1.setText("ブラウザで表示"); | |
267 | + jMenuItem1.addActionListener(new java.awt.event.ActionListener() { | |
268 | + public void actionPerformed(java.awt.event.ActionEvent evt) { | |
269 | + jMenuItem1ActionPerformed(evt); | |
270 | + } | |
271 | + }); | |
272 | + jMenu3.add(jMenuItem1); | |
273 | + | |
274 | + jMenuBar1.add(jMenu3); | |
275 | + | |
276 | + jMenu2.setText("検索"); | |
277 | + jMenu2.addMouseListener(new java.awt.event.MouseAdapter() { | |
278 | + public void mouseClicked(java.awt.event.MouseEvent evt) { | |
279 | + jMenu2MouseClicked(evt); | |
280 | + } | |
281 | + }); | |
282 | + jMenuBar1.add(jMenu2); | |
283 | + | |
284 | + setJMenuBar(jMenuBar1); | |
285 | + | |
286 | + javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane()); | |
287 | + getContentPane().setLayout(layout); | |
288 | + layout.setHorizontalGroup( | |
289 | + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
290 | + .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
291 | + .addGroup(layout.createSequentialGroup() | |
292 | + .addComponent(jLabel1) | |
293 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
294 | + .addComponent(jTxtUrl) | |
295 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
296 | + .addComponent(jBtnSearch)) | |
297 | + .addComponent(jTabbedPane1) | |
298 | + ); | |
299 | + layout.setVerticalGroup( | |
300 | + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) | |
301 | + .addGroup(layout.createSequentialGroup() | |
302 | + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) | |
303 | + .addComponent(jLabel1) | |
304 | + .addComponent(jTxtUrl, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE) | |
305 | + .addComponent(jBtnSearch)) | |
306 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
307 | + .addComponent(jTabbedPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 250, javax.swing.GroupLayout.PREFERRED_SIZE) | |
308 | + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) | |
309 | + .addComponent(jPanelRtn, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) | |
310 | + .addContainerGap()) | |
311 | + ); | |
312 | + | |
313 | + pack(); | |
314 | + }// </editor-fold>//GEN-END:initComponents | |
315 | + | |
316 | + private void jBtnRowInsActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowInsActionPerformed | |
317 | + int SelectedRow = jTable1.getSelectedRow(); | |
318 | + SearchData sdata = new SearchData(); | |
319 | + if(SelectedRow >= 0) { | |
320 | + sdatatblmodel.insertRow(SelectedRow, sdata); | |
321 | + } else { | |
322 | + sdatatblmodel.addRow(sdata); | |
323 | + } | |
324 | + }//GEN-LAST:event_jBtnRowInsActionPerformed | |
325 | + | |
326 | + private void jBtnRowDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowDelActionPerformed | |
327 | + int SelectedRow = jTable1.getSelectedRow(); | |
328 | + if(!(SelectedRow < 0)) { | |
329 | + sdatatblmodel.removeRow(SelectedRow); | |
330 | + } | |
331 | + }//GEN-LAST:event_jBtnRowDelActionPerformed | |
332 | + | |
333 | + private void jMenuLoadActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuLoadActionPerformed | |
334 | + jFileChooser1.setDialogTitle("読込"); | |
335 | + int selected = jFileChooser1.showOpenDialog(this); | |
336 | + if (selected == JFileChooser.APPROVE_OPTION) { | |
337 | + File file = jFileChooser1.getSelectedFile(); | |
338 | + xmlwriter.load(file); | |
339 | + jTxtUrl.setText(xmlwriter.getTestUrl()); | |
340 | + xmlwriter.getSdata(); | |
341 | + sdatatblmodel.setRowCount(0); | |
342 | + for(int i = 0; i < SearchData.size(); i++) { | |
343 | + SearchData sdata = SearchData.get(i); | |
344 | + sdatatblmodel.addRow(sdata); | |
345 | + } | |
346 | + } | |
347 | + }//GEN-LAST:event_jMenuLoadActionPerformed | |
348 | + | |
349 | + private void jMenuSaveActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuSaveActionPerformed | |
350 | + jFileChooser1.setDialogTitle("保存"); | |
351 | + int selected = jFileChooser1.showSaveDialog(this); | |
352 | + if (selected == JFileChooser.APPROVE_OPTION) { | |
353 | + File file = jFileChooser1.getSelectedFile(); | |
354 | + xmlwriter.setTestUrl(jTxtUrl.getText()); | |
355 | + | |
356 | + SearchData.clear(); | |
357 | + for(int row = 0; row < sdatatblmodel.getRowCount(); row++) { | |
358 | + SearchData sdata = sdatatblmodel.getSearchData(row); | |
359 | + SearchData.add(sdata); | |
360 | + } | |
361 | + xmlwriter.setSdata(); | |
362 | + xmlwriter.save(file); | |
363 | + } | |
364 | + }//GEN-LAST:event_jMenuSaveActionPerformed | |
365 | + | |
366 | + private void jBtnRowCpyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnRowCpyActionPerformed | |
367 | + int SelectedRow = jTable1.getSelectedRow(); | |
368 | + if(SelectedRow >= 0) { | |
369 | + SearchData sdata = sdatatblmodel.getSearchData(SelectedRow); | |
370 | + sdatatblmodel.insertRow(SelectedRow, sdata); | |
371 | + } | |
372 | + }//GEN-LAST:event_jBtnRowCpyActionPerformed | |
373 | + | |
374 | + private void jMenuItem1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jMenuItem1ActionPerformed | |
375 | + Desktop desktop = Desktop.getDesktop(); | |
376 | + String uriString = jTxtUrl.getText(); | |
377 | + try { | |
378 | + URI uri = new URI(uriString); | |
379 | + desktop.browse(uri); | |
380 | + | |
381 | + } catch (URISyntaxException | IOException ex) { | |
382 | + Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex); | |
383 | + } | |
384 | + }//GEN-LAST:event_jMenuItem1ActionPerformed | |
385 | + | |
386 | + private void jMenu2MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jMenu2MouseClicked | |
387 | + Search_execution(); | |
388 | + }//GEN-LAST:event_jMenu2MouseClicked | |
389 | + | |
390 | + private void jBtnSearchActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jBtnSearchActionPerformed | |
391 | + Search_execution(); | |
392 | + }//GEN-LAST:event_jBtnSearchActionPerformed | |
393 | + | |
394 | + /** | |
395 | + * 検索実行. | |
396 | + */ | |
397 | + void Search_execution_old() { | |
398 | + jTxtRtn.setText(null); | |
399 | + HtmlParser par = new HtmlParser(jTxtUrl.getText()); | |
400 | + | |
401 | + // データ無し(404)判定 | |
402 | + String strdata = par.getStringPageData(); | |
403 | + if(strdata == null) { | |
404 | + jTxtRtn.append("読込みページがありません"); | |
405 | + return; | |
406 | + } | |
407 | + String text = jTxt404msg.getText(); | |
408 | + String[] strsearch = text.split("\n"); | |
409 | + for(String strsearch1 : strsearch) { | |
410 | + if(strdata.contains(strsearch1)) { | |
411 | + jTxtRtn.append(strsearch1); | |
412 | + return; | |
413 | + } | |
414 | + } | |
415 | + | |
416 | + // 検索結果 | |
417 | + for(int row = 0; row < sdatatblmodel.getRowCount(); row++) { | |
418 | + SearchData sdata = sdatatblmodel.getSearchData(row); | |
419 | + String ans = sdata.getitem(); | |
420 | + String rtn = par.search(sdata); | |
421 | + jTxtRtn.append(ans + "\t" + rtn + "\n"); | |
422 | + } | |
423 | + | |
424 | + jTxtRtn.setCaretPosition(0); | |
425 | + } | |
426 | + | |
427 | + /** | |
428 | + * 検索実行. | |
429 | + */ | |
430 | + void Search_execution() { | |
431 | + jTxtRtn.setText(null); | |
432 | + Scraping scrap = new Scraping(); | |
433 | + | |
434 | + // URL生成 | |
435 | + URL url = null; | |
436 | + try { | |
437 | + url = new URL(jTxtUrl.getText()); | |
438 | + } catch (MalformedURLException ex) { | |
439 | + Logger.getLogger(HtmlSearch.class.getName()).log(Level.SEVERE, null, ex); | |
440 | + } | |
441 | + | |
442 | + // SearchData生成 | |
443 | + SearchData.clear(); | |
444 | + for(int row = 0; row < sdatatblmodel.getRowCount(); row++) { | |
445 | + SearchData.add(sdatatblmodel.getSearchData(row)); | |
446 | + } | |
447 | + | |
448 | + // HTML検索 | |
449 | + String[] result = scrap.getResult(url); | |
450 | + | |
451 | + // 検索結果 | |
452 | + if(result == null) { | |
453 | + jTxtRtn.append("Data not find"); | |
454 | + return; | |
455 | + } | |
456 | + | |
457 | + // 結果表示 | |
458 | + for(int i = 0; i < SearchData.size(); i++) { | |
459 | + String ans = SearchData.get(i).getitem(); | |
460 | + String rtn = result[i]; | |
461 | + jTxtRtn.append(ans + "\t" + rtn + "\n"); | |
462 | + } | |
463 | + | |
464 | + jTxtRtn.setCaretPosition(0); | |
465 | + } | |
466 | + | |
467 | + /** | |
468 | + * @param args the command line arguments | |
469 | + */ | |
470 | + public static void main(String args[]) { | |
471 | + /* Set the Nimbus look and feel */ | |
472 | + //<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) "> | |
473 | + /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel. | |
474 | + * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html | |
475 | + */ | |
476 | + try { | |
477 | + for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) { | |
478 | + if ("Nimbus".equals(info.getName())) { | |
479 | + javax.swing.UIManager.setLookAndFeel(info.getClassName()); | |
480 | + break; | |
481 | + } | |
482 | + } | |
483 | + } catch (ClassNotFoundException | |
484 | + | InstantiationException | |
485 | + | IllegalAccessException | |
486 | + | javax.swing.UnsupportedLookAndFeelException ex) { | |
487 | + java.util.logging.Logger.getLogger(HtmlSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); | |
488 | + } | |
489 | + //</editor-fold> | |
490 | + | |
491 | + /* Create and display the form */ | |
492 | + java.awt.EventQueue.invokeLater(new Runnable() { | |
493 | + @Override | |
494 | + public void run() { | |
495 | + new HtmlSearch().setVisible(true); | |
496 | + } | |
497 | + }); | |
498 | + } | |
499 | + | |
500 | + // Variables declaration - do not modify//GEN-BEGIN:variables | |
501 | + private javax.swing.JButton jBtnRowCpy; | |
502 | + private javax.swing.JButton jBtnRowDel; | |
503 | + private javax.swing.JButton jBtnRowIns; | |
504 | + private javax.swing.JButton jBtnSearch; | |
505 | + private javax.swing.JFileChooser jFileChooser1; | |
506 | + private javax.swing.JLabel jLabel1; | |
507 | + private javax.swing.JMenu jMenu1; | |
508 | + private javax.swing.JMenu jMenu2; | |
509 | + private javax.swing.JMenu jMenu3; | |
510 | + private javax.swing.JMenuBar jMenuBar1; | |
511 | + private javax.swing.JMenuItem jMenuItem1; | |
512 | + private javax.swing.JMenuItem jMenuLoad; | |
513 | + private javax.swing.JMenuItem jMenuSave; | |
514 | + private javax.swing.JPanel jPanelRtn; | |
515 | + private javax.swing.JPanel jPanelTab1; | |
516 | + private javax.swing.JPanel jPanelTab2; | |
517 | + private javax.swing.JRadioButton jRadioButton1; | |
518 | + private javax.swing.JScrollPane jScrollPane1; | |
519 | + private javax.swing.JScrollPane jScrollPane404msg; | |
520 | + private javax.swing.JScrollPane jScrollPaneLabel; | |
521 | + private javax.swing.JScrollPane jScrollPaneRtn; | |
522 | + private javax.swing.JTabbedPane jTabbedPane1; | |
523 | + private javax.swing.JTable jTable1; | |
524 | + private javax.swing.JTextArea jTxt404msg; | |
525 | + private javax.swing.JTextArea jTxtLabel; | |
526 | + private javax.swing.JTextArea jTxtRtn; | |
527 | + private javax.swing.JTextField jTxtUrl; | |
528 | + // End of variables declaration//GEN-END:variables | |
529 | +} | |
530 | + | |
531 | +class SearchDataTableModel extends DefaultTableModel { | |
532 | + /* ---------------------------------------------------------------------- * | |
533 | + * データ属性 | |
534 | + * ---------------------------------------------------------------------- */ | |
535 | + public String[] columnName = { | |
536 | + /* 0 */ "項目名", | |
537 | + /* 1 */ "タグ", | |
538 | + /* 2 */ "ID", | |
539 | + /* 3 */ "クラス", | |
540 | + /* 4 */ "位置", | |
541 | + /* 5 */ "抽出条件" | |
542 | + }; | |
543 | + | |
544 | + public Class[] columnClass = { | |
545 | + /* 0 */ String.class, | |
546 | + /* 1 */ String.class, | |
547 | + /* 2 */ String.class, | |
548 | + /* 3 */ String.class, | |
549 | + /* 4 */ String.class, | |
550 | + /* 5 */ String.class | |
551 | + }; | |
552 | + | |
553 | + int column_item = 0; | |
554 | + int column_htmltag = 1; | |
555 | + int column_htmlid = 2; | |
556 | + int column_htmlclass = 3; | |
557 | + int column_around = 4; | |
558 | + int column_regexp = 5; | |
559 | + | |
560 | + /* ---------------------------------------------------------------------- * | |
561 | + * 処理 | |
562 | + * ---------------------------------------------------------------------- */ | |
563 | + @Override | |
564 | + public String getColumnName(int modelIndex) { | |
565 | + return columnName[modelIndex]; | |
566 | + } | |
567 | + | |
568 | + @Override | |
569 | + public Class<?> getColumnClass(int modelIndex) { | |
570 | + return columnClass[modelIndex]; | |
571 | + } | |
572 | + | |
573 | + @Override | |
574 | + public int getColumnCount() { | |
575 | + return columnName.length; | |
576 | + } | |
577 | + | |
578 | + /* ---------------------------------------------------------------------- */ | |
579 | + | |
580 | + public SearchData getSearchData(int row) { | |
581 | + SearchData sdata = new SearchData(); | |
582 | + sdata.setitem(String.valueOf(getValueAt(row, column_item))); | |
583 | + sdata.setHtmltag(String.valueOf(getValueAt(row, column_htmltag))); | |
584 | + sdata.setHtmlid(String.valueOf(getValueAt(row, column_htmlid))); | |
585 | + sdata.setHtmlclass(String.valueOf(getValueAt(row, column_htmlclass))); | |
586 | + sdata.setaround(String.valueOf(getValueAt(row, column_around))); | |
587 | + sdata.setregexp(String.valueOf(getValueAt(row, column_regexp))); | |
588 | + return sdata; | |
589 | + } | |
590 | + | |
591 | + public void addRow(SearchData sdata) { | |
592 | + addRow(getObjdata(sdata)); | |
593 | + } | |
594 | + | |
595 | + public void insertRow(int row, SearchData sdata) { | |
596 | + insertRow(row, getObjdata(sdata)); | |
597 | + } | |
598 | + | |
599 | + private Object[] getObjdata(SearchData sdata) { | |
600 | + Object[] obj = new Object[] { | |
601 | + sdata.getitem(), | |
602 | + sdata.getHtmltag(), | |
603 | + sdata.getHtmlid(), | |
604 | + sdata.getHtmlclass(), | |
605 | + sdata.getaround(), | |
606 | + sdata.getregexp() | |
607 | + }; | |
608 | + return obj; | |
609 | + } | |
610 | + | |
611 | +} | |
\ No newline at end of file |