svnno****@sourc*****
svnno****@sourc*****
2009年 8月 11日 (火) 06:45:12 JST
Revision: 2549 http://sourceforge.jp/projects/kita/svn/view?view=rev&revision=2549 Author: nogu Date: 2009-08-11 06:45:12 +0900 (Tue, 11 Aug 2009) Log Message: ----------- add parser.{h,cpp} Modified Paths: -------------- kita/branches/KITA-KDE4/kita/src/libkita/CMakeLists.txt kita/branches/KITA-KDE4/kita/src/libkita/datinfo.cpp kita/branches/KITA-KDE4/kita/src/libkita/kita_misc.cpp kita/branches/KITA-KDE4/kita/src/libkita/kita_misc.h Added Paths: ----------- kita/branches/KITA-KDE4/kita/src/libkita/parser.cpp kita/branches/KITA-KDE4/kita/src/libkita/parser.h Modified: kita/branches/KITA-KDE4/kita/src/libkita/CMakeLists.txt =================================================================== --- kita/branches/KITA-KDE4/kita/src/libkita/CMakeLists.txt 2009-08-10 21:12:13 UTC (rev 2548) +++ kita/branches/KITA-KDE4/kita/src/libkita/CMakeLists.txt 2009-08-10 21:45:12 UTC (rev 2549) @@ -20,6 +20,7 @@ k2ch.cpp jbbs.cpp machibbs.cpp + parser.cpp postdata.cpp thread.cpp thread.h Modified: kita/branches/KITA-KDE4/kita/src/libkita/datinfo.cpp =================================================================== --- kita/branches/KITA-KDE4/kita/src/libkita/datinfo.cpp 2009-08-10 21:12:13 UTC (rev 2548) +++ kita/branches/KITA-KDE4/kita/src/libkita/datinfo.cpp 2009-08-10 21:45:12 UTC (rev 2549) @@ -24,6 +24,7 @@ #include "globalconfig.h" #include "kita_misc.h" #include "kita-utf8.h" +#include "parser.h" #include "thread.h" using namespace Kita; @@ -383,10 +384,10 @@ if (!parseDat(num)) return QString(); QString titleHTML; - createTitleHTML(m_resDatVec[ num ], titleHTML); + Parser::createTitleHTML(m_resDatVec[ num ], titleHTML); QString retStr; - DatToText(titleHTML, retStr); + Parser::DatToText(titleHTML, retStr); return retStr; } @@ -398,7 +399,7 @@ if (!parseDat(num)) return QString(); QString retStr; - DatToText(m_resDatVec[ num ].bodyHTML, retStr); + Parser::DatToText(m_resDatVec[ num ].bodyHTML, retStr); return retStr; } @@ -451,7 +452,7 @@ return HTML_BROKEN; } else { - createTitleHTML(resdat, titleHTML); + Parser::createTitleHTML(resdat, titleHTML); bodyHTML = resdat.bodyHTML; return HTML_NORMAL; @@ -908,7 +909,7 @@ // qDebug("parseDat %d",num); QString subject; - parseResDat(m_resDatVec[ num ], subject); + Parser::parseResDat(m_resDatVec[ num ], subject); if (num == 1 && !subject.isEmpty()) m_thread->setThreadName(subject); if (m_resDatVec[ num ].broken) Modified: kita/branches/KITA-KDE4/kita/src/libkita/kita_misc.cpp =================================================================== --- kita/branches/KITA-KDE4/kita/src/libkita/kita_misc.cpp 2009-08-10 21:12:13 UTC (rev 2548) +++ kita/branches/KITA-KDE4/kita/src/libkita/kita_misc.cpp 2009-08-10 21:45:12 UTC (rev 2549) @@ -10,26 +10,17 @@ #include "kita_misc.h" -#include <QtCore/QDir> #include <QtCore/QRegExp> +#include <QtGui/QFont> #include <kurl.h> #include "boardmanager.h" -#include "datinfo.h" /* struct RESDAT is defined. */ #include "datmanager.h" -#include "globalconfig.h" -#include "kita-utf8.h" #include "kita-utf16.h" -static const int KITA_RESDIGIT = 4; - using namespace Kita; -static QString m_weekstr[ 7 ]; -static QString m_colonstr; -static QString m_colonnamestr; - /* fro convertURL */ static int m_prevConvMode; static QString m_prevConvUrl; @@ -40,121 +31,6 @@ static QString m_machiSubject; static QString m_machiLine; -/* conversion of DAT */ - - -/* get plain text from raw data */ -/* - This function replaces "<br>" to "\n", removes HTML tags and - replaces special chars. -*/ -void Kita::DatToText( - - /* input */ - const QString &rawData, - - /* output */ - QString& text -) -{ - text.clear(); - - unsigned int startPos, pos; - const QChar *chpt = rawData.unicode(); - unsigned int length = rawData.length(); - - for (unsigned int i = startPos = 0 ; i < length ; i++) { - - switch (chpt[ i ].unicode()) { - - case '<': - - /* " <br> " */ - if (chpt[ i + 1 ] == 'b' && chpt[ i + 2 ] == 'r' && chpt[ i + 3 ] == '>') { - - unsigned int i2 = i - startPos; - if (i > 0 && chpt[ i - 1 ] == ' ') i2--; /* remove space before <br> */ - text += rawData.mid(startPos, i2) + '\n'; - startPos = i + 4; - if (chpt[ startPos ] == ' ') startPos++; /* remove space after <br> */ - i = startPos - 1; - } - - /*----------------------------------------*/ - - /* remove HTML tags <[^>]*> */ - else { - - if (i - startPos) text += rawData.mid(startPos, i - startPos); - while (chpt[ i ] != '>' && i < length) i++; - startPos = i + 1; - } - - break; - - - /*----------------------------------*/ - - case '&': - - /* special char */ - { - QString tmpstr; - tmpstr = parseSpecialChar(chpt + i, pos); - - if (!tmpstr.isEmpty()) { - text += rawData.mid(startPos, i - startPos) + tmpstr; - startPos = i + pos; - i = startPos - 1; - } - } - - break; - } - } - - text += rawData.mid(startPos); -} - - -/* parsing function for special char (such as ♥ */ - -/* For example, if cdat = "&", then - - pos (= length of cdat) = 5, - retstr = "&". */ -QString Kita::parseSpecialChar( - - /* input */ - const QChar *cdat, - - /* output */ - unsigned int& pos) -{ - QString retstr; - - if ((pos = isEqual(cdat , ">"))) retstr = '>'; - else if ((pos = isEqual(cdat , "<"))) retstr = '<'; - else if ((pos = isEqual(cdat , " "))) retstr = ' '; - else if ((pos = isEqual(cdat , "&"))) retstr = '&'; - else if ((pos = isEqual(cdat , """))) retstr = '"'; - - else if ((pos = isEqual(cdat , "♥"))) - retstr = QString::fromUtf8(KITAUTF8_HEART); - - else if ((pos = isEqual(cdat , "♦"))) - retstr = QString::fromUtf8(KITAUTF8_DIA); - - else if ((pos = isEqual(cdat , "♣"))) - retstr = QString::fromUtf8(KITAUTF8_CLUB); - - else if ((pos = isEqual(cdat , "♠"))) - retstr = QString::fromUtf8(KITAUTF8_SPADE); - - return retstr; -} - - /*------------------------------------------------------------*/ /*------------------------------------------------------------*/ @@ -381,19 +257,7 @@ } -/* if cdat == str, return str.length() */ -int Kita::isEqual(const QChar *cdat, const QString& str) -{ - int i = 0; - const int size = str.size(); - while (i < size && str.at(i) != '\0') { - if (*cdat != str.at(i)) return 0; - cdat++;i++; - } - return i; -} - /* convert strings to positive number. */ /* if cdat is not number, return -1. */ @@ -618,645 +482,6 @@ /*-------------------------------------------------*/ /*-------------------------------------------------*/ -static void parseName(const QString& rawStr, RESDAT& resdat); -static void parseDateId(const QString& rawStr, RESDAT& resdat); -static void parseBody(const QString &rawStr, RESDAT& resdat); - -static bool parseLink(const QChar *cdat, const unsigned int length, - QString& linkstr, QString& linkurl, unsigned int& pos); -static bool parseResAnchor(const QChar *cdat, const unsigned int length, - QString& linkstr, int* refNum, unsigned int& pos); -static bool createResAnchor(const QString &rawStr, RESDAT& resdat, - const QChar *chpt, unsigned int &i, unsigned int &index); - -/* Main Parser */ - -/* - struct RESDAT is defined in datinfo.h. - This function is called from DatToHtml() and DatInfo::parseDat() - - input: - - resdat.num ... number - resdat.linestr ... raw line strings - - output: - - resdat.* - subject -*/ -bool Kita::parseResDat(RESDAT& resdat, QString& subject) -{ - if (resdat.parsed) return true; - - resdat.parsed = true; - resdat.broken = false; - resdat.anclist.clear(); - - /* search the staring positions of each section to split raw data. */ - const QChar *chpt = resdat.linestr.unicode(); - unsigned int length = resdat.linestr.length(); - unsigned int section = 0; - unsigned int sectionPos[ 5 ]; - for (unsigned int i = 0 ; i < length ; i++) { - - /* sections are splitted by "<>" */ - if (chpt[ i ] == '<' && chpt[ i + 1 ] == '>') { - section++; - - - if (section >= 5) { - resdat.broken = true; - return true; - } - - sectionPos[ section ] = i + 2; - i++; - } - } - - /* broken data */ - if (section != 4) { - resdat.broken = true; - return true; - } - - // qDebug("[%d] %d %d %d %d",section, sectionPos[1],sectionPos[2],sectionPos[3],sectionPos[4]); - - /* name */ - length = sectionPos[ 1 ] - 2 ; - parseName(resdat.linestr.mid(0, length), resdat); - - /* mail */ - length = sectionPos[ 2 ] - 2 - sectionPos[ 1 ]; - DatToText(resdat.linestr.mid(sectionPos[ 1 ], length), resdat.address); - - /* date, ID, host */ - length = sectionPos[ 3 ] - 2 - sectionPos[ 2 ]; - parseDateId(resdat.linestr.mid(sectionPos[ 2 ], length), resdat); - - /* body */ - length = sectionPos[ 4 ] - 2 - sectionPos[ 3 ]; - parseBody(resdat.linestr.mid(sectionPos[ 3 ], length), resdat); - - /* subject */ - subject = resdat.linestr.mid(sectionPos[ 4 ]); - - return true; -} - - -/* parse name */ - -/* output: - - resdat.name - resdat.nameHTML - -*/ -void parseName(const QString& rawStr, RESDAT& resdat) -{ - unsigned int i = 0, pos; - int refNum[ 2 ]; - QString linkurl, linkstr; - - DatToText(rawStr, resdat.name); - - const QChar * chpt = resdat.name.unicode(); - unsigned int length = resdat.name.length(); - resdat.nameHTML.clear(); - - /* anchor */ - while (parseResAnchor(chpt + i, length - i, linkstr, refNum, pos)) { - - linkurl = QString("#%1").arg(refNum[ 0 ]); - if (refNum[ 1 ]) linkurl += QString("-%1").arg(refNum[ 1 ]); - - resdat.nameHTML += "<a href=\"" + linkurl + "\">"; - resdat.nameHTML += linkstr; - resdat.nameHTML += "</a>"; - - ANCNUM anctmp; - if (refNum[ 1 ] < refNum[ 0 ]) refNum[ 1 ] = refNum[ 0 ]; - anctmp.from = refNum[ 0 ]; - anctmp.to = refNum[ 1 ]; - resdat.anclist += anctmp; - - i += pos; - } - - /* non-digits strings */ - if (i < length) { - - resdat.nameHTML += "<span class=\"name_noaddr\">"; - resdat.nameHTML += resdat.name.mid(i); - resdat.nameHTML += "</span>"; - } - -} - - -/* parse date, ID, host */ - -/* output : - - resdat.dateTime - resdat.date - resdat.id - resdat.host - -*/ -void parseDateId(const QString& rawStr, RESDAT& resdat) -{ - resdat.date = rawStr; - resdat.id.clear(); - resdat.host.clear(); - resdat.be.clear(); - resdat.bepointmark.clear(); - - const QChar *chpt = rawStr.unicode(); - unsigned int pos = 0, startpos = 0; - unsigned int length = rawStr.length(); - - while (chpt[ pos ] != '\0' && - !(chpt[ pos ] == 'I' && chpt[ pos + 1 ] == 'D') && - !(chpt[ pos ] == 'B' && chpt[ pos + 1 ] == 'E')) { - pos++; - } - resdat.date = rawStr.left(pos); - - /* id */ - if (chpt[ pos ] == 'I' && chpt[ pos + 1 ] == 'D') { - pos += 3; - startpos = pos; - while (chpt[ pos ] != ' ' && pos++ < length) {}; - resdat.id = rawStr.mid(startpos, pos - startpos); - pos++; - } - - // qDebug("date %s, ID %s", (const char*)resdat.date.local8Bit(), resdat.id.ascii()); - - if (pos >= length) return ; - - /* be */ - if (chpt[ pos ] == 'B' && chpt[ pos + 1 ] == 'E') { - pos += 3; - startpos = pos; - while (chpt[ pos ] != '-' && pos++ < length) {}; - resdat.be = rawStr.mid(startpos, pos - startpos); - pos++; - if (pos < length && chpt[ pos ] == '#') { - startpos = pos; - while (chpt[ pos ] == '#' && pos++ < length) {}; - resdat.bepointmark = rawStr.mid(startpos, pos - startpos); - } - } - - if (pos >= length) return ; - - /* host */ - if (chpt[ pos ] == 'H' && chpt[ pos + 1 ] == 'O') { - pos += 5; - startpos = pos; - while (chpt[ pos ] != ' ' && pos++ < length) {}; - resdat.host = rawStr.mid(startpos, pos - startpos); - pos++; - // qDebug("host %s", resdat.host.ascii()); - } -} - - - -/* parse body */ - -/* output : - - resdat.bodyHTML - -*/ -void parseBody(const QString &rawStr, RESDAT& resdat) -{ - resdat.bodyHTML.clear(); - - unsigned int startPos; - QString linkstr, linkurl; - const QChar *chpt = rawStr.unicode(); - unsigned int length = rawStr.length(); - - bool ancChain = false; - - /* ancChain is chain for anchor. For examle, if anchor ">2" - appeared, ancChain is set to true. Moreover, if next strings - are "=5", anchor for 5 is also set. Thus, we can obtain anchors - for strings ">2=5" as follows: - - <a href="#2">>2</a><a href="#5">=5</a> - */ - - int offset = 0; - if (chpt[ 0 ] == ' ') offset = 1; /* remove one space after <> */ - for (unsigned int i = startPos = offset ; i < length ; i++) { - - switch (chpt[ i ].unicode()) { - - case '<': - - /* " <br> " */ - if (chpt[ i + 1 ] == 'b' && chpt[ i + 2 ] == 'r' && chpt[ i + 3 ] == '>') { - - /* reset anchor chain */ - ancChain = false; - - unsigned int i2 = i - startPos; - if (i > 0 && chpt[ i - 1 ] == ' ') i2--; /* remove space before <br> */ - resdat.bodyHTML += rawStr.mid(startPos, i2); - - resdat.bodyHTML += "<br>"; - - startPos = i + 4; - if (chpt[ startPos ] == ' ') startPos++; /* remove space after <br> */ - i = startPos - 1; - } - - /*----------------------------------------*/ - - /* remove HTML tags <[^>]*> */ - else { - - if (i - startPos) resdat.bodyHTML += rawStr.mid(startPos, i - startPos); - while (chpt[ i ] != '>' && i < length) i++; - startPos = i + 1; - } - - break; - - /*----------------------------------------*/ - - case 'h': /* "http://" or "ttp://" or "tp:" */ - case 't': - { - unsigned int pos = 0; - if (parseLink(chpt + i, length - i, linkstr, linkurl, pos)) { - resdat.bodyHTML += rawStr.mid(startPos, i - startPos); - resdat.bodyHTML += "<a href=\"" + linkurl + "\">"; - resdat.bodyHTML += linkstr; - resdat.bodyHTML += "</a>"; - - startPos = i + pos; - i = startPos - 1; - } - } - - break; - - /*----------------------------------*/ - - case '&': - - /* > */ - if (chpt[ i + 1 ] == 'g' && chpt[ i + 2 ] == 't' && chpt[ i + 3 ] == ';') - ancChain = createResAnchor(rawStr, resdat, chpt, i, startPos); - - break; - - /*----------------------------------------*/ - - /* unicode '>' */ - case UTF16_BRACKET: - - ancChain = createResAnchor(rawStr, resdat, chpt, i, startPos); - break; - - /*----------------------------------*/ - - default: - - if (ancChain) ancChain = createResAnchor(rawStr, resdat, chpt, i, startPos); - } - } - - resdat.bodyHTML += rawStr.mid(startPos); -} - - - -/* parsing function for link */ - -/* For example, - - cdat = "ttp://foo.com", - - then - - linkstr = "ttp://foo.com", - linkurl = "http://foo.com", - pos (= length of cdat) = 13, - - and return true. - */ -bool parseLink( - - /* input */ - const QChar *cdat, const unsigned int length, - - /* output */ - QString& linkstr, QString& linkurl, unsigned int& pos -) -{ - - /*-----------------------------*/ - - linkstr.clear(); - linkurl.clear(); - - QString retlinkstr; - QString prefix; - QString scheme; - - if (isEqual(cdat , "http://")) { - prefix = "http://"; - scheme = "http://"; - } else if (isEqual(cdat , "ttp://")) { - prefix = "ttp://"; - scheme = "http://"; - } else if (isEqual(cdat , "tp://")) { - prefix = "tp://"; - scheme = "http://"; - } else if (isEqual(cdat , "https://")) { - prefix = "https://"; - scheme = "https://"; - } else if (isEqual(cdat , "ttps://")) { - prefix = "ttps://"; - scheme = "https://"; - } else if (isEqual(cdat , "tps://")) { - prefix = "tps://"; - scheme = "https://"; - } else { - return false; - } - - pos = prefix.length(); - while (cdat[ pos ] >= '!' && cdat[ pos ] <= '~' && - cdat[ pos ] != ' ' && cdat[ pos ] != '<' && cdat[ pos ] != '>' - && pos < length) { - retlinkstr += cdat[ pos++ ]; - } - if (pos > length) return false; - - if (!retlinkstr.isEmpty()) DatToText(retlinkstr, linkstr); - - linkurl = scheme + linkstr; - linkstr = prefix + linkstr; - - return true; -} - - - -/* parsing function for anchor (>>digits) */ - -/* This function parses res anchor. - - For example, if cdat = ">12-20", then - - linkstr = ">12-20", - refNum[0] = 12, - refNum[1] = 20, - pos (= length of cdat) = 9, - ret = true; - -*/ -bool parseResAnchor( - - /* input */ - const QChar *cdat, const unsigned int length, - - /* output */ - QString& linkstr, int* refNum, unsigned int& pos) -{ - - struct LocalFunc { - static bool isHYPHEN(unsigned short c) - { - - /* UTF-16 */ - if (c == '-' - || (c >= 0x2010 && c <= 0x2015) - || (c == 0x2212) - || (c == 0xFF0D) /* UTF8: 0xEFBC8D */ - ) { - return true; - } - - return false; - } - }; - - bool ret = false; - - if (length == 0) return false; - - linkstr.clear(); - refNum[ 0 ] = 0; - refNum[ 1 ] = 0; - pos = 0; - - /* check '>' twice */ - for (int i = 0; i < 2; i++) { - - if (cdat[ pos ].unicode() == UTF16_BRACKET) { - linkstr += cdat[ pos ]; - pos++; - } else if (cdat[ pos ] == '&' && cdat[ pos + 1 ] == 'g' /* > */ - && cdat[ pos + 2 ] == 't' && cdat[ pos + 3 ] == ';') { - linkstr += '>'; - pos += 4; - } - - } - - /* check ',' */ - if (!pos) { - if (cdat[ pos ] == ',' || cdat[ pos ].unicode() == UTF16_COMMA) { - linkstr += ','; - pos ++; - } - } - - /* check '=' */ - if (!pos) { - if (cdat[ pos ] == '=' || cdat[ pos ].unicode() == UTF16_EQ) { - linkstr += '='; - pos ++; - } - } - - /* check digits */ - int hyphen = 0; - - for (int i = 0 ; i < KITA_RESDIGIT + 1 && pos < length ; i++, pos++) { - - unsigned short c = cdat[ pos ].unicode(); - - if ((c < UTF16_0 || c > UTF16_9) - && (c < '0' || c > '9') - && (!LocalFunc::isHYPHEN(c) - || (i == 0 && LocalFunc::isHYPHEN(c)) - || (hyphen && LocalFunc::isHYPHEN(c))) - ) break; - - linkstr += cdat[ pos ]; - - if (LocalFunc::isHYPHEN(c)) { - hyphen = 1; - i = -1; - } else { - if (c >= UTF16_0) c = '0' + cdat[ pos ].unicode() - UTF16_0; - refNum[ hyphen ] *= 10; - refNum[ hyphen ] += c - '0'; - } - - ret = true; - } - - return ret; -} - - - -/* create res anchor */ -/* This function is called from parseBody internally. - See also parseBody. */ -bool createResAnchor(const QString &rawStr, RESDAT& resdat, - const QChar *chpt, unsigned int &i, unsigned int &startPos) -{ - QString linkstr, linkurl; - int refNum[ 2 ]; - unsigned int pos; - unsigned int length = rawStr.length(); - - /* parse anchor */ - if (!parseResAnchor(chpt + i, length - i, linkstr, refNum, pos)) { - - i += pos - 1; - return false; - } - - /* create anchor */ - resdat.bodyHTML += rawStr.mid(startPos, i - startPos); - linkurl = QString("#%1").arg(refNum[ 0 ]); - if (refNum[ 1 ]) linkurl += QString("-%1").arg(refNum[ 1 ]); - - resdat.bodyHTML += "<a href=\"" + linkurl + "\">"; - resdat.bodyHTML += linkstr; - resdat.bodyHTML += "</a>"; - - /* add anchor to ancList */ - ANCNUM anctmp; - if (refNum[ 1 ] < refNum[ 0 ]) refNum[ 1 ] = refNum[ 0 ]; - anctmp.from = refNum[ 0 ]; - anctmp.to = refNum[ 1 ]; - resdat.anclist += anctmp; - - startPos = i + pos; - i = startPos - 1; - - return true; -} - - -/* create HTML of title. - - struct RESDAT resdat should be parsed by parseResDat before calling this function. - - output: titleHTML - -*/ -void Kita::createTitleHTML(RESDAT& resdat, QString& titleHTML) -{ - titleHTML.clear(); - if (!resdat.parsed) return ; - - bool showMailAddress = GlobalConfig::showMailAddress(); - bool useTableTag = GlobalConfig::useStyleSheet(); - - if (m_colonstr.isEmpty()) { - m_colonstr = QString::fromUtf8(KITAUTF8_COLON); - m_colonnamestr = QString::fromUtf8(KITAUTF8_NAME); - } - - if (useTableTag) titleHTML += "<table class=\"res_title\"><tr>"; - - /* res number */ - if (useTableTag) titleHTML += "<td class=\"res_title_number\">"; - titleHTML += "<a href=\"#write" + QString::number(resdat.num) + "\">"; - titleHTML += QString::number(resdat.num); - titleHTML += "</a> "; - - - /* name & mail address */ - if (useTableTag) titleHTML += "<td class=\"res_title_name\">"; - titleHTML += "<b>" + m_colonnamestr; - - /* show name with mail address */ - if (showMailAddress) { - - titleHTML += resdat.nameHTML; - if (!resdat.address.isEmpty()) titleHTML += " [" + resdat.address + ']'; - - } else { /* don't show mail address */ - - if (resdat.address.isEmpty()) { - - titleHTML += "<span class=\"name_noaddr\">"; - titleHTML += resdat.name; - titleHTML += "</span>"; - - } else { - - titleHTML += "<a href=\"mailto:" + resdat.address + "\""; - titleHTML += " title=\"" + resdat.address + "\">"; - titleHTML += resdat.name; - titleHTML += "</a>"; - } - } - - titleHTML += "</b> "; - - /* date */ - if (useTableTag) titleHTML += "<td class=\"res_title_date\">"; - titleHTML += m_colonstr + resdat.date; - if (useTableTag) titleHTML += "</td>"; - - /* ID */ - if (!resdat.id.isEmpty()) { - - if (useTableTag) titleHTML += "<td class=\"res_title_id\">"; - if (resdat.id.count("???") >= 1) titleHTML += " ID:" + resdat.id; - else titleHTML += " <a href=\"#idpop" + resdat.id + "\">ID</a>" + ":" + resdat.id; - if (useTableTag) titleHTML += "</td>"; - } - - /* BE */ - if (!resdat.be.isEmpty()) { - - if (useTableTag) titleHTML += "<td class=\"res_title_be\">"; - titleHTML += " <a href=\"#bepop" + resdat.be + "\">?" + resdat.bepointmark + "</a>"; - if (useTableTag) titleHTML += "</td>"; - } - - /* host */ - if (!resdat.host.isEmpty()) { - - if (useTableTag) titleHTML += "<td class=\"res_title_host\">"; - titleHTML += " HOST:" + resdat.host; - if (useTableTag) titleHTML += "</td>"; - } - - if (useTableTag) titleHTML += "</tr></table>"; -} - - QString Kita::getCategory(const QString& line) { QRegExp regexp("<BR><BR><B>(.*)</B><BR>"); Modified: kita/branches/KITA-KDE4/kita/src/libkita/kita_misc.h =================================================================== --- kita/branches/KITA-KDE4/kita/src/libkita/kita_misc.h 2009-08-10 21:12:13 UTC (rev 2548) +++ kita/branches/KITA-KDE4/kita/src/libkita/kita_misc.h 2009-08-10 21:45:12 UTC (rev 2549) @@ -33,14 +33,7 @@ /** * @author Hideki Ikemoto */ - /*------------------------------*/ - /* conversion of DAT */ - void DatToText(const QString &rawData, QString& text); - QString parseSpecialChar(const QChar *cdat, unsigned int& pos); - - - /*------------------------------*/ /* conversion of URL */ KDE_EXPORT KUrl getDatUrl(const KUrl& url , QString& refstr); KDE_EXPORT KUrl getDatUrl(const KUrl& url); @@ -57,7 +50,6 @@ /*------------------------------*/ /* utilities */ KDE_EXPORT uint datToSince(const KUrl& datUrl); - int isEqual(const QChar *cdat, const QString& str); KDE_EXPORT int stringToPositiveNum(const QChar *cdat, const unsigned int length); KDE_EXPORT QString getCategory(const QString& line); KDE_EXPORT bool isBoardUrl(const QString& url); @@ -78,12 +70,6 @@ /* for Flash CGI/Mini Thread */ QString ParseFlashCGIOneLine(const QString& line); - - - /* for 2ch */ - bool parseResDat(RESDAT& resdat, QString& subject); - - void createTitleHTML(RESDAT& resdat, QString& titletext); } #endif Added: kita/branches/KITA-KDE4/kita/src/libkita/parser.cpp =================================================================== --- kita/branches/KITA-KDE4/kita/src/libkita/parser.cpp (rev 0) +++ kita/branches/KITA-KDE4/kita/src/libkita/parser.cpp 2009-08-10 21:45:12 UTC (rev 2549) @@ -0,0 +1,752 @@ +/*************************************************************************** + * Copyright (C) 2009 by Kita Developers * + * ikemo****@users***** * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + ***************************************************************************/ +#include "parser.h" + +#include <QtCore/QString> + +#include "datinfo.h" +#include "globalconfig.h" +#include "kita-utf16.h" +#include "kita-utf8.h" + +using namespace Kita; +using namespace Kita::Parser; + +static const int KITA_RESDIGIT = 4; + +/* if cdat == str, return str.length() */ +static int isEqual(const QChar *cdat, const QString& str) +{ + int i = 0; + const int size = str.size(); + while (i < size && str.at(i) != '\0') { + if (*cdat != str.at(i)) return 0; + cdat++;i++; + } + return i; +} + +/* parsing function for special char (such as ♥ */ + +/* For example, if cdat = "&", then + + pos (= length of cdat) = 5, + retstr = "&". */ +static QString parseSpecialChar( + + /* input */ + const QChar *cdat, + + /* output */ + unsigned int& pos) +{ + QString retstr; + + if ((pos = isEqual(cdat , ">"))) retstr = '>'; + else if ((pos = isEqual(cdat , "<"))) retstr = '<'; + else if ((pos = isEqual(cdat , " "))) retstr = ' '; + else if ((pos = isEqual(cdat , "&"))) retstr = '&'; + else if ((pos = isEqual(cdat , """))) retstr = '"'; + + else if ((pos = isEqual(cdat , "♥"))) + retstr = QString::fromUtf8(KITAUTF8_HEART); + + else if ((pos = isEqual(cdat , "♦"))) + retstr = QString::fromUtf8(KITAUTF8_DIA); + + else if ((pos = isEqual(cdat , "♣"))) + retstr = QString::fromUtf8(KITAUTF8_CLUB); + + else if ((pos = isEqual(cdat , "♠"))) + retstr = QString::fromUtf8(KITAUTF8_SPADE); + + return retstr; +} + +/* get plain text from raw data */ +/* + This function replaces "<br>" to "\n", removes HTML tags and + replaces special chars. +*/ +void Kita::Parser::DatToText( + + /* input */ + const QString &rawData, + + /* output */ + QString& text +) +{ + text.clear(); + + unsigned int startPos, pos; + const QChar *chpt = rawData.unicode(); + unsigned int length = rawData.length(); + + for (unsigned int i = startPos = 0 ; i < length ; i++) { + + switch (chpt[ i ].unicode()) { + + case '<': + + /* " <br> " */ + if (chpt[ i + 1 ] == 'b' && chpt[ i + 2 ] == 'r' && chpt[ i + 3 ] == '>') { + + unsigned int i2 = i - startPos; + if (i > 0 && chpt[ i - 1 ] == ' ') i2--; /* remove space before <br> */ + text += rawData.mid(startPos, i2) + '\n'; + startPos = i + 4; + if (chpt[ startPos ] == ' ') startPos++; /* remove space after <br> */ + i = startPos - 1; + } + + /*----------------------------------------*/ + + /* remove HTML tags <[^>]*> */ + else { + + if (i - startPos) text += rawData.mid(startPos, i - startPos); + while (chpt[ i ] != '>' && i < length) i++; + startPos = i + 1; + } + + break; + + /*----------------------------------*/ + + case '&': + + /* special char */ + { + QString tmpstr; + tmpstr = parseSpecialChar(chpt + i, pos); + + if (!tmpstr.isEmpty()) { + text += rawData.mid(startPos, i - startPos) + tmpstr; + startPos = i + pos; + i = startPos - 1; + } + } + + break; + } + } + + text += rawData.mid(startPos); +} + +/* parsing function for anchor (>>digits) */ + +/* This function parses res anchor. + + For example, if cdat = ">12-20", then + + linkstr = ">12-20", + refNum[0] = 12, + refNum[1] = 20, + pos (= length of cdat) = 9, + ret = true; + +*/ +static bool parseResAnchor( + + /* input */ + const QChar *cdat, const unsigned int length, + + /* output */ + QString& linkstr, int* refNum, unsigned int& pos) +{ + + struct LocalFunc { + static bool isHYPHEN(unsigned short c) + { + + /* UTF-16 */ + if (c == '-' + || (c >= 0x2010 && c <= 0x2015) + || (c == 0x2212) + || (c == 0xFF0D) /* UTF8: 0xEFBC8D */ + ) { + return true; + } + + return false; + } + }; + + bool ret = false; + + if (length == 0) return false; + + linkstr.clear(); + refNum[ 0 ] = 0; + refNum[ 1 ] = 0; + pos = 0; + + /* check '>' twice */ + for (int i = 0; i < 2; i++) { + + if (cdat[ pos ].unicode() == UTF16_BRACKET) { + linkstr += cdat[ pos ]; + pos++; + } else if (cdat[ pos ] == '&' && cdat[ pos + 1 ] == 'g' /* > */ + && cdat[ pos + 2 ] == 't' && cdat[ pos + 3 ] == ';') { + linkstr += '>'; + pos += 4; + } + + } + + /* check ',' */ + if (!pos) { + if (cdat[ pos ] == ',' || cdat[ pos ].unicode() == UTF16_COMMA) { + linkstr += ','; + pos ++; + } + } + + /* check '=' */ + if (!pos) { + if (cdat[ pos ] == '=' || cdat[ pos ].unicode() == UTF16_EQ) { + linkstr += '='; + pos ++; + } + } + + /* check digits */ + int hyphen = 0; + + for (int i = 0 ; i < KITA_RESDIGIT + 1 && pos < length ; i++, pos++) { + + unsigned short c = cdat[ pos ].unicode(); + + if ((c < UTF16_0 || c > UTF16_9) + && (c < '0' || c > '9') + && (!LocalFunc::isHYPHEN(c) + || (i == 0 && LocalFunc::isHYPHEN(c)) + || (hyphen && LocalFunc::isHYPHEN(c))) + ) break; + + linkstr += cdat[ pos ]; + + if (LocalFunc::isHYPHEN(c)) { + hyphen = 1; + i = -1; + } else { + if (c >= UTF16_0) c = '0' + cdat[ pos ].unicode() - UTF16_0; + refNum[ hyphen ] *= 10; + refNum[ hyphen ] += c - '0'; + } + + ret = true; + } + + return ret; +} + +/* parse name */ + +/* output: + + resdat.name + resdat.nameHTML + +*/ +static void parseName(const QString& rawStr, RESDAT& resdat) +{ + unsigned int i = 0, pos; + int refNum[ 2 ]; + QString linkurl, linkstr; + + DatToText(rawStr, resdat.name); + + const QChar * chpt = resdat.name.unicode(); + unsigned int length = resdat.name.length(); + resdat.nameHTML.clear(); + + /* anchor */ + while (parseResAnchor(chpt + i, length - i, linkstr, refNum, pos)) { + + linkurl = QString("#%1").arg(refNum[ 0 ]); + if (refNum[ 1 ]) linkurl += QString("-%1").arg(refNum[ 1 ]); + + resdat.nameHTML += "<a href=\"" + linkurl + "\">"; + resdat.nameHTML += linkstr; + resdat.nameHTML += "</a>"; + + ANCNUM anctmp; + if (refNum[ 1 ] < refNum[ 0 ]) refNum[ 1 ] = refNum[ 0 ]; + anctmp.from = refNum[ 0 ]; + anctmp.to = refNum[ 1 ]; + resdat.anclist += anctmp; + + i += pos; + } + + /* non-digits strings */ + if (i < length) { + + resdat.nameHTML += "<span class=\"name_noaddr\">"; + resdat.nameHTML += resdat.name.mid(i); + resdat.nameHTML += "</span>"; + } + +} + +/* parse date, ID, host */ + +/* output : + + resdat.dateTime + resdat.date + resdat.id + resdat.host + +*/ +static void parseDateId(const QString& rawStr, RESDAT& resdat) +{ + resdat.date = rawStr; + resdat.id.clear(); + resdat.host.clear(); + resdat.be.clear(); + resdat.bepointmark.clear(); + + const QChar *chpt = rawStr.unicode(); + unsigned int pos = 0, startpos = 0; + unsigned int length = rawStr.length(); + + while (chpt[ pos ] != '\0' && + !(chpt[ pos ] == 'I' && chpt[ pos + 1 ] == 'D') && + !(chpt[ pos ] == 'B' && chpt[ pos + 1 ] == 'E')) { + pos++; + } + resdat.date = rawStr.left(pos); + + /* id */ + if (chpt[ pos ] == 'I' && chpt[ pos + 1 ] == 'D') { + pos += 3; + startpos = pos; + while (chpt[ pos ] != ' ' && pos++ < length) {}; + resdat.id = rawStr.mid(startpos, pos - startpos); + pos++; + } + + // qDebug("date %s, ID %s", (const char*)resdat.date.local8Bit(), resdat.id.ascii()); + + if (pos >= length) return ; + + /* be */ + if (chpt[ pos ] == 'B' && chpt[ pos + 1 ] == 'E') { + pos += 3; + startpos = pos; + while (chpt[ pos ] != '-' && pos++ < length) {}; + resdat.be = rawStr.mid(startpos, pos - startpos); + pos++; + if (pos < length && chpt[ pos ] == '#') { + startpos = pos; + while (chpt[ pos ] == '#' && pos++ < length) {}; + resdat.bepointmark = rawStr.mid(startpos, pos - startpos); + } + } + + if (pos >= length) return ; + + /* host */ + if (chpt[ pos ] == 'H' && chpt[ pos + 1 ] == 'O') { + pos += 5; + startpos = pos; + while (chpt[ pos ] != ' ' && pos++ < length) {}; + resdat.host = rawStr.mid(startpos, pos - startpos); + pos++; + // qDebug("host %s", resdat.host.ascii()); + } +} + +/* parsing function for link */ + +/* For example, + + cdat = "ttp://foo.com", + + then + + linkstr = "ttp://foo.com", + linkurl = "http://foo.com", + pos (= length of cdat) = 13, + + and return true. + */ +static bool parseLink( + + /* input */ + const QChar *cdat, const unsigned int length, + + /* output */ + QString& linkstr, QString& linkurl, unsigned int& pos +) +{ + + /*-----------------------------*/ + + linkstr.clear(); + linkurl.clear(); + + QString retlinkstr; + QString prefix; + QString scheme; + + if (isEqual(cdat , "http://")) { + prefix = "http://"; + scheme = "http://"; + } else if (isEqual(cdat , "ttp://")) { + prefix = "ttp://"; + scheme = "http://"; + } else if (isEqual(cdat , "tp://")) { + prefix = "tp://"; + scheme = "http://"; + } else if (isEqual(cdat , "https://")) { + prefix = "https://"; + scheme = "https://"; + } else if (isEqual(cdat , "ttps://")) { + prefix = "ttps://"; + scheme = "https://"; + } else if (isEqual(cdat , "tps://")) { + prefix = "tps://"; + scheme = "https://"; + } else { + return false; + } + + pos = prefix.length(); + while (cdat[ pos ] >= '!' && cdat[ pos ] <= '~' && + cdat[ pos ] != ' ' && cdat[ pos ] != '<' && cdat[ pos ] != '>' + && pos < length) { + retlinkstr += cdat[ pos++ ]; + } + if (pos > length) return false; + + if (!retlinkstr.isEmpty()) DatToText(retlinkstr, linkstr); + + linkurl = scheme + linkstr; + linkstr = prefix + linkstr; + + return true; +} + +/* create res anchor */ +/* This function is called from parseBody internally. + See also parseBody. */ +static bool createResAnchor(const QString &rawStr, RESDAT& resdat, + const QChar *chpt, unsigned int &i, unsigned int &startPos) +{ + QString linkstr, linkurl; + int refNum[ 2 ]; + unsigned int pos; + unsigned int length = rawStr.length(); + + /* parse anchor */ + if (!parseResAnchor(chpt + i, length - i, linkstr, refNum, pos)) { + + i += pos - 1; + return false; + } + + /* create anchor */ + resdat.bodyHTML += rawStr.mid(startPos, i - startPos); + linkurl = QString("#%1").arg(refNum[ 0 ]); + if (refNum[ 1 ]) linkurl += QString("-%1").arg(refNum[ 1 ]); + + resdat.bodyHTML += "<a href=\"" + linkurl + "\">"; + resdat.bodyHTML += linkstr; + resdat.bodyHTML += "</a>"; + + /* add anchor to ancList */ + ANCNUM anctmp; + if (refNum[ 1 ] < refNum[ 0 ]) refNum[ 1 ] = refNum[ 0 ]; + anctmp.from = refNum[ 0 ]; + anctmp.to = refNum[ 1 ]; + resdat.anclist += anctmp; + + startPos = i + pos; + i = startPos - 1; + + return true; +} + +/* parse body */ + +/* output : + + resdat.bodyHTML + +*/ +static void parseBody(const QString &rawStr, RESDAT& resdat) +{ + resdat.bodyHTML.clear(); + + unsigned int startPos; + QString linkstr, linkurl; + const QChar *chpt = rawStr.unicode(); + unsigned int length = rawStr.length(); + + bool ancChain = false; + + /* ancChain is chain for anchor. For examle, if anchor ">2" + appeared, ancChain is set to true. Moreover, if next strings + are "=5", anchor for 5 is also set. Thus, we can obtain anchors + for strings ">2=5" as follows: + + <a href="#2">>2</a><a href="#5">=5</a> + */ + + int offset = 0; + if (chpt[ 0 ] == ' ') offset = 1; /* remove one space after <> */ + for (unsigned int i = startPos = offset ; i < length ; i++) { + + switch (chpt[ i ].unicode()) { + + case '<': + + /* " <br> " */ + if (chpt[ i + 1 ] == 'b' && chpt[ i + 2 ] == 'r' && chpt[ i + 3 ] == '>') { + + /* reset anchor chain */ + ancChain = false; + + unsigned int i2 = i - startPos; + if (i > 0 && chpt[ i - 1 ] == ' ') i2--; /* remove space before <br> */ + resdat.bodyHTML += rawStr.mid(startPos, i2); + + resdat.bodyHTML += "<br>"; + + startPos = i + 4; + if (chpt[ startPos ] == ' ') startPos++; /* remove space after <br> */ + i = startPos - 1; + } + + /*----------------------------------------*/ + + /* remove HTML tags <[^>]*> */ + else { + + if (i - startPos) resdat.bodyHTML += rawStr.mid(startPos, i - startPos); + while (chpt[ i ] != '>' && i < length) i++; + startPos = i + 1; + } + + break; + + /*----------------------------------------*/ + + case 'h': /* "http://" or "ttp://" or "tp:" */ + case 't': + { + unsigned int pos = 0; + if (parseLink(chpt + i, length - i, linkstr, linkurl, pos)) { + resdat.bodyHTML += rawStr.mid(startPos, i - startPos); + resdat.bodyHTML += "<a href=\"" + linkurl + "\">"; + resdat.bodyHTML += linkstr; + resdat.bodyHTML += "</a>"; + + startPos = i + pos; + i = startPos - 1; + } + } + + break; + + /*----------------------------------*/ + + case '&': + + /* > */ + if (chpt[ i + 1 ] == 'g' && chpt[ i + 2 ] == 't' && chpt[ i + 3 ] == ';') + ancChain = createResAnchor(rawStr, resdat, chpt, i, startPos); + + break; + + /*----------------------------------------*/ + + /* unicode '>' */ + case UTF16_BRACKET: + + ancChain = createResAnchor(rawStr, resdat, chpt, i, startPos); + break; + + /*----------------------------------*/ + + default: + + if (ancChain) ancChain = createResAnchor(rawStr, resdat, chpt, i, startPos); + } + } + + resdat.bodyHTML += rawStr.mid(startPos); +} + +/* Main Parser */ + +/* + struct RESDAT is defined in datinfo.h. + This function is called from DatToHtml() and DatInfo::parseDat() + + input: + + resdat.num ... number + resdat.linestr ... raw line strings + + output: + + resdat.* + subject +*/ +bool Kita::Parser::parseResDat(RESDAT& resdat, QString& subject) +{ + if (resdat.parsed) return true; + + resdat.parsed = true; + resdat.broken = false; + resdat.anclist.clear(); + + /* search the staring positions of each section to split raw data. */ + const QChar *chpt = resdat.linestr.unicode(); + unsigned int length = resdat.linestr.length(); + unsigned int section = 0; + unsigned int sectionPos[ 5 ]; + for (unsigned int i = 0 ; i < length ; i++) { + + /* sections are splitted by "<>" */ + if (chpt[ i ] == '<' && chpt[ i + 1 ] == '>') { + section++; + + if (section >= 5) { + resdat.broken = true; + return true; + } + + sectionPos[ section ] = i + 2; + i++; + } + } + + /* broken data */ + if (section != 4) { + resdat.broken = true; + return true; + } + + // qDebug("[%d] %d %d %d %d",section, sectionPos[1],sectionPos[2],sectionPos[3],sectionPos[4]); + + /* name */ + length = sectionPos[ 1 ] - 2 ; + parseName(resdat.linestr.mid(0, length), resdat); + + /* mail */ + length = sectionPos[ 2 ] - 2 - sectionPos[ 1 ]; + DatToText(resdat.linestr.mid(sectionPos[ 1 ], length), resdat.address); + + /* date, ID, host */ + length = sectionPos[ 3 ] - 2 - sectionPos[ 2 ]; + parseDateId(resdat.linestr.mid(sectionPos[ 2 ], length), resdat); + + /* body */ + length = sectionPos[ 4 ] - 2 - sectionPos[ 3 ]; + parseBody(resdat.linestr.mid(sectionPos[ 3 ], length), resdat); + + /* subject */ + subject = resdat.linestr.mid(sectionPos[ 4 ]); + + return true; +} + +/* create HTML of title. + + struct RESDAT resdat should be parsed by parseResDat before calling this function. + + output: titleHTML + +*/ +void Kita::Parser::createTitleHTML(RESDAT& resdat, QString& titleHTML) +{ + titleHTML.clear(); + if (!resdat.parsed) return ; + + bool showMailAddress = GlobalConfig::showMailAddress(); + bool useTableTag = GlobalConfig::useStyleSheet(); + + if (useTableTag) titleHTML += "<table class=\"res_title\"><tr>"; + + /* res number */ + if (useTableTag) titleHTML += "<td class=\"res_title_number\">"; + titleHTML += "<a href=\"#write" + QString::number(resdat.num) + "\">"; + titleHTML += QString::number(resdat.num); + titleHTML += "</a> "; + + /* name & mail address */ + if (useTableTag) titleHTML += "<td class=\"res_title_name\">"; + titleHTML += "<b>" + QString::fromUtf8(KITAUTF8_NAME); + + /* show name with mail address */ + if (showMailAddress) { + + titleHTML += resdat.nameHTML; + if (!resdat.address.isEmpty()) titleHTML += " [" + resdat.address + ']'; + + } else { /* don't show mail address */ + + if (resdat.address.isEmpty()) { + + titleHTML += "<span class=\"name_noaddr\">"; + titleHTML += resdat.name; + titleHTML += "</span>"; + + } else { + + titleHTML += "<a href=\"mailto:" + resdat.address + "\""; + titleHTML += " title=\"" + resdat.address + "\">"; + titleHTML += resdat.name; + titleHTML += "</a>"; + } + } + + titleHTML += "</b> "; + + /* date */ + if (useTableTag) titleHTML += "<td class=\"res_title_date\">"; + titleHTML += QString::fromUtf8(KITAUTF8_COLON) + resdat.date; + if (useTableTag) titleHTML += "</td>"; + + /* ID */ + if (!resdat.id.isEmpty()) { + + if (useTableTag) titleHTML += "<td class=\"res_title_id\">"; + if (resdat.id.count("???") >= 1) titleHTML += " ID:" + resdat.id; + else titleHTML += " <a href=\"#idpop" + resdat.id + "\">ID</a>" + ":" + resdat.id; + if (useTableTag) titleHTML += "</td>"; + } + + /* BE */ + if (!resdat.be.isEmpty()) { + + if (useTableTag) titleHTML += "<td class=\"res_title_be\">"; + titleHTML += " <a href=\"#bepop" + resdat.be + "\">?" + resdat.bepointmark + "</a>"; + if (useTableTag) titleHTML += "</td>"; + } + + /* host */ + if (!resdat.host.isEmpty()) { + + if (useTableTag) titleHTML += "<td class=\"res_title_host\">"; + titleHTML += " HOST:" + resdat.host; + if (useTableTag) titleHTML += "</td>"; + } + + if (useTableTag) titleHTML += "</tr></table>"; +} Added: kita/branches/KITA-KDE4/kita/src/libkita/parser.h =================================================================== --- kita/branches/KITA-KDE4/kita/src/libkita/parser.h (rev 0) +++ kita/branches/KITA-KDE4/kita/src/libkita/parser.h 2009-08-10 21:45:12 UTC (rev 2549) @@ -0,0 +1,25 @@ +/*************************************************************************** + * Copyright (C) 2009 by Kita Developers * + * ikemo****@users***** * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + ***************************************************************************/ +#ifndef PARSER_H +#define PARSER_H + +class QString; + +class RESDAT; + +namespace Kita { + namespace Parser { + bool parseResDat(RESDAT& resdat, QString& subject); + void DatToText(const QString &rawData, QString& text); + void createTitleHTML(RESDAT& resdat, QString& titleHTML); + } +} + +#endif