Contratto collettivo aziendale ordinario
Così come spiegato nella trattazione del CCAL, anche in questo non si potrà rimuovere del tutto limiti quantitativi imposti dal legislatore ma esclusivamente prevederne una rimodulazione, lasciando il vincolo imposto dalla norma.
'
;
return;
}
}
// too much content
// ================
if ($R.parsingOptions._elements_too_much_content.indexOf('|'+_tag_name+'|') > -1)
{
_explored = (_explored || $R.getContent__exploreNodeAndGetStuff(_node, true));
switch (true)
{
case (_tag_name == 'h1' && (_explored._length__all_text > (65 * 2))):
case (_tag_name == 'h2' && (_explored._length__all_text > (65 * 2 * 3))):
case ((_tag_name.match(/^h(3|4|5|6)$/) != null) && (_explored._length__all_text > (65 * 2 * 5))):
case ((_tag_name.match(/^(b|i|em|strong)$/) != null) && (_explored._length__all_text > (65 * 5 * 5))):
$R.debugOutline(_node, 'clean-after', 'too-much-content');
_global__the_html = ''
+ _global__the_html.substr(0, _pos__start__before)
+ _global__the_html.substr(_pos__start__after, (_pos__end__before - _pos__start__after))
;
return;
}
}
// empty elements
// ==============
switch (true)
{
case (($R.parsingOptions._elements_self_closing.indexOf('|'+_tag_name+'|') > -1)):
case (($R.parsingOptions._elements_ignore_tag.indexOf('|'+_tag_name+'|') > -1)):
case (_tag_name == 'td'):
break;
default:
var _contents = _global__the_html.substr(_pos__start__after, (_pos__end__before - _pos__start__after));
_contents = _contents.replace(/(
)/gi, '');
_contents = _contents.replace(/(
)/gi, '');
// for rows, clear empty cells
if (_tag_name == 'tr')
{
_contents = _contents.replace(/
_contents = _contents.replace(/< \/td>/gi, '');
}
// for tables, clear empty rows
if (_tag_name == 'table')
{
_contents = _contents.replace(/
_contents = _contents.replace(/< \/tr>/gi, '');
}
var _contentsLength = $R.measureText__getTextLength(_contents);
switch (true)
{
case (_contentsLength == 0 && _tag_name == 'p'):
_global__the_html = _global__the_html.substr(0, _pos__start__before) + '
';
return;
case (_contentsLength == 0):
case ((_contentsLength < 5) && ($R.parsingOptions._elements_visible.indexOf('|'+_tag_name+'|') > -1)):
$R.debugOutline(_node, 'clean-after', 'blank');
_global__the_html = _global__the_html.substr(0, _pos__start__before);
return;
}
break;
}
// too much missing
// ================
if ($R.parsingOptions._elements_link_density.indexOf('|'+_tag_name+'|') > -1)
{
_explored = (_explored || $R.getContent__exploreNodeAndGetStuff(_node, true));
var
_contents = _global__the_html
.substr(_pos__start__after, (_pos__end__before - _pos__start__after))
.replace(/(< ([^>]+)>)/gi, ''),
_contentsLength = $R.measureText__getTextLength(_contents),
_initialLength = 0
+ _explored._length__all_text
+ (_explored._count__images_small * 10)
+ (_explored._count__images_skip * 10)
+ (_node.getElementsByTagName('iframe').length * 10)
+ (_node.getElementsByTagName('object').length * 10)
+ (_node.getElementsByTagName('embed').length * 10)
+ (_node.getElementsByTagName('button').length * 10)
+ (_node.getElementsByTagName('input').length * 10)
+ (_node.getElementsByTagName('select').length * 10)
+ (_node.getElementsByTagName('textarea').length * 10)
;
// too much missing
switch (true)
{
case (!(_contentsLength > 0)):
case (!(_initialLength > 0)):
case (!((_contentsLength / _initialLength) < 0.5)):
case (!(($R.language == 'cjk') && (_contentsLength / _initialLength) < 0.1)):
case ((_global__exploreNodeToBuildHTMLFor && ((_explored._length__plain_text / _global__exploreNodeToBuildHTMLFor._length__plain_text) > 0.25))):
case (($R.language == 'cjk') && (_global__exploreNodeToBuildHTMLFor && ((_explored._length__plain_text / _global__exploreNodeToBuildHTMLFor._length__plain_text) > 0.1))):
break;
default:
$R.debugOutline(_node, 'clean-after', 'missing-density');
_global__the_html = _global__the_html.substr(0, _pos__start__before);
return;
}
}
// return
return;
};
// actually do it
_recursive(_nodeToBuildHTMLFor);
// return html
return _global__the_html;
};
// article title marker
// ====================
$R.articleTitleMarker__start = '
';
$R.articleTitleMarker__end = '
';
// article title check function
// ============================
$R.getContent__find__hasIsolatedTitleInHTML = function (_html)
{
return (_html.substr(0, $R.articleTitleMarker__start.length) == $R.articleTitleMarker__start);
};
// article title get function
// ============================
$R.getContent__find__getIsolatedTitleInHTML = function (_html)
{
// is it there?
if ($R.getContent__find__hasIsolatedTitleInHTML(_html)); else { return ''; }
// regex
var
_getTitleRegex = new RegExp($R.articleTitleMarker__start + '(.*?)' + $R.articleTitleMarker__end, 'i'),
_getTitleMatch = _html.match(_getTitleRegex)
;
// match?
if (_getTitleMatch); else { return ''; }
// return
return _getTitleMatch[1];
};
// find title in arbitrary html
// ============================
$R.getContent__find__isolateTitleInHTML = function (_html, _document_title)
{
// can't just use (h1|h2|h3|etc) -- we want to try them in a certain order
// =============================
var
_heading_pregs = [
/< (h1)[^>]*?>([\s\S]+?)< \/\1>/gi,
/< (h2)[^>]*?>([\s\S]+?)< \/\1>/gi,
/< (h3|h4|h5|h6)[^>]*?>([\s\S]+?)< \/\1>/gi
],
_secondary_headings = '|h2|h3|h4|h5|h6|',
_search_document_title = ' ' + _document_title.replace(/< [^>]+?>/gi, '').replace(/\s+/gi, ' ') + ' '
;
// loop pregs
// ==========
for (var i=0, _i=_heading_pregs.length; i<_i ; i++)
{
// exec
var _match = _heading_pregs[i].exec(_html);
// return?
switch (true)
{
case (!(_match)):
case (!(_heading_pregs[i].lastIndex > -1)):
// will continue loop
break;
default:
// measurements
var
_heading_end_pos = _heading_pregs[i].lastIndex,
_heading_start_pos = (_heading_end_pos - _match[0].length),
_heading_type = _match[1],
_heading_text = _match[2].replace(/< \s*br[^>]*>/gi, '').replace(/[\n\r]+/gi, ''),
_heading_text_plain = _heading_text.replace(/< [^>]+?>/gi, '').replace(/\s+/gi, ' ');
_heading_length = $R.measureText__getTextLength(_heading_text_plain),
_heading_words = [],
_to_heading_text = _html.substr(0, _heading_start_pos),
_to_heading_length = $R.measureText__getTextLength(_to_heading_text.replace(/< [^>]+?>/gi, '').replace(/\s+/gi, ' '))
;
// return?
switch (true)
{
case (!(_heading_length > 5)):
case (!(_heading_length < (65 * 3))):
case (!(_to_heading_length < (65 * 3 * 2))):
// will continue for loop
break;
case ((_secondary_headings.indexOf('|' + _heading_type + '|') > -1)):
// words in this heading
_heading_words = _heading_text_plain.split(' ');
// count words present in title
for (var j=0, _j=_heading_words.length, _matched_words=''; j<_j ; j++) {
if (_search_document_title.indexOf(' ' + _heading_words[j] + ' ') > -1) {
_matched_words += _heading_words[j] + ' ';
}
}
// break continues for loop
// nothing goes to switch's default
// ================================
// no break?
var _no_break = false;
switch (true)
{
// if it's big enough, and it's a substring of the title, it's good
case ((_heading_length > 20) && (_search_document_title.indexOf(_heading_text_plain) > -1)):
// if it's slightly smaler, but is exactly at the begging or the end
case ((_heading_length > 10) && ((_search_document_title.indexOf(_heading_text_plain) == 1) || (_search_document_title.indexOf(_heading_text_plain) == (_search_document_title.length - 1 - _heading_text_plain.length)))):
_no_break = true;
break;
}
// break?
var _break = false;
switch (true)
{
// no break?
case (_no_break):
break;
// heading too long? -- if not h2
case ((_heading_length > ((_search_document_title.length - 2) * 2)) && (_heading_type != 'h2')):
// heading long enough?
case ((_heading_length < Math.ceil((_search_document_title.length - 2) * 0.50))):
// enough words matched?
case ((_heading_length < 25) && (_matched_words.length < Math.ceil(_heading_length * 0.75))):
case ((_heading_length < 50) && (_matched_words.length < Math.ceil(_heading_length * 0.65))):
case ((_matched_words.length < Math.ceil(_heading_length * 0.55))):
_break = true;
break;
}
// break?
if (_break) { break; }
default:
// this is the title -- do isolation; return
// =================
return ''
+ $R.articleTitleMarker__start
+ _heading_text
+ $R.articleTitleMarker__end
+ _html.substr(_heading_end_pos)
;
}
break;
}
}
// return unmodified
return _html;
};
$R.getContent__find = function ()
{
// get content
// ===========
var
_found = $R.getContent__findInPage($R.win),
_targetNode = _found._targetCandidate.__node,
_$targetNode = $(_targetNode),
_aboveNodes = []
;
// RTL
// ===
switch (true)
{
case (_$targetNode.attr('dir') == 'rtl'):
case (_$targetNode.css('direction') == 'rtl'):
$R.makeRTL();
break;
}
// get html
// ========
var
_foundHTML = _found._html,
_firstFragmentBefore = $R.getContent__nextPage__getFirstFragment(_foundHTML),
_documentTitle = ($R.document.title > '' ? $R.document.title : '')
;
// get title
// =========
// has title already?
_foundHTML = $R.getContent__find__isolateTitleInHTML(_foundHTML, _documentTitle);
$R.articleTitle = $R.getContent__find__getIsolatedTitleInHTML(_foundHTML);
$R.debugPrint('TitleSource', 'target');
// get html above?
if ($R.articleTitle > ''); else
{
// get html above target?
// ======================
// global vars:
// _found
// _foundHTML
// _documentTitle
// _aboveNodes
var
_prevNode = _found._targetCandidate.__node,
_prevHTML = '',
_aboveHTML = '',
_differentTargets = (_found._firstCandidate.__node != _found._targetCandidate.__node)
;
(function ()
{
while (true)
{
// the end?
switch (true)
{
case (_prevNode.tagName && (_prevNode.tagName.toLowerCase() == 'body')):
case (_differentTargets && (_prevNode == _found._firstCandidate.__node)):
// enough is enough
return;
}
// up or sideways?
if (_prevNode.previousSibling); else
{
_prevNode = _prevNode.parentNode;
continue;
}
// previous
_prevNode = _prevNode.previousSibling;
// outline -- element might be re-outlined, when buildHTML is invoked
if ($R.debug) { $R.debugOutline(_prevNode, 'target', 'add-above'); }
// get html; add
_prevHTML = $R.getContent__buildHTMLForNode(_prevNode, 'above-the-target');
_aboveHTML = _prevHTML + _aboveHTML;
_aboveNodes.unshift(_prevNode);
// isolate title
_aboveHTML = $R.getContent__find__isolateTitleInHTML(_aboveHTML, _documentTitle);
// finished?
switch (true)
{
case ($R.measureText__getTextLength(_aboveHTML.replace(/< [^>]+?>/gi, '').replace(/\s+/gi, ' ')) > (65 * 3 * 3)):
case ($R.getContent__find__hasIsolatedTitleInHTML(_aboveHTML)):
return;
}
}
})();
// is what we found any good?
// ==========================
switch (true)
{
case ($R.getContent__find__hasIsolatedTitleInHTML(_aboveHTML)):
case (_differentTargets && (_aboveHTML.split(']+?>/gi, '').replace(/\s+/gi, ' ')) < (65 * 3))):
_foundHTML = _aboveHTML + _foundHTML;
break;
default:
_aboveHTML = '';
_aboveNodes = [];
break;
}
$R.articleTitle = $R.getContent__find__getIsolatedTitleInHTML(_foundHTML);
$R.debugPrint('TitleSource', 'above_HTML');
// get document title?
if ($R.articleTitle > ''); else
{
// if all else failed, get document title
// ======================================
// global vars:
// _foundHTML
// _documentTitle
(function ()
{
// return?
// =======
if (_documentTitle > ''); else { return; }
// vars
var
_doc_title_parts = [],
_doc_title_pregs =
[
/( [-][-] |( [-] )|( [>][>] )|( [< ][<] )|( [|] )|( [\/] ))/i,
/(([:] ))/i
]
;
// loop through pregs
// ==================
for (var i=0, _i=_doc_title_pregs.length; i<_i; i++)
{
// split
_doc_title_parts = _documentTitle.split(_doc_title_pregs[i]);
// break if we managed a split
if (_doc_title_parts.length > 1) { break; }
}
// sort title parts -- longer goes higher up -- i.e. towards 0
// ================
_doc_title_parts.sort(function (a, b)
{
switch (true)
{
case (a.length > b.length): return -1;
case (a.length < b.length): return 1;
default: return 0;
}
});
// set title -- first part, if more than one word; otherwise, whole
// =========
_foundHTML = ''
+ $R.articleTitleMarker__start
+ (_doc_title_parts[0].split(/\s+/i).length > 1 ? _doc_title_parts[0] : _documentTitle)
+ $R.articleTitleMarker__end
+ _foundHTML
;
})();
$R.articleTitle = $R.getContent__find__getIsolatedTitleInHTML(_foundHTML);
$R.debugPrint('TitleSource', 'document_title');
}
}
// display
// =======
$R.$pages.html('');
$R.displayPageHTML(_foundHTML, 1, $R.win.location.href);
// remember
// ========
$R.debugRemember['theTarget'] = _found._targetCandidate.__node;
$R.debugRemember['firstCandidate'] = _found._firstCandidate.__node;
// next
// ====
$R.nextPage__firstFragment__firstPage = _firstFragmentBefore;
$R.nextPage__firstFragment__lastPage = $R.getContent__nextPage__getFirstFragment(_foundHTML);;
$R.nextPage__loadedPages = [$R.win.location.href];
$R.getContent__nextPage__find($R.win, _found._links);
// return
return true;
};
$R.getContent__findInPage = function (_pageWindow)
{
// calculations
// ============
var
_firstCandidate = false,
_secondCandidate = false,
_targetCandidate = false
;
$R.debugTimerStart('ExploreAndGetStuff');
var _stuff = $R.getContent__exploreNodeAndGetStuff(_pageWindow.document.body);
$R.debugPrint('ExploreAndGetStuff', $R.debugTimerEnd()+'ms');
$R.debugTimerStart('ProcessFirst');
var _processedCandidates = $R.getContent__processCandidates(_stuff._candidates);
_firstCandidate = _processedCandidates[0];
_targetCandidate = _firstCandidate;
$R.debugPrint('ProcessFirst', $R.debugTimerEnd()+'ms');
// debug
if ($R.debug)
{
// debug first candidates
$R.log('First 5 Main Candidates:');
for (var x in _processedCandidates)
{
if (x == 5) { break; }
$R.log(_processedCandidates[x], _processedCandidates[x].__node);
}
// highlight first
$R.debugOutline(_firstCandidate.__node, 'target', 'first');
}
// in case we stop
$R.debugPrint('Target', 'first');
// do second?
switch (true)
{
case (!(_firstCandidate._count__containers > 0)):
case (!(_firstCandidate._count__candidates > 0)):
case (!(_firstCandidate._count__pieces > 0)):
case (!(_firstCandidate._count__containers > 25)):
break;
default:
$R.debugTimerStart('ProcessSecond');
var _processedCandidatesSecond = $R.getContent__processCandidatesSecond(_processedCandidates);
_secondCandidate = _processedCandidatesSecond[0];
$R.debugPrint('ProcessSecond', $R.debugTimerEnd()+'ms');
// they're the same
if (_firstCandidate.__node == _secondCandidate.__node) { break; }
// debug
if ($R.debug)
{
// log second candidates
$R.log('First 5 Second Candidates:');
for (var x in _processedCandidatesSecond)
{
if (x == 5) { break; }
$R.log(_processedCandidatesSecond[x], _processedCandidatesSecond[x].__node);
}
// highlight second
$R.debugOutline(_secondCandidate.__node, 'target', 'second');
}
// compute again
// =============
_firstCandidate['__points_history_final'] = $R.getContent__computePointsForCandidateThird(_firstCandidate, _firstCandidate);
_firstCandidate['__points_final'] = _firstCandidate.__points_history_final[0];
_secondCandidate['__points_history_final'] = $R.getContent__computePointsForCandidateThird(_secondCandidate, _firstCandidate);
_secondCandidate['__points_final'] = _secondCandidate.__points_history_final[0];
// log results
// ===========
if ($R.debug)
{
$R.log('The 2 Candidates:');
$R.log(_firstCandidate);
$R.log(_secondCandidate);
}
// are we selecting _second?
// =========================
switch (true)
{
case ((_secondCandidate.__candidate_details._count__lines_of_65_characters < 20) && (_secondCandidate.__points_final / _firstCandidate.__points_final) > 1):
case ((_secondCandidate.__candidate_details._count__lines_of_65_characters > 20) && (_secondCandidate.__points_final / _firstCandidate.__points_final) > 0.9):
case ((_secondCandidate.__candidate_details._count__lines_of_65_characters > 50) && (_secondCandidate.__points_final / _firstCandidate.__points_final) > 0.75):
_targetCandidate = _secondCandidate;
$R.debugPrint('Target', 'second');
break;
}
// print points
// ============
if ($R.debug)
{
$R.debugPrint('PointsFirst', _firstCandidate['__points_history_final'][0].toFixed(2));
$R.debugPrint('PointsSecond', _secondCandidate['__points_history_final'][0].toFixed(2));
}
break;
}
// highlight target
// ================
if ($R.debug)
{
$(_targetCandidate.__node).css({
'box-shadow':
'inset 0px 0px 50px rgba(255, 255, 0, 0.95), 0px 0px 50px rgba(255, 255, 0, 0.95)'
});
}
// get html
// ========
$R.debugTimerStart('BuildHTML');
var _html = $R.getContent__buildHTMLForNode(_targetCandidate.__node, 'the-target');
_html = _html.substr((_html.indexOf('>')+1))
_html = _html.substr(0, _html.lastIndexOf('< '));
$R.debugPrint('BuildHTML', $R.debugTimerEnd()+'ms');
$R.debugTimerStart('BuildHTMLPregs');
_html = _html.replace(/<(blockquote|div|p|td|li)([^>]*)>(\s*
)+/gi, '< $1$2>');
_html = _html.replace(/(
\s*)+< \/(blockquote|div|p|td|li)>/gi, '');
_html = _html.replace(/(
\s*)+< (blockquote|div|h\d|ol|p|table|ul|li)([^>]*)>/gi, '< $2$3>');
_html = _html.replace(/< \/(blockquote|div|h\d|ol|p|table|ul|li)>(\s*
)+/gi, '');
_html = _html.replace(/(
\s*
\s*)+/gi, '
');
_html = _html.replace(/(
\s*
\s*)+/gi, '
');
$R.debugPrint('BuildHTMLPregs', $R.debugTimerEnd()+'ms');
// return
// ======
return {
'_html': _html,
'_links': _stuff._links,
'_targetCandidate': _targetCandidate,
'_firstCandidate': _firstCandidate
};
};
// get first page fragment
// =======================
$R.getContent__nextPage__getFirstFragment = function (_html)
{
// remove all tags
_html = _html.replace(/< [^>]+?>/gi, '');
// normalize spaces
_html = _html.replace(/\s+/gi, ' ');
// return first 1000 characters
return _html.substr(0, 2000);
};
// get link parts
// ==============
// substr starting with the first slash after //
$R.getURLPath = function (_url)
{
return _url.substr(_url.indexOf('/', (_url.indexOf('//') + 2)));
};
// substr until the first slash after //
$R.getURLDomain = function (_url)
{
return _url.substr(0, _url.indexOf('/', (_url.indexOf('//') + 2)))
};
// find
// ====
$R.getContent__nextPage__find = function (_currentPageWindow, _linksInCurrentPage)
{
// page id
var _pageNr = ($R.nextPage__loadedPages.length + 1);
// get
// ===
var _possible = [];
if (_possible.length > 0); else { _possible = $R.getContent__nextPage__find__possible(_currentPageWindow, _linksInCurrentPage, 0.5); }
//if (_possible.length > 0); else { _possible = $R.getContent__nextPage__find__possible(_currentPageWindow, _linksInCurrentPage, 0.50); }
// none
if (_possible.length > 0); else
{ if ($R.debug) { $R.log('no next link found'); } return; }
if ($R.debug) { $R.log('possible next', _possible); }
// the one
// =======
var _nextLink = false;
// next keyword?
// =============
(function ()
{
if (_nextLink) { return; }
for (var i=0, _i=_possible.length; i<_i ; i++)
{
for (var j=0, _j=$R.nextPage__captionKeywords.length; j<_j; j++)
{
if (_possible[i]._caption.indexOf($R.nextPage__captionKeywords[j]) > -1)
{
// length
// ======
if (_possible[i]._caption.length > $R.nextPage__captionKeywords[j].length * 2)
{ continue; }
// not keywords
// ============
for (var z=0, _z=$R.nextPage__captionKeywords__not.length; z<_z ; z++)
{
if (_possible[i]._caption.indexOf($R.nextPage__captionKeywords__not[z]) > -1)
{ _nextLink = false; return; }
}
// got it
// ======
_nextLink = _possible[i];
return;
}
}
}
})();
// caption matched page number
// ===========================
(function ()
{
if (_nextLink) { return; }
for (var i=0, _i=_possible.length; i<_i ; i++)
{
if (_possible[i]._caption == (''+_pageNr))
{ _nextLink = _possible[i]; return; }
}
})();
// next keyword in title
// =====================
(function ()
{
if (_nextLink) { return; }
for (var i=0, _i=_possible.length; i<_i; i++)
{
// sanity
if (_possible[i]._title > ''); else { continue; }
if ($R.measureText__getTextLength(_possible[i]._caption) < = 2); else { continue; }
for (var j=0, _j=$R.nextPage__captionKeywords.length; j<_j; j++)
{
if (_possible[i]._title.indexOf($R.nextPage__captionKeywords[j]) > -1)
{
// length
// ======
if (_possible[i]._title.length > $R.nextPage__captionKeywords[j].length * 2)
{ continue; }
// not keywords
// ============
for (var z=0, _z=$R.nextPage__captionKeywords__not.length; z<_z ; z++)
{
if (_possible[i]._title.indexOf($R.nextPage__captionKeywords__not[z]) > -1)
{ _nextLink = false; return; }
}
// got it
// ======
_nextLink = _possible[i];
return;
}
}
}
})();
// return?
// =======
if (_nextLink); else { return; }
// mark
// ====
$R.debugPrint('NextPage', 'true');
if ($R.debug)
{
$R.debugOutline(_nextLink._node, 'target', 'next-page');
$R.log('NextPage Link', _nextLink, _nextLink._node);
}
// process page
// ============
$R.getContent__nextPage__loadToFrame(_pageNr, _nextLink._href);
$R.nextPage__loadedPages.push(_nextLink._href);
};
// find with similarity
// ====================
$R.getContent__nextPage__find__possible = function (_currentPageWindow, _linksInCurrentPage, _distanceFactor)
{
var
_mainPageHref = $R.win.location.href,
_mainPageDomain = $R.getURLDomain(_mainPageHref),
_mainPagePath = $R.getURLPath(_mainPageHref)
;
var _links = $.map
(
_linksInCurrentPage,
function (_element, _index)
{
var
_href = _element.__node.href,
_path = $R.getURLPath(_href),
_title = (_element.__node.title > '' ? _element.__node.title.toLowerCase() : ''),
_caption = _element.__node.innerHTML.replace(/< [^>]+?>/gi, '').replace(/\&[^\&\s;]{1,10};/gi, '').replace(/\s+/gi, ' ').replace(/^ /, '').replace(/ $/, '').toLowerCase(),
_distance = $R.levenshteinDistance(_mainPagePath, _path)
;
var _caption2 = '';
for (var i=0, _i=_caption.length, _code=0; i<_i ; i++)
{
_code = _caption.charCodeAt(i);
_caption2 += (_code > 127 ? (''+_code+';') : _caption.charAt(i));
}
_caption = _caption2;
switch (true)
{
case (!(_href > '')):
case (_mainPageHref.length > _href.length):
case (_mainPageDomain != $R.getURLDomain(_href)):
case (_href.substr(_mainPageHref.length).substr(0, 1) == '#'):
case (_distance > Math.ceil(_distanceFactor * _path.length)):
return null;
default:
// skip if already loaded as next page
for (var i=0, _i=$R.nextPage__loadedPages.length; i<_i ; i++)
{ if ($R.nextPage__loadedPages[i] == _href) { return null; } }
// return
return {
'_node': _element.__node,
'_href': _href,
'_title': _title,
'_caption': _caption,
'_distance': _distance
};
}
}
);
// sort -- the less points, the closer to position 0
// ====
_links.sort(function (a, b)
{
switch (true)
{
case (a._distance < b._distance): return -1;
case (a._distance > b._distance): return 1;
default: return 0;
}
});
// return
return _links;
};
// load to frame
// =============
$R.getContent__nextPage__loadToFrame = function (_pageNr, _nextPageURL)
{
// do ajax
// =======
$.ajax
({
'url' : _nextPageURL,
'type' : 'GET',
'dataType' : 'html',
'async' : true,
'timeout': (10 * 1000),
//'headers': { 'Referrer': _nextPageURL },
'success' : function (_response, _textStatus, _xhr) { $R.getContent__nextPage__ajaxComplete(_pageNr, _response, _textStatus, _xhr); },
'error' : function (_xhr, _textStatus, _error) { $R.getContent__nextPage__ajaxError(_pageNr, _xhr, _textStatus, _error); }
});
};
// ajax calbacks
// =============
$R.getContent__nextPage__ajaxError = function (_pageNr, _xhr, _textStatus, _error)
{
};
$R.getContent__nextPage__ajaxComplete = function (_pageNr, _response, _textStatus, _xhr)
{
// valid?
// ======
if (_response > ''); else { return; }
// script
// ======
var _script = ''
+ '' ; // get html // ======== var _html = _response; // normalize // ========= _html = _html.replace(/<\s+/gi, '<'); _html = _html.replace(/\s+>/gi, '>'); _html = _html.replace(/\s+\/>/gi, '/>'); // remove // ====== _html = _html.replace(/<script[^>]*?>([\s\S]*?)<\/script>/gi, ''); _html = _html.replace(/<script[^>]*?\/>/gi, ''); _html = _html.replace(/<noscript[^>]*?>([\s\S]*?)<\/noscript>/gi, ''); _html = _html.replace(/<onload="*?" id="nextPageFrame__'+_pageNr+'" '="" +="" frameborder="0" scrolling="no" '<iframe'="" $r.$nextpages.append(''="" =="==============" frame="" append="" body');="" _script+'<="" _html="_html.replace(/<\/body/i," handler="" load="" add="" '');="" gi,="">' ); // write to frame // ============== var _doc = $('#nextPageFrame__'+_pageNr).contents().get(0); _doc.open(); _doc.write(_html); _doc.close(); }; // loaded in frame // =============== $R.getContent__nextPage__loadedInFrame = function (_pageNr, _pageWindow) { // find // ==== var _found = $R.getContent__findInPage(_pageWindow), _foundHTML = _found._html, _removeTitleRegex = new RegExp($R.articleTitleMarker__start + '(.*?)' + $R.articleTitleMarker__end, 'i') ; // get first fragment // ================== var _firstFragment = $R.getContent__nextPage__getFirstFragment(_foundHTML); // gets first 2000 characters // diff set at 100 -- 0.05 switch (true) { case ($R.levenshteinDistance(_firstFragment, $R.nextPage__firstFragment__firstPage) < 100): case ($R.levenshteinDistance(_firstFragment, $R.nextPage__firstFragment__lastPage) < 100): // mark $R.debugPrint('NextPage', 'false'); // mark again if ($R.debug) { $('#debugOutput__value__NextPage').html('false'); } // pop page $R.nextPage__loadedPages.pop(); // break return false; default: // add to first fragemnts $R.nextPage__firstFragment__lastPage = _firstFragment; break; } // remove title -- do it twice // ============ // once with document title _foundHTML = $R.getContent__find__isolateTitleInHTML(_foundHTML, ($R.document.title > '' ? $R.document.title : '')); _foundHTML = _foundHTML.replace(_removeTitleRegex, ''); // once with article title _foundHTML = $R.getContent__find__isolateTitleInHTML(_foundHTML, $R.articleTitle); _foundHTML = _foundHTML.replace(_removeTitleRegex, ''); // display // ======= $R.displayPageHTML(_foundHTML, _pageNr, _pageWindow.location.href); // next // ==== $R.getContent__nextPage__find(_pageWindow, _found._links); }; // rewrites // ======== // rewrite displayPageHTML -- for multi-page articles // ======================= $R.displayPageHTML = function (_processedPageHTML, _pageNr, _pageURL) { // skip first if (_pageNr > 1); else { return; } // push to pages $C._nextPages.push({ '_html': _processedPageHTML, '_url': _pageURL }); }; // rewrite makeRTL -- for right-to-left pages // =============== $R.makeRTL = function () { $R.rtl = true; }; $R.makeNotRTL = function () { $R.rtl = false; } // set component object // ==================== window.ClearlyComponent = $C; window.$readable = $R; }
'
;
return;
case (_explored._count__images_medium == 1):
_global__the_html = ''
+ _global__the_html.substr(0, _pos__start__after-1)
+ ' class="readableLinkWithMediumImage">'
+ _global__the_html.substr(_pos__start__after, (_pos__end__before - _pos__start__after))
+ '
'
;
return;
}
}
// too much content
if ($D.parseOptions._elements_too_much_content.indexOf('|'+_tag_name+'|') > -1)
{
_explored = (_explored || $D.getContent__exploreNodeAndGetStuff(_node, true));
if (_explored && _explored._is__unskippable); else
{
switch (true)
{
case (_tag_name == 'h1' && (_explored._length__all_text > (65 * 2))):
case (_tag_name == 'h2' && (_explored._length__all_text > (65 * 2 * 3))):
case ((_tag_name.match(/^h(3|4|5|6)$/) != null) && (_explored._length__all_text > (65 * 2 * 5))):
case ((_tag_name.match(/^(b|i|em|strong)$/) != null) && (_explored._length__all_text > (65 * 5 * 5))):
$D.debugOutline(_node, 'clean-after', 'too-much-content');
_global__the_html = ''
+ _global__the_html.substr(0, _pos__start__before)
+ _global__the_html.substr(_pos__start__after, (_pos__end__before - _pos__start__after))
;
return;
}
}
}
// empty elements
switch (true)
{
case (($D.parseOptions._elements_self_closing.indexOf('|'+_tag_name+'|') > -1)):
case (($D.parseOptions._elements_ignore_tag.indexOf('|'+_tag_name+'|') > -1)):
case (_tag_name == 'td'):
break;
default:
var _contents = _global__the_html.substr(_pos__start__after, (_pos__end__before - _pos__start__after));
_contents = _contents.replace(/(
)/gi, '');
_contents = _contents.replace(/(
)/gi, '');
// for rows, clear empty cells
if (_tag_name == 'tr')
{
_contents = _contents.replace(/
_contents = _contents.replace(/< \/td>/gi, '');
}
// for tables, clear empty rows
if (_tag_name == 'table')
{
_contents = _contents.replace(/
_contents = _contents.replace(/< \/tr>/gi, '');
}
var _contentsLength = $D.measureText__getTextLength(_contents);
_explored = (_explored || $D.getContent__exploreNodeAndGetStuff(_node, true));
if (_explored && _explored._is__unskippable); else
{
switch (true)
{
case (_contentsLength == 0 && _tag_name == 'p'):
_global__the_html = _global__the_html.substr(0, _pos__start__before) + '
';
return;
case (_contentsLength == 0):
case ((_contentsLength < 5) && ($D.parseOptions._elements_visible.indexOf('|'+_tag_name+'|') > -1)):
$D.debugOutline(_node, 'clean-after', 'blank');
_global__the_html = _global__the_html.substr(0, _pos__start__before);
return;
}
}
break;
}
// too much missing
if ($D.parseOptions._elements_link_density.indexOf('|'+_tag_name+'|') > -1)
{
_explored = (_explored || $D.getContent__exploreNodeAndGetStuff(_node, true));
if (_explored && _explored._is__unskippable); else
{
var
_contents = _global__the_html
.substr(_pos__start__after, (_pos__end__before - _pos__start__after))
.replace(/(< ([^>]+)>)/gi, ''),
_contentsLength = $D.measureText__getTextLength(_contents),
_initialLength = 0
+ _explored._length__all_text
+ (_explored._count__images_small * 10)
+ (_explored._count__images_skip * 10)
+ (_node.getElementsByTagName('iframe').length * 10)
+ (_node.getElementsByTagName('object').length * 10)
+ (_node.getElementsByTagName('embed').length * 10)
+ (_node.getElementsByTagName('button').length * 10)
+ (_node.getElementsByTagName('input').length * 10)
+ (_node.getElementsByTagName('select').length * 10)
+ (_node.getElementsByTagName('textarea').length * 10)
;
// too much missing
switch (true)
{
case (!(_contentsLength > 0)):
case (!(_initialLength > 0)):
case (!((_contentsLength / _initialLength) < 0.5)):
case (!(($D.language == 'cjk') && (_contentsLength / _initialLength) < 0.1)):
case ((_global__exploreNodeToBuildHTMLFor && ((_explored._length__plain_text / _global__exploreNodeToBuildHTMLFor._length__plain_text) > 0.25))):
case (($D.language == 'cjk') && (_global__exploreNodeToBuildHTMLFor && ((_explored._length__plain_text / _global__exploreNodeToBuildHTMLFor._length__plain_text) > 0.1))):
break;
default:
$D.debugOutline(_node, 'clean-after', 'missing-density');
_global__the_html = _global__the_html.substr(0, _pos__start__before);
return;
}
}
}
// return
return;
};
// actually do it
_recursive(_nodeToBuildHTMLFor);
// return html
return _global__the_html;
};
// build html for node }
// isolate title in html {
// =======================
$D.articleTitleMarker__start = '
';
$D.articleTitleMarker__end = '
';
$D.getContent__find__hasIsolatedTitleInHTML = function (_html)
{
return (_html.substr(0, $D.articleTitleMarker__start.length) == $D.articleTitleMarker__start);
};
$D.getContent__find__getIsolatedTitleInHTML = function (_html)
{
// is it there?
if ($D.getContent__find__hasIsolatedTitleInHTML(_html)); else { return ''; }
// regex
var
_getTitleRegex = new RegExp($D.articleTitleMarker__start + '(.*?)' + $D.articleTitleMarker__end, 'i'),
_getTitleMatch = _html.match(_getTitleRegex)
;
// match?
if (_getTitleMatch); else { return ''; }
// return
return _getTitleMatch[1];
};
$D.getContent__find__isolateTitleInHTML = function (_html, _document_title)
{
// use document title
if ($D.$document.find('body').attr($D.parseOptions._use_document_title_attribute) == $D.parseOptions._use_document_title_attribute_value)
{ return _html; }
// can't just use (h1|h2|h3|etc)
// we want to try them in a certain order
var
_heading_pregs = [
/< (h1)[^>]*?>([\s\S]+?)< \/\1>/gi,
/< (h2)[^>]*?>([\s\S]+?)< \/\1>/gi,
/< (h3|h4|h5|h6)[^>]*?>([\s\S]+?)< \/\1>/gi
],
_secondary_headings = '|h2|h3|h4|h5|h6|',
_search_document_title = ' ' + _document_title.replace(/< [^>]+?>/gi, '').replace(/\s+/gi, ' ') + ' '
;
// loop pregs
for (var i=0, _i=_heading_pregs.length; i<_i ; i++)
{
// exec
var _match = _heading_pregs[i].exec(_html);
// return?
switch (true)
{
case (!(_match)):
case (!(_heading_pregs[i].lastIndex > -1)):
// will continue loop
break;
default:
// measurements
var
_heading_end_pos = _heading_pregs[i].lastIndex,
_heading_start_pos = (_heading_end_pos - _match[0].length),
_heading_type = _match[1],
_heading_text = _match[2].replace(/< \s*br[^>]*>/gi, '').replace(/[\n\r]+/gi, ''),
_heading_text_plain = _heading_text.replace(/< [^>]+?>/gi, '').replace(/\s+/gi, ' ');
_heading_length = $D.measureText__getTextLength(_heading_text_plain),
_heading_words = [],
_to_heading_text = _html.substr(0, _heading_start_pos),
_to_heading_length = $D.measureText__getTextLength(_to_heading_text.replace(/< [^>]+?>/gi, '').replace(/\s+/gi, ' '))
;
// return?
switch (true)
{
case (!(_heading_length > 5)):
case (!(_heading_length < (65 * 3))):
case (!(_to_heading_length < (65 * 3 * 2))):
// will continue for loop
break;
case ((_secondary_headings.indexOf('|' + _heading_type + '|') > -1)):
// words in this heading
_heading_words = _heading_text_plain.split(' ');
// count words present in title
for (var j=0, _j=_heading_words.length, _matched_words=''; j<_j ; j++) {
if (_search_document_title.indexOf(' ' + _heading_words[j] + ' ') > -1) {
_matched_words += _heading_words[j] + ' ';
}
}
// break continues for loop
// nothing goes to switch's default
// no break?
// =========
var _no_break = false;
switch (true)
{
// if it's big enough, and it's a substring of the title, it's good
case ((_heading_length > 20) && (_search_document_title.indexOf(_heading_text_plain) > -1)):
// if it's slightly smaler, but is exactly at the begging or the end
case ((_heading_length > 10) && ((_search_document_title.indexOf(_heading_text_plain) == 1) || (_search_document_title.indexOf(_heading_text_plain) == (_search_document_title.length - 1 - _heading_text_plain.length)))):
_no_break = true;
break;
}
// break?
// ======
var _break = false;
switch (true)
{
// no break?
case (_no_break):
break;
// heading too long? -- if not h2
case ((_heading_length > ((_search_document_title.length - 2) * 2)) && (_heading_type != 'h2')):
// heading long enough?
case ((_heading_length < Math.ceil((_search_document_title.length - 2) * 0.50))):
// enough words matched?
case ((_heading_length < 25) && (_matched_words.length < Math.ceil(_heading_length * 0.75))):
case ((_heading_length < 50) && (_matched_words.length < Math.ceil(_heading_length * 0.65))):
case ((_matched_words.length < Math.ceil(_heading_length * 0.55))):
_break = true;
break;
}
// break?
// ======
if (_break) { break; }
default:
// this is the title -- do isolation; return
return ''
+ $D.articleTitleMarker__start
+ _heading_text
+ $D.articleTitleMarker__end
+ $D.getContent__find__isolateTitleInHTML__balanceDivsAtStart(_html.substr(_heading_end_pos))
;
}
break;
}
}
// return unmodified
return _html;
};
$D.getContent__find__isolateTitleInHTML__balanceDivsAtStart__substrCount = function (_haystack, _needle, _offset, _length)
{
// http://kevin.vanzonneveld.net
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + bugfixed by: Onno Marsman
// + improved by: Brett Zamir (http://brett-zamir.me)
// + improved by: Thomas
// * example 1: substr_count('Kevin van Zonneveld', 'e');
// * returns 1: 3
// * example 2: substr_count('Kevin van Zonneveld', 'K', 1);
// * returns 2: 0
// * example 3: substr_count('Kevin van Zonneveld', 'Z', 0, 10);
// * returns 3: false
var cnt = 0;
_haystack += '';
_needle += '';
if (isNaN(_offset)) { _offset = 0; }
if (isNaN(_length)) { _length = 0; }
if (_needle.length == 0) { return false; }
_offset--;
while ((_offset = _haystack.indexOf(_needle, _offset + 1)) != -1) {
if (_length > 0 && (_offset + _needle.length) > _length) {
return false;
}
cnt++;
}
return cnt;
};
$D.getContent__find__isolateTitleInHTML__balanceDivsAtStart = function (_html)
{
// easy; remove all at begining
var
_h = _html.replace(/^(\s*< \s*\/\s*[^>]+>)+/gi, ''),
_r = /< \s*\/\s*([^\s>]+?)[^>]*>/gi,
_the_end_tag = '',
_the_start_tag = '
var
_sub = _h.substr(0, _end_tag_pos),
_start_tags = $D.getContent__find__isolateTitleInHTML__balanceDivsAtStart__substrCount(_sub, _the_start_tag, _last_pos),
_end_tags = ((_start_tags > 0) ? (1 + $D.getContent__find__isolateTitleInHTML__balanceDivsAtStart__substrCount(_sub, _the_end_tag, _last_pos)) : false)
;
if ((!(_start_tags > 0)) || (_start_tags < _end_tags))
{
_h = ''
+ _h.substr(0, _end_tag_pos)
+ _h.substr(_end_tag_pos + _the_end_tag.length)
;
_last_pos = _end_tag_pos;
}
else
{
_last_pos = _end_tag_pos + 1;
}
}
$D.log(_h);
return _h;
};
// isolate title in html }
// find in page {
// ==============
$D.getContent__findInPage = function (_pageWindow)
{
// calculations
// ============
var
_firstCandidate = false,
_secondCandidate = false,
_targetCandidate = false
;
$D.debugTimerStart('ExploreAndGetStuff');
var _stuff = $D.getContent__exploreNodeAndGetStuff(_pageWindow.document.body);
$D.debugRemember('ExploreAndGetStuff', $D.debugTimerEnd()+'ms');
$D.debugTimerStart('ProcessFirst');
var _processedCandidates = $D.getContent__processCandidates__first(_stuff._candidates);
_firstCandidate = _processedCandidates[0];
_targetCandidate = _firstCandidate;
$D.debugRemember('ProcessFirst', $D.debugTimerEnd()+'ms');
// debug
if ($D.debug)
{
// debug first candidates
$D.log('First 5 Main Candidates:');
for (var x in _processedCandidates)
{
if (x == 5) { break; }
$D.log(_processedCandidates[x], _processedCandidates[x].__node);
}
// highlight first
$D.debugOutline(_firstCandidate.__node, 'target', 'first');
}
// in case we stop
$D.debugRemember('Target', 'first');
// do second?
switch (true)
{
case (!(_firstCandidate._count__containers > 0)):
case (!(_firstCandidate._count__candidates > 0)):
case (!(_firstCandidate._count__pieces > 0)):
case (!(_firstCandidate._count__containers > 25)):
break;
default:
$D.debugTimerStart('ProcessSecond');
var _processedCandidatesSecond = $D.getContent__processCandidates__second(_processedCandidates);
_secondCandidate = _processedCandidatesSecond[0];
$D.debugRemember('ProcessSecond', $D.debugTimerEnd()+'ms');
// they're the same
if (_firstCandidate.__node == _secondCandidate.__node) { break; }
// debug
if ($D.debug)
{
// log second candidates
$D.log('First 5 Second Candidates:');
for (var x in _processedCandidatesSecond)
{
if (x == 5) { break; }
$D.log(_processedCandidatesSecond[x], _processedCandidatesSecond[x].__node);
}
// highlight second
$D.debugOutline(_secondCandidate.__node, 'target', 'second');
}
// compute again
// =============
_firstCandidate['__points_history_final'] = $D.getContent__computePointsForCandidate__third(_firstCandidate, _firstCandidate);
_firstCandidate['__points_final'] = _firstCandidate.__points_history_final[0];
_secondCandidate['__points_history_final'] = $D.getContent__computePointsForCandidate__third(_secondCandidate, _firstCandidate);
_secondCandidate['__points_final'] = _secondCandidate.__points_history_final[0];
// log results
// ===========
if ($D.debug)
{
$D.log('The 2 Candidates:');
$D.log(_firstCandidate);
$D.log(_secondCandidate);
}
// are we selecting _second?
// =========================
switch (true)
{
case ((_secondCandidate.__candidate_details._count__lines_of_65_characters < 20) && (_secondCandidate.__points_final / _firstCandidate.__points_final) > 1):
case ((_secondCandidate.__candidate_details._count__lines_of_65_characters > 20) && (_secondCandidate.__points_final / _firstCandidate.__points_final) > 0.9):
case ((_secondCandidate.__candidate_details._count__lines_of_65_characters > 50) && (_secondCandidate.__points_final / _firstCandidate.__points_final) > 0.75):
_targetCandidate = _secondCandidate;
$D.debugRemember('Target', 'second');
break;
}
// print points
// ============
if ($D.debug)
{
$D.debugRemember('PointsFirst', _firstCandidate['__points_history_final'][0].toFixed(2));
$D.debugRemember('PointsSecond', _secondCandidate['__points_history_final'][0].toFixed(2));
}
break;
}
// highlight target
// ================
if ($D.debug)
{
$CJ(_targetCandidate.__node).css({
'box-shadow':
'inset 0px 0px 50px rgba(255, 255, 0, 0.95), 0px 0px 50px rgba(255, 255, 0, 0.95)'
});
}
// get html
// ========
$D.debugTimerStart('BuildHTML');
var _html = $D.getContent__buildHTMLForNode(_targetCandidate.__node, 'the-target');
_html = _html.substr((_html.indexOf('>')+1))
_html = _html.substr(0, _html.lastIndexOf('< '));
$D.debugRemember('BuildHTML', $D.debugTimerEnd()+'ms');
$D.debugTimerStart('BuildHTMLPregs');
_html = _html.replace(/<(blockquote|div|p|td|li)([^>]*)>(\s*
)+/gi, '< $1$2>');
_html = _html.replace(/(
\s*)+< \/(blockquote|div|p|td|li)>/gi, '');
_html = _html.replace(/(
\s*)+< (blockquote|div|h\d|ol|p|table|ul|li)([^>]*)>/gi, '< $2$3>');
_html = _html.replace(/< \/(blockquote|div|h\d|ol|p|table|ul|li)>(\s*
)+/gi, '');
_html = _html.replace(/(
\s*
\s*)+/gi, '
');
_html = _html.replace(/(
\s*
\s*)+/gi, '
');
$D.debugRemember('BuildHTMLPregs', $D.debugTimerEnd()+'ms');
// return
// ======
return {
'_html': _html,
'_links': _stuff._links,
'_targetCandidate': _targetCandidate,
'_firstCandidate': _firstCandidate
};
};
// find in page }
// start {
// =======
$D.start = function ()
{
// get content
// ===========
var
_found = $D.getContent__findInPage($D.window),
_found_links = _found._links,
_targetNode = _found._targetCandidate.__node,
_$targetNode = $CJ(_targetNode),
_aboveNodes = []
;
// RTL
// ===
switch (true)
{
case (_$targetNode.attr('dir') == 'rtl'):
case (_$targetNode.css('direction') == 'rtl'):
$D.makeRTL();
break;
}
// get html
// ========
var
_foundHTML = _found._html,
_firstFragmentBeforeProcessing = $D.nextPage__getFirstFragment(_foundHTML),
_documentTitle = ($D.document.title > '' ? $D.document.title : '')
;
// get title
// =========
// has title already?
_foundHTML = $D.getContent__find__isolateTitleInHTML(_foundHTML, _documentTitle);
$D.articleTitle = $D.getContent__find__getIsolatedTitleInHTML(_foundHTML);
$D.debugRemember('TitleSource', 'target');
// get html above?
if ($D.articleTitle > ''); else
{
// get html above target?
// ======================
// global vars:
// _found
// _foundHTML
// _documentTitle
// _aboveNodes
var
_prevNode = _found._targetCandidate.__node,
_prevHTML = '',
_aboveHTML = '',
_differentTargets = (_found._firstCandidate.__node != _found._targetCandidate.__node)
;
(function ()
{
while (true)
{
// the end?
switch (true)
{
case (_prevNode.tagName && (_prevNode.tagName.toLowerCase() == 'body')):
case (_differentTargets && (_prevNode == _found._firstCandidate.__node)):
// enough is enough
return;
}
// up or sideways?
if (_prevNode.previousSibling); else
{
_prevNode = _prevNode.parentNode;
continue;
}
// previous
_prevNode = _prevNode.previousSibling;
// outline -- element might be re-outlined, when buildHTML is invoked
if ($D.debug) { $D.debugOutline(_prevNode, 'target', 'add-above'); }
// get html; add
_prevHTML = $D.getContent__buildHTMLForNode(_prevNode, 'above-the-target');
_aboveHTML = _prevHTML + _aboveHTML;
_aboveNodes.unshift(_prevNode);
// isolate title
_aboveHTML = $D.getContent__find__isolateTitleInHTML(_aboveHTML, _documentTitle);
// finished?
switch (true)
{
case ($D.measureText__getTextLength(_aboveHTML.replace(/< [^>]+?>/gi, '').replace(/\s+/gi, ' ')) > (65 * 3 * 3)):
case ($D.getContent__find__hasIsolatedTitleInHTML(_aboveHTML)):
return;
}
}
})();
// is what we found any good?
// ==========================
switch (true)
{
case ($D.getContent__find__hasIsolatedTitleInHTML(_aboveHTML)):
case (_differentTargets && (_aboveHTML.split(']+?>/gi, '').replace(/\s+/gi, ' ')) < (65 * 3))):
_foundHTML = _aboveHTML + _foundHTML;
break;
default:
_aboveHTML = '';
_aboveNodes = [];
break;
}
// set title
// =========
$D.articleTitle = $D.getContent__find__getIsolatedTitleInHTML(_foundHTML);
$D.debugRemember('TitleSource', 'above_HTML');
// get document title?
if ($D.articleTitle > ''); else
{
// if all else failed, get document title
// ======================================
// global vars:
// _foundHTML
// _documentTitle
(function ()
{
// return?
// =======
if (_documentTitle > ''); else { return; }
// vars
var
_doc_title_parts = [],
_doc_title_pregs =
[
/( [-][-] |( [-] )|( [>][>] )|( [< ][<] )|( [|] )|( [\/] ))/i,
/(([:] ))/i
]
;
// loop through pregs
// ==================
for (var i=0, _i=_doc_title_pregs.length; i<_i; i++)
{
// split
_doc_title_parts = _documentTitle.split(_doc_title_pregs[i]);
// break if we managed a split
if (_doc_title_parts.length > 1) { break; }
}
// sort title parts -- longer goes higher up -- i.e. towards 0
// ================
_doc_title_parts.sort(function (a, b)
{
switch (true)
{
case (a.length > b.length): return -1;
case (a.length < b.length): return 1;
default: return 0;
}
});
// set title -- first part, if more than one word; otherwise, whole
// =========
_foundHTML = ''
+ $D.articleTitleMarker__start
+ (_doc_title_parts[0].split(/\s+/i).length > 1 ? _doc_title_parts[0] : _documentTitle)
+ $D.articleTitleMarker__end
+ _foundHTML
;
})();
// set title
// =========
$D.articleTitle = $D.getContent__find__getIsolatedTitleInHTML(_foundHTML);
$D.debugRemember('TitleSource', 'document_title');
}
}
// remember
// ========
$D.debugRemember('theTarget', _found._targetCandidate.__node);
$D.debugRemember('firstCandidate', _found._firstCandidate.__node);
// result
// ======
$D.nextPage__firstFragment__firstPage = _firstFragmentBeforeProcessing;
$D.nextPage__firstFragment__lastPage = $D.nextPage__getFirstFragment(_foundHTML);
$D.nextPage__firstLinks = _found_links;
var
_result = {
'_html': _foundHTML,
'_title': $D.articleTitle,
'_rtl': $D.rtl
}
;
// add elements
_result['_elements'] = _aboveNodes;
_result['_elements'].push(_found._targetCandidate.__node);
// return
// ======
$D.callbacks.finished(_result);
};
// start }
// return self
// ===========
return $D;
}
// ]]>