From 144a1676bd87dab2625843332e002fb5d79afef7 Mon Sep 17 00:00:00 2001 From: eneerge Date: Wed, 18 Jul 2018 22:27:20 -0500 Subject: [PATCH 1/6] Added html mapping for for &html control codes. Will replace characters such as ‘ and & with their plain text equivalent. --- urltitle.tcl | 390 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 389 insertions(+), 1 deletion(-) diff --git a/urltitle.tcl b/urltitle.tcl index a375f59..b793fbc 100644 --- a/urltitle.tcl +++ b/urltitle.tcl @@ -121,7 +121,395 @@ namespace eval UrlTitle { break } if {[string length $urtitle]} { - puthelp "PRIVMSG $chan :Title: $urtitle" + set html_mapping { + ‘ ' + ’ ' + ’ ' + – ' + ' ' + " " + " " + ‚ ‚ + “ “ + ” ” + „ „ + † † + ‡ ‡ + ‰ ‰ + ‹ ‹ + › › + ♠ ♠ + ♣ ♣ + ♥ ♥ + ♦ ♦ + ‾ ‾ + ← ← + ← ← + ↑ ↑ + ↑ ↑ + → → + → → + ↓ ↓ + ↓ ↓ + ↖ ↖ + ↖ ↖ + ↗ ↗ + ↗ ↗ + ↙ ↙ + ↙ ↙ + ↘ ↘ + ↘ ↘ + ▲ ▲ + ▲ ▲ + ▴ ▴ + ▴ ▴ + ▶ ▶ + ▶ ▶ + ▸ ▸ + ▸ ▸ + ► ► + ► ► + ▼ ▼ + ▼ ▼ + ▾ ▾ + ▾ ▾ + ◀ ◀ + ◀ ◀ + ◂ ◂ + ◂ ◂ + ◄ ◄ + ◄ ◄ + ™ ™ + ' ' + ™ ™ + � - + � - + ! ! + ! ! + " {"} + " {"} + " {"} + # {#} + # {#} + $ $ + $ $ + % % + % % + & & + & & + & & + ' ' + ' ' + ( ( + ( ( + ) ) + ) ) + * * + * * + + + + + + + , , + , , + - - + - - + . . + . . + / / + / / + ⁄ / + 0 - + 0 - + : : + : : + ; ; + ; ; + < < + < < + < < + = = + = = + > > + > > + > > + ? ? + ? ? + @ @ + @ @ + A - + A - + [ [ + [ [ + \ \ + \ \ + ] ] + ] ] + ^ ^ + ^ ^ + _ _ + _ _ + ` ` + ` ` + a - + a - + { { + | | + } } + ~ ~ + … … + … … + – – + – – + — — + — — + ˜ - + Ÿ " " +     + ¡ ¡ + ¡ ¡ + ¢ ¢ + ¢ ¢ + £ £ + £ £ + ¤ ¤ + ¤ ¤ + ¥ ¥ + ¥ ¥ + ¦ ¦ + ¦ ¦ + &brkbar; ¦ + § § + § § + ¨ ¨ + ¨ ¨ + ¨ ¨ + © © + © © + ª ª + ª ª + « « + « « + ¬ ¬ + ¬ ¬ + ® ® + ® ® + ¯ ¯ + ¯ ¯ + &hibar; ¯ + ° ° + ° ° + ± ± + ± ± + ² ² + ² ² + ³ ³ + ³ ³ + ´ ´ + ´ ´ + µ µ + µ µ + ¶ ¶ + ¶ ¶ + · · + · · + ¸ ¸ + ¸ ¸ + ¹ ¹ + ¹ ¹ + º º + º º + » » + » » + ¼ ¼ + ¼ ¼ + ½ ½ + ½ ½ + ¾ ¾ + ¾ ¾ + ¿ ¿ + ¿ ¿ + À À + À À + Á Á + Á Á + Â Â + Â Â + Ã Ã + Ã Ã + Ä Ä + Ä Ä + Å Å + Å Å + Æ Æ + Æ Æ + Ç Ç + Ç Ç + È È + È È + É É + É É + Ê Ê + Ê Ê + Ë Ë + Ë Ë + Ì Ì + Ì Ì + Í Í + Í Í + Î Î + Î Î + Ï Ï + Ï Ï + Ð Ð + Ð Ð + Ñ Ñ + Ñ Ñ + Ò Ò + Ò Ò + Ó Ó + Ó Ó + Ô Ô + Ô Ô + Õ Õ + Õ Õ + Ö Ö + Ö Ö + × × + × × + Ø Ø + Ø Ø + Ù Ù + Ù Ù + Ú Ú + Ú Ú + Û Û + Û Û + Ü Ü + Ü Ü + Ý Ý + Ý Ý + Þ Þ + Þ Þ + ß ß + ß ß + à à + à à + á á + á á + â â + â â + ã ã + ã ã + ä ä + ä ä + å å + å å + æ æ + æ æ + ç ç + ç ç + è è + è è + é é + é é + ê ê + ê ê + ë ë + ë ë + ì ì + ì ì + í í + í í + î î + î î + ï ï + ï ï + ð ð + ð ð + ñ ñ + ñ ñ + ò ò + ò ò + ó ó + ó ó + ô ô + ô ô + õ õ + õ õ + ö ö + ö ö + ÷ ÷ + ÷ ÷ + ø ø + ø ø + ù ù + ù ù + ú ú + ú ú + û û + û û + ü ü + ü ü + ý ý + ý ý + þ þ + þ þ + ÿ ÿ + ÿ ÿ + Α Α + α α + Β Β + β β + Γ Γ + γ γ + Δ Δ + δ δ + Ε Ε + ε ε + Ζ Ζ + ζ ζ + Η Η + η η + Θ Θ + θ θ + Ι Ι + ι ι + Κ Κ + κ κ + Λ Λ + λ λ + Μ Μ + μ μ + Ν Ν + ν ν + Ξ Ξ + ξ ξ + Ο Ο + ο ο + Π Π + π π + Ρ Ρ + ρ ρ + Σ Σ + σ σ + Τ Τ + τ τ + Υ Υ + υ υ + Φ Φ + φ φ + Χ Χ + χ χ + Ψ Ψ + ψ ψ + Ω Ω + ω ω + ● ● + • • + ∞ ∞ + ∞ ∞ + } + set tit [string map $html_mapping $urtitle] + puthelp "PRIVMSG $chan :\002$tit" } break } From 562eff30bf1bb1d1e6632fa65707113cb1c72f7d Mon Sep 17 00:00:00 2001 From: eneerge Date: Thu, 19 Jul 2018 00:53:14 -0500 Subject: [PATCH 2/6] Support urls without protocol in front --- urltitle.tcl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/urltitle.tcl b/urltitle.tcl index b793fbc..977aadf 100644 --- a/urltitle.tcl +++ b/urltitle.tcl @@ -48,7 +48,7 @@ namespace eval UrlTitle { variable fetchLimit 5 ;# How many times to process redirects before erroring # BINDS - bind pubm "-|-" {*://*} UrlTitle::handler + bind pubm "-|-" {*} UrlTitle::handler setudef flag urltitle ;# Channel flag to enable script. setudef flag logurltitle ;# Channel flag to enable logging of script. @@ -102,8 +102,7 @@ namespace eval UrlTitle { set unixtime [clock seconds] if {[channel get $chan urltitle] && ($unixtime - $delay) > $last && (![matchattr $user $ignore])} { foreach word [split $text] { - if {[string length $word] >= $length && [regexp {^(f|ht)tp(s|)://} $word] && \ - ![regexp {://([^/:]*:([^/]*@|\d+(/|$))|.*/\.)} $word]} { + if {[string length $word] >= $length && [regexp {((?:[a-zA-Z][\w-]+:(?:\/{1,3}|[a-zA-Z0-9%])|www\d{0,3}[.]|[a-zA-Z0-9\-]+[.][a-zA-Z]{2,4}\/?)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\)){0,}(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s\!()\[\]{};:\'\"\.\,<>?«»“”‘’]){0,})} $word]} { set last $unixtime # enable https if supported if {$httpsSupport} { @@ -511,7 +510,6 @@ namespace eval UrlTitle { set tit [string map $html_mapping $urtitle] puthelp "PRIVMSG $chan :\002$tit" } - break } } } @@ -570,7 +568,14 @@ namespace eval UrlTitle { variable timeout variable tdomSupport set title "" + if {[info exists url] && [string length $url]} { + if { + ([string first "http://" $url] == -1) && + ([string first "https://" $url] == -1) + } { + set url "http://$url" + } if {[catch {set http [Fetch $url -timeout $timeout]} results]} { putlog "Connection to $url failed" putlog "Error: $results" From d446cbaafd53dfbb1b9392a4fd4f5086bd04680c Mon Sep 17 00:00:00 2001 From: eneerge Date: Thu, 19 Jul 2018 21:05:09 -0500 Subject: [PATCH 3/6] Pull imgur titles when an extension is used --- urltitle.tcl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/urltitle.tcl b/urltitle.tcl index 977aadf..75f8f9b 100644 --- a/urltitle.tcl +++ b/urltitle.tcl @@ -576,6 +576,11 @@ namespace eval UrlTitle { } { set url "http://$url" } + + ## Some websites will display a title if an image is passed without an extension. + regsub -nocase {(\.png|\.gif|.jpeg|\.jpg)\Z} $url {} url + regsub -nocase {(i.imgur.com)} $url {imgur.com} url + if {[catch {set http [Fetch $url -timeout $timeout]} results]} { putlog "Connection to $url failed" putlog "Error: $results" From 3a2707c0b2e03768af51dec65779dc60e2526d11 Mon Sep 17 00:00:00 2001 From: eneerge Date: Thu, 19 Jul 2018 21:29:02 -0500 Subject: [PATCH 4/6] Organization and htmlparse checking --- urltitle.tcl | 787 ++++++++++++++++++++++++++------------------------- 1 file changed, 398 insertions(+), 389 deletions(-) diff --git a/urltitle.tcl b/urltitle.tcl index 75f8f9b..474774f 100644 --- a/urltitle.tcl +++ b/urltitle.tcl @@ -111,7 +111,11 @@ namespace eval UrlTitle { set urtitle [UrlTitle::parse $word] if {$htmlSupport} { set urtitle [::htmlparse::mapEscapes $urtitle] + } else { + # Fallback to a simple decoder if htmlparse not installed + set urtitle [simpleHtmlDecode $urtitle] } + # unregister https if supported if {$httpsSupport} { ::http::unregister https @@ -120,395 +124,7 @@ namespace eval UrlTitle { break } if {[string length $urtitle]} { - set html_mapping { - ‘ ' - ’ ' - ’ ' - – ' - ' ' - " " - " " - ‚ ‚ - “ “ - ” ” - „ „ - † † - ‡ ‡ - ‰ ‰ - ‹ ‹ - › › - ♠ ♠ - ♣ ♣ - ♥ ♥ - ♦ ♦ - ‾ ‾ - ← ← - ← ← - ↑ ↑ - ↑ ↑ - → → - → → - ↓ ↓ - ↓ ↓ - ↖ ↖ - ↖ ↖ - ↗ ↗ - ↗ ↗ - ↙ ↙ - ↙ ↙ - ↘ ↘ - ↘ ↘ - ▲ ▲ - ▲ ▲ - ▴ ▴ - ▴ ▴ - ▶ ▶ - ▶ ▶ - ▸ ▸ - ▸ ▸ - ► ► - ► ► - ▼ ▼ - ▼ ▼ - ▾ ▾ - ▾ ▾ - ◀ ◀ - ◀ ◀ - ◂ ◂ - ◂ ◂ - ◄ ◄ - ◄ ◄ - ™ ™ - ' ' - ™ ™ - � - - � - - ! ! - ! ! - " {"} - " {"} - " {"} - # {#} - # {#} - $ $ - $ $ - % % - % % - & & - & & - & & - ' ' - ' ' - ( ( - ( ( - ) ) - ) ) - * * - * * - + + - + + - , , - , , - - - - - - - . . - . . - / / - / / - ⁄ / - 0 - - 0 - - : : - : : - ; ; - ; ; - < < - < < - < < - = = - = = - > > - > > - > > - ? ? - ? ? - @ @ - @ @ - A - - A - - [ [ - [ [ - \ \ - \ \ - ] ] - ] ] - ^ ^ - ^ ^ - _ _ - _ _ - ` ` - ` ` - a - - a - - { { - | | - } } - ~ ~ - … … - … … - – – - – – - — — - — — - ˜ - - Ÿ " " -     - ¡ ¡ - ¡ ¡ - ¢ ¢ - ¢ ¢ - £ £ - £ £ - ¤ ¤ - ¤ ¤ - ¥ ¥ - ¥ ¥ - ¦ ¦ - ¦ ¦ - &brkbar; ¦ - § § - § § - ¨ ¨ - ¨ ¨ - ¨ ¨ - © © - © © - ª ª - ª ª - « « - « « - ¬ ¬ - ¬ ¬ - ® ® - ® ® - ¯ ¯ - ¯ ¯ - &hibar; ¯ - ° ° - ° ° - ± ± - ± ± - ² ² - ² ² - ³ ³ - ³ ³ - ´ ´ - ´ ´ - µ µ - µ µ - ¶ ¶ - ¶ ¶ - · · - · · - ¸ ¸ - ¸ ¸ - ¹ ¹ - ¹ ¹ - º º - º º - » » - » » - ¼ ¼ - ¼ ¼ - ½ ½ - ½ ½ - ¾ ¾ - ¾ ¾ - ¿ ¿ - ¿ ¿ - À À - À À - Á Á - Á Á - Â Â - Â Â - Ã Ã - Ã Ã - Ä Ä - Ä Ä - Å Å - Å Å - Æ Æ - Æ Æ - Ç Ç - Ç Ç - È È - È È - É É - É É - Ê Ê - Ê Ê - Ë Ë - Ë Ë - Ì Ì - Ì Ì - Í Í - Í Í - Î Î - Î Î - Ï Ï - Ï Ï - Ð Ð - Ð Ð - Ñ Ñ - Ñ Ñ - Ò Ò - Ò Ò - Ó Ó - Ó Ó - Ô Ô - Ô Ô - Õ Õ - Õ Õ - Ö Ö - Ö Ö - × × - × × - Ø Ø - Ø Ø - Ù Ù - Ù Ù - Ú Ú - Ú Ú - Û Û - Û Û - Ü Ü - Ü Ü - Ý Ý - Ý Ý - Þ Þ - Þ Þ - ß ß - ß ß - à à - à à - á á - á á - â â - â â - ã ã - ã ã - ä ä - ä ä - å å - å å - æ æ - æ æ - ç ç - ç ç - è è - è è - é é - é é - ê ê - ê ê - ë ë - ë ë - ì ì - ì ì - í í - í í - î î - î î - ï ï - ï ï - ð ð - ð ð - ñ ñ - ñ ñ - ò ò - ò ò - ó ó - ó ó - ô ô - ô ô - õ õ - õ õ - ö ö - ö ö - ÷ ÷ - ÷ ÷ - ø ø - ø ø - ù ù - ù ù - ú ú - ú ú - û û - û û - ü ü - ü ü - ý ý - ý ý - þ þ - þ þ - ÿ ÿ - ÿ ÿ - Α Α - α α - Β Β - β β - Γ Γ - γ γ - Δ Δ - δ δ - Ε Ε - ε ε - Ζ Ζ - ζ ζ - Η Η - η η - Θ Θ - θ θ - Ι Ι - ι ι - Κ Κ - κ κ - Λ Λ - λ λ - Μ Μ - μ μ - Ν Ν - ν ν - Ξ Ξ - ξ ξ - Ο Ο - ο ο - Π Π - π π - Ρ Ρ - ρ ρ - Σ Σ - σ σ - Τ Τ - τ τ - Υ Υ - υ υ - Φ Φ - φ φ - Χ Χ - χ χ - Ψ Ψ - ψ ψ - Ω Ω - ω ω - ● ● - • • - ∞ ∞ - ∞ ∞ - } - set tit [string map $html_mapping $urtitle] - puthelp "PRIVMSG $chan :\002$tit" + puthelp "PRIVMSG $chan :\002$urtitle" } } } @@ -624,6 +240,399 @@ namespace eval UrlTitle { return $title } + # Simple html decoder if htmlparse is not available + proc simpleHtmlDecode {text} { + set title "" + set html_mapping { + ‘ ' + ’ ' + ’ ' + – ' + ' ' + " " + " " + ‚ ‚ + “ “ + ” ” + „ „ + † † + ‡ ‡ + ‰ ‰ + ‹ ‹ + › › + ♠ ♠ + ♣ ♣ + ♥ ♥ + ♦ ♦ + ‾ ‾ + ← ← + ← ← + ↑ ↑ + ↑ ↑ + → → + → → + ↓ ↓ + ↓ ↓ + ↖ ↖ + ↖ ↖ + ↗ ↗ + ↗ ↗ + ↙ ↙ + ↙ ↙ + ↘ ↘ + ↘ ↘ + ▲ ▲ + ▲ ▲ + ▴ ▴ + ▴ ▴ + ▶ ▶ + ▶ ▶ + ▸ ▸ + ▸ ▸ + ► ► + ► ► + ▼ ▼ + ▼ ▼ + ▾ ▾ + ▾ ▾ + ◀ ◀ + ◀ ◀ + ◂ ◂ + ◂ ◂ + ◄ ◄ + ◄ ◄ + ™ ™ + ' ' + ™ ™ + � - + � - + ! ! + ! ! + " {"} + " {"} + " {"} + # {#} + # {#} + $ $ + $ $ + % % + % % + & & + & & + & & + ' ' + ' ' + ( ( + ( ( + ) ) + ) ) + * * + * * + + + + + + + , , + , , + - - + - - + . . + . . + / / + / / + ⁄ / + 0 - + 0 - + : : + : : + ; ; + ; ; + < < + < < + < < + = = + = = + > > + > > + > > + ? ? + ? ? + @ @ + @ @ + A - + A - + [ [ + [ [ + \ \ + \ \ + ] ] + ] ] + ^ ^ + ^ ^ + _ _ + _ _ + ` ` + ` ` + a - + a - + { { + | | + } } + ~ ~ + … … + … … + – – + – – + — — + — — + ˜ - + Ÿ " " +   " " + ¡ ¡ + ¡ ¡ + ¢ ¢ + ¢ ¢ + £ £ + £ £ + ¤ ¤ + ¤ ¤ + ¥ ¥ + ¥ ¥ + ¦ ¦ + ¦ ¦ + &brkbar; ¦ + § § + § § + ¨ ¨ + ¨ ¨ + ¨ ¨ + © © + © © + ª ª + ª ª + « « + « « + ¬ ¬ + ¬ ¬ + ® ® + ® ® + ¯ ¯ + ¯ ¯ + &hibar; ¯ + ° ° + ° ° + ± ± + ± ± + ² ² + ² ² + ³ ³ + ³ ³ + ´ ´ + ´ ´ + µ µ + µ µ + ¶ ¶ + ¶ ¶ + · · + · · + ¸ ¸ + ¸ ¸ + ¹ ¹ + ¹ ¹ + º º + º º + » » + » » + ¼ ¼ + ¼ ¼ + ½ ½ + ½ ½ + ¾ ¾ + ¾ ¾ + ¿ ¿ + ¿ ¿ + À À + À À + Á Á + Á Á + Â Â + Â Â + Ã Ã + Ã Ã + Ä Ä + Ä Ä + Å Å + Å Å + Æ Æ + Æ Æ + Ç Ç + Ç Ç + È È + È È + É É + É É + Ê Ê + Ê Ê + Ë Ë + Ë Ë + Ì Ì + Ì Ì + Í Í + Í Í + Î Î + Î Î + Ï Ï + Ï Ï + Ð Ð + Ð Ð + Ñ Ñ + Ñ Ñ + Ò Ò + Ò Ò + Ó Ó + Ó Ó + Ô Ô + Ô Ô + Õ Õ + Õ Õ + Ö Ö + Ö Ö + × × + × × + Ø Ø + Ø Ø + Ù Ù + Ù Ù + Ú Ú + Ú Ú + Û Û + Û Û + Ü Ü + Ü Ü + Ý Ý + Ý Ý + Þ Þ + Þ Þ + ß ß + ß ß + à à + à à + á á + á á + â â + â â + ã ã + ã ã + ä ä + ä ä + å å + å å + æ æ + æ æ + ç ç + ç ç + è è + è è + é é + é é + ê ê + ê ê + ë ë + ë ë + ì ì + ì ì + í í + í í + î î + î î + ï ï + ï ï + ð ð + ð ð + ñ ñ + ñ ñ + ò ò + ò ò + ó ó + ó ó + ô ô + ô ô + õ õ + õ õ + ö ö + ö ö + ÷ ÷ + ÷ ÷ + ø ø + ø ø + ù ù + ù ù + ú ú + ú ú + û û + û û + ü ü + ü ü + ý ý + ý ý + þ þ + þ þ + ÿ ÿ + ÿ ÿ + Α Α + α α + Β Β + β β + Γ Γ + γ γ + Δ Δ + δ δ + Ε Ε + ε ε + Ζ Ζ + ζ ζ + Η Η + η η + Θ Θ + θ θ + Ι Ι + ι ι + Κ Κ + κ κ + Λ Λ + λ λ + Μ Μ + μ μ + Ν Ν + ν ν + Ξ Ξ + ξ ξ + Ο Ο + ο ο + Π Π + π π + Ρ Ρ + ρ ρ + Σ Σ + σ σ + Τ Τ + τ τ + Υ Υ + υ υ + Φ Φ + φ φ + Χ Χ + χ χ + Ψ Ψ + ψ ψ + Ω Ω + ω ω + ● ● + • • + ∞ ∞ + ∞ ∞ + } + set title [string map $html_mapping $text] + return $title + } putlog "Initialized Url Title Grabber v$scriptVersion" } From f82921c7b93a01b3d037706c0f64a9c50ce5d91a Mon Sep 17 00:00:00 2001 From: eneerge Date: Thu, 19 Jul 2018 22:00:10 -0500 Subject: [PATCH 5/6] Removed unnedded imgur translation --- urltitle.tcl | 1 - 1 file changed, 1 deletion(-) diff --git a/urltitle.tcl b/urltitle.tcl index 474774f..f1f2a7d 100644 --- a/urltitle.tcl +++ b/urltitle.tcl @@ -195,7 +195,6 @@ namespace eval UrlTitle { ## Some websites will display a title if an image is passed without an extension. regsub -nocase {(\.png|\.gif|.jpeg|\.jpg)\Z} $url {} url - regsub -nocase {(i.imgur.com)} $url {imgur.com} url if {[catch {set http [Fetch $url -timeout $timeout]} results]} { putlog "Connection to $url failed" From e88095098d930bdf5a946c4d87aad63a730d46c0 Mon Sep 17 00:00:00 2001 From: eneerge Date: Thu, 19 Jul 2018 23:04:12 -0500 Subject: [PATCH 6/6] Fixed issue where console flag +p was not working --- urltitle.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urltitle.tcl b/urltitle.tcl index f1f2a7d..2571c62 100644 --- a/urltitle.tcl +++ b/urltitle.tcl @@ -130,7 +130,7 @@ namespace eval UrlTitle { } } # change to return 0 if you want the pubm trigger logged additionally.. - return 1 + return 0 } # General HTTP redirect handler