parse_function == null) { $vars = array(); if (!isset($p['query'])) return false; parse_str($p['query'], $vars); $q = $engine->query; if (isset($vars[$q])) { $terms = $vars[$q]; } else { // if there are no search terms don't record this as a search engine hit. // chances are its just a spam bot looking for some love. return false; } } else { $func = $engine->parse_function; $res = $func($ref, $engine, $terms); if ($res === false) return false; } $encoding = fs_get_referrer_encoding($ref, $engine); if ($encoding != null && $terms != null && !fs_is_utf8_string($terms)) { $terms = fs_convert_to_utf8($encoding, $terms); } if (!empty($terms)) { return true; } return false; } function fs_get_referrer_encoding($ref, $engine) { if ($engine->encoding_extractor != null) { $func = $engine->encoding_extractor; return $func($ref, $engine); } else { return $engine->keyword_encoding; } } function fs_find_matching_engine($ref) { $engines = fs_get_search_engines(); foreach($engines as $e) { if (strpos($ref, $e->pattern) !== false) return $e; } return false; } function fs_create_engine_conf($engines_table, &$search_engines_arr,$name, $pattern, $query, $parse_function, $keyword_encoding, $encoding_extractor) { $engine = $engines_table[$name]; if ($engine == null) die("Unknown search engine " .$name); $conf = new stdClass(); $conf->id = $engine->id; $conf->name = $engine->name; $conf->logo_icon = $engine->logo_icon; $conf->pattern = $pattern; $conf->query = $query; $conf->parse_function = $parse_function; $conf->keyword_encoding = $keyword_encoding; $conf->encoding_extractor = $encoding_extractor; $search_engines_arr[] = $conf; } function fs_create_search_engine(&$search_engine_ht,&$engines, $name, $logo_icon) { static $id = 1; $engine = new stdClass(); $engine->id = $id; $engine->name = $name; $engine->logo_icon = $logo_icon; $search_engine_ht[$engine->id] = $engine; $engines[$name] = $engine; $id++; } function fs_recalculate_search_engine_terms_calc_max() { $fsdb = &fs_get_db_conn(); $urls = fs_urls_table(); $count = $fsdb->get_var("SELECT COUNT(*) c FROM `$urls`"); if ($count === null) { return fs_db_error(); } else { return $count; } } function fs_recalculate_search_engine_terms($value, $max, $chunk = 1000) { require_once(FS_ABS_PATH.'/php/db-common.php'); $fsdb = &fs_get_db_conn(); $urls = fs_urls_table(); if ($value == 0) { if (false === $fsdb->get_results("UPDATE `$urls` SET `search_engine_id` = NULL, `search_terms` = NULL")) { return fs_db_error(); } } if (!is_numeric($value)) return "value $value must be numeric"; $res = $fsdb->get_results("SELECT id,url from $urls LIMIT $chunk OFFSET $value"); if ($res === false) { return fs_db_error(); } $total = count($res); if ($total > 0) { foreach($res as $r) { $id = $r->id; $ref = $r->url; $engine = null; $res = fs_process_search_engine_referrer($ref, $engine, $terms); if ($terms !== false && $terms != '') { $terms = $fsdb->escape($terms); $r2 = $fsdb->query("UPDATE `$urls` SET `search_engine_id`='$engine->id', `search_terms` = $terms WHERE `id` = '$id'"); if ($r2 === false) { return fs_db_error(); } } } } return $total; } /** * Parser specific to google images urls */ function fs_google_images_parser($ref, $engine, &$terms) { $p = @parse_url($ref); $vars = array(); parse_str($p['query'], $vars); if (isset($vars['prev'])) { $prev = $vars['prev']; $p = @parse_url($prev); if (isset($p['query'])) { parse_str($p['query'], $vars); if (isset($vars['q'])) { $terms = $vars['q']; return true; } } } return false; } function fs_google_encoding_extractor($ref, $engine) { $p = @parse_url($ref); $vars = array(); if (!isset($p['query'])) return null; parse_str($p['query'], $vars); if (!isset($vars['ie'])) return null; $e = $vars['ie']; if ($e == 'windows-1251') return 'cp1251'; if ($e == 'windows-1255') return 'cp1255'; return null; } function fs_google_term_parser($ref, $engine, &$terms) { // This function is far from perfect. // a perfect function is way to hard to implement and does not worth the effort. // == Google query parameters == // q= all these words (AND, implicit) // as_q= all these words (AND, implicit) // as_epq= advanced search "this exact wording or phrase" (quoted) // as_oq= one of those words (OR separated) // as_eq= Except qurey, prepand minus. // hl=en search language domain hl=en will search english sites. // num=10 number to show in result page // lr= unknown // as_filetype unknown // ft=i unknown // as_sitesearch=SITE site search, site:SITE // as_qdr=all // as_rights= // as_occt=any // cr=&as_nlo // as_nhi= // safe=images // as_epq : // as_q : advanced search "all these words" // as_oq : advanced search "one or more of these words" $terms = ""; $up = @parse_url($ref); if (isset($up['path']) && $up['path'] == '/imgres') { return fs_google_images_parser($ref, $engine, $terms); } if (!$up || !isset($up['query'])) return false; $p = array(); parse_str($up['query'], $p); $t = ''; if(!empty($p['q'])) $t = fs_append_str($t, $p['q']); if(!empty($p['as_q'])) $t = fs_append_str($t, str_replace("+"," ",$p['as_q'])); if(!empty($p['as_epq']))$t = fs_append_str($t, '"'.$p['as_epq'].'"'); if(!empty($p['as_oq'])) { // harder to implement, and almost never used. so fuck it. // $t = fs_append_str($t, fs_implode(" OR ", explode("+",str_replace(" ","+",$p['as_oq'])))); $t = stripcslashes($p['as_oq']); } if(!empty($p['as_eq'])) $t = "-".fs_append_str($t, fs_implode(" -", explode("+",str_replace(" ","+",$p['as_eq'])))); $terms = $t; return true; } /** * Extract the encoding of a search.walla.co.il url */ function fs_walla_encoding_extractor($ref, $engine) { $p = @parse_url($ref); $vars = array(); if (!isset($p['query'])) return null; parse_str($p['query'], $vars); if (!isset($vars['e'])) return null; $e = $vars['e']; if ($e == 'hew') return 'cp1255'; if ($e == 'utf') return null; // utf8 is the default FireStats encoding, nothing to do. return null; } ?>