1:2-3, 4) $q = strtolower($q); $regex = array( "/(\d)\"(\d)/" => "$1:$2", #12"5 -> 12:5 for some reason this happens fairly often "/([^\d\s\-:,])(\d)/" => "$1 $2", #add a space before numbers "/(\d)[ab]([\-:, ]|$)/" => "$1$2", #12:1b -> 12:1; however, a search for 23a will find only 23 "/(\d)([^\d\s\-:,])/" => "$1 $2", #add a space after numbers "/([a-z])-/" => "$1 -", #to handle james-1peter "/([^\d\s\-:,])[\-:,]/" => "$1 ", #remove punctuation not involving numbers "/[^\w\s\-:,'\"]/" => "", #remove all other characters "/\s{2,}/" => " ", #remove extra spaces ); $q = preg_replace(array_keys($regex), array_values($regex), $q); return $q; } function make_book_refs($q) { #check for the existence of any books in the string $abbreviations = get_simple_book_abbrevs(); #get the abbreviations stored in a file $letters = get_letters(); $letter = "A"; $bookrefs = array(); foreach ($abbreviations as $book => $abbrevs) { foreach ($abbrevs as $abbrev) { while (preg_match("/\b$abbrev\b/", $q, $matches)) { #one match at a time; use word boundaries so "1 time" != 1 tim $q = preg_replace("/\b($abbrev)\b/", "#<$letter#", $q, 1); $bookrefs[$letter] = array( "book" => $book, "q" => $matches[0], #the original match ); $count = count(array_keys($bookrefs)); if ($count >= count($letters)) { #if over 26 books, we need to extend the array $letters = extend_letters($letters); } $letter = $letters[$count]; #get what the next letter will be } } } return array($q, $bookrefs); #return marked up string, one with books changed to #"; #print "--nrmlu:$normalizedunit;
"; #print "---ref:$ref;
"; #print "---normalizedref:$normalizedref;
"; $count = count(array_keys($verserefs)); if ($count >= count($letters)) { #if over 26 books, we need to extend the array $letters = extend_letters($letters); } $letter = $letters[$count]; #get what the next letter will be if (strlen($normalizedref) > 0) { $verserefs[$letter] = array(); $verseunits = explode(",", $normalizedref); #treat each comma-separated unit individually foreach ($verseunits as $verseunit) { if (!preg_match("/\d/", $verseunit)) continue; #we want 0 to stay in the loop list($chapter, $explicitch, $objverseunit) = objectify_reference($chapter, $explicitch, $verseunit); if ($objverseunit) { array_push($verserefs[$letter], $objverseunit); } else { #nothing } } $verseunits = array(); #reset it for the next time if (!$verserefs[$letter]) { #if there weren't any objverseunits, we don't need it unset($verserefs[$letter]); continue; } $normalizedunit = preg_replace("/(?:^|[^\w,'])$ref(?:[^\w,']|$)/", "#>$letter#", $normalizedunit, 1); #replace with a #->-letter-# #don't use \b because it doesn't work if you enter "Psalm 1," array_push($verserefs[$letter], $ref); #always the last so we can pop it later } else { #nothing; it's a number sequence but not a ref } } $q = preg_replace("/$unit/", "$normalizedunit", $q, 1); } $q = preg_replace(array("/# +/", "/ +#/"), array("#", "#"), $q); #get rid of extra spaces #print_r($verserefs); #$q = strtr($q, "<>", "[]"); #debug #print "\n$q"; #exit; return array($q, $verserefs); } function normalize_query_unit($unit) { #handle some nonstandard queries that nonetheless make sense $regex = array( "/(\d+?) *?- *?end/" => "$1-999", #4:1-end = 4:1-999; 4-end = 4-999 "/ch(?:apter|ap)?s? *?(\d+)/" => "$1:", #gen ch 6 -> gen 6: ;; gen chap 6 -> gen 6 "/(\d) *?v(?:erse|v|er)?s? *?(\d)/" => "$1:$2", #gen 6 vs 8 -> gen 6:8; gen 6 vv 8-10 -> gen 6:8-10 "/v(?:erse|v|er)?s? *?(\d)/" => "$1", # gen ch 6 vs 8 (which becomes gen 6: vs 8) -> gen 6:8 "/(\d) *?(?:to|through|thru) *?(\d)/" => "$1-$2", #gen 6 vs 7 to 9 -> gen 6:7-9 "/(\d):? *?and *?(\d)/" => "$1,$2", #gen 6 vs 7 and 9 -> gen 6:7,9 "/(\d) *?f{1,2}\b/" => "$1-999", #gen 6:8ff -> gen 6:8-999; gen 6ff -> gen 6-999 "/:{2,}/" => ":", "/^:$/" => "", #"john :" => john ); return preg_replace(array_keys($regex), array_values($regex), $unit); } function normalize_reference($ref) { #make refs easier to parse automatically #trim is already done above $regex = array( "/[^\d ,\-:]/" => "", #just in case any slipped through "/[^\d:\-]+$/" => "", #don't need to do a beginning one because $ref will always be sent starting with [\d:] "/ *([\-:,]) */" => "$1", "/(\d) (\d+? ?(?:[^:]|$))/" => "$1.$2", #rom 8 28 -> rom 8:28; people use this syntax fairly frequently; change to dot for next transformations "/(\d) (\d+? ?:)/" => "$1,$2", #rom 8 2:7 -> rom 8,2:7 "/(:\d+?)[ .](\d)/" => "$1,$2", #rom 8:28 7 -> rom 8:28,7 "/(:\d+?-\d+?)[ .](\d)/" => "$1,$2", #rom 8:1-10 11-20 -> 8:1-10,11-20 "/,{2,}/" => ",", #get rid of double punctuation marks "/[.:]{2,}/" => ":", "/-{2,}/" => "-", "/\s/" => "", ); $ref = preg_replace(array_keys($regex), array_values($regex), $ref); if (strstr($ref, "000") || preg_match("/\d{4,}/", $ref)) { #these will never be references return ""; } $ref = strtr($ref, ".", ":"); #change any remaining periods from transformations above into colons return $ref; } function cleanup_reference_unit($unit) { $regex = array( "/(^|\D):/" => "$1", "/:-/" => ":1-", "/(-\d+?):$/" => "$1:999", #2:3-4: -> 2:3-4:999 #"/(:\d+?):(\D|$)/" => "$1$2", #4:7: -> 4:7 "/:(\D)/" => ":1-999$1", #4: -> 4:1-999; originally it was (\D|$) but that made eccl. 2:21. fail "/(\d+?)-(\D|$)/" => "$1-949", #4:2- -> 4:2-949; "949" because mark 16:1 - luke 2:5 needs to work, and we need to know that it was open-ended "/[^\d]+$/" => "", # cleanup; originally there was a : in the [], but we don't want ending colons ); return preg_replace(array_keys($regex), array_values($regex), $unit); } function objectify_reference($chapter, $explicitch, $unit) { $return = array(); $original = $unit; $unit = cleanup_reference_unit($unit); #print "cleanup:$unit
\n"; if (substr_count($unit, "-") > 1) { #if there's more than one range, only use the outside values preg_match("/(.+?)-+?(.+)-(.+)/", $unit, $matches); #we want $2 to be greedy, so we don't use ? if (strstr($matches[2], ":") && !strstr($matches[3], ":")) { #if a ch is indicated in the discarded match, use that ch preg_match_all("/(\d+):/", $matches[2], $temp); #get all the chapters (eg, 23:6-24:7) $temp = array_pop($temp[1]); #use the last indicated chapter (eg, 24) $matches[3] = "$temp:$matches[3]"; #change the last match to include the chapter we just found unset ($temp); #cleanup } $unit = "$matches[1]-$matches[3]"; #turn it into a simple pairing save_error("discarded-reference", $matches[2]); unset ($matches); #print "u:$unit\n"; } if (preg_match("/^(\d+?):(\d+?)-(\d+?):(\d+?)$/", $unit, $ranges)) { #2:3-4:5 $case = "dd"; #d = double, or "1:2"; s = single, or "3"; we don't use this anywhere, but it makes things easier to understand array_shift($ranges); #ranges[0] includes the whole unit, which we don't care about list($beginc, $beginv, $endc, $endv) = $ranges; if (($endc < $beginc) || ($endv < $beginv && $endc == $beginc)) { #5:6-4:7; 2:4-2:3 #invert them list($endc, $endv, $beginc, $beginv) = $ranges; } $isrange = 1; #is talking about more than one verse $explicitch = 1; } elseif (preg_match("/^(\d+?):(\d+?)-(\d+)$/", $unit, $ranges)) { #2:4-5 $case = "ds"; $isrange = 1; list($beginc, $beginv, $endv) = array_slice($ranges, 1, 3); #ignore ranges[0] $endc = $beginc; if ($endv < $beginv || $endc < $beginc) { #if it looks backwards list($beginc, $beginv, $endc, $endv, $isrange) = check_reference_range_error($beginc, $beginv, $endc, $endv, $isrange, $original); #original for use in error msg } $explicitch = 1; } elseif (preg_match("/^(\d+?):(\d+)$/", $unit, $ranges)) { #2:4 $case = "d"; list($beginc, $beginv) = array_slice($ranges, 1, 2); #again, ignore ranges[0] $isrange = 0; $explicitch = 1; } elseif (preg_match("/^(\d+?)-(\d+):(\d+)$/", $unit, $ranges)) { #3-4:5 $case = "sd"; $isrange = 1; if ($explicitch) { #[2:]3-4:5; the 2 is implied from a prev unit (2:1,3-4:5) list($beginc, $beginv, $endc, $endv) = array($chapter, $ranges[1], $ranges[2], $ranges[3]); } else { #3:1-4:5; otherwise we assume the single is a chapter ref list($beginc, $beginv, $endc, $endv) = array($ranges[1], 1, $ranges[2], $ranges[3]); } if (($endc < $beginc) || ($endv < $beginv && $endc <= $beginc)) { #oops list($beginc, $beginv, $endc, $endv) = double_reference_range_error($beginc, $beginv, $endc, $endv, $original); $isrange = 0; } $explicitch = 1; } elseif (preg_match("/^(\d+)-(\d+)$/", $unit, $ranges)) { #4-5 $case = "ss"; $isrange = 1; if ($explicitch) { #[1:]4-5 list($beginc, $beginv, $endc, $endv) = array($chapter, $ranges[1], $chapter, $ranges[2]); } else { #4:1-5:999; no explicitch list($beginc, $beginv, $endc, $endv) = array($ranges[1], 1, $ranges[2], 999); } if (($endv < $beginv && $endc <= $beginc) || $endc < $beginc) { list($beginc, $beginv, $endc, $endv, $isrange) = check_reference_range_error($beginc, $beginv, $endc, $endv, $isrange, $original); } } elseif (preg_match("/^(\d+)$/", $unit, $ranges)) { #4 $case = "s"; if ($explicitch) { #[1:]4 $isrange = 0; list($beginc, $beginv) = array($chapter, $ranges[1]); } else { #4:1-999; otherwise we assume they want the whole chapter $isrange = 1; list($beginc, $beginv, $endc, $endv) = array($ranges[1], 1, $ranges[1], 999); } } else { #we don't know what to do with it save_error("discarded-reference", $original); return array($chapter, $explicitch, array()); } if ($beginc < 1) $beginc = 1; #just in case anything was 0; could conceivably lead to a false isrange (1:0-1:1)... if ($beginv < 1) $beginv = 1; #but nothing bad happens if ($isrange) { if ($endc < 1) $endc = 1; if ($endv < 1) $endv = 1; $return = array( "isrange" => 1, "beginc" => $beginc, "beginv" => $beginv, "endc" => $endc, "endv" => $endv, ); $chapter = $endc; } else { $return = array( "beginc" => $beginc, "beginv" => $beginv, "endc" => $beginc, "endv" => $beginv, ); $chapter = $beginc; } #print "$case-$explicitch-$chapter\n"; return array($chapter, $explicitch, $return); } function check_reference_range_error($beginc, $beginv, $endc, $endv, $isrange, $unit) { #possible errors if ($endv > 900 && $endc < $beginc) { #5:6--4 became 5:6-4:999; display one verse and set error $endc = $beginc; $endv = $beginv; $isrange = 0; save_error("end-before-begin", $unit); } elseif ($endv >= $endc) { #[2:]4-3 -> 2:4-3:999; assume they meant to use the chapter; no error $endc = $endv; $endv = 999; #use the isrange we were sent, ie, 1 } else { #[2:]4-1 -> ??? 2:4-2:4; we don't even want to try to guess, because we'd probably guess wrong $endc = $beginc; $endv = $beginv; $isrange = 0; #display one verse and set error save_error("end-before-begin", $unit); } return array($beginc, $beginv, $endc, $endv, $isrange); } function double_reference_range_error($beginc, $beginv, $endc, $endv, $unit) { #if there's a problem, don't try to guess save_error("end-before-begin", $unit); return array($beginc, $beginv, $beginc, $beginv); #return only the beginning verses } function objectify_q($q, $bookrefs, $verserefs) { #make the actual objectq and objectqdesc $objectq = array(); $objectqdesc = array(); $qarray = preg_split("/#+/", $q); if (strstr($q, '"')) $alreadyquotes = 0; if (preg_match("/#<[A-Z]+?#-#<[A-Z]+?#/", $q)) $checkbookspan = 1; #if someone types heb - james elseif (preg_match("/#<[A-Z]+?#-$/", $q)) $checkbookspan = 1; #if someone types heb - else $checkbookspan = 0; foreach ($qarray as $qunit) { if (!$qunit || !preg_match("/[^\s\"]/", $qunit)) continue; if ($qunit{0} == "<") { #we already suspect it's a book because the first char is a < preg_match("/^<([A-Z]+)/", $qunit, $matches); #put the letter into matches[1] array_push($objectq, $bookrefs[$matches[1]]); $objectqdesc["book"] = (isset($objectqdesc["book"])) ? $objectqdesc["book"]++ : 1; if (isset($alreadyquotes) && strstr($qunit, '"')) $alreadyquotes += substr_count($qunit, '"'); } elseif ($qunit{0} == ">") { #if it's a ref preg_match("/^>([A-Z]+)/", $qunit, $matches); #put the letter into matches[1]; $refq = array_pop($verserefs[$matches[1]]); #the ref is always at the end array_push($objectq, array( "q" => $refq, "ref" => $verserefs[$matches[1]], #a book number. the letter in the query references the array )); $objectqdesc["ref"] = (isset($objectqdesc["ref"])) ? $objectqdesc["ref"]++ : 1; if (isset($alreadyquotes) && strstr($qunit, '"')) $alreadyquotes += substr_count($qunit, '"'); } else { #otherwise it could be a sequence of words if (isset($alreadyquotes) && strstr($qunit, '"') && $alreadyquotes % 2 == 0) { #do phrase matching #if there's a mismatch of quotes, eg "Mark 7 "hello there", don't do phrase matching $matches = preg_split('/"/', $qunit, -1, PREG_SPLIT_NO_EMPTY); } else $matches = explode(" ", $qunit); foreach($matches as $match) { if (strlen($match) == 0 || $match == " ") continue; array_push($objectq, array( "q" => $match, "word" => 1, )); } $objectqdesc["word"] = (isset($objectqdesc["word"])) ? $objectqdesc["word"] + count($matches) : count($matches); } } if ($checkbookspan) list($objectq, $objectqdesc) = check_book_span($objectq, $objectqdesc); return array($objectq, $objectqdesc); } function check_book_span($objectq, $objectqdesc) { #if someone enters a book rand (mk - jn) without chapter numbers, assume 1-end foreach ($objectq as $i => $array) { if (($array["q"] != "-") || ($i == 0) || (!isset($objectq[$i-1]["book"]))) continue; $objectq[$i]["ref"][0] = array( "isrange" => 1, "beginc" => 1, "beginv" => 1, "endc" => 949, #949 indicates we should look for the end of the range in the next book "endv" => 949, ); unset($objectq[$i]["word"]); #this key gets looked for later $objectqdesc["word"]--; } if ($objectqdesc["word"] == 0) unset($objectqdesc["word"]); #if there aren't any left at the end, we don't need it return array($objectq, $objectqdesc); } function correct_known_misspells($q) { #if there are often misspelled words that should be silently corrected, they go here $misspells = get_known_misspells_from_file(); foreach ($misspells as $key => $value) { #always check on boundaries if (preg_match("/\b$key\b/", $q)) $q = preg_replace("/\b$key\b/", "$value", $q); } return $q; } function get_known_misspells_from_file() { #file is of format misspell\tcorrected #any comments could be put in another \t after global $spellfile; $file = fopen($spellfile, "r") or die ("No spelling file"); $misspells = array(); while (!feof($file)) { $line = rtrim(fgets($file, 1024)); $words = explode("\t", $line); if (!$words[1]) continue; $misspells[$words[0]] = $words[1]; } return $misspells; } function identify_special_searches($q) { #give special error messages when certain queries are entered global $specialsearchfile; $specials = array(); $file = fopen($specialsearchfile, "r") or die ("No spelling file"); while (!feof($file)) { $line = rtrim(fgets($file, 1024)); $words = explode("\t", $line); if (preg_match("/\b$words[0]/", $q)) $specials[$words[1]]++; } if ($specials) { global $specialsearches; $specialsearches = $specials; } return 1; } #also requires save_errors. Here is a blank function. # function save_errors($arg1, $arg2) { # return 1; # } ?>