1 /* 2 ------------------------------------------------------------------- 3 4 Copyright (C) 2014, Edwin van Leeuwen 5 6 This file is part of todod todo list manager. 7 8 Todod is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version. 12 13 Todod is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with Todod. If not, see <http://www.gnu.org/licenses/>. 20 21 ------------------------------------------------------------------- 22 */ 23 24 module todod.search; 25 26 import std.algorithm; 27 import std.math; 28 import std.string; 29 30 version( unittest ) { 31 import std.stdio; 32 } 33 34 /** 35 Match word to word and return a weight based on match 36 37 Weight is between 1.0 (exact match) and 0.0 (no match) 38 */ 39 double matchWordToWord( string searchString, string compareWord ) { 40 double scalar = levenshteinDistance( searchString, compareWord ); 41 double scalarLowercase = levenshteinDistance( 42 searchString.toLower, compareWord.toLower ); 43 size_t maxWeight = max( searchString.length, compareWord.length); 44 // Weigh to be between 0 and 1.0. 45 return pow(1.0 - (scalar+scalarLowercase)/(2.0*maxWeight),4); 46 } 47 48 // Simple comparisons 49 unittest { 50 double w1 = matchWordToWord( "bla", "bla" ); 51 assert( w1 == 1.0 ); 52 double w2 = matchWordToWord( "Bla", "bla" ); 53 assert( w2 < w1 ); 54 double w3 = matchWordToWord( "vla", "bla" ); 55 assert( w3 < w2 ); 56 } 57 58 // Lower limit >= 0 59 unittest { 60 double w = matchWordToWord( "abcdefghijklmnopqrstuvwxyz", 61 "123456789012345678901234567890" ); 62 assert( w == 0.0 ); 63 w = matchWordToWord( 64 "123456789012345678901234567890", "abcdefghijklmnopqrstuvwxyz" ); 65 assert( w == 0.0 ); 66 } 67 68 /// Break sentence into words 69 string[] byWord( string sentence ) { 70 return sentence.chomp.chomp(" ").split( " " ); 71 } 72 73 unittest { 74 // Trailing space 75 assert( ("a b ").byWord.length == 2 ); 76 } 77 78 /// Search for term in sentence and return a weight based on match 79 /// 80 /// Split sentence into words and apply matchWordToWord 81 double weightTermSentence( string searchString, string compareSentence ) { 82 double sum = 0.0; 83 foreach( word; compareSentence.byWord ) { 84 sum += matchWordToWord( searchString, word ); 85 } 86 return sum; 87 } 88 89 // Test search in sentence 90 unittest { 91 assert( matchWordToWord( "bla", "bla" ) == 92 weightTermSentence( "bla", "How is bla" ) ); 93 assert( matchWordToWord( "bla", "bla" )+matchWordToWord( "bla", "Bla" ) == 94 weightTermSentence( "bla", "Bla is bla" ) ); 95 96 assert( weightTermSentence( "abc", "def fgh" ) == 0.0 ); 97 } 98 99 /// Search for term(s) in sentence and return a weight based on match 100 double weightSearchSentence( string searchString, string compareSentence ) { 101 double sum = 0.0; 102 foreach( term; searchString.byWord ) { 103 sum += weightTermSentence( term, compareSentence ); 104 } 105 return sum; 106 } 107 108 // Multiple search terms 109 unittest { 110 assert( matchWordToWord( "bla", "bla" )+matchWordToWord( "is", "is" ) == 111 weightSearchSentence( "bla is", "How is bla" ) ); 112 } 113 114 // Long sentences 115 unittest { 116 auto w1 = weightSearchSentence( "line plusnet", "line rental plusnet" ); 117 auto w2 = weightSearchSentence( "line plusnet", "This is a strange long sentence that should not match very well but will match very well due to the longness of it" ); 118 assert( w1 > 10*w2 ); 119 }