1 /*
2 	 -------------------------------------------------------------------
3 
4 	 Copyright (C) 2014, Edwin van Leeuwen
5 
6 	 This file is part of todod todo list manager.
7 
8 	 Todod is free software; you can redistribute it and/or modify
9 	 it under the terms of the GNU General Public License as published by
10 	 the Free Software Foundation; either version 3 of the License, or
11 	 (at your option) any later version.
12 
13 	 Todod is distributed in the hope that it will be useful,
14 	 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 	 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 	 GNU General Public License for more details.
17 
18 	 You should have received a copy of the GNU General Public License
19 	 along with Todod. If not, see <http://www.gnu.org/licenses/>.
20 
21 	 -------------------------------------------------------------------
22 	 */
23 
24 module todod.search;
25 
26 import std.algorithm;
27 import std.math;
28 import std.string;
29 
30 version( unittest ) {
31 	import std.stdio;
32 }
33 
34 /** 
35 	Match word to word and return a weight based on match
36 
37 	Weight is between 1.0 (exact match) and 0.0 (no match)
38 	*/
39 double matchWordToWord( string searchString, string compareWord ) {
40 	double scalar = levenshteinDistance( searchString, compareWord );
41 	double scalarLowercase = levenshteinDistance( 
42 			searchString.toLower, compareWord.toLower );
43 	size_t maxWeight = max( searchString.length, compareWord.length);
44 	// Weigh to be between 0 and 1.0. 
45 	return pow(1.0 - (scalar+scalarLowercase)/(2.0*maxWeight),4);
46 }
47 
48 // Simple comparisons
49 unittest {
50 	double w1 = matchWordToWord( "bla", "bla" );
51 	assert( w1 == 1.0 );
52 	double w2 = matchWordToWord( "Bla", "bla" );
53 	assert( w2 < w1 );
54 	double w3 = matchWordToWord( "vla", "bla" );
55 	assert( w3 < w2 );
56 }
57 
58 // Lower limit >= 0
59 unittest {
60 	double w = matchWordToWord( "abcdefghijklmnopqrstuvwxyz", 
61 			"123456789012345678901234567890" );
62 	assert( w == 0.0 );
63 	w = matchWordToWord( 
64 			"123456789012345678901234567890", "abcdefghijklmnopqrstuvwxyz" );
65 	assert( w == 0.0 );
66 }
67 
68 /// Break sentence into words
69 string[] byWord( string sentence ) {
70 	return sentence.chomp.chomp(" ").split( " " );
71 }
72 
73 unittest {
74 	// Trailing space
75 	assert( ("a b ").byWord.length == 2 );
76 }
77 	
78 /// Search for term in sentence and return a weight based on match
79 ///
80 /// Split sentence into words and apply matchWordToWord
81 double weightTermSentence( string searchString, string compareSentence ) {
82 	double sum = 0.0;
83 	foreach( word; compareSentence.byWord ) {
84 		sum += matchWordToWord( searchString, word );
85 	}
86 	return sum;
87 }
88 
89 // Test search in sentence
90 unittest {
91 	assert( matchWordToWord( "bla", "bla" ) ==
92 			weightTermSentence( "bla", "How is bla" ) );
93 	assert( matchWordToWord( "bla", "bla" )+matchWordToWord( "bla", "Bla" ) ==
94 			weightTermSentence( "bla", "Bla is bla" ) );
95 
96 	assert( weightTermSentence( "abc", "def fgh" ) == 0.0 );
97 }
98 
99 /// Search for term(s) in sentence and return a weight based on match
100 double weightSearchSentence( string searchString, string compareSentence ) {
101 	double sum = 0.0;
102 	foreach( term; searchString.byWord ) {
103 		sum += weightTermSentence( term, compareSentence );
104 	}
105 	return sum;
106 }
107 
108 // Multiple search terms
109 unittest {
110 	assert( matchWordToWord( "bla", "bla" )+matchWordToWord( "is", "is" ) ==
111 			weightSearchSentence( "bla is", "How is bla" ) );
112 }
113 
114 // Long sentences
115 unittest {
116 	auto w1 = weightSearchSentence( "line plusnet", "line rental plusnet" );
117 	auto w2 = weightSearchSentence( "line plusnet", "This is a strange long sentence that should not match very well but will match very well due to the longness of it" );
118 	assert( w1 > 10*w2 );
119 }