Changeset 236
- Timestamp:
- 12/31/07 00:44:41 (1 year ago)
- Files:
-
- lex/trunk/jsp/clause.jsp (modified) (6 diffs)
- lex/trunk/jsp/navclause.jsp (modified) (1 diff)
- lex/trunk/src/com/qwirx/lex/hebrew/HebrewConverter.java (modified) (3 diffs)
- lex/trunk/src/com/qwirx/lex/Search.java (modified) (11 diffs)
- lex/trunk/test/com/qwirx/lex/SearchTest.java (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
lex/trunk/jsp/clause.jsp
r211 r236 1 <% String pageTitle = "Text Browser"; %> 2 <%@ include file="header2.jsp" %> 1 3 <%@ page import="java.util.*" %> 2 4 <%@ page import="java.util.regex.*" %> … … 11 13 <%@ page import="net.didion.jwnl.data.POS" %> 12 14 <%@ page import="net.didion.jwnl.data.Synset" %> 13 <html> 14 <head> 15 <title>Lex: Text Browser</title> 15 16 16 <script type="text/javascript"><!-- 17 17 … … 49 49 50 50 //--></script> 51 <link rel="stylesheet" href="style.css" /> 52 </head> 51 53 52 <style type="text/css"> 54 TABLE.tree TD { 53 TABLE.tree TD 54 { 55 55 text-align: center; 56 56 } 57 div.topmenu a.clause_jsp <%@ include file="hilite.inc" %>58 57 </style> 59 <body onLoad="enableEditButton()"> 60 61 <%@ include file="header.jsp" %> 58 62 59 <%@ include file="auth.jsp" %> 60 63 61 <% 64 62 … … 446 444 if (type.equals("word")) 447 445 { 448 String lexeme = HebrewConverter.wordT oHtml(word,446 String lexeme = HebrewConverter.wordTranslitToHtml(word, 449 447 generator); 450 448 String part_of_speech = (String) … … 1278 1276 %> 1279 1277 </p> 1278 1279 <script type="text/javascript"><!-- 1280 enableEditButton(); 1281 //--></script> 1280 1282 1281 1283 <p>Linked logical structure: … … 1293 1295 { 1294 1296 MatchedObject word = sci.next().const_iterator().next(); 1295 value_text += HebrewConverter.wordT oHtml(word, generator);1297 value_text += HebrewConverter.wordTranslitToHtml(word, generator); 1296 1298 if (sci.hasNext()) 1297 1299 { lex/trunk/jsp/navclause.jsp
r222 r236 322 322 word_iter.next().const_iterator().next(); 323 323 324 lexemes += HebrewConverter.wordT oHtml(word, generator);324 lexemes += HebrewConverter.wordTranslitToHtml(word, generator); 325 325 326 326 if (word_iter.hasNext()) lex/trunk/src/com/qwirx/lex/hebrew/HebrewConverter.java
r220 r236 40 40 { 41 41 char c = input.charAt(i); 42 if (c >= 0x80) 43 { 44 output.append("&#" + (int)c + ";"); 42 if (c == '<') 43 { 44 output.append("<"); 45 } 46 else if (c == '>') 47 { 48 output.append(">"); 49 } 50 else if (c == '&') 51 { 52 output.append("&"); 45 53 } 46 54 else … … 383 391 } 384 392 393 static class Hebrewator implements MorphemeHandler 394 { 395 private MatchedObject m_Word; 396 private StringBuffer m_Output; 397 398 public Hebrewator(MatchedObject word, StringBuffer output) 399 { 400 m_Word = word; 401 m_Output = output; 402 } 403 404 public void convert(String surface, 405 boolean lastMorpheme, String desc, 406 String morphNode) 407 { 408 String raw = m_Word.getEMdFValue(surface).getString(); 409 String hebrew = HebrewConverter.toHebrew(raw); 410 m_Output.append(hebrew); 411 } 412 } 413 385 414 public static String wordToHtml(MatchedObject word, EmdrosDatabase emdros) 386 415 throws IOException, DatabaseException, SAXException 387 416 { 388 return wordT oHtml(word, new HebrewMorphemeGenerator(emdros));389 } 390 391 public static String wordT oHtml(MatchedObject word,417 return wordTranslitToHtml(word, new HebrewMorphemeGenerator(emdros)); 418 } 419 420 public static String wordTranslitToHtml(MatchedObject word, 392 421 HebrewMorphemeGenerator generator) 393 422 { … … 397 426 return toHtml(out.toString()); 398 427 } 428 429 public static String wordHebrewToHtml(MatchedObject word, 430 HebrewMorphemeGenerator generator) 431 { 432 StringBuffer out = new StringBuffer(); 433 Hebrewator xlit = new Hebrewator(word, out); 434 generator.parse(word, xlit, false); 435 return toHtml(out.toString()); 436 } 399 437 } lex/trunk/src/com/qwirx/lex/Search.java
r232 r236 6 6 import java.util.Iterator; 7 7 import java.util.List; 8 import java.util.Set; 8 9 9 10 import jemdros.FlatSheaf; … … 16 17 import jemdros.SheafConstIterator; 17 18 import jemdros.Straw; 19 import jemdros.StrawConstIterator; 18 20 19 21 import org.xml.sax.SAXException; … … 26 28 public class Search 27 29 { 28 private String m_Query; 30 private int m_ResultCount = 0; 31 private int m_MaxResults = 100; 29 32 private EmdrosDatabase m_Emdros; 30 33 31 public Search(String query, EmdrosDatabase emdros) 32 { 33 m_Query = query; 34 public Search(EmdrosDatabase emdros) 35 { 34 36 m_Emdros = emdros; 35 37 } 36 38 37 39 private static class ResultBase 38 40 { … … 43 45 } 44 46 45 public List<SearchResult> run() 47 public void setMaxResults(int limit) 48 { 49 m_MaxResults = limit; 50 } 51 52 public List<SearchResult> basic(String query) 46 53 throws DatabaseException, IOException, SAXException, SQLException 47 54 { 55 return advanced("[word " + 56 "lexeme = '"+query+"' OR " + 57 "lexeme = '"+query+"/' OR " + 58 "lexeme = '"+query+"[']"); 59 } 60 61 private void addToMonadSet(Sheaf sheaf, SetOfMonads set) 62 { 63 SheafConstIterator shci = sheaf.const_iterator(); 64 65 while (shci.hasNext()) 66 { 67 Straw straw = shci.next(); 68 StrawConstIterator swci = straw.const_iterator(); 69 70 while (swci.hasNext()) 71 { 72 MatchedObject object = swci.next(); 73 set.unionWith(object.getMonads()); 74 } 75 } 76 } 77 78 public List<SearchResult> advanced(String query) 79 throws DatabaseException, IOException, SAXException, SQLException 80 { 48 81 HebrewMorphemeGenerator generator = 49 82 new HebrewMorphemeGenerator(m_Emdros); … … 52 85 ( 53 86 "SELECT ALL OBJECTS IN " + 54 m_Emdros.getVisibleMonad String() + " " +87 m_Emdros.getVisibleMonads().toString() + " " + 55 88 "WHERE [verse GET book, chapter, verse, verse_label " + 56 " [clause "+ 57 " [word NORETRIEVE " + 58 " lexeme = '"+m_Query+"' OR " + 59 " lexeme = '"+m_Query+"/' OR " + 60 " lexeme = '"+m_Query+"['" + 61 " ]" + 62 " ]" + 63 " ]" 89 " [clause " + query + "]]" 64 90 ); 65 91 66 92 List<ResultBase> resultBases = new ArrayList<ResultBase>(); 67 93 SetOfMonads powerSet = new SetOfMonads(); 68 94 SetOfMonads matchSet = new SetOfMonads(); 69 95 SheafConstIterator sci = sheaf.const_iterator(); 96 97 m_ResultCount = 0; 98 70 99 while (sci.hasNext()) 71 100 { … … 78 107 while (clause_iter.hasNext()) 79 108 { 109 m_ResultCount++; 110 if (m_ResultCount > m_MaxResults) continue; // just count them 111 80 112 MatchedObject clause = 81 113 clause_iter.next().const_iterator().next(); … … 92 124 resultBases.add(base); 93 125 powerSet.unionWith(base.monads); 126 addToMonadSet(clause.getSheaf(), matchSet); 94 127 } 95 128 } … … 119 152 FlatStraw fs = fsci.next(); 120 153 FlatStrawConstIterator fwci = fs.const_iterator(); 121 154 122 155 while (fwci.hasNext()) 123 156 { … … 136 169 { 137 170 ResultBase base = i.next(); 138 String lexemes = ""; 171 String original = ""; 172 String translit = ""; 139 173 140 174 for (Iterator<MatchedObject> j = base.words.iterator(); j.hasNext();) … … 142 176 MatchedObject word = j.next(); 143 177 144 if (word.getEMdFValue("lexeme").toString().equals(m_Query)) 178 boolean isMatch = SetOfMonads.overlap(word.getMonads(), 179 matchSet); 180 181 if (isMatch) 145 182 { 146 lexemes += "<strong>"; 183 original += "<strong>"; 184 translit += "<strong>"; 147 185 } 148 186 149 lexemes += HebrewConverter.wordToHtml(word, generator); 150 151 if (word.getEMdFValue("lexeme").toString().equals(m_Query)) 187 original += HebrewConverter.wordHebrewToHtml (word, generator); 188 translit += HebrewConverter.wordTranslitToHtml(word, generator); 189 190 if (isMatch) 152 191 { 153 lexemes += "</strong>"; 192 original += "</strong>"; 193 translit += "</strong>"; 154 194 } 155 195 156 lexemes += " "; 196 original += " "; 197 translit += " "; 157 198 } 158 199 159 200 SearchResult result = new SearchResult(base.location, 160 lexemes, base.url); 201 "<div class=\"hebrew\">" + original + "</div>\n" + 202 "<div class=\"translit\">" + translit + "</div>\n", 203 base.url); 161 204 results.add(result); 162 205 } … … 164 207 return results; 165 208 } 209 210 public int getResultCount() { return m_ResultCount; } 166 211 167 212 public static class SearchResult lex/trunk/test/com/qwirx/lex/SearchTest.java
r232 r236 1 1 package com.qwirx.lex; 2 2 3 import java.util.ArrayList; 3 4 import java.util.Iterator; 4 5 import java.util.List; … … 27 28 public void setUp() throws Exception 28 29 { 29 m_Emdros = Lex.getEmdrosDatabase(" test", "test");30 m_Emdros = Lex.getEmdrosDatabase("chris", "test"); 30 31 } 31 32 … … 35 36 } 36 37 37 private void assertSearchResultsMatch(String query) throws Exception 38 { 39 List<SearchResult> actualResults = new Search(query, m_Emdros).run(); 38 private void assertSearchResultsMatch(String query) 39 throws Exception 40 { 41 Search search = new Search(m_Emdros); 42 assertSearchResultsMatch(query, search, 100); 43 } 44 45 private void assertSearchResultsMatch(String query, int limit) 46 throws Exception 47 { 48 Search search = new Search(m_Emdros); 49 search.setMaxResults(limit); 50 assertSearchResultsMatch(query, search, limit); 51 } 52 53 private void assertSearchResultsMatch(String query, Search search, 54 int limit) 55 throws Exception 56 { 57 List<SearchResult> actualResults = search.basic(query); 40 58 Iterator<SearchResult> actualIterator = actualResults.iterator(); 41 59 … … 43 61 new HebrewMorphemeGenerator(m_Emdros); 44 62 45 String getFeatures = "GET " + 63 Sheaf sheaf = m_Emdros.getSheaf 64 ( 65 "SELECT ALL OBJECTS IN " + 66 m_Emdros.getVisibleMonads().toString() + " " + 67 "WHERE [clause "+ 68 " [word " + 69 " lexeme = '"+query+"' OR " + 70 " lexeme = '"+query+"/' OR " + 71 " lexeme = '"+query+"[' " + 72 " ]" + 73 " ]" 74 ); 75 76 List<Integer> clauses = new ArrayList<Integer>(); 77 78 int count = 0; 79 80 SheafConstIterator sci = sheaf.const_iterator(); 81 while (sci.hasNext()) 82 { 83 count++; 84 if (count > limit) continue; 85 86 Straw straw = sci.next(); 87 MatchedObject clause = straw.const_iterator().next(); 88 clauses.add(new Integer(clause.getID_D())); 89 } 90 91 String mql = "SELECT ALL OBJECTS IN " + 92 m_Emdros.getVisibleMonads().toString() + " " + 93 "WHERE " + 94 "[verse GET book, chapter, verse, verse_label " + 95 " [clause "; 96 97 if (clauses.size() == 0) 98 { 99 mql += "self = -1"; // never matches 100 } 101 else 102 { 103 for (Iterator<Integer> i = clauses.iterator(); i.hasNext();) 104 { 105 Integer clauseId = i.next(); 106 mql += "self = " + clauseId.toString(); 107 if (i.hasNext()) 108 { 109 mql += " OR "; 110 } 111 } 112 } 113 114 mql += "[word GET " + 46 115 "lexeme, " + 47 116 "phrase_dependent_part_of_speech, " + … … 51 120 "graphical_verbal_ending, " + 52 121 "graphical_nominal_ending, " + 53 "graphical_pron_suffix"; 54 55 Sheaf sheaf = m_Emdros.getSheaf 56 ( 57 "SELECT ALL OBJECTS IN " + 58 m_Emdros.getVisibleMonadString() + " " + 59 "WHERE [verse GET book, chapter, verse, verse_label " + 60 " [clause "+ 61 " [" + 62 " [word FIRST " + 63 " lexeme = '"+query+"' OR " + 64 " lexeme = '"+query+"/' OR " + 65 " lexeme = '"+query+"[' " + getFeatures + "] " + 66 " [word " + getFeatures + "]* " + 67 " [word LAST " + getFeatures + "] " + 68 " ]" + 69 " OR" + 70 " [" + 71 " [word FIRST " + getFeatures + "] " + 72 " [word " + getFeatures + "]* " + 73 " [word " + 74 " lexeme = '"+query+"' OR " + 75 " lexeme = '"+query+"/' OR " + 76 " lexeme = '"+query+"[' " + getFeatures + "] " + 77 " [word " + getFeatures + "]* " + 78 " [word LAST " + getFeatures + "] " + 79 " ]" + 80 " OR" + 81 " [" + 82 " [word FIRST " + getFeatures + "] " + 83 " [word " + getFeatures + "]* " + 84 " [word LAST " + 85 " lexeme = '"+query+"' OR " + 86 " lexeme = '"+query+"/' OR " + 87 " lexeme = '"+query+"[' " + getFeatures + "] " + 88 " ]" + 89 " ]"+ 90 " ]"); 91 92 SheafConstIterator sci = sheaf.const_iterator(); 122 "graphical_pron_suffix]]]"; 123 124 sheaf = m_Emdros.getSheaf(mql); 125 sci = sheaf.const_iterator(); 126 93 127 while (sci.hasNext()) 94 128 { … … 104 138 clause_iter.next().const_iterator().next(); 105 139 106 String lexemes = ""; 140 String original = ""; 141 String translit = ""; 107 142 108 S trawConstIterator word_iter =109 clause.getSheaf().const_iterator() .next().const_iterator();143 SheafConstIterator word_iter = 144 clause.getSheaf().const_iterator(); 110 145 111 146 while (word_iter.hasNext()) 112 147 { 113 MatchedObject word = word_iter.next(); 114 115 if (word.getEMdFValue("lexeme").toString().equals(query)) 148 MatchedObject word = 149 word_iter.next().const_iterator().next(); 150 151 String lexeme = word.getEMdFValue("lexeme").getString(); 152 boolean isMatch = lexeme.equals(query) || 153 lexeme.equals(query + "/") || 154 lexeme.equals(query + "["); 155 156 if (isMatch) 116 157 { 117 lexemes += "<strong>"; 158 original += "<strong>"; 159 translit += "<strong>"; 118 160 } 119 161 120 lexemes += HebrewConverter.wordToHtml(word, generator); 121 122 if (word.getEMdFValue("lexeme").toString().equals(query)) 162 original += HebrewConverter.wordHebrewToHtml (word, generator); 163 translit += HebrewConverter.wordTranslitToHtml(word, generator); 164 165 if (isMatch) 123 166 { 124 lexemes += "</strong>"; 167 original += "</strong>"; 168 translit += "</strong>"; 125 169 } 126 170 127 lexemes += " "; 171 original += " "; 172 translit += " "; 128 173 } 129 174 … … 133 178 assertEquals(verse.getEMdFValue("verse_label").getString(), 134 179 actual.getLocation()); 135 assertEquals(lexemes, actual.getDescription()); 136 assertEquals("clause.jsp?book=" + 180 assertEquals(actual.getLocation(), 181 "<div class=\"hebrew\">" + original + "</div>\n" + 182 "<div class=\"translit\">" + translit + "</div>\n", 183 actual.getDescription()); 184 assertEquals(actual.getLocation(), 185 "clause.jsp?book=" + 137 186 m_Emdros.getEnumConstNameFromValue("book_name_e", 138 187 verse.getEMdFValue("book").getInt()) + … … 143 192 ); 144 193 } 145 } 146 147 assertFalse(actualIterator.hasNext()); 194 195 assertEquals(count, search.getResultCount()); 196 } 197 198 assertFalse("Found " + (actualResults.size() - count + 1) + 199 " more results than expected", actualIterator.hasNext()); 148 200 } 149 201 150 202 public void testSearchCode() throws Exception 151 203 { 152 assertSearchResultsMatch("CMJM"); // noun 153 assertSearchResultsMatch("BR"); // verb 154 assertSearchResultsMatch("W"); // conjunction 155 assertSearchResultsMatch("foo"); // no match 204 assertSearchResultsMatch("CMJM", 1); // noun 205 assertSearchResultsMatch("CMJM"); // noun 206 assertSearchResultsMatch("BR"); // verb 207 assertSearchResultsMatch("W", 0); // conjunction 208 assertSearchResultsMatch("W", 1); // conjunction 209 assertSearchResultsMatch("W", 10); // conjunction 210 assertSearchResultsMatch("W", 100); // conjunction 211 assertSearchResultsMatch("foo", 0); // no match 212 assertSearchResultsMatch("foo", 1); // no match 213 assertSearchResultsMatch("foo"); // no match 156 214 } 157 215
