How to search Multiple Sites using Lucene Search engine API?
- by Wael Salman
Hope that someone can help me as soon as possible :-)
I would like to know how can we search Multiple Sites using Lucene??! (All sites are in one index).
I have succeeded to search one website , and to index multiple sites, however I am not able to search all websites.
Consider this method that I have:
private void PerformSearch()
{
DateTime start = DateTime.Now;
//Create the Searcher object
string strIndexDir = Server.MapPath("index") + @"\" + mstrURL;
IndexSearcher objSearcher = new IndexSearcher(strIndexDir);
//Parse the query, "text" is the default field to search
Query objQuery = QueryParser.Parse(mstrQuery, "text", new StandardAnalyzer());
//Create the result DataTable
mobjDTResults.Columns.Add("title", typeof(string));
mobjDTResults.Columns.Add("path", typeof(string));
mobjDTResults.Columns.Add("score", typeof(string));
mobjDTResults.Columns.Add("sample", typeof(string));
mobjDTResults.Columns.Add("explain", typeof(string));
//Perform search and get hit count
Hits objHits = objSearcher.Search(objQuery);
mintTotal = objHits.Length();
//Create Highlighter
QueryHighlightExtractor highlighter = new QueryHighlightExtractor(objQuery, new StandardAnalyzer(), "<B>", "</B>");
//Initialize "Start At" variable
mintStartAt = GetStartAt();
//How many items we should show?
int intResultsCt = GetSmallerOf(mintTotal, mintMaxResults + mintStartAt);
//Loop through results and display
for (int intCt = mintStartAt; intCt < intResultsCt; intCt++)
{
//Get the document from resuls index
Document doc = objHits.Doc(intCt);
//Get the document's ID and set the cache location
string strID = doc.Get("id");
string strLocation = "";
if (mstrURL.Substring(0,3) == "www")
strLocation = Server.MapPath("cache") +
@"\" + mstrURL + @"\" + strID + ".htm";
else
strLocation = doc.Get("path") + doc.Get("filename");
//Load the HTML page from cache
string strPlainText;
using (StreamReader sr = new StreamReader(strLocation, System.Text.Encoding.Default))
{
strPlainText = ParseHTML(sr.ReadToEnd());
}
//Add result to results datagrid
DataRow row = mobjDTResults.NewRow();
if (mstrURL.Substring(0,3) == "www")
row["title"] = doc.Get("title");
else
row["title"] = doc.Get("filename");
row["path"] = doc.Get("path");
row["score"] = String.Format("{0:f}", (objHits.Score(intCt) * 100)) + "%";
row["sample"] = highlighter.GetBestFragments(strPlainText, 200, 2, "...");
Explanation objExplain = objSearcher.Explain(objQuery, intCt);
row["explain"] = objExplain.ToHtml();
mobjDTResults.Rows.Add(row);
}
objSearcher.Close();
//Finalize results information
mTsDuration = DateTime.Now - start;
mintFromItem = mintStartAt + 1;
mintToItem = GetSmallerOf(mintStartAt + mintMaxResults, mintTotal);
}
as you can see that I use the site URL 'mstrURL' when I create the search object
string strIndexDir = Server.MapPath("index") + @"\" + mstrURL;
How can I do the same when I want to search multiple sites??
Actually I am using the code from http://www.keylimetie.com/blog/2005/8/4/lucenenet/