public class IndexBuilder
static string indexDirectoryPath = "C:\\code\\ProofOfConcept\\";
static int searchDateId = -1;
static List<IFieldable> fields = SpyFuResourcesIndexConfig.Fields;
public static void Main(string[] args)
DateTime start = DateTime.Now;
if (searchDateId == -1) throw new Exception("Please specify a search date id with -sd ########");
SpyFu.Standard.AppSettingsParsed.CentralConfig.Load();
ReportStatus("Initialization", "Started");
if (!Directory.Exists(indexDirectoryPath)) Directory.CreateDirectory(indexDirectoryPath);
new IndexInfo(searchDateId, IndexType.DomainName, Region.US).SaveToFile(indexDirectoryPath.TrimEnd(new[] { '\\' }) + "\\index.info");
HelperMethods.AddEveryoneFilePermissions(indexDirectoryPath.TrimEnd(new[] { '\\' }) + "\\index.info");
IndexWriter indexWriter = new IndexWriter(Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo(indexDirectoryPath)), SpyFuResourcesIndexConfig.Analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
foreach (IFieldable field in IndexBuilder.fields) doc.Add(field);
var glossaryTermList = GlossaryTermModels.GetList();
int totalDocs = glossaryTermList.Count();
foreach (var glossaryTerm in glossaryTermList)
foreach (IFieldable field in SpyFuResourcesIndexConfig.Fields)
string val = SpyFuResourcesIndexConfig.luceneToCustomMapping[field.Name](glossaryTerm);
doc.GetField(field.Name).SetValue(val);
indexWriter.AddDocument(doc);
if (++docsAdded % 100000 == 0)
Console.WriteLine(docsAdded);
ReportStatus("Adding Documents", "In Progress", (int)Math.Round(100 * (docsAdded / (double)totalDocs)));
ReportStatus("Adding Documents", "Completed", 100);
Console.WriteLine(ex.Message);
ReportStatus("", "Error");
Console.WriteLine("Docs Added: {0}", docsAdded);
Console.WriteLine("All documents have been added to the index (took {0})\nOptimizing...", DateTime.Now - start);
ReportStatus("Optimizing", "Started");
ReportStatus("Optimizing", "Completed");
IndexInfo indexInfo = IndexInfo.LoadFromFile(indexDirectoryPath.TrimEnd(new[] { '\\' }) + "\\index.info");
indexInfo.Size = new System.IO.DirectoryInfo(indexDirectoryPath).GetIndexSize();
indexInfo.SaveToFile(indexDirectoryPath.TrimEnd(new[] { '\\' }) + "\\index.info");
public void CreateIndex(int searchDateId, SpyFuResourcesIndexConfig.Analyzer analyzer)
var startTime = DateTime.Now;
var lastTime = DateTime.Now;
var _idx = new IndexWriter(FSDirectory.Open(new DirectoryInfo(IndexDirectoryPath)), analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
new IndexInfo(searchDateId, IndexType.KeywordToDomain, KeywordToDomainIndexConfig.Db.DbName.ToLower().Equals("honeybadger") ? Region.US : Region.UK).SaveToFile(IndexDirectoryPath.TrimEnd('\\') + "\\index.info");
HelperMethods.AddEveryoneFilePermissions(IndexDirectoryPath.TrimEnd('\\') + "\\index.info");
Console.WriteLine("MaxBufferedDocs:" + _idx.GetMaxBufferedDocs());
Console.WriteLine("MaxMergeDocs:" + _idx.MaxMergeDocs);
Console.WriteLine("MergeFactor:" + _idx.MergeFactor);
_idx.MaxMergeDocs = int.MaxValue;
_idx.SetMaxBufferedDocs(1000);
Document doc = new Document();
foreach (IFieldable field in DomainToKeywordIndexConfig.Fields) doc.Add(field);
#region Setting Field Values
foreach (IFieldable field in KeywordToDomainIndexConfig.Fields)
if (KeywordToDomainIndexConfig.luceneToCustomMapping.ContainsKey(field.Name))
fieldValue = KeywordToDomainIndexConfig.luceneToCustomMapping[field.Name](prevDomain);
string sqlFieldName = KeywordToDomainIndexConfig.LuceneToSqlMapping.ContainsKey(field.Name) ? KeywordToDomainIndexConfig.LuceneToSqlMapping[field.Name] : field.Name;
fieldValue = dr[sqlFieldName].ToString();
catch (IndexOutOfRangeException)
fieldValue = string.Empty;
doc.GetField(field.Name).SetValue(fieldValue);
if (KeywordToDomainIndexConfig.customFieldWeighters.ContainsKey(field.Name))
doc.GetField(field.Name).Boost = KeywordToDomainIndexConfig.customFieldWeighters[field.Name](dr);
if (!double.TryParse(fieldValue, out num))
((NumericField)doc.GetFieldable(field.Name)).SetDoubleValue(num);
if (KeywordToDomainIndexConfig.customFieldWeighters.ContainsKey(field.Name))
((NumericField)doc.GetFieldable(field.Name)).Boost = KeywordToDomainIndexConfig.customFieldWeighters[field.Name](dr);
Console.WriteLine(ex.Message);
Console.WriteLine("Docs Added: {0}", docsAdded);
private static void DisplayHelp()
Console.WriteLine("\nAccepted arguments:\n");
Console.WriteLine("-sd [########] : search date id (REQUIRED)");
Console.WriteLine("-cs [db connection string] (OPTIONAL - default is \"SERVER=honeybadger;User=ds;PASSWORD=honv4Rm1nt;DATABASE=SearchFu\")");
Console.WriteLine("-p [target index directory] (REQUIRED)");
Console.WriteLine("-wf [file path to a newline-separated text file containing words] (REQUIRED)");
Console.WriteLine("\nLast value entered will be used, except for word file paths, which will keep track of,");
Console.WriteLine("and utilize, multiple entries.");
private static void ProcessArgs(string[] args)
for (int i = 0; i < args.Length; i++)
switch (args[i].ToLower())
IndexBuilder.searchDateId = Int32.Parse(args[++i]);
IndexBuilder.connectionString = args[++i];
IndexBuilder.indexDirectoryPath = args[++i];
IndexBuilder.wordsFilePaths.Add(args[++i]);
private static void ReportStatus(string task, string status, int? percent = null)
var parameters = new Dictionary<string, object>()
{ "@IndexType", "DomainName" },
{ "@SearchDateId", searchDateId }
if (percent != null) parameters.Add("@StatusPct", percent.ToString());
SpyFu.Standard.AppSettingsParsed.DatabaseSettings.DbUser.ExecNonQueryProc("Lucene_Insert_Status", parameters);