Code:
// This program parses an XML dictionary file and prints a formatted result.
//
// NOTE: The required XML dictionary (16mb) will be downloaded to this
// machine if it is not found! It will be stored in: ~/.config/latin/
//
// The goals of this project:
//
// 1. < 100 lines code
// 2. Simple & elegant coding
// 3. Fast & efficient execution.
//
// "Do one thing,
// and do it well."
//
// —Linux Credo
//
// Compile with:
// $ g++ -O -Wall lat.cpp -o lat
//
// Run with:
// $ lat amo sum totus
//
// Where 'amo', 'sum', and 'totus' are the words to be searched
//
// Gather online possibilities and pipe output into 'less'
// ('latc' script required for this functionality!!!):
//
// $ lat $(latc quam totus amor)
//
// Where 'quam', 'totus', and 'amor' are your search terms
//
// For testing. Completely clear terminal to not confuse with other text.
// $ reset; g++ -O -Wall lat.cpp -o lat; sleep 2; lat amo sum totus | less
//
#include<iostream>
#include<string>
#include<regex>
#include<fstream>
#include<unistd.h>
#include<sys/types.h>
#include<pwd.h>
using namespace std;
int main(int argc, char* argv[])
{
// No search term entered. Bye!
if (!argv[1]) return 1;
std::string line; // Used for file input
std::string charToStr(argv[1]); // Cannot use char with strings
std::string keyStart ("key=\"" + charToStr + "\""); // Key tags which word in XML file is surrounded
std::string keyEnd ("</entry>");
std::string text;
struct passwd *pw = getpwuid(getuid()); // Set up to get ~/
std::string homeDir = pw->pw_dir;
std::string XMLfile (homeDir + "/.config/latin/Perseus_text_1999.04.0060.xml");
std::string XMLfileDlURL="http://www.perseus.tufts.edu/hopper/dltext?doc=Perseus:text:1999.04.0060";
//ifstream myFileTest (XMLfile);
ifstream myFile(XMLfile);
// Download dictionary if not found
if (myFile.fail())
{
std::cout << "\nNote: The XML dictionary file " << XMLfile << " has not been found.\n\nDownloading and preparing XML file...\n\n";
string dlCmd=("mkdir -p " + homeDir + "/.config/latin/ && cd " + homeDir + "/.config/latin/ && wget -O- " + XMLfileDlURL + " | tr -d '\\r' > " + XMLfile);
// system() won't accept a string
const char * sysCharCmd = dlCmd.c_str();
system(sysCharCmd);
// Check again to see if the file was created and can be found
myFile.clear();
if (myFile.fail())
{
std::cout << "Could not download or find file!\n\nExiting...\n\n";
return 2;
}else{
std::cout << "Finished downloading!\n\nRestart program to use new dictionary.\n\n";
return 0;
}
}
// Go through all given keys from command line parameters
for(int keyNum = 1; keyNum < argc; keyNum++ )
{
charToStr=argv[keyNum]; // Make compatible with int
keyStart="key=\"" + charToStr + "\"";
text=""; // Do not append text
myFile.clear(); // Go to beginning of file
myFile.seekg(0, ios::beg);
// Find search key and save result in 'text' string
while (getline (myFile,line) && text == "")
if (line.find(keyStart) != std::string::npos) // We found a key!
do // Grab keys text
text += line;
while (getline (myFile,line) && line.find(keyEnd) == std::string::npos);
// Don't waste time—go to next iteration!
if (text == "")
{
std::cout << "Search key '" << charToStr << "' not found.\n" << endl;
continue;
}
/* User may want to define an entire paragrapth of words
at one time, so do string modification right after
each key to allow first results to be shown instantly. */
// Replace regex pattern in slot #1 with the text in slot #2.
std::string tReplace[] = {"<orth>", "[", "</orth>", ",", "</gen>", ".", "<sense id.*><etym lang=\"la\" opt=\"n\">", "[", "<etym lang=\"la\" opt=\"n\">", "[", "</etym>, <trans opt=\"n\">|</etym>\\.—", "]\n\n • ", "(</etym>\\. —</sense>|</etym>\\.)", "]", "</etym>\\. </sense>", "", "(\\.|</usg>) ?— ?</sense>", ".", "<sense[^>]*>", "\n\n", "<[^>]*>", "", " — ", "\n\n • ", "\\. ?+—", ".\n\n • ", " +", " ", ". ?—", "\n\n", " ,", ",", " \\.", ".", " :", ":", "‘ ", "‘", " ’", "’", "^ ", "", "\\( ", "\\(", " \\)", "\\)" };
// Now manipulate that text string and make it pretty.
signed int repSize = (sizeof(tReplace) / sizeof(tReplace[0]));
for (signed int i = 0; i < repSize; i += 2)
{
regex reg(tReplace[i]);
text = regex_replace(text, reg, tReplace[i + 1]);
}
// Give lots of space to easily distinguish between definitions
std::cout << text << "\n\n\n";
}
myFile.close();
return 0;
}