//: C04:HTMLStripper2.cpp {RunByHand} // From "Thinking in C++, Volume 2", by Bruce Eckel & Chuck Allison. // (c) 1995-2004 MindView, Inc. All Rights Reserved. // See source code use permissions stated in the file 'License.txt', // distributed with the code package available at www.MindView.net. //{L} ../C03/ReplaceAll // Filter to remove html tags and markers. #include #include #include #include #include #include #include #include "../C03/ReplaceAll.h" #include "../require.h" using namespace std; string& stripHTMLTags(string& s) throw(runtime_error) { size_t leftPos; while((leftPos = s.find('<')) != string::npos) { size_t rightPos = s.find('>', leftPos+1); if(rightPos == string::npos) { ostringstream msg; msg << "Incomplete HTML tag starting in position " << leftPos; throw runtime_error(msg.str()); } s.erase(leftPos, rightPos - leftPos + 1); } // Remove all special HTML characters replaceAll(s, "<", "<"); replaceAll(s, ">", ">"); replaceAll(s, "&", "&"); replaceAll(s, " ", " "); // Etc... return s; } int main(int argc, char* argv[]) { requireArgs(argc, 1, "usage: HTMLStripper2 InputFile"); ifstream in(argv[1]); assure(in, argv[1]); // Read entire file into string; then strip ostringstream ss; ss << in.rdbuf(); try { string s = ss.str(); cout << stripHTMLTags(s) << endl; return EXIT_SUCCESS; } catch(runtime_error& x) { cout << x.what() << endl; return EXIT_FAILURE; } } ///:~