//: C01:HTMLStripper.cpp // From Thinking in C++, 2nd Edition // Available at http://www.BruceEckel.com // (c) Bruce Eckel 2000 // Copyright notice in Copyright.txt // Filter to remove html tags and markers #include "../require.h" #include #include #include using namespace std; string replaceAll(string s, string f, string r) { unsigned int found = s.find(f); while(found != string::npos) { s.replace(found, f.length(), r); found = s.find(f); } return s; } string stripHTMLTags(string s) { while(true) { unsigned int left = s.find('<'); unsigned int right = s.find('>'); if(left==string::npos || right==string::npos) break; s = s.erase(left, right - left + 1); } s = replaceAll(s, "<", "<"); s = replaceAll(s, ">", ">"); s = replaceAll(s, "&", "&"); s = replaceAll(s, " ", " "); // Etc... return s; } int main(int argc, char* argv[]) { requireArgs(argc, 1, "usage: HTMLStripper InputFile"); ifstream in(argv[1]); assure(in, argv[1]); const int sz = 4096; char buf[sz]; while(in.getline(buf, sz)) { string s(buf); cout << stripHTMLTags(s) << endl; } } ///:~