Monday, March 03, 2003

Using library algorithms -- cont'd (K&M Chapter 6)

Testing for simple palindromes:


#include <algorithm>
#include <cctype>
#include <iostream>
#include <string>

using std::cin;             using std::cout;
using std::endl;            using std::equal;
using std::string;
using std::isspace;

bool is_palindrome(const string& s)
{
	return equal(s.begin(), s.end(), s.rbegin());
}

int main()
{
        string s;
        while (cin >> s) {
                if (is_palindrome(s))
                        cout << s << " is a palindrome" << endl;
                else
                        cout << s << " is not a palindrome" << endl;
        }
        return 0;
}



Finding URLs


#include <algorithm>
#include <cctype>
#include <string>
#include <vector>

#include "urls.h"

using std::find; using std::find_if;
using std::isalnum; using std::isalpha;
using std::isdigit; using std::search;
using std::string; using std::vector;

bool not_url_char(char);

string::const_iterator
url_end(string::const_iterator, string::const_iterator);

string::const_iterator
url_beg(string::const_iterator, string::const_iterator);

vector<string>
find_urls(const string& s)
{
	vector<string> ret;
	typedef string::const_iterator iter;
	iter b = s.begin(), e = s.end();

	// look through the entire input
	while (b != e) {

		// look for one or more letters followed by `://'
		b = url_beg(b, e);

		// if we found it
		if (b != e) {
			// get the rest of the URL
			iter after = url_end(b, e);

			// remember the URL
			ret.push_back(string(b, after));

			// advance `b' and check for more URL on this line
			b = after;
		}
	}
	return ret;
}

string::const_iterator
url_end(string::const_iterator b, string::const_iterator e)
{
	return find_if(b, e, not_url_char);
}

bool not_url_char(char c)
{
	// characters, in addition to alphanumerics, that can appear in a URL
	static const string url_ch = "~;/?:@=&$-_.+!*'(),";

	// see whether `c' can appear in a URL and return the negative
	return !(isalnum(c) ||
	         find(url_ch.begin(), url_ch.end(), c) != url_ch.end());
}

string::const_iterator
url_beg(string::const_iterator b, string::const_iterator e)
{
	static const string sep = "://";

	typedef string::const_iterator iter;

	// `i' marks where the separator was found
	iter i = b;

	while ((i = search(i, e, sep.begin(), sep.end())) != e) {
		// make sure the separator isn't at the beginning or
		// end of the line
		if (i != b && i + sep.size() != e) {

			// `beg' marks the beginning of the protocol-name
			iter beg = i;
			while (beg != b && isalpha(beg[-1]))
				--beg;

			// is there at least one appropriate character
			// before and after the separator?
			if (beg != i && !not_url_char(i[sep.size()]))
				return beg;
		}

		// the separator we found wasn't part of a URL;
		// advance `i' past this separator
		i += sep.size();
	}
	return e;
}



Finding URLS (main() program).


#include <algorithm>
#include <cctype>
#include <iostream>
#include <string>
#include <vector>

#include "urls.h"

using std::cout;
using std::cin;
using std::endl;
using std::find_if;
using std::getline;
using std::string;
using std::vector;


int main() {
        string s;
        while (getline(cin, s)) {
                vector<string> v = find_urls(s);
                for (vector<string>::const_iterator i = v.begin();
                        i != v.end(); ++i)
                        cout << *i << endl;
        }
        return 0;
}


Further demonstration of more algorithm functions


#include	<iostream>
#include	<algorithm>
#include	<vector>
#include	<fstream>
#include	<cctype>
#include	<stdexcept>

using std::vector;
using std::string;
using std::ifstream;
using std::endl;
using std::cerr;
using std::cout;
using std::invalid_argument;
using std::transform;

bool
capitalized(const string &str)
{
	string::const_iterator	it;
	for (it = str.begin(); it != str.end(); it++) 
		if (isupper(*it))
			return true;
	return false;
}

bool
ends_in_x(const string &str)
{
	return *(str.end() - 1) == 'x';
}

string
uncapitalize(const string &str)
{
	string	ret;
	string::const_iterator	it;
	for (it = str.begin(); it != str.end(); it++) 
		ret += tolower(*it);
	return ret;
}

void
read_words(const char *file, vector<string> &words)
{
	ifstream in(file);

	if (! in)
		throw invalid_argument("Invalid file \"" + string(file) + "\"");

	string word;
	while (in >> word)
		words.push_back(word);
}

void
show_words(const vector<string> &words)
{
	vector<string>::const_iterator it;
	for (it = words.begin(); it != words.end(); it++) 
		cout << *it << endl;
}

int
main()
{
	vector<string> standard;
	vector<string> custom;

	try {
		read_words("/usr/share/dict/words", standard);
		read_words("custom", custom);
	} catch (invalid_argument e) {
		cerr << e.what() << endl;
		return -1;
	}

	// Create a new vector which consists of all the strings
	// in 'standard' and all the strings in 'custom'.
	//
	vector<string> all(standard);

	copy(custom.begin(), custom.end(), back_inserter(all));
	// or equivalently...
	//       all.insert(all.end(), custom.begin(), custom.end());

	// Remove all those strings from 'all' that end in the letter 'x'.
	//
	vector<string>::iterator zap;
	zap = remove_if(all.begin(), all.end(), ends_in_x);
	all.erase(zap, all.end());

	// Create a new vector 'uncapitalized' that will contain all those
	// strings in 'all' that contain no capitalized letters.
	// Note that vector 'all' is unmodified.
	//
	vector<string> uncapitalized;
	remove_copy_if(all.begin(), all.end(), back_inserter(uncapitalized),
			capitalized);

	// Finally create a new vector 'lowercase' which contains all the
	// strings of 'all', but lowercased.
	//
	vector<string> lowercase;
	transform(all.begin(), all.end(), back_inserter(lowercase),
			uncapitalize);

	show_words(lowercase);
}


Last modified: Tue Mar 4 02:03:09 2003