# charsim.py
# Written by Todd Wareham for CS 2500
"""
Given the names of two text files as command-line arguments, computes and
 prints the number of distinct characters in each file and a measure of
 their similarity by relative to distinct character content.
"""

import sys

if len(sys.argv) != 3:
    print "usage: ", sys.argv[0], " file1, file2"
    sys.exit(1)

f = open(sys.argv[1], "r")
line = f.read()
f.close()

s1 = set(line)


f = open(sys.argv[2], "r")
line = f.read()
f.close()

s2 = set(line)

print len(s1), "distinct characters in file #1"
print len(s2), "distinct characters in file #2"
print "File character similarity: ", \
      ((len(s1 & s2) * 1.0) / len(s1 | s2)) * 100.0, "%"

