Letter | Frequency (%) |
---|---|
A | 8.167% |
B | 1.492% |
C | 2.782% |
D | 4.253% |
E | 12.702% |
F | 2.228% |
G | 2.015% |
H | 6.094% |
I | 6.966% |
J | 0.153% |
K | 0.772% |
L | 4.025% |
M | 2.406% |
N | 6.749% |
O | 7.507% |
P | 1.929% |
Q | 0.095% |
R | 5.987% |
S | 6.327% |
T | 9.056% |
U | 2.758% |
V | 0.978% |
W | 2.360% |
X | 0.150% |
Y | 1.974% |
Z | 0.074% |
Relative letter frequencies (%)
Digraphs
th, er, on, an, re, he, in, ed, nd, ha, at, en, es, of, or, nt, ea, ti, to, it, st, io, le, is, ou, ar, as, de rt, ve
Trigraphs
the, and, tha, ent, ion, tio, for, nde, has, nce, edt, tis, oft, sth, men
Code
01.
/**
02.
* Prints out frequencies of input characters (in percent)
03.
* @param source input file
04.
* @param encoding encoding of the file
05.
*/
06.
public
static
void
count(File source, String encoding)
throws
UnsupportedEncodingException, IOException{
07.
BufferedReader reader =
new
BufferedReader(
new
InputStreamReader(
new
FileInputStream(source), encoding));
08.
09.
TreeMap<Character, Integer> occurences =
new
TreeMap<Character, Integer>();
10.
11.
String s =
null
;
12.
int
counter =
0
;
13.
while
((s = reader.readLine())!=
null
){
14.
for
(
int
i =
0
; i < s.length(); i++){
15.
counter++;
16.
Character curr = (Character) s.charAt(i);
17.
if
(occurences.get(curr) ==
null
){
18.
occurences.put(curr,
new
Integer(
1
));
19.
}
else
{
20.
occurences.put(curr, occurences.get(curr).intValue() +
1
);
21.
}
22.
}
23.
}
24.
25.
for
(Character ch : occurences.keySet()){
26.
System.out.println(ch.toString() +
": "
+ (occurences.get(ch).intValue()/(
double
)counter *
100
));
27.
}
28.
}
Sources
- LEWAND, Robert. Cryptological mathematics. [s.l.] : The Mathematical Association of America, 2000. 199 p. ISBN 0-88385-719-7.