1 package net.sf.statcvs.reports;
2
3 import java.util.Iterator;
4 import java.util.List;
5 import java.util.TreeMap;
6 import java.util.regex.Matcher;
7 import java.util.regex.Pattern;
8
9 import net.sf.statcvs.Messages;
10 import net.sf.statcvs.model.Commit;
11 import net.sf.statcvs.model.Repository;
12 import net.sf.statcvs.output.ConfigurationOptions;
13 import net.sf.statcvs.output.ReportConfig;
14 import net.sf.statcvs.reportmodel.IntegerColumn;
15 import net.sf.statcvs.reportmodel.SimpleTextColumn;
16 import net.sf.statcvs.reportmodel.Table;
17 import net.sf.statcvs.util.IntegerMap;
18
19 /**
20 * Table report which A list of word frequencies in commit messages.
21 *
22 * @author Benoit Xhenseval
23 * @version $Id: CloudCommitTableReport.java,v 1.5 2009/05/08 13:18:48 benoitx Exp $
24 */
25 public class CloudCommitTableReport implements TableReport {
26 private static final String ENGLISH_EXCLUSIONS = "\\d+|an|the|me|my|we|you|he|she|it|are|is|am|will|shall|should|would|had|have|has|was|were|be|been|this|that|there|"
27 + "|who|when|how|where|which|already|after|by|on|or|so|some|commit|also|got|get|do|don't|from|all|but|yet|to|in|does|doesn't"
28 + "out|of|for|if|yes|no|not|may|might|can|could|at|as|with|without|some|more|lot|lots|than|then|adding|added|work|they|used|still|show|must|into|same";
29 private Table table = null;
30 private final ReportConfig config;
31 private final Repository content;
32 private final IntegerMap cloudMap = new IntegerMap();
33 private Pattern excluded;
34
35 /**
36 * Creates a table report containing the top 10 authors and their
37 * LOC contributions
38 * @param content the version control source data
39 */
40 public CloudCommitTableReport(final ReportConfig config) {
41 content = config.getRepository();
42 this.config = config;
43 }
44
45 /**
46 * @see net.sf.statcvs.reports.TableReport#calculate()
47 */
48 public void calculate() {
49 if (this.table != null) {
50 return;
51 }
52 if (excluded == null) {
53 excluded = Pattern.compile(ConfigurationOptions.getConfigStringProperty("cloud.exclusionRegExp", ENGLISH_EXCLUSIONS), Pattern.CASE_INSENSITIVE);
54 }
55 final String summary = Messages.getString("CLOUD_TABLE_TITLE");
56 table = new Table(summary);
57 final SimpleTextColumn wordColumn = new SimpleTextColumn(Messages.getString("CLOUD_WORD_COL"));
58 final IntegerColumn frequencyColumn = new IntegerColumn(Messages.getString("CLOUD_COUNT_COL"));
59 frequencyColumn.setShowPercentages(true);
60 table.addColumn(wordColumn);
61 table.addColumn(frequencyColumn);
62 table.setKeysInFirstColumn(true);
63
64 calculate(content.getCommits());
65 int lines = 0;
66 final Integer minFrequency = ConfigurationOptions.getConfigIntegerProperty("cloud.minFrequency", new Integer(5));
67 final Integer maxNumbers = ConfigurationOptions.getConfigIntegerProperty("cloud.maxWordNumberInTable", new Integer(50));
68 final Iterator it = cloudMap.iteratorSortedByValueReverse();
69 double maxFreq = -1;
70 while (it.hasNext()) {
71 final String word = (String) it.next();
72 final int frequency = cloudMap.get(word);
73
74 if (maxFreq < 0) {
75 maxFreq = Math.log(frequency);
76 }
77
78 if (frequency < minFrequency.intValue()) {
79 break;
80 }
81
82
83
84 wordColumn.addValue(word);
85 frequencyColumn.addValue(frequency);
86 lines++;
87 if (lines >= maxNumbers.intValue()) {
88 break;
89 }
90 }
91
92 }
93
94 private void calculate(final List commits) {
95 final Iterator it = commits.iterator();
96 final Integer minSize = ConfigurationOptions.getConfigIntegerProperty("cloud.minLengthForWord", new Integer(4));
97 while (it.hasNext()) {
98 final Commit commit = (Commit) it.next();
99 if (commit.getAuthor() == null || !this.config.isDeveloper(commit.getAuthor())) {
100 continue;
101 }
102
103 final String comment = commit.getComment();
104
105 if (comment != null && comment.length() > minSize.intValue()) {
106 final String[] split = comment.split("\\W+");
107 for (int i = 0; i < split.length; i++) {
108 final String word = split[i];
109 if (word != null && word.length() >= minSize.intValue()) {
110
111 tryToAdd(word.toLowerCase());
112 }
113 }
114 }
115 }
116
117 }
118
119 private void tryToAdd(final String word) {
120 final Matcher m = excluded.matcher(word);
121 if (m.matches()) {
122 return;
123 }
124 mergeIfRequired(word, "ed", 1);
125 mergeIfRequired(word, "ing", 3);
126 mergeIfRequired(word, "es", 1);
127 mergeIfRequired(word, "s", 1);
128
129
130
131 }
132
133 private void mergeIfRequired(final String word, final String suffix, final int toRemove) {
134
135 cloudMap.addInt(word, 1);
136
137 if (word.endsWith(suffix)) {
138 final String chopped = word.substring(0, word.length() - toRemove);
139 if (cloudMap.contains(chopped)) {
140 cloudMap.addInt(chopped, cloudMap.get(word));
141 cloudMap.remove(word);
142 }
143 }
144 }
145
146 /**
147 * @see net.sf.statcvs.reports.TableReport#getTable()
148 */
149 public Table getTable() {
150 return table;
151 }
152
153 public String getRawContent() {
154 final StringBuffer buffer = new StringBuffer();
155 buffer.append("<p>");
156
157 int lines = 0;
158 final Iterator it = cloudMap.iteratorSortedByValueReverse();
159 final Integer minFrequency = ConfigurationOptions.getConfigIntegerProperty("cloud.minFrequency", new Integer(5));
160 final Integer maxNumbers = ConfigurationOptions.getConfigIntegerProperty("cloud.maxWordNumberInCloud", new Integer(100));
161 double maxFreq = -1;
162 final TreeMap tm = new TreeMap();
163 while (it.hasNext()) {
164 final String word = (String) it.next();
165 final int frequency = cloudMap.get(word);
166
167 if (maxFreq < 0) {
168 maxFreq = Math.log(frequency);
169 }
170
171 if (frequency < minFrequency.intValue()) {
172 break;
173 }
174
175 final long fontSize = Math.round(Math.min(-2 + Math.log(frequency) * 10 / maxFreq, 8));
176
177
178
179 final StringBuffer buffer1 = new StringBuffer();
180 buffer1.append("<font size=\"").append(fontSize).append("\">").append(word).append(" </font> ");
181 tm.put(word, buffer1.toString());
182
183 lines++;
184 if (lines >= maxNumbers.intValue()) {
185 break;
186 }
187 }
188
189 final Iterator it2 = tm.values().iterator();
190 while (it2.hasNext()) {
191 buffer.append(it2.next());
192 }
193
194 buffer.append("</p>");
195 return buffer.toString();
196 }
197 }