In this blog post I am describing how to make an attractive WordCloud which can update real-time using vega.js
Vega is a visualization grammar, a declarative format for creating, saving, and sharing interactive visualization designs.
With Vega, you can describe the visual appearance and interactive behavior of a visualization in a JSON format, and generate views using HTML5 Canvas or SVG. Read more
This wordcloud was written for the project of twitter analytic dashboard displayed at WSO2ConAsia 2016 and the US Election 2016 project of WSO2 as an intern at +WSO2
This wordCloud is customizable and colorful, really suitable for marketing purposes of analytic stuff.
The main library used is vega.js (a javascript library which run on top of d3.js)
![]() |
| WordCloud displayed @ wso2ConAsia 2016 |
- First the sentence or words should be processed and taken only important words which should display in word cloud. For this first we need to remove all the links or URLs in the string and unnecessary characters including numbers using regex and next remove some common stop words which we use in our day today life. You can simply do this processing part in back end. Here I will provide you basic codes which is helpful in java and you can apply this where ever needed.
/**
* Created by dinali on 3/8/16.
*/
import java.util.*;
public class Test {
public static String[] stopWord = {"i","me","my","myself","we","us","our","just","ours","ourselves","you","your","yours","yourself","yourselves","he","him","his","himself","she","her","hers","herself","it","its","itself","they","them","their","theirs","themselves","what","which","who","whom","whose","this","that","these","those","am","is","are","was","were","be","been","being","have","has","had","having","do","does","did","doing","will","would","should","can","could","ought","i'm","you're","he's","she's","it's","we're","they're","i've","you've","we've","they've","i'd","you'd","he'd","she'd","we'd","they'd","i'll","you'll","he'll","she'll","we'll","they'll","isn't","aren't","wasn't","weren't","hasn't","haven't","hadn't","doesn't","don't","didn't","won't","wouldn't","shan't","shouldn't","can't","cannot","couldn't","mustn't","let's","that's","who's","what's","here's","there's","when's","where's","why's","how's","a","an","the","and","but","if","or","because","as","until","while","of","at","by","for","with","about","against","between","into","through","during","before","after","above","below","to","from","up","upon","down","in","out","on","off","over","under","again","further","then","once","here","there","when","where","why","how","all","any","both","each","few","more","most","other","some","such","no","nor","not","only","own","same","so","than","too","very","say","says","said","shall","trump","donaldtrump","hillary","clinton","hillaryclinton","ted","cruz","tedcruz","rick","santorum","ricksantorum","marco","rubio","marcorubio","mike","huckabee","mikehuckabee","martin","omalley","martinomalley","carly","fiorina","carlyfiorina","rand","paul","randpaul","john","kasich","johnkasich","ben","carson","bencarson","lindsley","graham","lindsleygraham","scott","walker","scottwalker","jim","gilmore","jimgilmore","jeb","bush","jebbush","http","https","chris","christie","chrischristie","pataki","george","georgepataki","election","election2016"};
public static List<String> stringList = new ArrayList<String>(Arrays.asList(stopWord));
public static ArrayList<String> newList =new ArrayList<String>();
public static void main(String args[]){
String testString ="2,500-year-old seal belonging to a woman -- \"exceptional\" in that era -- found in Jerusalem http://cnn.it/1OY6DZw ";
String output= processString(testString);
System.out.println(output);
}
public static String processString(String longstr){
String source = longstr;
String s= getString(source);
String newString=" ";
StringTokenizer st = new StringTokenizer(s);
while (st.hasMoreTokens()) {
String token=st.nextToken();
if(!stringList.contains(token)){
newList.add(token);
newString+= " "+token;
}
}
return newString;
}
public static String getString(String source) {
String newStr="";
source = source.replaceAll("#[A-Za-z0-9]*", " ");
String regex = "(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]";
source = source.replaceAll(regex, " ");
source = source.replaceAll("@[A-Za-z0-9]*"," ");
source=source.toLowerCase();
source = source.replaceAll("[^a-z]"," ");
return source;
}
}
- If you are dealing with big data its useless to keep the needed words as a single string (big memory is needed). You can use stream-lib library- CountMinSketch an algorithm which is a kind of a data structure to get the needed word and the count of the specific word after processing the string.
- When working in real-time after processing, the data can be stored in a mysql database and using setInterval() function you can call the data in ever 5min add append the string to the wordCloud function.
/*************************************************************************************************************
* Common WordCloud
***************************************************************************************************************/
var dasIp="52.77.25.83";
var stopWords ="(a|b| ef)";
var textData="";
var authenticateString = window.btoa("admin:user@das");
//"10.100.4.185"; //"52.77.25.83";
function drawPersonWordCloud( cloudDiv ,Pname ,color){
// var tableName= Cname+"WORDCLOUD";
var j=1;var i;
var stopWords ="(e|ae|1| |goodnight)";
getPersonDataCloud( cloudDiv,Pname ,color );
setInterval(function() {
// Do something every 5 minutes
getPersonDataCloud( cloudDiv,Pname ,color );
}, 300000);
}
function updatePersonText( new_text, stopWords, cloudDiv,color){
var cloudDivID ="#"+cloudDiv;
var width = $(cloudDivID).width();
// var widthDid = $("news").width();
var height = $(cloudDivID).height();
var colorset =[color,"#6d4c41","#000000"];
//console.log(width);
var text={
"width":width,
"height": height,
"padding": {"top":0, "bottom":0, "left":0, "right":0},
"data": [
{
"name": "table",
"values": [ new_text
],
"transform": [
{
"type": "countpattern",
"field": "data",
"case": "upper",
"pattern": "[\\w']{3,}",
"stopwords": stopWords
},
{
"type": "formula", "field": "angle",
"expr": "[-45, 0, 45][~~(random() * 3)]"
},
{
"type": "formula", "field": "weight",
"expr": "if(datum.text=='VEGA', 600, 300)"
},
{
"type": "wordcloud",
"size": [width, height],
"text": {"field": "text"},
"rotate": {"field": "angle"},
"font": {"value": "Verdana"},
"fontSize": {"field": "count"},
"fontWeight": {"field": "weight"},
"fontScale": [20,60]
}
]
}
],
"scales": [
{
"name": "color",
"type": "ordinal",
"range": colorset
}
],
"marks": [
{
"type": "text",
"from": {"data": "table"},
"properties": {
"enter": {
"x": {"field": "layout_x"},
"y": {"field": "layout_y"},
"angle": {"field": "layout_rotate"},
"font": {"field": "layout_font"},
"fontSize": {"field": "layout_fontSize"},
"fontStyle": {"field": "layout_fontStyle"},
"fontWeight": {"field": "layout_fontWeight"},
"text": {"field": "text"},
"align": {"value": "center"},
"baseline": {"value": "alphabetic"},
"fill": {"scale": "color", "field": "text"}
},
"update": {
"fillOpacity": {"value": 1}
},
"hover": {
"fillOpacity": {"value": 0.5}
}
}
}
]
};
return text;
}
/*
* Here we can take data from database through jaggary
*/
function getPersonDataCloud(cloudDiv, Pname,color){
var cloudDivID="#"+cloudDiv;
var newTestString=" ";
var Candidates = { Choose : Pname};
$.ajax({
url: "js/candidateCloud.jag",
dataType: "json",
contentType:'application/json',
data: JSON.stringify(Candidates),
type: "POST",
success: function (data) {
// console.log(TextData);
var TextData=JSON.stringify(data);
/**
*The data should be in the form of "string1:count1;string2:count2;...."
*/
var res = TextData.split(";");
var longstr ="";
for(var i=1;i<res.length-1;i++){
var text=res[i];
var array= text.split(":");
for(var j=0;j<array[1];j++){
longstr+= array[0]+" ";
}
}
new_cloud =updatePersonText(longstr, stopWords,cloudDiv,color);
// console.log(JSON.stringify(new_cloud));
var viewUpdateFunction = (function(chart) {
this.view = chart({el:cloudDivID}).update();
}).bind(this);
vg.parse.spec(new_cloud, viewUpdateFunction);
}
});
}
- You can use jaggary to get data from the database. Here I describe a sample jaggary file to get data called js/candidateCloud.jag
<%
//A jaggary code to get mysql table column data
var Para = request.getContent();
var db = new Database(""jdbc:mysql://localhost:3306/","DataBaseName","userName","pw");
var result = db.query("SELECT "+Para["Choose"]+" FROM tableName where id=1");
var data = [];
data.push(result);
db.close();
%>
Instead of mysql table data, you also can directly plug needed set of words and have a wordcloud

No comments:
Post a Comment