Search Results

Search found 17651 results on 707 pages for 'unix domain sockets'.

Page 574/707 | < Previous Page | 570 571 572 573 574 575 576 577 578 579 580 581  | Next Page >

  • Using R to Analyze G1GC Log Files

    - by user12620111
    Using R to Analyze G1GC Log Files body, td { font-family: sans-serif; background-color: white; font-size: 12px; margin: 8px; } tt, code, pre { font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace; } h1 { font-size:2.2em; } h2 { font-size:1.8em; } h3 { font-size:1.4em; } h4 { font-size:1.0em; } h5 { font-size:0.9em; } h6 { font-size:0.8em; } a:visited { color: rgb(50%, 0%, 50%); } pre { margin-top: 0; max-width: 95%; border: 1px solid #ccc; white-space: pre-wrap; } pre code { display: block; padding: 0.5em; } code.r, code.cpp { background-color: #F8F8F8; } table, td, th { border: none; } blockquote { color:#666666; margin:0; padding-left: 1em; border-left: 0.5em #EEE solid; } hr { height: 0px; border-bottom: none; border-top-width: thin; border-top-style: dotted; border-top-color: #999999; } @media print { * { background: transparent !important; color: black !important; filter:none !important; -ms-filter: none !important; } body { font-size:12pt; max-width:100%; } a, a:visited { text-decoration: underline; } hr { visibility: hidden; page-break-before: always; } pre, blockquote { padding-right: 1em; page-break-inside: avoid; } tr, img { page-break-inside: avoid; } img { max-width: 100% !important; } @page :left { margin: 15mm 20mm 15mm 10mm; } @page :right { margin: 15mm 10mm 15mm 20mm; } p, h2, h3 { orphans: 3; widows: 3; } h2, h3 { page-break-after: avoid; } } pre .operator, pre .paren { color: rgb(104, 118, 135) } pre .literal { color: rgb(88, 72, 246) } pre .number { color: rgb(0, 0, 205); } pre .comment { color: rgb(76, 136, 107); } pre .keyword { color: rgb(0, 0, 255); } pre .identifier { color: rgb(0, 0, 0); } pre .string { color: rgb(3, 106, 7); } var hljs=new function(){function m(p){return p.replace(/&/gm,"&").replace(/"}while(y.length||w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("")}while(p!=v.node);s.splice(r,1);while(r'+M[0]+""}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.length-1,M);if(L){var O=R.cN?"":"";if(R.rE){y+=J(R.buffer+N,R)+O}else{if(R.eE){y+=J(R.buffer+N,R)+O+m(M)}else{y+=J(R.buffer+N+M,R)+O}}while(L1){O=D[D.length-2].cN?"":"";y+=O;L--;D.length--}var r=D[D.length-1];D.length--;D[D.length-1].buffer="";if(r.starts){I(r.starts,"")}return R.rE}if(w(M,R)){throw"Illegal"}}var E=e[B];var D=[E.dM];var A=0;var x=0;var y="";try{var s,u=0;E.dM.buffer="";do{s=p(C,u);var t=G(s[0],s[1],s[2]);u+=s[0].length;if(!t){u+=s[1].length}}while(!s[2]);if(D.length1){throw"Illegal"}return{r:A,keyword_count:x,value:y}}catch(H){if(H=="Illegal"){return{r:0,keyword_count:0,value:m(C)}}else{throw H}}}function g(t){var p={keyword_count:0,r:0,value:m(t)};var r=p;for(var q in e){if(!e.hasOwnProperty(q)){continue}var s=d(q,t);s.language=q;if(s.keyword_count+s.rr.keyword_count+r.r){r=s}if(s.keyword_count+s.rp.keyword_count+p.r){r=p;p=s}}if(r.language){p.second_best=r}return p}function i(r,q,p){if(q){r=r.replace(/^((]+|\t)+)/gm,function(t,w,v,u){return w.replace(/\t/g,q)})}if(p){r=r.replace(/\n/g,"")}return r}function n(t,w,r){var x=h(t,r);var v=a(t);var y,s;if(v){y=d(v,x)}else{return}var q=c(t);if(q.length){s=document.createElement("pre");s.innerHTML=y.value;y.value=k(q,c(s),x)}y.value=i(y.value,w,r);var u=t.className;if(!u.match("(\\s|^)(language-)?"+v+"(\\s|$)")){u=u?(u+" "+v):v}if(/MSIE [678]/.test(navigator.userAgent)&&t.tagName=="CODE"&&t.parentNode.tagName=="PRE"){s=t.parentNode;var p=document.createElement("div");p.innerHTML=""+y.value+"";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p|=||=||=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\*",e:"\\*/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.cpp=function(){var a={keyword:{"false":1,"int":1,"float":1,"while":1,"private":1,"char":1,"catch":1,"export":1,virtual:1,operator:2,sizeof:2,dynamic_cast:2,typedef:2,const_cast:2,"const":1,struct:1,"for":1,static_cast:2,union:1,namespace:1,unsigned:1,"long":1,"throw":1,"volatile":2,"static":1,"protected":1,bool:1,template:1,mutable:1,"if":1,"public":1,friend:2,"do":1,"return":1,"goto":1,auto:1,"void":2,"enum":1,"else":1,"break":1,"new":1,extern:1,using:1,"true":1,"class":1,asm:1,"case":1,typeid:1,"short":1,reinterpret_cast:2,"default":1,"double":1,register:1,explicit:1,signed:1,typename:1,"try":1,"this":1,"switch":1,"continue":1,wchar_t:1,inline:1,"delete":1,alignof:1,char16_t:1,char32_t:1,constexpr:1,decltype:1,noexcept:1,nullptr:1,static_assert:1,thread_local:1,restrict:1,_Bool:1,complex:1},built_in:{std:1,string:1,cin:1,cout:1,cerr:1,clog:1,stringstream:1,istringstream:1,ostringstream:1,auto_ptr:1,deque:1,list:1,queue:1,stack:1,vector:1,map:1,set:1,bitset:1,multiset:1,multimap:1,unordered_set:1,unordered_map:1,unordered_multiset:1,unordered_multimap:1,array:1,shared_ptr:1}};return{dM:{k:a,i:"",k:a,r:10,c:["self"]}]}}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d*)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch|library|setGeneric|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if|in|break|next|repeat|else|for|return|switch|while|try|stop|warning|require|attach|detach|source|setMethod|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA|NA_integer_|NA_real_|NA_character_|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL|TRUE|FALSE|T|F|Inf|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]*\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"|=||   Using R to Analyze G1GC Log Files   Using R to Analyze G1GC Log Files Introduction Working in Oracle Platform Integration gives an engineer opportunities to work on a wide array of technologies. My team’s goal is to make Oracle applications run best on the Solaris/SPARC platform. When looking for bottlenecks in a modern applications, one needs to be aware of not only how the CPUs and operating system are executing, but also network, storage, and in some cases, the Java Virtual Machine. I was recently presented with about 1.5 GB of Java Garbage First Garbage Collector log file data. If you’re not familiar with the subject, you might want to review Garbage First Garbage Collector Tuning by Monica Beckwith. The customer had been running Java HotSpot 1.6.0_31 to host a web application server. I was told that the Solaris/SPARC server was running a Java process launched using a commmand line that included the following flags: -d64 -Xms9g -Xmx9g -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:InitiatingHeapOccupancyPercent=80 -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+PrintGC -XX:+PrintGCTimeStamps -XX:+PrintHeapAtGC -XX:+PrintGCDateStamps -XX:+PrintFlagsFinal -XX:+DisableExplicitGC -XX:+UnlockExperimentalVMOptions -XX:ParallelGCThreads=8 Several sources on the internet indicate that if I were to print out the 1.5 GB of log files, it would require enough paper to fill the bed of a pick up truck. Of course, it would be fruitless to try to scan the log files by hand. Tools will be required to summarize the contents of the log files. Others have encountered large Java garbage collection log files. There are existing tools to analyze the log files: IBM’s GC toolkit The chewiebug GCViewer gchisto HPjmeter Instead of using one of the other tools listed, I decide to parse the log files with standard Unix tools, and analyze the data with R. Data Cleansing The log files arrived in two different formats. I guess that the difference is that one set of log files was generated using a more verbose option, maybe -XX:+PrintHeapAtGC, and the other set of log files was generated without that option. Format 1 In some of the log files, the log files with the less verbose format, a single trace, i.e. the report of a singe garbage collection event, looks like this: {Heap before GC invocations=12280 (full 61): garbage-first heap total 9437184K, used 7499918K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) region size 4096K, 1 young (4096K), 0 survivors (0K) compacting perm gen total 262144K, used 144077K [0xffffffff40000000, 0xffffffff50000000, 0xffffffff50000000) the space 262144K, 54% used [0xffffffff40000000, 0xffffffff48cb3758, 0xffffffff48cb3800, 0xffffffff50000000) No shared spaces configured. 2014-05-14T07:24:00.988-0700: 60586.353: [GC pause (young) 7324M->7320M(9216M), 0.1567265 secs] Heap after GC invocations=12281 (full 61): garbage-first heap total 9437184K, used 7496533K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) region size 4096K, 0 young (0K), 0 survivors (0K) compacting perm gen total 262144K, used 144077K [0xffffffff40000000, 0xffffffff50000000, 0xffffffff50000000) the space 262144K, 54% used [0xffffffff40000000, 0xffffffff48cb3758, 0xffffffff48cb3800, 0xffffffff50000000) No shared spaces configured. } A simple grep can be used to extract a summary: $ grep "\[ GC pause (young" g1gc.log 2014-05-13T13:24:35.091-0700: 3.109: [GC pause (young) 20M->5029K(9216M), 0.0146328 secs] 2014-05-13T13:24:35.440-0700: 3.459: [GC pause (young) 9125K->6077K(9216M), 0.0086723 secs] 2014-05-13T13:24:37.581-0700: 5.599: [GC pause (young) 25M->8470K(9216M), 0.0203820 secs] 2014-05-13T13:24:42.686-0700: 10.704: [GC pause (young) 44M->15M(9216M), 0.0288848 secs] 2014-05-13T13:24:48.941-0700: 16.958: [GC pause (young) 51M->20M(9216M), 0.0491244 secs] 2014-05-13T13:24:56.049-0700: 24.066: [GC pause (young) 92M->26M(9216M), 0.0525368 secs] 2014-05-13T13:25:34.368-0700: 62.383: [GC pause (young) 602M->68M(9216M), 0.1721173 secs] But that format wasn't easily read into R, so I needed to be a bit more tricky. I used the following Unix command to create a summary file that was easy for R to read. $ echo "SecondsSinceLaunch BeforeSize AfterSize TotalSize RealTime" $ grep "\[GC pause (young" g1gc.log | grep -v mark | sed -e 's/[A-SU-z\(\),]/ /g' -e 's/->/ /' -e 's/: / /g' | more SecondsSinceLaunch BeforeSize AfterSize TotalSize RealTime 2014-05-13T13:24:35.091-0700 3.109 20 5029 9216 0.0146328 2014-05-13T13:24:35.440-0700 3.459 9125 6077 9216 0.0086723 2014-05-13T13:24:37.581-0700 5.599 25 8470 9216 0.0203820 2014-05-13T13:24:42.686-0700 10.704 44 15 9216 0.0288848 2014-05-13T13:24:48.941-0700 16.958 51 20 9216 0.0491244 2014-05-13T13:24:56.049-0700 24.066 92 26 9216 0.0525368 2014-05-13T13:25:34.368-0700 62.383 602 68 9216 0.1721173 Format 2 In some of the log files, the log files with the more verbose format, a single trace, i.e. the report of a singe garbage collection event, was more complicated than Format 1. Here is a text file with an example of a single G1GC trace in the second format. As you can see, it is quite complicated. It is nice that there is so much information available, but the level of detail can be overwhelming. I wrote this awk script (download) to summarize each trace on a single line. #!/usr/bin/env awk -f BEGIN { printf("SecondsSinceLaunch IncrementalCount FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize\n") } ###################### # Save count data from lines that are at the start of each G1GC trace. # Each trace starts out like this: # {Heap before GC invocations=14 (full 0): # garbage-first heap total 9437184K, used 325496K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) ###################### /{Heap.*full/{ gsub ( "\\)" , "" ); nf=split($0,a,"="); split(a[2],b," "); getline; if ( match($0, "first") ) { G1GC=1; IncrementalCount=b[1]; FullCount=substr( b[3], 1, length(b[3])-1 ); } else { G1GC=0; } } ###################### # Pull out time stamps that are in lines with this format: # 2014-05-12T14:02:06.025-0700: 94.312: [GC pause (young), 0.08870154 secs] ###################### /GC pause/ { DateTime=$1; SecondsSinceLaunch=substr($2, 1, length($2)-1); } ###################### # Heap sizes are in lines that look like this: # [ 4842M->4838M(9216M)] ###################### /\[ .*]$/ { gsub ( "\\[" , "" ); gsub ( "\ \]" , "" ); gsub ( "->" , " " ); gsub ( "\\( " , " " ); gsub ( "\ \)" , " " ); split($0,a," "); if ( split(a[1],b,"M") > 1 ) {BeforeSize=b[1]*1024;} if ( split(a[1],b,"K") > 1 ) {BeforeSize=b[1];} if ( split(a[2],b,"M") > 1 ) {AfterSize=b[1]*1024;} if ( split(a[2],b,"K") > 1 ) {AfterSize=b[1];} if ( split(a[3],b,"M") > 1 ) {TotalSize=b[1]*1024;} if ( split(a[3],b,"K") > 1 ) {TotalSize=b[1];} } ###################### # Emit an output line when you find input that looks like this: # [Times: user=1.41 sys=0.08, real=0.24 secs] ###################### /\[Times/ { if (G1GC==1) { gsub ( "," , "" ); split($2,a,"="); UserTime=a[2]; split($3,a,"="); SysTime=a[2]; split($4,a,"="); RealTime=a[2]; print DateTime,SecondsSinceLaunch,IncrementalCount,FullCount,UserTime,SysTime,RealTime,BeforeSize,AfterSize,TotalSize; G1GC=0; } } The resulting summary is about 25X smaller that the original file, but still difficult for a human to digest. SecondsSinceLaunch IncrementalCount FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ... 2014-05-12T18:36:34.669-0700: 3985.744 561 0 0.57 0.06 0.16 1724416 1720320 9437184 2014-05-12T18:36:34.839-0700: 3985.914 562 0 0.51 0.06 0.19 1724416 1720320 9437184 2014-05-12T18:36:35.069-0700: 3986.144 563 0 0.60 0.04 0.27 1724416 1721344 9437184 2014-05-12T18:36:35.354-0700: 3986.429 564 0 0.33 0.04 0.09 1725440 1722368 9437184 2014-05-12T18:36:35.545-0700: 3986.620 565 0 0.58 0.04 0.17 1726464 1722368 9437184 2014-05-12T18:36:35.726-0700: 3986.801 566 0 0.43 0.05 0.12 1726464 1722368 9437184 2014-05-12T18:36:35.856-0700: 3986.930 567 0 0.30 0.04 0.07 1726464 1723392 9437184 2014-05-12T18:36:35.947-0700: 3987.023 568 0 0.61 0.04 0.26 1727488 1723392 9437184 2014-05-12T18:36:36.228-0700: 3987.302 569 0 0.46 0.04 0.16 1731584 1724416 9437184 Reading the Data into R Once the GC log data had been cleansed, either by processing the first format with the shell script, or by processing the second format with the awk script, it was easy to read the data into R. g1gc.df = read.csv("summary.txt", row.names = NULL, stringsAsFactors=FALSE,sep="") str(g1gc.df) ## 'data.frame': 8307 obs. of 10 variables: ## $ row.names : chr "2014-05-12T14:00:32.868-0700:" "2014-05-12T14:00:33.179-0700:" "2014-05-12T14:00:33.677-0700:" "2014-05-12T14:00:35.538-0700:" ... ## $ SecondsSinceLaunch: num 1.16 1.47 1.97 3.83 6.1 ... ## $ IncrementalCount : int 0 1 2 3 4 5 6 7 8 9 ... ## $ FullCount : int 0 0 0 0 0 0 0 0 0 0 ... ## $ UserTime : num 0.11 0.05 0.04 0.21 0.08 0.26 0.31 0.33 0.34 0.56 ... ## $ SysTime : num 0.04 0.01 0.01 0.05 0.01 0.06 0.07 0.06 0.07 0.09 ... ## $ RealTime : num 0.02 0.02 0.01 0.04 0.02 0.04 0.05 0.04 0.04 0.06 ... ## $ BeforeSize : int 8192 5496 5768 22528 24576 43008 34816 53248 55296 93184 ... ## $ AfterSize : int 1400 1672 2557 4907 7072 14336 16384 18432 19456 21504 ... ## $ TotalSize : int 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 ... head(g1gc.df) ## row.names SecondsSinceLaunch IncrementalCount ## 1 2014-05-12T14:00:32.868-0700: 1.161 0 ## 2 2014-05-12T14:00:33.179-0700: 1.472 1 ## 3 2014-05-12T14:00:33.677-0700: 1.969 2 ## 4 2014-05-12T14:00:35.538-0700: 3.830 3 ## 5 2014-05-12T14:00:37.811-0700: 6.103 4 ## 6 2014-05-12T14:00:41.428-0700: 9.720 5 ## FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ## 1 0 0.11 0.04 0.02 8192 1400 9437184 ## 2 0 0.05 0.01 0.02 5496 1672 9437184 ## 3 0 0.04 0.01 0.01 5768 2557 9437184 ## 4 0 0.21 0.05 0.04 22528 4907 9437184 ## 5 0 0.08 0.01 0.02 24576 7072 9437184 ## 6 0 0.26 0.06 0.04 43008 14336 9437184 Basic Statistics Once the data has been read into R, simple statistics are very easy to generate. All of the numbers from high school statistics are available via simple commands. For example, generate a summary of every column: summary(g1gc.df) ## row.names SecondsSinceLaunch IncrementalCount FullCount ## Length:8307 Min. : 1 Min. : 0 Min. : 0.0 ## Class :character 1st Qu.: 9977 1st Qu.:2048 1st Qu.: 0.0 ## Mode :character Median :12855 Median :4136 Median : 12.0 ## Mean :12527 Mean :4156 Mean : 31.6 ## 3rd Qu.:15758 3rd Qu.:6262 3rd Qu.: 61.0 ## Max. :55484 Max. :8391 Max. :113.0 ## UserTime SysTime RealTime BeforeSize ## Min. :0.040 Min. :0.0000 Min. : 0.0 Min. : 5476 ## 1st Qu.:0.470 1st Qu.:0.0300 1st Qu.: 0.1 1st Qu.:5137920 ## Median :0.620 Median :0.0300 Median : 0.1 Median :6574080 ## Mean :0.751 Mean :0.0355 Mean : 0.3 Mean :5841855 ## 3rd Qu.:0.920 3rd Qu.:0.0400 3rd Qu.: 0.2 3rd Qu.:7084032 ## Max. :3.370 Max. :1.5600 Max. :488.1 Max. :8696832 ## AfterSize TotalSize ## Min. : 1380 Min. :9437184 ## 1st Qu.:5002752 1st Qu.:9437184 ## Median :6559744 Median :9437184 ## Mean :5785454 Mean :9437184 ## 3rd Qu.:7054336 3rd Qu.:9437184 ## Max. :8482816 Max. :9437184 Q: What is the total amount of User CPU time spent in garbage collection? sum(g1gc.df$UserTime) ## [1] 6236 As you can see, less than two hours of CPU time was spent in garbage collection. Is that too much? To find the percentage of time spent in garbage collection, divide the number above by total_elapsed_time*CPU_count. In this case, there are a lot of CPU’s and it turns out the the overall amount of CPU time spent in garbage collection isn’t a problem when viewed in isolation. When calculating rates, i.e. events per unit time, you need to ask yourself if the rate is homogenous across the time period in the log file. Does the log file include spikes of high activity that should be separately analyzed? Averaging in data from nights and weekends with data from business hours may alias problems. If you have a reason to suspect that the garbage collection rates include peaks and valleys that need independent analysis, see the “Time Series” section, below. Q: How much garbage is collected on each pass? The amount of heap space that is recovered per GC pass is surprisingly low: At least one collection didn’t recover any data. (“Min.=0”) 25% of the passes recovered 3MB or less. (“1st Qu.=3072”) Half of the GC passes recovered 4MB or less. (“Median=4096”) The average amount recovered was 56MB. (“Mean=56390”) 75% of the passes recovered 36MB or less. (“3rd Qu.=36860”) At least one pass recovered 2GB. (“Max.=2121000”) g1gc.df$Delta = g1gc.df$BeforeSize - g1gc.df$AfterSize summary(g1gc.df$Delta) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0 3070 4100 56400 36900 2120000 Q: What is the maximum User CPU time for a single collection? The worst garbage collection (“Max.”) is many standard deviations away from the mean. The data appears to be right skewed. summary(g1gc.df$UserTime) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0.040 0.470 0.620 0.751 0.920 3.370 sd(g1gc.df$UserTime) ## [1] 0.3966 Basic Graphics Once the data is in R, it is trivial to plot the data with formats including dot plots, line charts, bar charts (simple, stacked, grouped), pie charts, boxplots, scatter plots histograms, and kernel density plots. Histogram of User CPU Time per Collection I don't think that this graph requires any explanation. hist(g1gc.df$UserTime, main="User CPU Time per Collection", xlab="Seconds", ylab="Frequency") Box plot to identify outliers When the initial data is viewed with a box plot, you can see the one crazy outlier in the real time per GC. Save this data point for future analysis and drop the outlier so that it’s not throwing off our statistics. Now the box plot shows many outliers, which will be examined later, using times series analysis. Notice that the scale of the x-axis changes drastically once the crazy outlier is removed. par(mfrow=c(2,1)) boxplot(g1gc.df$UserTime,g1gc.df$SysTime,g1gc.df$RealTime, main="Box Plot of Time per GC\n(dominated by a crazy outlier)", names=c("usr","sys","elapsed"), xlab="Seconds per GC", ylab="Time (Seconds)", horizontal = TRUE, outcol="red") crazy.outlier.df=g1gc.df[g1gc.df$RealTime > 400,] g1gc.df=g1gc.df[g1gc.df$RealTime < 400,] boxplot(g1gc.df$UserTime,g1gc.df$SysTime,g1gc.df$RealTime, main="Box Plot of Time per GC\n(crazy outlier excluded)", names=c("usr","sys","elapsed"), xlab="Seconds per GC", ylab="Time (Seconds)", horizontal = TRUE, outcol="red") box(which = "outer", lty = "solid") Here is the crazy outlier for future analysis: crazy.outlier.df ## row.names SecondsSinceLaunch IncrementalCount ## 8233 2014-05-12T23:15:43.903-0700: 20741 8316 ## FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ## 8233 112 0.55 0.42 488.1 8381440 8235008 9437184 ## Delta ## 8233 146432 R Time Series Data To analyze the garbage collection as a time series, I’ll use Z’s Ordered Observations (zoo). “zoo is the creator for an S3 class of indexed totally ordered observations which includes irregular time series.” require(zoo) ## Loading required package: zoo ## ## Attaching package: 'zoo' ## ## The following objects are masked from 'package:base': ## ## as.Date, as.Date.numeric head(g1gc.df[,1]) ## [1] "2014-05-12T14:00:32.868-0700:" "2014-05-12T14:00:33.179-0700:" ## [3] "2014-05-12T14:00:33.677-0700:" "2014-05-12T14:00:35.538-0700:" ## [5] "2014-05-12T14:00:37.811-0700:" "2014-05-12T14:00:41.428-0700:" options("digits.secs"=3) times=as.POSIXct( g1gc.df[,1], format="%Y-%m-%dT%H:%M:%OS%z:") g1gc.z = zoo(g1gc.df[,-c(1)], order.by=times) head(g1gc.z) ## SecondsSinceLaunch IncrementalCount FullCount ## 2014-05-12 17:00:32.868 1.161 0 0 ## 2014-05-12 17:00:33.178 1.472 1 0 ## 2014-05-12 17:00:33.677 1.969 2 0 ## 2014-05-12 17:00:35.538 3.830 3 0 ## 2014-05-12 17:00:37.811 6.103 4 0 ## 2014-05-12 17:00:41.427 9.720 5 0 ## UserTime SysTime RealTime BeforeSize AfterSize ## 2014-05-12 17:00:32.868 0.11 0.04 0.02 8192 1400 ## 2014-05-12 17:00:33.178 0.05 0.01 0.02 5496 1672 ## 2014-05-12 17:00:33.677 0.04 0.01 0.01 5768 2557 ## 2014-05-12 17:00:35.538 0.21 0.05 0.04 22528 4907 ## 2014-05-12 17:00:37.811 0.08 0.01 0.02 24576 7072 ## 2014-05-12 17:00:41.427 0.26 0.06 0.04 43008 14336 ## TotalSize Delta ## 2014-05-12 17:00:32.868 9437184 6792 ## 2014-05-12 17:00:33.178 9437184 3824 ## 2014-05-12 17:00:33.677 9437184 3211 ## 2014-05-12 17:00:35.538 9437184 17621 ## 2014-05-12 17:00:37.811 9437184 17504 ## 2014-05-12 17:00:41.427 9437184 28672 Example of Two Benchmark Runs in One Log File The data in the following graph is from a different log file, not the one of primary interest to this article. I’m including this image because it is an example of idle periods followed by busy periods. It would be uninteresting to average the rate of garbage collection over the entire log file period. More interesting would be the rate of garbage collect in the two busy periods. Are they the same or different? Your production data may be similar, for example, bursts when employees return from lunch and idle times on weekend evenings, etc. Once the data is in an R Time Series, you can analyze isolated time windows. Clipping the Time Series data Flashing back to our test case… Viewing the data as a time series is interesting. You can see that the work intensive time period is between 9:00 PM and 3:00 AM. Lets clip the data to the interesting period:     par(mfrow=c(2,1)) plot(g1gc.z$UserTime, type="h", main="User Time per GC\nTime: Complete Log File", xlab="Time of Day", ylab="CPU Seconds per GC", col="#1b9e77") clipped.g1gc.z=window(g1gc.z, start=as.POSIXct("2014-05-12 21:00:00"), end=as.POSIXct("2014-05-13 03:00:00")) plot(clipped.g1gc.z$UserTime, type="h", main="User Time per GC\nTime: Limited to Benchmark Execution", xlab="Time of Day", ylab="CPU Seconds per GC", col="#1b9e77") box(which = "outer", lty = "solid") Cumulative Incremental and Full GC count Here is the cumulative incremental and full GC count. When the line is very steep, it indicates that the GCs are repeating very quickly. Notice that the scale on the Y axis is different for full vs. incremental. plot(clipped.g1gc.z[,c(2:3)], main="Cumulative Incremental and Full GC count", xlab="Time of Day", col="#1b9e77") GC Analysis of Benchmark Execution using Time Series data In the following series of 3 graphs: The “After Size” show the amount of heap space in use after each garbage collection. Many Java objects are still referenced, i.e. alive, during each garbage collection. This may indicate that the application has a memory leak, or may indicate that the application has a very large memory footprint. Typically, an application's memory footprint plateau's in the early stage of execution. One would expect this graph to have a flat top. The steep decline in the heap space may indicate that the application crashed after 2:00. The second graph shows that the outliers in real execution time, discussed above, occur near 2:00. when the Java heap seems to be quite full. The third graph shows that Full GCs are infrequent during the first few hours of execution. The rate of Full GC's, (the slope of the cummulative Full GC line), changes near midnight.   plot(clipped.g1gc.z[,c("AfterSize","RealTime","FullCount")], xlab="Time of Day", col=c("#1b9e77","red","#1b9e77")) GC Analysis of heap recovered Each GC trace includes the amount of heap space in use before and after the individual GC event. During garbage coolection, unreferenced objects are identified, the space holding the unreferenced objects is freed, and thus, the difference in before and after usage indicates how much space has been freed. The following box plot and bar chart both demonstrate the same point - the amount of heap space freed per garbage colloection is surprisingly low. par(mfrow=c(2,1)) boxplot(as.vector(clipped.g1gc.z$Delta), main="Amount of Heap Recovered per GC Pass", xlab="Size in KB", horizontal = TRUE, col="red") hist(as.vector(clipped.g1gc.z$Delta), main="Amount of Heap Recovered per GC Pass", xlab="Size in KB", breaks=100, col="red") box(which = "outer", lty = "solid") This graph is the most interesting. The dark blue area shows how much heap is occupied by referenced Java objects. This represents memory that holds live data. The red fringe at the top shows how much data was recovered after each garbage collection. barplot(clipped.g1gc.z[,c("AfterSize","Delta")], col=c("#7570b3","#e7298a"), xlab="Time of Day", border=NA) legend("topleft", c("Live Objects","Heap Recovered on GC"), fill=c("#7570b3","#e7298a")) box(which = "outer", lty = "solid") When I discuss the data in the log files with the customer, I will ask for an explaination for the large amount of referenced data resident in the Java heap. There are two are posibilities: There is a memory leak and the amount of space required to hold referenced objects will continue to grow, limited only by the maximum heap size. After the maximum heap size is reached, the JVM will throw an “Out of Memory” exception every time that the application tries to allocate a new object. If this is the case, the aplication needs to be debugged to identify why old objects are referenced when they are no longer needed. The application has a legitimate requirement to keep a large amount of data in memory. The customer may want to further increase the maximum heap size. Another possible solution would be to partition the application across multiple cluster nodes, where each node has responsibility for managing a unique subset of the data. Conclusion In conclusion, R is a very powerful tool for the analysis of Java garbage collection log files. The primary difficulty is data cleansing so that information can be read into an R data frame. Once the data has been read into R, a rich set of tools may be used for thorough evaluation.

    Read the article

  • How to analyze data

    - by Subhash Dike
    We are working on an application that allows user to search/read some content in a particular domain. We wanted to add some capability in the app which can suggest user some content based on the usage pattern (analyze data based on frequency and relevance). Currently every time user search or read something we do store that information in backend database. We would like to use this data to present some additional content to user. Could someone explain what kind of tools will be required for such a job and any example? And what this concept is called, data analysis? data mining? business intelligence? or something else? Update: Sorry for being too broad, here is an example SQL Database (Just to give an idea, actual db is little different with normalization and stuff) Table: UserArticles Fields: UserName | ArticleId | ArticleTitle | DateVisited | ArticleCategory Table: CategoryArticles Fields: Category | Article Title | Author etc. One Category may have one more articles. One user may have read the same article multiple times (in this case we place additional entry in the user article table. Task: Use the information availabel in UserArticle table and rank categories in order which would be presented to user automatically in other part of application. Factors to be considered are frequency and recency. This might be possible through simple queries or may require specialized tools. Either way, the task is what mention above. I am not too sure which route to take, hence the question. Thoughts??

    Read the article

  • Country specific content vs global content

    - by Ando
    I have a global product presentation website myproduct.com For certain countries I also own the country domain: myproduct.co.uk, myproduct.com.au, myproduct.es, myproduct.de, etc. The presentation website is translated in multiple languages and I set up redirects: myproduct.es will redirect to myproduct.com/es/, myproduct.de will redirect to myproduct.com/de/, etc. . The content so far is the same, just translated in different languages. The advantages are that it's easy to keep the content aligned - everything is managed from one centralized dashboard (I'm using Wordpress with qtranslate). Now I'm running into trouble as for different countries I want localized content - for UK I want to run different promotions and use a different reseller than for .com.au so I would like that users coming from myproduct.co.uk see something different than those coming from myproduct.com.au (and not be redirected to myproduct.com as they are right now). How can I achieve this? I could duplicate the whole main website and modify only certain parts but then I would have a lot of duplicate content (e.g. info about how the product works) and I would have pages that are likely to change (FAQ page) that I would have to keep updated over all websites. I can duplicate only partially the main website: on the localized website I would have only the pages that are different and then all other links would point to the .com site. This would solve the duplication problem but would cause confusion for the user as you would navigate from .co.uk to .com without noticing and then wonder how to get back. Other, better option?

    Read the article

  • Repeat use of Schema / Rich Snippets Markup i.e LocalBusiness Data

    - by bybe
    I am unable to find official wording and I'm hoping that some Rich Snippets/Schema Guru can give me some insight into proper usage of repeated content when it comes to using markup. I'm building a site that wants to use Schema as the markup type and the owner would like as much usage as possible. The business name, telephone and address will appear on every page now is it valid or even useful to use Rich Snippets on every page where this information is displayed. For example this information appears in the header, and footer of every page of the site and too give you an example of my current markup see below: <body itemscope itemtype="http://schema.org/LocalBusiness"> <header> <a itemprop="url" href="http://www.domain.co.uk/"> <img itemprop="logo" src="image.png" alt="Company Name Logo" /> </a> <span itemprop="telephone">01202 000 000</span> </header> <div> This is where the content will go</div> <footer> <span itemprop="name">Company Name</span> <span itemprop="description"> A small little bit about this company</span> <div itemprop="address" itemscope itemtype="http://schema.org/PostalAddress"> <span itemprop="streetAddress">Address Goes here</span> <span itemprop="addressLocality">Area Here</span>, <span itemprop="addressRegion">Region Here</span> </div> </footer> </body> !-- Local Business Schema Now Closed --> So as you can see above this information will be displayed on every single page.... Is this valid or bad to repeat usage of this information in schema format...

    Read the article

  • Reference Data Management

    - by rahulkamath
    Normal 0 false false false EN-US X-NONE X-NONE MicrosoftInternetExplorer4 /* Style Definitions */ table.MsoNormalTable {mso-style-name:"Table Normal"; mso-tstyle-rowband-size:0; mso-tstyle-colband-size:0; mso-style-noshow:yes; mso-style-priority:99; mso-style-qformat:yes; mso-style-parent:""; mso-padding-alt:0in 5.4pt 0in 5.4pt; mso-para-margin:0in; mso-para-margin-bottom:.0001pt; mso-pagination:widow-orphan; font-size:11.0pt; font-family:"Calibri","sans-serif"; mso-ascii-font-family:Calibri; mso-ascii-theme-font:minor-latin; mso-fareast-font-family:"Times New Roman"; mso-fareast-theme-font:minor-fareast; mso-hansi-font-family:Calibri; mso-hansi-theme-font:minor-latin; mso-bidi-font-family:"Times New Roman"; mso-bidi-theme-font:minor-bidi;} table.MsoTableColorfulListAccent2 {mso-style-name:"Colorful List - Accent 2"; mso-tstyle-rowband-size:1; mso-tstyle-colband-size:1; mso-style-priority:72; mso-style-unhide:no; mso-padding-alt:0in 5.4pt 0in 5.4pt; mso-tstyle-shading:#F8EDED; mso-tstyle-shading-themecolor:accent2; mso-tstyle-shading-themetint:25; mso-para-margin:0in; mso-para-margin-bottom:.0001pt; mso-pagination:widow-orphan; font-size:10.0pt; font-family:"Calibri","sans-serif"; color:black; mso-themecolor:text1;} table.MsoTableColorfulListAccent2FirstRow {mso-style-name:"Colorful List - Accent 2"; mso-table-condition:first-row; mso-style-priority:72; mso-style-unhide:no; mso-tstyle-shading:#9E3A38; mso-tstyle-shading-themecolor:accent2; mso-tstyle-shading-themeshade:204; mso-tstyle-border-bottom:1.5pt solid white; mso-tstyle-border-bottom-themecolor:background1; color:white; mso-themecolor:background1; mso-ansi-font-weight:bold; mso-bidi-font-weight:bold;} table.MsoTableColorfulListAccent2LastRow {mso-style-name:"Colorful List - Accent 2"; mso-table-condition:last-row; mso-style-priority:72; mso-style-unhide:no; mso-tstyle-shading:white; mso-tstyle-shading-themecolor:background1; mso-tstyle-border-top:1.5pt solid black; mso-tstyle-border-top-themecolor:text1; color:#9E3A38; mso-themecolor:accent2; mso-themeshade:204; mso-ansi-font-weight:bold; mso-bidi-font-weight:bold;} table.MsoTableColorfulListAccent2FirstCol {mso-style-name:"Colorful List - Accent 2"; mso-table-condition:first-column; mso-style-priority:72; mso-style-unhide:no; mso-ansi-font-weight:bold; mso-bidi-font-weight:bold;} table.MsoTableColorfulListAccent2LastCol {mso-style-name:"Colorful List - Accent 2"; mso-table-condition:last-column; mso-style-priority:72; mso-style-unhide:no; mso-ansi-font-weight:bold; mso-bidi-font-weight:bold;} table.MsoTableColorfulListAccent2OddColumn {mso-style-name:"Colorful List - Accent 2"; mso-table-condition:odd-column; mso-style-priority:72; mso-style-unhide:no; mso-tstyle-shading:#EFD3D2; mso-tstyle-shading-themecolor:accent2; mso-tstyle-shading-themetint:63; mso-tstyle-border-top:cell-none; mso-tstyle-border-left:cell-none; mso-tstyle-border-bottom:cell-none; mso-tstyle-border-right:cell-none; mso-tstyle-border-insideh:cell-none; mso-tstyle-border-insidev:cell-none;} table.MsoTableColorfulListAccent2OddRow {mso-style-name:"Colorful List - Accent 2"; mso-table-condition:odd-row; mso-style-priority:72; mso-style-unhide:no; mso-tstyle-shading:#F2DBDB; mso-tstyle-shading-themecolor:accent2; mso-tstyle-shading-themetint:51;} Reference Data Management Oracle Data Relationship Management (DRM) has always been extremely powerful as an Enterprise MDM solution that can help manage changes to master data in a way that influences enterprise structure, whether it be mastering chart of accounts to enable financial transformation, or revamping organization structures to drive business transformation and operational efficiencies, or mastering sales territories in light of rapid fire acquisitions that require frequent sales territory refinement, equitable distribution of leads and accounts to salespersons, and alignment of budget/forecast with results to optimize sales coverage. Increasingly, DRM is also being utilized by Oracle customers for reference data management, an emerging solution space that deserves some explanation. What is reference data? Reference data is a close cousin of master data. While master data may be more rapidly changing, requires consensus building across stakeholders and lends structure to business transactions, reference data is simpler, more slowly changing, but has semantic content that is used to categorize or group other information assets – including master data – and give them contextual value. The following table contains an illustrative list of examples of reference data by type. Reference data types may include types and codes, business taxonomies, complex relationships & cross-domain mappings or standards. Types & Codes Taxonomies Relationships / Mappings Standards Transaction Codes Industry Classification Categories and Codes, e.g., North America Industry Classification System (NAICS) Product / Segment; Product / Geo Calendars (e.g., Gregorian, Fiscal, Manufacturing, Retail, ISO8601) Lookup Tables (e.g., Gender, Marital Status, etc.) Product Categories City à State à Postal Codes Currency Codes (e.g., ISO) Status Codes Sales Territories (e.g., Geo, Industry Verticals, Named Accounts, Federal/State/Local/Defense) Customer / Market Segment; Business Unit / Channel Country Codes (e.g., ISO 3166, UN) Role Codes Market Segments Country Codes / Currency Codes / Financial Accounts Date/Time, Time Zones (e.g., ISO 8601) Domain Values Universal Standard Products and Services Classification (UNSPSC), eCl@ss International Classification of Diseases (ICD) e.g., ICD9 à IC10 mappings Tax Rates Why manage reference data? Reference data carries contextual value and meaning and therefore its use can drive business logic that helps execute a business process, create a desired application behavior or provide meaningful segmentation to analyze transaction data. Further, mapping reference data often requires human judgment. Sample Use Cases of Reference Data Management Healthcare: Diagnostic Codes The reference data challenges in the healthcare industry offer a case in point. Part of being HIPAA compliant requires medical practitioners to transition diagnosis codes from ICD-9 to ICD-10, a medical coding scheme used to classify diseases, signs and symptoms, causes, etc. The transition to ICD-10 has a significant impact on business processes, procedures, contracts, and IT systems. Since both code sets ICD-9 and ICD-10 offer diagnosis codes of very different levels of granularity, human judgment is required to map ICD-9 codes to ICD-10. The process requires collaboration and consensus building among stakeholders much in the same way as does master data management. Moreover, to build reports to understand utilization, frequency and quality of diagnoses, medical practitioners may need to “cross-walk” mappings -- either forward to ICD-10 or backwards to ICD-9 depending upon the reporting time horizon. Spend Management: Product, Service & Supplier Codes Similarly, as an enterprise looks to rationalize suppliers and leverage their spend, conforming supplier codes, as well as product and service codes requires supporting multiple classification schemes that may include industry standards (e.g., UNSPSC, eCl@ss) or enterprise taxonomies. Aberdeen Group estimates that 90% of companies rely on spreadsheets and manual reviews to aggregate, classify and analyze spend data, and that data management activities account for 12-15% of the sourcing cycle and consume 30-50% of a commodity manager’s time. Creating a common map across the extended enterprise to rationalize codes across procurement, accounts payable, general ledger, credit card, procurement card (P-card) as well as ACH and bank systems can cut sourcing costs, improve compliance, lower inventory stock, and free up talent to focus on value added tasks. Specialty Finance: Point of Sales Transaction Codes and Product Codes In the specialty finance industry, enterprises are confronted with usury laws – governed at the state and local level – that regulate financial product innovation as it relates to consumer loans, check cashing and pawn lending. To comply, it is important to demonstrate that transactions booked at the point of sale are posted against valid product codes that were on offer at the time of booking the sale. Since new products are being released at a steady stream, it is important to ensure timely and accurate mapping of point-of-sale transaction codes with the appropriate product and GL codes to comply with the changing regulations. Multi-National Companies: Industry Classification Schemes As companies grow and expand across geographies, a typical challenge they encounter with reference data represents reconciling various versions of industry classification schemes in use across nations. While the United States, Mexico and Canada conform to the North American Industry Classification System (NAICS) standard, European Union countries choose different variants of the NACE industry classification scheme. Multi-national companies must manage the individual national NACE schemes and reconcile the differences across countries. Enterprises must invest in a reference data change management application to address the challenge of distributing reference data changes to downstream applications and assess which applications were impacted by a given change.

    Read the article

  • Multilingual website without language component in the URL

    - by user359650
    I'm working on a website for Canada which will have French and English versions. For SEO purposes, I would like to avoid using any language tag in URLs because I believe it will have more impact (e.g. example.ca/products better than en.example.ca/products or example.ca/en/products). I believe this is technically possible because the2 languages are sufficiently different that the URLs won't be conflicting with one another (e.g. if you want a "product" page, it will be /products in English, and /produits in French so you know which language the URL is about). Since Google (and most likely others) doesn't rely on the URL (nor HTML tags) to determine the content language I don't see any problems with search engines. To make this possible I've thought about using a cookie distinct from the session cookie (e.g. example.org_language) with long term expiry (e.g. N years) that will memorize the language chosen by the user. That way when people visit the website with a new browser session, they get served the proper language. I have already given up on users being able to switch one page from English to French: when people will chose English or French from the menu they will be redirected to the corresponding version of the home page. Do you foresee any problems with not using a language component in the URL (whether domain or path)? (as long as one makes sure URLS don't conflict).

    Read the article

  • What to learn to make a photo gallery site like this? [closed]

    - by Steven Chen
    I started a Wordpress site for my blog but now I want to start another domain for sharing my photos. I used to use Flickr but it requires you to sign up for Flickr Pro if you want to upload over a certain amount of photos. Since I don't want to pay for that, I'm looking to create a photo gallery site like this: http://10mmgalore.com/ or http://www.shuttermaki.com/ But how do you go about with that? Would I be able to install Wordpress and find a theme like that? And/Or are there any Wordpress plugins that I can install to make it like look like that? Or do I need different tools and platforms asides from Wordpress? So can someone list the features, tools and information that I need to make a photo gallery site? The main functions that I want for my site is to have a picture in the middle and when the user clicks on the photo, it goes to the next photo; or there are left and right arrows that users can click to the next photo. I just want to create an album website for my 365 days project (a photo a day). It doesn't have to be as fancy as the sites mentioned above. Thank you!

    Read the article

  • best way to host multiple wordpress site on single vps [migrated]

    - by Ben
    Not sure if this is webmaster or a WordPress question, it's a bit half and half, sorry if I'm posting in the wrong place. Without using Multi-Site or installing new WordPress CMS' in second-level domains, what's the best way to get multiple WordPress installs running on my VPS (running Linux powered CentOS 6 with WHM and cPanel)? It's currently working but only by setting the permalinks option to the default setting, so the URLs aren't human-friendly. I have come across something called WPSiteStack, though I'd really rather not go down this route. Long story short, I need the following: Seperate installs so one core / theme / plugin update doesn't affect all sites and increases security of all sites; 'Pretty' permalinks; Each WordPress install must be in the root of it's own domain to ensure that I can accurately measure my clients' quotas; It may also be worth noting that some functions within each install use the $_SERVER['DOCUMENT_ROOT'] and $_SERVER['HOST'] variables. I have already edited the httpd-vhosts.conf, httpd.conf and .htaccess files but this hasn't made any changes. So any ideas what I'm missing or doing wrong? Any help is much appreciated.

    Read the article

  • Client-Server MMOG & data structures sync when joining / playing

    - by plang
    After reading a few articles on MMOG architecture, there is still one point on which I cannot find much information: it has to do with how you keep in sync server data on the client, when you join, and while you play. A pretty vague question, I agree. Let me refine it: Let's say we have an MMOG virtual world subdivided into geographical cells. A player in a cell is mostly interested in what happens in the cell itself, and all the surrounding cells, not more. When joining the game for the first time, the only thing we can do is send some sort of "database dump" of the interesting cells to the client. When playing, I guess it would be very inefficient to do the same thing regularly. I imagine the best thing to do is to send "deltas" to the client, which would allow keeping the local database in sync. Now let's say the player moves, and arrives in another cell. Surrounding cells change, and for all the new cells the player subscribes, the same technique as used when joining the game has to be used: some sort of "database dump". This mechanic of joining/moving in a cell-based MMOG virtual world interests me, and I was wondering if there were tried and tested techniques in this domain. Thanks!

    Read the article

  • starting up with VPS or cloud hosting? [closed]

    - by FlyOn
    Possible Duplicate: How to find web hosting that meets my requirements? Summary: I want to start hosting my product. I'd like to register domains (at some point). I'm a linux beginner. Thinking about scalability and price, I'm thinking am I better off on a VPN to get started or would some form of cloud hosting be better (not being familiar with either). Full question: I'm creating a product where people can create their own 3D representations of whatever data / info they have, and (re)organise that data. The product is coming along beautifully on my local environment, but it's about time I start getting some form of hosting ready, and I could really use some advice where / how to get started: I'd like people to be able to move/register their own domains on my server. I could start without this just to demo the product, but it would be the very first on the todo list. I'd like to automatically copy some files / install databases etc for each domain. I probably want to see if I can let users manage their own subdomains at some points, but for now: I'd like start as simple as possible. I've always on a windows machine, so my linux experience is quite basic. I really don't mind getting into it, but I'm thinking it's better to get my product out first of all and see where to go from there. Although... I'd like things to be scalable. If I set up some reseller VPN now which only scales to 100 domains or so, which means I have to set up something else / move again when I pass that level, or which means that I'm in trouble if I suddenly get lots of new customers... hmm. Finally, I need to start cheap. I'm putting all I have into starting this company, and live on very little. So before I have any customers, 50 dollars a month is a fair bit and 100 dollars a month may be too much. If anyone has some tips to help get me started I'd be really grateful.

    Read the article

  • Obscure SPUtility.SendMail Behavior When Manually Passing in Mail Headers

    - by Damon
    There are two ways to send mail in SharePoint: you can either use the mail components from the System.Net namespace, or you can send email using SharePoint's SPUtility.SendMail method.  One of the benefits of the SPUtility.SendMail method is that it uses the mail configuration from SharePoint, so you can manage settings in Central Administration instead of having to go through and modify your web.config file.  SPUtility.SendMail can get the job done, but it's defiantly not as developer friendly as the components from the System.Net namespace.  If you want to CC someone on an email, for example, you do NOT have a nice CC parameter - you have to manually add the CC mail header and pass it into the SPUtility.SendMail method.  I had to do this the other day, and ran into a really obscure issue. If you do NOT pass the headers into the method then SharePoint sends the email using the From Address configured in the Outgoing Mail settings in Central Admin.  If you pass headers into the method, but do not include the from header, then SharePoint sends the mail using the email address of the current user. This can be an issue if your mail server is setup to reject an email from an invalid email address or an email address that is not on your domain.  The way to fix this issue is to always pass in the from header.  If you want to use the configured From address, then you can do the following: SPWebApplication webApp = SPWebApplication.Lookup(new Uri(SPContext.Current.Site.Url)); StringDictionary headers = new StringDictionary(); headers.Add("from", webApp.OutboundMailSenderAddress);

    Read the article

  • Best way to setup multiple sites' emails in my Gmail

    - by John
    I've a dozen sites and I want all of their emails come to my one gmail id and I want to reply centrally from Gmail only. I've also added all of those emails in "send email as:" list in Gmail. I could add email forwarders in my Cpanel but in that case I'll not be able to send email whose inboxes haven't been created( for example [email protected]). If I create email account then I'd receive emails in my inbox as well as forwared by the forwarder( to my gmail id). Otherwise I can setup Gmail for my domain. But for a dozen emails I'm not sure if that'd be fine. I see in http://www.google.com/enterprise/apps/business/pricing.html that for up to 10 emails it is free. But then to send email from webhosting the php code will need SMTP login details and leaving my important gmail account details in my webhosting account is very risky given my sites have been compromised twice. What is the best way to centralize all my emails so that I can read/reply/search from single place?

    Read the article

  • Choosing open source vs. proprietary CMS

    - by jkneip
    Hi- I've been tasked with redesigning a website for a small academic library. While only in charge of the site for 6 months, we've been maintaining static html pages edited in Dreamweaver for years. Last count of our total pages is around 400. Our university is going with an enterprise level solution called Sitefinity, although we maintain our own domain and are responsible to maintain our own presense. Some background-my library has a couple Microsoft IIS servers on which this static html site has been running. I'm advocating for the implementation of a CMS while doing this redesign. The problem is I'm basically the lone webmaster so I have no one to agree or disagree with my choice. There are also only 1-2 content editors right know for the site but a CMS could change that factor. I would like to use the functionality of having servers that run .NET and MS SQL but am more experience setting up and maintaining open source software like Wordpress or Drupal on web hosts. My main concern is choosing a CMS that will be easy to update / maintain / deal with upgrades (i.e., support) in case I'm not there in the future. So I'm wondering how to factor in the open source CMS vs. a relatively inexpensive commercial CMS decision and whether choosing PHP/MySQL vs. ASP.net framework for development environment will play into my decision. Thanks for any input that can be offered based on the details I've given. Thanks, Jason

    Read the article

  • cpnfigure open_basedir under Plesk

    - by cori
    This might be a question for ServerFault, and f it wasn't for the Plesk aspect I would ask it there to start with, so if it's better suited for over there let me know and I'll move it. I'm working on a dedicated server set up as a reseller account with Plesk to manage the domains and server configuration, and i need to add a directory to the local open_basedir configuration for a specific vhost. Given Plesk's normal methodology, I expected to be able to go to /var/www/vhost/{%DOMAINNAME%}/conf and modify vhost.conf and place a new value there, as I have successfully done with other configuration settings for this domain (turning safe_mode off, for instance). When I do so, however, the new setting doesn't take (per phpinfo();). If I edit httpd.conf (which the plesk configuration specifically says not to do in the notes at the top of httpd.conf) the setting takes. Is there something specific about the open_basdir setting that makes it not configurable in vhost.conf? How much trouble am I letting myself in for by editing the vhost-specific httpd.conf (I imagine is someone makes changes in the plesk web interface it might be overwritten, but what other risk is there)? Thanks!

    Read the article

  • Port numbers in Visual Studio projects and IIS

    - by aspdotnetuser
    I have a few questions about localhost and port numbers as this is an area where I do not have a lot of knowledge, and because I recently had to work with setting up Visual Studio projects and IIS and there are things I'm not clear on. I have the following questions on the things I find confusing. I thought it made more sense to include them all in one question instead of making separate questions. I have noticed a random port number is generated with projects I have worked on in the past, but I recently saw a project where the port number was fixed. What is the purpose of having a fixed/default localhost port number? i.e is it particularly useful on projects that have many programmers working on the project? If a solution contains multiple projects (for example, WCF services, Domain, MVC/Web pages), is it possible to setup a different localhost port for each of them? If so, what is the benefit of this? If a solution contains multiple projects and has different localhost urls/port numbers, must there be a corresponding website (and application pool) for each project in IIS? Or just for the project that contains the actual web pages?

    Read the article

  • Does unit testing lead to premature generalization (specifically in the context of C++)?

    - by Martin
    Preliminary notes I'll not go into the distinction of the different kinds of test there are, there are already a few questions on these sites regarding that. I'll take what's there and that says: unit testing in the sense of "testing the smallest isolatable unit of an application" from which this question actually derives The isolation problem What is the smallest isolatable unit of a program. Well, as I see it, it (highly?) depends on what language you are coding in. Micheal Feathers talks about the concept of a seam: [WEwLC, p31] A seam is a place where you can alter behavior in your program without editing in that place. And without going into the details, I understand a seam -- in the context of unit testing -- to be a place in a program where your "test" can interface with your "unit". Examples Unit test -- especially in C++ -- require from the code under test to add more seams that would be strictly called for for a given problem. Example: Adding a virtual interface where non-virtual implementation would have been sufficient Splitting -- generalizing(?) -- a (smallish) class further "just" to facilitate adding a test. Splitting a single-executable project into seemingly "independent" libs, "just" to facilitate compiling them independently for the tests. The question I'll try a few versions that hopefully ask about the same point: Is the way that Unit Tests require one to structure an application's code "only" beneficial for the unit tests or is it actually beneficial to the applications structure. Is the generalization code need to exhibit to be unit-testable useful for anything but the unit tests? Does adding unit tests force one to generalize unnecessarily? Is the shape unit tests force on code "always" also a good shape for the code in general as seen from the problem domain? I remember a rule of thumb that said don't generalize until you need to / until there's a second place that uses the code. With Unit Tests, there's always a second place that uses the code -- namely the unit test. So is this reason enough to generalize?

    Read the article

  • My new anti-patent BSD-based license: necessary and effective? [closed]

    - by paperjam
    I am writing multimedia software in a domain that is rife with software patents. I want to open source my software but only for the benefit of those who don't play the patent game, that is enthusiasts, small companies, research projects, etc. The idea is, if my code would infringe a software patent somewhere and a company pays to license that patent, they then lose the right to use and distribute my software. Now I detest license proliferation as much as anyone but I can't find an existing OSI approved license that does this. The GPL comes close, but it only restricts distribution, not use. I want to stop someone using my software should they obtain a patent license to do so. Does another license do this job? Is the wording below unambiguous? - I don't want a legal opinion, just whether it would be interpreted as I intend. Copyright (c) <year>, <copyright holder> All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: [ three standard new-BSD conditions not shown here] * No patents are licensed from any third party in respect of redistribution or use of this software or its derivatives unless the patent license is arranged to permit free use and distribution by all. THIS SOFTWARE IS... [standard BSD disclaimer not shown here]

    Read the article

  • Is it wise for a programmer to move into management?

    - by Desolate Planet
    Many times, a developer has suggested that I become a team leader because I'm motivated, but during my career in the IT industry, I've seen so many people who are great at programming, move into management and they are miserable. I've also seen many managers return to programming stating "I'm a technical person, I like technical problems". If this is such a common thing, why do developers feel compelled to leave the technical domain and move into management? Sure you'll have more money and more control, but if you don't enjoy your work and take your problems out on your tream. Secondly, I've been asked in developer interviews, "Would you consider leading a team?" and I'm always tempted to cite the Peter Principle based on what I've seen. I am interested in furthering myself, but not in the way the company may want i.e "Vice President of department blah". To be honest, I've seen this more often in the corporate world than in small development houses and it's always put me off ever going back to a corporate environment. I just feel that this is becoming more and more the norm and it's impacting team morale and degrading the quality of the work. Question: Based on what I've said, Is it a smart move for a technical person to move into management?

    Read the article

  • Mocking concrete class - Not recommended

    - by Mik378
    I've just read an excerpt of "Growing Object-Oriented Software" book which explains some reasons why mocking concrete class is not recommended. Here some sample code of a unit-test for the MusicCentre class: public class MusicCentreTest { @Test public void startsCdPlayerAtTimeRequested() { final MutableTime scheduledTime = new MutableTime(); CdPlayer player = new CdPlayer() { @Override public void scheduleToStartAt(Time startTime) { scheduledTime.set(startTime); } } MusicCentre centre = new MusicCentre(player); centre.startMediaAt(LATER); assertEquals(LATER, scheduledTime.get()); } } And his first explanation: The problem with this approach is that it leaves the relationship between the objects implicit. I hope we've made clear by now that the intention of Test-Driven Development with Mock Objects is to discover relationships between objects. If I subclass, there's nothing in the domain code to make such a relationship visible, just methods on an object. This makes it harder to see if the service that supports this relationship might be relevant elsewhere and I'll have to do the analysis again next time I work with the class. I can't figure out exactly what he means when he says: This makes it harder to see if the service that supports this relationship might be relevant elsewhere and I'll have to do the analysis again next time I work with the class. I understand that the service corresponds to MusicCentre's method called startMediaAt. What does he mean by "elsewhere"? The complete excerpt is here: http://www.mockobjects.com/2007/04/test-smell-mocking-concrete-classes.html

    Read the article

  • In MVC , DAO should be called from Controller or Model

    - by tito
    I have seen various arguments against the DAO being called from the Controller class directly and also the DAO from the Model class.Infact I personally feel that if we are following the MVC pattern , the controller should not coupled with the DAO , but the Model class should invoke the DAO from within and controller should invoke the model class.Why because , we can decouple the model class apart from a webapplication and expose the functionalities for various ways like for a REST service to use our model class. If we write the DAO invocation in the controller , it would not be possible for a REST service to reuse the functionality right ? I have summarized both the approaches below. Approach #1 public class CustomerController extends HttpServlet { proctected void doPost(....) { Customer customer = new Customer("xxxxx","23",1); new CustomerDAO().save(customer); } } Approach #2 public class CustomerController extends HttpServlet { proctected void doPost(....) { Customer customer = new Customer("xxxxx","23",1); customer.save(customer); } } public class Customer { ........... private void save(Customer customer){ new CustomerDAO().save(customer); } } Note- Here is what a definition of Model is : Model: The model manages the behavior and data of the application domain, responds to requests for information about its state (usually from the view), and responds to instructions to change state (usually from the controller). In event-driven systems, the model notifies observers (usually views) when the information changes so that they can react. I would need an expert opinion on this because I find many using #1 or #2 , So which one is it ?

    Read the article

  • Will search engines discover that our old pages have been 301 redirected if there are no more links to them in the old site?

    - by Obay
    We've moved our website to a new domain. Thousands of our pages come from one PHP file in the old site (e.g. oldsite.com/news.php?id=<id>). So we added some code in news.php file to do a 301 redirect to the specific corresponding news article in the new website (newsite.com/news/<id>). We have not yet done a 301 redirect for the root of the old site (so we could display a notice to our users that we've moved), but all links inside it are already 301 redirected. My concern is that, when Google crawls our old website, it will no longer be able to find the old news articles and discover that they have been 301 Redirected -- is this correct? If so, does that mean our PageRank won't be carried over to the new site? I've also read that we would need to create a sitemap for the new site. Is it possible to indicate in the sitemap the old and new locations of specific pages? Because if not, how will Google know? (I'm not sure change of address in Webmaster Tools would be specific enough).

    Read the article

  • 12.10 Wireless hotspot configuration and internet browsing - question

    - by Indian
    In our campus we have a leased line connection from a service provider, which has an external IP W.X.Y.Z. This connection is distributed from the server several sub-networks / subnets as follows: Faculty: 172.33....../ 255.255.0.0 Administration: 172.34......./255.255.255.0 Students: 172.35...../255.255.216.0 A student has a laptop with a fixed IP address 172.35.23.123 / 255.255.216.0 where the IP address is on the ethernet port. The gateways for internet access are 172.31.1.1 and 172.31.1.2. Further the student has a wireless port which is inaccessible in the hostel area. The OS of the student is Ubuntu 12.10. The student in the possession of an android phone on which he wishes to install specific software and therefore wishes to activate the internet therein. The student has already attempted the Wireless hotspot solution which works for 12.04 but has not been successful. Various instructions on the internet have helped the student to do the following Installation of dhcp server and hostapd: sudo apt-get install isc-dhcp-server sudo apt-get install hostapd File: /etc/network/interfaces auto lo iface lo inet loopback auto wlan0 iface wlan0 inet static address 10.10.0.1 netmask 255.255.255.0 dns-nameservers 172.31.1.1 172.31.1.2 File: /etc/dhcp/dhcpd.conf subnet 10.10.0.0 netmask 255.255.255.0 { range 10.10.0.2 10.10.0.4; option routers 10.10.0.1; option domain-name-servers 172.31.1.1 172.31.1.2; default-lease-time 6000; max-lease-time 72000; } File: /etc/hostapd/hostapd.conf interface=wlan0 driver=nl80211 ssid=my_hotspot channel=1 hw_mode=g auth_algs=1 wpa=3 wpa_passphrase=1234567890 wpa_key_mgmt=WPA-PSK wpa_pairwise=TKIP CCMP rsn_pairwise=CCMP File: /etc/default/hostapd RUN_DAEMON=”yes” DAEMON_CONF=”/etc/hostapd/hostapd.conf” DAEMON_OPTS=”-dd” File: /etc/default/isc-dhcp-server INTERFACES=”wlan0” File: /etc/rc.local iptables -t nat -A POSTROUTING -s 10.10.0.0/16 -o eth0 -j MASQUERADE exit 0 After all the configuration, the computer is restarted. The student can see that the hotspot named “my_hotspot” is available. The hotspot also awards an address to the android phone. The student will now be able to browse the internet.

    Read the article

  • configure open_basedir under Plesk

    - by cori
    This might be a question for ServerFault, and if it wasn't for the Plesk aspect I would ask it there to start with, so if it's better suited for over there let me know and I'll move it. I'm working on a dedicated server set up as a reseller account with Plesk to manage the domains and server configuration, and I need to add a directory to the local open_basedir configuration for a specific vhost. Given Plesk's normal methodology, I expected to be able to go to /var/www/vhost/{%DOMAINNAME%}/conf and modify vhost.conf and place a new value there, as I have successfully done with other configuration settings for this domain (turning safe_mode off, for instance). When I do so, however, the new setting doesn't take (per phpinfo();). If I edit httpd.conf (which the plesk configuration specifically says not to do in the notes at the top of httpd.conf) the setting takes. Is there something specific about the open_basdir setting that makes it not configurable in vhost.conf? How much trouble am I letting myself in for by editing the vhost-specific httpd.conf (I imagine is someone makes changes in the plesk web interface it might be overwritten, but what other risk is there)? Thanks!

    Read the article

  • What alternative is better to diagram this scenario?

    - by Mosty Mostacho
    I was creating and discussing a class diagram with a partner of mine. To simplify things, I've modify the real domain we're working on and made up the following diagram: Basically, a company works on constructions that are quite different one from each other but are still constructions. Note I've added one field for each class but there should be many more. Now, I thought this was the way to go but my partner told me that if in the future new construction classes appear we would have to modify the Company class, which is correct. So the new proposed class diagram would be this: Now I've been wondering: Should the fact that in no place of the application will there be mixed lists of planes and bridges affect the design in any way? When we have to list only planes for a company, how are we supposed to distinguish them from the other elements in the list without checking for their class names? Related to the previous question, is it correct to assume that this type of diagram should be high-level and this is something it shouldn't matter at this stage but rather be thought and decided at implementation time? Any comment will be appreciated.

    Read the article

  • How to manage and estimate unstructured requirements received from customers

    - by user20358
    A lot of the times I receive a software system's requirements from our customers in a very unstructured format. It is usually a bunch of "product development" guys from the customer's who come up with these "proposed solutions" to the business problems they have. While they are the experts at the business domain, a lot of the times they don't have the solutions right. This results in multiple versions of the same requirement mixing up of two requirements into one a few versions of the requirement later down the line, the requirements which were combined together get separated out again, each taking with it some of the new additions How do you work with such requirements coming in and sort them out into proper use cases and before development begins? What tools can we use to track a particular requirement's history, from the first time it was conceived till the time it gets crystallized into a proper use case? Estimating work against requirements received in such a fashion is a nightmare which ends up in making mistakes in understanding the requirement correctly and estimating the effort against it correctly. Any tips, tools, tricks to make this activity more manageable? I'm just trying to get some insights from someone more experienced than I am in requirements management and effort estimation.

    Read the article

< Previous Page | 570 571 572 573 574 575 576 577 578 579 580 581  | Next Page >