Search Results

Search found 74197 results on 2968 pages for 'part time'.

Page 209/2968 | < Previous Page | 205 206 207 208 209 210 211 212 213 214 215 216  | Next Page >

  • Using R to Analyze G1GC Log Files

    - by user12620111
    Using R to Analyze G1GC Log Files body, td { font-family: sans-serif; background-color: white; font-size: 12px; margin: 8px; } tt, code, pre { font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace; } h1 { font-size:2.2em; } h2 { font-size:1.8em; } h3 { font-size:1.4em; } h4 { font-size:1.0em; } h5 { font-size:0.9em; } h6 { font-size:0.8em; } a:visited { color: rgb(50%, 0%, 50%); } pre { margin-top: 0; max-width: 95%; border: 1px solid #ccc; white-space: pre-wrap; } pre code { display: block; padding: 0.5em; } code.r, code.cpp { background-color: #F8F8F8; } table, td, th { border: none; } blockquote { color:#666666; margin:0; padding-left: 1em; border-left: 0.5em #EEE solid; } hr { height: 0px; border-bottom: none; border-top-width: thin; border-top-style: dotted; border-top-color: #999999; } @media print { * { background: transparent !important; color: black !important; filter:none !important; -ms-filter: none !important; } body { font-size:12pt; max-width:100%; } a, a:visited { text-decoration: underline; } hr { visibility: hidden; page-break-before: always; } pre, blockquote { padding-right: 1em; page-break-inside: avoid; } tr, img { page-break-inside: avoid; } img { max-width: 100% !important; } @page :left { margin: 15mm 20mm 15mm 10mm; } @page :right { margin: 15mm 10mm 15mm 20mm; } p, h2, h3 { orphans: 3; widows: 3; } h2, h3 { page-break-after: avoid; } } pre .operator, pre .paren { color: rgb(104, 118, 135) } pre .literal { color: rgb(88, 72, 246) } pre .number { color: rgb(0, 0, 205); } pre .comment { color: rgb(76, 136, 107); } pre .keyword { color: rgb(0, 0, 255); } pre .identifier { color: rgb(0, 0, 0); } pre .string { color: rgb(3, 106, 7); } var hljs=new function(){function m(p){return p.replace(/&/gm,"&").replace(/"}while(y.length||w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("")}while(p!=v.node);s.splice(r,1);while(r'+M[0]+""}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.length-1,M);if(L){var O=R.cN?"":"";if(R.rE){y+=J(R.buffer+N,R)+O}else{if(R.eE){y+=J(R.buffer+N,R)+O+m(M)}else{y+=J(R.buffer+N+M,R)+O}}while(L1){O=D[D.length-2].cN?"":"";y+=O;L--;D.length--}var r=D[D.length-1];D.length--;D[D.length-1].buffer="";if(r.starts){I(r.starts,"")}return R.rE}if(w(M,R)){throw"Illegal"}}var E=e[B];var D=[E.dM];var A=0;var x=0;var y="";try{var s,u=0;E.dM.buffer="";do{s=p(C,u);var t=G(s[0],s[1],s[2]);u+=s[0].length;if(!t){u+=s[1].length}}while(!s[2]);if(D.length1){throw"Illegal"}return{r:A,keyword_count:x,value:y}}catch(H){if(H=="Illegal"){return{r:0,keyword_count:0,value:m(C)}}else{throw H}}}function g(t){var p={keyword_count:0,r:0,value:m(t)};var r=p;for(var q in e){if(!e.hasOwnProperty(q)){continue}var s=d(q,t);s.language=q;if(s.keyword_count+s.rr.keyword_count+r.r){r=s}if(s.keyword_count+s.rp.keyword_count+p.r){r=p;p=s}}if(r.language){p.second_best=r}return p}function i(r,q,p){if(q){r=r.replace(/^((]+|\t)+)/gm,function(t,w,v,u){return w.replace(/\t/g,q)})}if(p){r=r.replace(/\n/g,"")}return r}function n(t,w,r){var x=h(t,r);var v=a(t);var y,s;if(v){y=d(v,x)}else{return}var q=c(t);if(q.length){s=document.createElement("pre");s.innerHTML=y.value;y.value=k(q,c(s),x)}y.value=i(y.value,w,r);var u=t.className;if(!u.match("(\\s|^)(language-)?"+v+"(\\s|$)")){u=u?(u+" "+v):v}if(/MSIE [678]/.test(navigator.userAgent)&&t.tagName=="CODE"&&t.parentNode.tagName=="PRE"){s=t.parentNode;var p=document.createElement("div");p.innerHTML=""+y.value+"";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p|=||=||=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\*",e:"\\*/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.cpp=function(){var a={keyword:{"false":1,"int":1,"float":1,"while":1,"private":1,"char":1,"catch":1,"export":1,virtual:1,operator:2,sizeof:2,dynamic_cast:2,typedef:2,const_cast:2,"const":1,struct:1,"for":1,static_cast:2,union:1,namespace:1,unsigned:1,"long":1,"throw":1,"volatile":2,"static":1,"protected":1,bool:1,template:1,mutable:1,"if":1,"public":1,friend:2,"do":1,"return":1,"goto":1,auto:1,"void":2,"enum":1,"else":1,"break":1,"new":1,extern:1,using:1,"true":1,"class":1,asm:1,"case":1,typeid:1,"short":1,reinterpret_cast:2,"default":1,"double":1,register:1,explicit:1,signed:1,typename:1,"try":1,"this":1,"switch":1,"continue":1,wchar_t:1,inline:1,"delete":1,alignof:1,char16_t:1,char32_t:1,constexpr:1,decltype:1,noexcept:1,nullptr:1,static_assert:1,thread_local:1,restrict:1,_Bool:1,complex:1},built_in:{std:1,string:1,cin:1,cout:1,cerr:1,clog:1,stringstream:1,istringstream:1,ostringstream:1,auto_ptr:1,deque:1,list:1,queue:1,stack:1,vector:1,map:1,set:1,bitset:1,multiset:1,multimap:1,unordered_set:1,unordered_map:1,unordered_multiset:1,unordered_multimap:1,array:1,shared_ptr:1}};return{dM:{k:a,i:"",k:a,r:10,c:["self"]}]}}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d*)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch|library|setGeneric|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if|in|break|next|repeat|else|for|return|switch|while|try|stop|warning|require|attach|detach|source|setMethod|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA|NA_integer_|NA_real_|NA_character_|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL|TRUE|FALSE|T|F|Inf|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]*\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"|=||   Using R to Analyze G1GC Log Files   Using R to Analyze G1GC Log Files Introduction Working in Oracle Platform Integration gives an engineer opportunities to work on a wide array of technologies. My team’s goal is to make Oracle applications run best on the Solaris/SPARC platform. When looking for bottlenecks in a modern applications, one needs to be aware of not only how the CPUs and operating system are executing, but also network, storage, and in some cases, the Java Virtual Machine. I was recently presented with about 1.5 GB of Java Garbage First Garbage Collector log file data. If you’re not familiar with the subject, you might want to review Garbage First Garbage Collector Tuning by Monica Beckwith. The customer had been running Java HotSpot 1.6.0_31 to host a web application server. I was told that the Solaris/SPARC server was running a Java process launched using a commmand line that included the following flags: -d64 -Xms9g -Xmx9g -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:InitiatingHeapOccupancyPercent=80 -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+PrintGC -XX:+PrintGCTimeStamps -XX:+PrintHeapAtGC -XX:+PrintGCDateStamps -XX:+PrintFlagsFinal -XX:+DisableExplicitGC -XX:+UnlockExperimentalVMOptions -XX:ParallelGCThreads=8 Several sources on the internet indicate that if I were to print out the 1.5 GB of log files, it would require enough paper to fill the bed of a pick up truck. Of course, it would be fruitless to try to scan the log files by hand. Tools will be required to summarize the contents of the log files. Others have encountered large Java garbage collection log files. There are existing tools to analyze the log files: IBM’s GC toolkit The chewiebug GCViewer gchisto HPjmeter Instead of using one of the other tools listed, I decide to parse the log files with standard Unix tools, and analyze the data with R. Data Cleansing The log files arrived in two different formats. I guess that the difference is that one set of log files was generated using a more verbose option, maybe -XX:+PrintHeapAtGC, and the other set of log files was generated without that option. Format 1 In some of the log files, the log files with the less verbose format, a single trace, i.e. the report of a singe garbage collection event, looks like this: {Heap before GC invocations=12280 (full 61): garbage-first heap total 9437184K, used 7499918K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) region size 4096K, 1 young (4096K), 0 survivors (0K) compacting perm gen total 262144K, used 144077K [0xffffffff40000000, 0xffffffff50000000, 0xffffffff50000000) the space 262144K, 54% used [0xffffffff40000000, 0xffffffff48cb3758, 0xffffffff48cb3800, 0xffffffff50000000) No shared spaces configured. 2014-05-14T07:24:00.988-0700: 60586.353: [GC pause (young) 7324M->7320M(9216M), 0.1567265 secs] Heap after GC invocations=12281 (full 61): garbage-first heap total 9437184K, used 7496533K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) region size 4096K, 0 young (0K), 0 survivors (0K) compacting perm gen total 262144K, used 144077K [0xffffffff40000000, 0xffffffff50000000, 0xffffffff50000000) the space 262144K, 54% used [0xffffffff40000000, 0xffffffff48cb3758, 0xffffffff48cb3800, 0xffffffff50000000) No shared spaces configured. } A simple grep can be used to extract a summary: $ grep "\[ GC pause (young" g1gc.log 2014-05-13T13:24:35.091-0700: 3.109: [GC pause (young) 20M->5029K(9216M), 0.0146328 secs] 2014-05-13T13:24:35.440-0700: 3.459: [GC pause (young) 9125K->6077K(9216M), 0.0086723 secs] 2014-05-13T13:24:37.581-0700: 5.599: [GC pause (young) 25M->8470K(9216M), 0.0203820 secs] 2014-05-13T13:24:42.686-0700: 10.704: [GC pause (young) 44M->15M(9216M), 0.0288848 secs] 2014-05-13T13:24:48.941-0700: 16.958: [GC pause (young) 51M->20M(9216M), 0.0491244 secs] 2014-05-13T13:24:56.049-0700: 24.066: [GC pause (young) 92M->26M(9216M), 0.0525368 secs] 2014-05-13T13:25:34.368-0700: 62.383: [GC pause (young) 602M->68M(9216M), 0.1721173 secs] But that format wasn't easily read into R, so I needed to be a bit more tricky. I used the following Unix command to create a summary file that was easy for R to read. $ echo "SecondsSinceLaunch BeforeSize AfterSize TotalSize RealTime" $ grep "\[GC pause (young" g1gc.log | grep -v mark | sed -e 's/[A-SU-z\(\),]/ /g' -e 's/->/ /' -e 's/: / /g' | more SecondsSinceLaunch BeforeSize AfterSize TotalSize RealTime 2014-05-13T13:24:35.091-0700 3.109 20 5029 9216 0.0146328 2014-05-13T13:24:35.440-0700 3.459 9125 6077 9216 0.0086723 2014-05-13T13:24:37.581-0700 5.599 25 8470 9216 0.0203820 2014-05-13T13:24:42.686-0700 10.704 44 15 9216 0.0288848 2014-05-13T13:24:48.941-0700 16.958 51 20 9216 0.0491244 2014-05-13T13:24:56.049-0700 24.066 92 26 9216 0.0525368 2014-05-13T13:25:34.368-0700 62.383 602 68 9216 0.1721173 Format 2 In some of the log files, the log files with the more verbose format, a single trace, i.e. the report of a singe garbage collection event, was more complicated than Format 1. Here is a text file with an example of a single G1GC trace in the second format. As you can see, it is quite complicated. It is nice that there is so much information available, but the level of detail can be overwhelming. I wrote this awk script (download) to summarize each trace on a single line. #!/usr/bin/env awk -f BEGIN { printf("SecondsSinceLaunch IncrementalCount FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize\n") } ###################### # Save count data from lines that are at the start of each G1GC trace. # Each trace starts out like this: # {Heap before GC invocations=14 (full 0): # garbage-first heap total 9437184K, used 325496K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) ###################### /{Heap.*full/{ gsub ( "\\)" , "" ); nf=split($0,a,"="); split(a[2],b," "); getline; if ( match($0, "first") ) { G1GC=1; IncrementalCount=b[1]; FullCount=substr( b[3], 1, length(b[3])-1 ); } else { G1GC=0; } } ###################### # Pull out time stamps that are in lines with this format: # 2014-05-12T14:02:06.025-0700: 94.312: [GC pause (young), 0.08870154 secs] ###################### /GC pause/ { DateTime=$1; SecondsSinceLaunch=substr($2, 1, length($2)-1); } ###################### # Heap sizes are in lines that look like this: # [ 4842M->4838M(9216M)] ###################### /\[ .*]$/ { gsub ( "\\[" , "" ); gsub ( "\ \]" , "" ); gsub ( "->" , " " ); gsub ( "\\( " , " " ); gsub ( "\ \)" , " " ); split($0,a," "); if ( split(a[1],b,"M") > 1 ) {BeforeSize=b[1]*1024;} if ( split(a[1],b,"K") > 1 ) {BeforeSize=b[1];} if ( split(a[2],b,"M") > 1 ) {AfterSize=b[1]*1024;} if ( split(a[2],b,"K") > 1 ) {AfterSize=b[1];} if ( split(a[3],b,"M") > 1 ) {TotalSize=b[1]*1024;} if ( split(a[3],b,"K") > 1 ) {TotalSize=b[1];} } ###################### # Emit an output line when you find input that looks like this: # [Times: user=1.41 sys=0.08, real=0.24 secs] ###################### /\[Times/ { if (G1GC==1) { gsub ( "," , "" ); split($2,a,"="); UserTime=a[2]; split($3,a,"="); SysTime=a[2]; split($4,a,"="); RealTime=a[2]; print DateTime,SecondsSinceLaunch,IncrementalCount,FullCount,UserTime,SysTime,RealTime,BeforeSize,AfterSize,TotalSize; G1GC=0; } } The resulting summary is about 25X smaller that the original file, but still difficult for a human to digest. SecondsSinceLaunch IncrementalCount FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ... 2014-05-12T18:36:34.669-0700: 3985.744 561 0 0.57 0.06 0.16 1724416 1720320 9437184 2014-05-12T18:36:34.839-0700: 3985.914 562 0 0.51 0.06 0.19 1724416 1720320 9437184 2014-05-12T18:36:35.069-0700: 3986.144 563 0 0.60 0.04 0.27 1724416 1721344 9437184 2014-05-12T18:36:35.354-0700: 3986.429 564 0 0.33 0.04 0.09 1725440 1722368 9437184 2014-05-12T18:36:35.545-0700: 3986.620 565 0 0.58 0.04 0.17 1726464 1722368 9437184 2014-05-12T18:36:35.726-0700: 3986.801 566 0 0.43 0.05 0.12 1726464 1722368 9437184 2014-05-12T18:36:35.856-0700: 3986.930 567 0 0.30 0.04 0.07 1726464 1723392 9437184 2014-05-12T18:36:35.947-0700: 3987.023 568 0 0.61 0.04 0.26 1727488 1723392 9437184 2014-05-12T18:36:36.228-0700: 3987.302 569 0 0.46 0.04 0.16 1731584 1724416 9437184 Reading the Data into R Once the GC log data had been cleansed, either by processing the first format with the shell script, or by processing the second format with the awk script, it was easy to read the data into R. g1gc.df = read.csv("summary.txt", row.names = NULL, stringsAsFactors=FALSE,sep="") str(g1gc.df) ## 'data.frame': 8307 obs. of 10 variables: ## $ row.names : chr "2014-05-12T14:00:32.868-0700:" "2014-05-12T14:00:33.179-0700:" "2014-05-12T14:00:33.677-0700:" "2014-05-12T14:00:35.538-0700:" ... ## $ SecondsSinceLaunch: num 1.16 1.47 1.97 3.83 6.1 ... ## $ IncrementalCount : int 0 1 2 3 4 5 6 7 8 9 ... ## $ FullCount : int 0 0 0 0 0 0 0 0 0 0 ... ## $ UserTime : num 0.11 0.05 0.04 0.21 0.08 0.26 0.31 0.33 0.34 0.56 ... ## $ SysTime : num 0.04 0.01 0.01 0.05 0.01 0.06 0.07 0.06 0.07 0.09 ... ## $ RealTime : num 0.02 0.02 0.01 0.04 0.02 0.04 0.05 0.04 0.04 0.06 ... ## $ BeforeSize : int 8192 5496 5768 22528 24576 43008 34816 53248 55296 93184 ... ## $ AfterSize : int 1400 1672 2557 4907 7072 14336 16384 18432 19456 21504 ... ## $ TotalSize : int 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 ... head(g1gc.df) ## row.names SecondsSinceLaunch IncrementalCount ## 1 2014-05-12T14:00:32.868-0700: 1.161 0 ## 2 2014-05-12T14:00:33.179-0700: 1.472 1 ## 3 2014-05-12T14:00:33.677-0700: 1.969 2 ## 4 2014-05-12T14:00:35.538-0700: 3.830 3 ## 5 2014-05-12T14:00:37.811-0700: 6.103 4 ## 6 2014-05-12T14:00:41.428-0700: 9.720 5 ## FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ## 1 0 0.11 0.04 0.02 8192 1400 9437184 ## 2 0 0.05 0.01 0.02 5496 1672 9437184 ## 3 0 0.04 0.01 0.01 5768 2557 9437184 ## 4 0 0.21 0.05 0.04 22528 4907 9437184 ## 5 0 0.08 0.01 0.02 24576 7072 9437184 ## 6 0 0.26 0.06 0.04 43008 14336 9437184 Basic Statistics Once the data has been read into R, simple statistics are very easy to generate. All of the numbers from high school statistics are available via simple commands. For example, generate a summary of every column: summary(g1gc.df) ## row.names SecondsSinceLaunch IncrementalCount FullCount ## Length:8307 Min. : 1 Min. : 0 Min. : 0.0 ## Class :character 1st Qu.: 9977 1st Qu.:2048 1st Qu.: 0.0 ## Mode :character Median :12855 Median :4136 Median : 12.0 ## Mean :12527 Mean :4156 Mean : 31.6 ## 3rd Qu.:15758 3rd Qu.:6262 3rd Qu.: 61.0 ## Max. :55484 Max. :8391 Max. :113.0 ## UserTime SysTime RealTime BeforeSize ## Min. :0.040 Min. :0.0000 Min. : 0.0 Min. : 5476 ## 1st Qu.:0.470 1st Qu.:0.0300 1st Qu.: 0.1 1st Qu.:5137920 ## Median :0.620 Median :0.0300 Median : 0.1 Median :6574080 ## Mean :0.751 Mean :0.0355 Mean : 0.3 Mean :5841855 ## 3rd Qu.:0.920 3rd Qu.:0.0400 3rd Qu.: 0.2 3rd Qu.:7084032 ## Max. :3.370 Max. :1.5600 Max. :488.1 Max. :8696832 ## AfterSize TotalSize ## Min. : 1380 Min. :9437184 ## 1st Qu.:5002752 1st Qu.:9437184 ## Median :6559744 Median :9437184 ## Mean :5785454 Mean :9437184 ## 3rd Qu.:7054336 3rd Qu.:9437184 ## Max. :8482816 Max. :9437184 Q: What is the total amount of User CPU time spent in garbage collection? sum(g1gc.df$UserTime) ## [1] 6236 As you can see, less than two hours of CPU time was spent in garbage collection. Is that too much? To find the percentage of time spent in garbage collection, divide the number above by total_elapsed_time*CPU_count. In this case, there are a lot of CPU’s and it turns out the the overall amount of CPU time spent in garbage collection isn’t a problem when viewed in isolation. When calculating rates, i.e. events per unit time, you need to ask yourself if the rate is homogenous across the time period in the log file. Does the log file include spikes of high activity that should be separately analyzed? Averaging in data from nights and weekends with data from business hours may alias problems. If you have a reason to suspect that the garbage collection rates include peaks and valleys that need independent analysis, see the “Time Series” section, below. Q: How much garbage is collected on each pass? The amount of heap space that is recovered per GC pass is surprisingly low: At least one collection didn’t recover any data. (“Min.=0”) 25% of the passes recovered 3MB or less. (“1st Qu.=3072”) Half of the GC passes recovered 4MB or less. (“Median=4096”) The average amount recovered was 56MB. (“Mean=56390”) 75% of the passes recovered 36MB or less. (“3rd Qu.=36860”) At least one pass recovered 2GB. (“Max.=2121000”) g1gc.df$Delta = g1gc.df$BeforeSize - g1gc.df$AfterSize summary(g1gc.df$Delta) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0 3070 4100 56400 36900 2120000 Q: What is the maximum User CPU time for a single collection? The worst garbage collection (“Max.”) is many standard deviations away from the mean. The data appears to be right skewed. summary(g1gc.df$UserTime) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0.040 0.470 0.620 0.751 0.920 3.370 sd(g1gc.df$UserTime) ## [1] 0.3966 Basic Graphics Once the data is in R, it is trivial to plot the data with formats including dot plots, line charts, bar charts (simple, stacked, grouped), pie charts, boxplots, scatter plots histograms, and kernel density plots. Histogram of User CPU Time per Collection I don't think that this graph requires any explanation. hist(g1gc.df$UserTime, main="User CPU Time per Collection", xlab="Seconds", ylab="Frequency") Box plot to identify outliers When the initial data is viewed with a box plot, you can see the one crazy outlier in the real time per GC. Save this data point for future analysis and drop the outlier so that it’s not throwing off our statistics. Now the box plot shows many outliers, which will be examined later, using times series analysis. Notice that the scale of the x-axis changes drastically once the crazy outlier is removed. par(mfrow=c(2,1)) boxplot(g1gc.df$UserTime,g1gc.df$SysTime,g1gc.df$RealTime, main="Box Plot of Time per GC\n(dominated by a crazy outlier)", names=c("usr","sys","elapsed"), xlab="Seconds per GC", ylab="Time (Seconds)", horizontal = TRUE, outcol="red") crazy.outlier.df=g1gc.df[g1gc.df$RealTime > 400,] g1gc.df=g1gc.df[g1gc.df$RealTime < 400,] boxplot(g1gc.df$UserTime,g1gc.df$SysTime,g1gc.df$RealTime, main="Box Plot of Time per GC\n(crazy outlier excluded)", names=c("usr","sys","elapsed"), xlab="Seconds per GC", ylab="Time (Seconds)", horizontal = TRUE, outcol="red") box(which = "outer", lty = "solid") Here is the crazy outlier for future analysis: crazy.outlier.df ## row.names SecondsSinceLaunch IncrementalCount ## 8233 2014-05-12T23:15:43.903-0700: 20741 8316 ## FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ## 8233 112 0.55 0.42 488.1 8381440 8235008 9437184 ## Delta ## 8233 146432 R Time Series Data To analyze the garbage collection as a time series, I’ll use Z’s Ordered Observations (zoo). “zoo is the creator for an S3 class of indexed totally ordered observations which includes irregular time series.” require(zoo) ## Loading required package: zoo ## ## Attaching package: 'zoo' ## ## The following objects are masked from 'package:base': ## ## as.Date, as.Date.numeric head(g1gc.df[,1]) ## [1] "2014-05-12T14:00:32.868-0700:" "2014-05-12T14:00:33.179-0700:" ## [3] "2014-05-12T14:00:33.677-0700:" "2014-05-12T14:00:35.538-0700:" ## [5] "2014-05-12T14:00:37.811-0700:" "2014-05-12T14:00:41.428-0700:" options("digits.secs"=3) times=as.POSIXct( g1gc.df[,1], format="%Y-%m-%dT%H:%M:%OS%z:") g1gc.z = zoo(g1gc.df[,-c(1)], order.by=times) head(g1gc.z) ## SecondsSinceLaunch IncrementalCount FullCount ## 2014-05-12 17:00:32.868 1.161 0 0 ## 2014-05-12 17:00:33.178 1.472 1 0 ## 2014-05-12 17:00:33.677 1.969 2 0 ## 2014-05-12 17:00:35.538 3.830 3 0 ## 2014-05-12 17:00:37.811 6.103 4 0 ## 2014-05-12 17:00:41.427 9.720 5 0 ## UserTime SysTime RealTime BeforeSize AfterSize ## 2014-05-12 17:00:32.868 0.11 0.04 0.02 8192 1400 ## 2014-05-12 17:00:33.178 0.05 0.01 0.02 5496 1672 ## 2014-05-12 17:00:33.677 0.04 0.01 0.01 5768 2557 ## 2014-05-12 17:00:35.538 0.21 0.05 0.04 22528 4907 ## 2014-05-12 17:00:37.811 0.08 0.01 0.02 24576 7072 ## 2014-05-12 17:00:41.427 0.26 0.06 0.04 43008 14336 ## TotalSize Delta ## 2014-05-12 17:00:32.868 9437184 6792 ## 2014-05-12 17:00:33.178 9437184 3824 ## 2014-05-12 17:00:33.677 9437184 3211 ## 2014-05-12 17:00:35.538 9437184 17621 ## 2014-05-12 17:00:37.811 9437184 17504 ## 2014-05-12 17:00:41.427 9437184 28672 Example of Two Benchmark Runs in One Log File The data in the following graph is from a different log file, not the one of primary interest to this article. I’m including this image because it is an example of idle periods followed by busy periods. It would be uninteresting to average the rate of garbage collection over the entire log file period. More interesting would be the rate of garbage collect in the two busy periods. Are they the same or different? Your production data may be similar, for example, bursts when employees return from lunch and idle times on weekend evenings, etc. Once the data is in an R Time Series, you can analyze isolated time windows. Clipping the Time Series data Flashing back to our test case… Viewing the data as a time series is interesting. You can see that the work intensive time period is between 9:00 PM and 3:00 AM. Lets clip the data to the interesting period:     par(mfrow=c(2,1)) plot(g1gc.z$UserTime, type="h", main="User Time per GC\nTime: Complete Log File", xlab="Time of Day", ylab="CPU Seconds per GC", col="#1b9e77") clipped.g1gc.z=window(g1gc.z, start=as.POSIXct("2014-05-12 21:00:00"), end=as.POSIXct("2014-05-13 03:00:00")) plot(clipped.g1gc.z$UserTime, type="h", main="User Time per GC\nTime: Limited to Benchmark Execution", xlab="Time of Day", ylab="CPU Seconds per GC", col="#1b9e77") box(which = "outer", lty = "solid") Cumulative Incremental and Full GC count Here is the cumulative incremental and full GC count. When the line is very steep, it indicates that the GCs are repeating very quickly. Notice that the scale on the Y axis is different for full vs. incremental. plot(clipped.g1gc.z[,c(2:3)], main="Cumulative Incremental and Full GC count", xlab="Time of Day", col="#1b9e77") GC Analysis of Benchmark Execution using Time Series data In the following series of 3 graphs: The “After Size” show the amount of heap space in use after each garbage collection. Many Java objects are still referenced, i.e. alive, during each garbage collection. This may indicate that the application has a memory leak, or may indicate that the application has a very large memory footprint. Typically, an application's memory footprint plateau's in the early stage of execution. One would expect this graph to have a flat top. The steep decline in the heap space may indicate that the application crashed after 2:00. The second graph shows that the outliers in real execution time, discussed above, occur near 2:00. when the Java heap seems to be quite full. The third graph shows that Full GCs are infrequent during the first few hours of execution. The rate of Full GC's, (the slope of the cummulative Full GC line), changes near midnight.   plot(clipped.g1gc.z[,c("AfterSize","RealTime","FullCount")], xlab="Time of Day", col=c("#1b9e77","red","#1b9e77")) GC Analysis of heap recovered Each GC trace includes the amount of heap space in use before and after the individual GC event. During garbage coolection, unreferenced objects are identified, the space holding the unreferenced objects is freed, and thus, the difference in before and after usage indicates how much space has been freed. The following box plot and bar chart both demonstrate the same point - the amount of heap space freed per garbage colloection is surprisingly low. par(mfrow=c(2,1)) boxplot(as.vector(clipped.g1gc.z$Delta), main="Amount of Heap Recovered per GC Pass", xlab="Size in KB", horizontal = TRUE, col="red") hist(as.vector(clipped.g1gc.z$Delta), main="Amount of Heap Recovered per GC Pass", xlab="Size in KB", breaks=100, col="red") box(which = "outer", lty = "solid") This graph is the most interesting. The dark blue area shows how much heap is occupied by referenced Java objects. This represents memory that holds live data. The red fringe at the top shows how much data was recovered after each garbage collection. barplot(clipped.g1gc.z[,c("AfterSize","Delta")], col=c("#7570b3","#e7298a"), xlab="Time of Day", border=NA) legend("topleft", c("Live Objects","Heap Recovered on GC"), fill=c("#7570b3","#e7298a")) box(which = "outer", lty = "solid") When I discuss the data in the log files with the customer, I will ask for an explaination for the large amount of referenced data resident in the Java heap. There are two are posibilities: There is a memory leak and the amount of space required to hold referenced objects will continue to grow, limited only by the maximum heap size. After the maximum heap size is reached, the JVM will throw an “Out of Memory” exception every time that the application tries to allocate a new object. If this is the case, the aplication needs to be debugged to identify why old objects are referenced when they are no longer needed. The application has a legitimate requirement to keep a large amount of data in memory. The customer may want to further increase the maximum heap size. Another possible solution would be to partition the application across multiple cluster nodes, where each node has responsibility for managing a unique subset of the data. Conclusion In conclusion, R is a very powerful tool for the analysis of Java garbage collection log files. The primary difficulty is data cleansing so that information can be read into an R data frame. Once the data has been read into R, a rich set of tools may be used for thorough evaluation.

    Read the article

  • How do tight timelines and scheduling pressure affect TCO and delivery time?

    - by JonathanHayward
    A friend's father, who is a software engineering manager, said, emphatically, "The number one cause of scheduling overruns is scheduling pressure." Where does the research stand? Is a moderate amount of scheduling pressure invigorating, or is the manager I mentioned right or wrong, or is it a matter of "the more scheduling pressure you have, the longer the delivery time and the more TCO?" Is it one of those things where ideally software engineering would work without scheduling pressure but practically we have to work with constraints of real-world situations? Any links to software engineering literature would be appreciated.

    Read the article

  • How to concentrate on one project at a time. Divide and Conquer doesn't work for me [closed]

    - by refhat
    Possible Duplicate: Tips for staying focused and motivated on a project I have serious issues on concentrating on one project at a time. I cant even follow the Divide and Conquer Approach. Once I start a project, I try to get the things done as neatly as possible but very soon I end up messing so many components of it. I try to do divide and conquer, but my approach doesn't work smoothly, and then I then wonder here and there in other projects. Sometimes I try spending so many hours for some trivial issues, which in-fact are not even issues. How do I avoid this jargon and be a smooth developer and have a nice workflow around my projects. I tend to loose my concentration on the current project and wonder in another project.

    Read the article

  • Do More, Spend Less, Speed Time to Market – All with Oracle Database Appliance.

    - by jgelhaus
    Do More, Spend Less, Speed Time to Market – All with Oracle Database Appliance. Join Oracle for a first hand experience that will highlight how your business can lower TCO for hardware and software, do more with your existing personnel and resources, and get your products to market faster with Oracle Database Appliance. Learn how you can take advantage of the world's most popular database – Oracle Database 11g – in a single solution that's affordable, provides automated installation, is easy to manage, and is supported end-to-end by Oracle. Oracle Database Appliance is the complete package: software, server, storage, and networking, all designed by Oracle to simplify your technology and let you get down to business. Webcast Schedule Wednesday, April 4 1:00pm Eastern Webcast Link Teleconference: 1-866-753-5684 Conference Code: 61908866 Passcode: oda Add meeting to your calendar Wednesday, April 11 1:00pm Eastern Webcast Link Teleconference: 1-866-753-5684 Conference Code: 61909590 Passcode: oda Add meeting to your calendar Wednesday, April 18 1:00pm Eastern Webcast Link Teleconference: 1-866-753-5684 Conference Code: 61910385 Passcode: oda Add meeting to your calendar

    Read the article

  • What should be a fair amount of time in an interview before rejecting a candidate?

    - by Danish
    As a panelist for technical interviews, you often come across candidates who have all the requried educational qualifications, skill sets and experience level on resumes, but struggle to answer even the most basic questions. Ideally, technical interviews should try to check different aspects of a candidate and test them on various skills. So, if the candidate falters on one aspect, one should test the other ones before coming to a conclusion. But often, if a candidates falters on the first few questions, the red flag rises up pretty quickly. What in your opinion should be the bare minimum time spent with a candidate before making a fair accessment of his/her skills and suitability for the job?

    Read the article

  • Two internships at the same time -- good or bad?

    - by Karl
    I had no internship a few months ago, so I basically went on a 'resume mailing' spree and emailed a lot of companies that I was interested in working for and that had my line of work. This didn't prove futile until a company accepted me into their internship program but said that I would be working remotely. I had no problem with that, the project was good and I was interested. Now I have another internship at a company that is close to my home and I don't want to miss it at all! I can manage both internships side-by-side. In the day, I will do the internship that is closer to my home and at night (and other times), I can manage the remote internship. My question is -- should I both? I am particularly interested in how two internships at the same time are viewed. Would it look good or bad? PS: Neither is paying me anything, so money is not a factor.

    Read the article

  • Auto-mount CD/DVD drive to single, specific mount point every time?

    - by Christopher Parker
    Currently, whenever I insert a CD or DVD into my DVD drive, it mounts to a location such as /media/<LABEL>, where <LABEL> is the arbitrary label assigned to the optical disc. I remember, once upon a time, CD and DVD media being reliably located at /media/cdrom0 or something similar. Why was this changed? And how do I get this old behavior back for this drive? I can understand this behavior for USB sticks. It makes sense for those. But not for CD/DVD media, in my opinion. For example, because of this, I have no way to configure Wine to point to my DVD drive, as the mount point changes with every single CD I insert. TL;DR: How do I make CD/DVD media always mount to /media/cdrom0?

    Read the article

  • Where can I get feedback and support from other programmers in real time?

    - by cypherblue
    I used to work in an office surrounded by a large team of programmers where we all used the same languages and had different expertises. Now that I am on my own forming a startup at home, my productivity is suffering because I miss having people I can talk to for specific help, inspiration and reality checks when working on a coding problem. I don't have access to business incubators or shared (co-working) office spaces for startups so I need to chat with people virtually. Where can I go for real-time chat with other programmers and developers (currently I'm looking for people developing for the web, javascript and python) for live debugging and problem-solving of the tasks I am working on? And what other resources can I use to get fellow programmer support?

    Read the article

  • Why UFW has to be (re)started at boot time if it's only iptables rule manager?

    - by Tomasz Zielinski
    README from source package says: When installing ufw from source, you will also need to integrate it into your boot process for the firewall to start when you restart your system. Depending on your needs, this can be as simple as adding the following to a startup script (eg rc.local for systems that use it): # /lib/ufw/ufw-init start For systems that use SysV initscripts, an example script is provided in doc/initscript.example. See doc/upstart.example for an Upstart example. Consult your distribution's documentation for the proper way to modify your boot process. On my system I have this: # /etc/ufw/ufw.conf # # Set to yes to start on boot. If setting this remotely, be sure to add a rule # to allow your remote connection before starting ufw. Eg: 'ufw allow 22/tcp' ENABLED=yes So, why does simple iptables rule manager need to be started at boot time? Is there any secret to that, or it merely checks if all rules are in place ?

    Read the article

  • How do I know if my game's average game session time is too small?

    - by you786
    My game has only one life, and the aim is to stay alive as long as possible to get as many points as possible (it's an endless runner). Using Google Analytics I found that players are staying alive for an average of 17 seconds. I could easily increase or decrease this by manipulating acceleration or starting speed. The question is, should I change it at all? Is there any research or general ideas on the best playing time for a game like this? I would also like to know about any research about how long an ideal mobile game session should last.

    Read the article

  • New blog post shows immediately in google search results where as other HTML content takes time, why?

    - by Jayapal Chandran
    I have a blog which has been active for 3 years. Recently I posted an article and it immediately appeared in google search. Maybe 5 to 10 minutes. A point to note is I was logged into my google account. Maybe google checked my post's when I searched since I am logged in? Yet I logged out and used another browser and searched again with that specific text and it appeared in google search result. How did this happen? However, if I make an article in static HTML and publish, it takes time. (I assume this is the case but I haven't tested much). Yet tested a few cases after updating it in my sitemap xml. How does google search work for a blog and other content?

    Read the article

  • Is it a waste of time to free resources before I exit a process?

    - by Martin
    Let's consider a fictional program that builds a linked list in the heap, and at the end of the program there is a loop that frees all the nodes, and then exits. For this case let's say the linked list is just 500K of memory, and no special space managing is required. Is that a wast of time, because the OS will do that anyway? Will there be a different behavior later? Is that different according to the OS version? I'm mainly interested in UNIX based systems, but any information will be appreciated. I had today my first lesson in OS course and I'm wondering about that now.

    Read the article

  • Best strategy (tried and tested) for using Box2D in a real-time multiplayer game?

    - by Simon Grey
    I am currently tackling real-time multiplayer physics updates for a game engine I am writing. My question is how best to use Box2D for networked physics. If I run the simulation on the server, should I send position, velocity etc to every client on every tick? Should I send it every few ticks? Maybe there is another way that I am missing? How has this problem been solved using Box2D before? Anyone with some ideas would be greatly appreciated!

    Read the article

  • Why would a web site keep my signup information for a limited time only?

    - by Alois Mahdal
    I have just created account at (some web service, well, actually it was Transifex, a localization service). Registration form requested typical things: accont name, e-mail adress, password (twice), and, optional company name and phone number. What confused me was this sentence on confirmation page (the one right after submitting the form): We will store your signup information for 7 days on our server. Can anybody explain what does this mean? What exactly they are referring to by "signup information", if it's something that should be kept for only 7 days? Or is my account going to be destroyed after that time? (Well, that could make sense for some special services, but not for this one.)

    Read the article

  • Is it time to add IPv6 access to my websites?

    - by Rob Hoare
    I have several dedicated servers and VPS servers, and some of those are at companies that have provided me with native IPv6 blocks (in addition to the IPv4 IP addresses). Does it currently make sense to point an AAAA record to an IPv6 address on my server, in addition to the A record pointing to the IPv4 address? This would be for (for example) the www subdomain. (the networking and web server software would be set up on the server to respond appropriately). A while ago I read that a small percentage of users (1 in a thousand?) would have slow or no access if a subdomain had both A and AAAA records because their networking software asked for one and got the other. Is that still the case, will adding an AAAA record inconvenience some users, or is the percentage already smaller and falling? In other words, is now the time to get around to adding native IPv6 support for a busy website aimed at the general public, or is it still too early?

    Read the article

  • How can you predict the time it will take for two processes in two different machines in a cluster to communicate?

    - by Dokkat
    I am trying to develop a computing application which needs a lot of memory (500gb). Buying a single machine for that is overly expensive. I can, though, buy ~100 small instances on Digital Ocean or similar, divide the memory in blocks and use TCP to emulate shared memory between the instances. Now, my question is: how can I measure/predict the time it will take for two processes in two different machines like that to share information, in comparison to IPC and shared memory? Are there rules of thumb? I don't want exact values, but knowing more or less how much faster one is would be very helpful in visualising the feasibility of this approach.

    Read the article

  • Why appending to a list in Scala should have O(n) time complexity?

    - by Jubbat
    I am learning Scala at the moment and I just read that the execution time of the append operation for a list (:+) grows linearly with the size of the list. Appending to a list seems like a pretty common operation. Why should the idiomatic way to do this be prepending the components and then reversing the list? It can't also be a design failure as implementation could be changed at any point. From my point of view, both prepending and appending should be O(1). Is there any legitimate reason for this?

    Read the article

  • Releasing Mobile Application under multiple platforms, same time or?

    - by SAFAD
    I've always had this idea circling around until I am facing this issue. We made an android app, it is ready, but we are planning to release the same app on iOS and possibly Windows Phone; Now, should we just release the Android app and promise the clients that the iOS version is coming soon (create anticipation before release) or delay the release until the iOS version is ready ? Same applies if we have a premium and free version, should we release the free version and promise that the better premium is coming soon, or release them both the same time ? Best Regards

    Read the article

  • How much time to wait to upgrade to a non LTS release?

    - by Jhonnytunes
    For LTS upgrades the recomendation is wait 3 months or first point release to is where the major bugs are fixed and the release is "stable" for production. What is the recommended amount of time to wait before upgrade to a non LTS release? Im just talking about the desktop version of course. Im asking because found this where say all release from 14.04 will be based on debian unstable: Cutting Edge: Starting with the 14.04 LTS development cycle, automatic full package import is performed from Debian unstable1 This is due to deploying ProposedMigration in the Ubuntu archive. From here.

    Read the article

  • /etc/apt/apt.conf gets cleared every time I change proxy settings under settings->network->Network proxy

    - by Muriuki David
    I use a proxy server settings at work but when i get home, my network connection uses no proxy settings. every time i get home and use the proxy settings under settings-networks-Network Proxy to set to "none", the file /etc/apt/apt.conf gets cleared and the following day in the morning i have to edit the file and type in the command again, or at least copy paste from a backup file. How can i avoid this situation, its tiring, how can i make the proxy settings gui write to this file for apt-get and software center to work when i set proxy under network settings?

    Read the article

  • Is this a bad time to be majoring in computer science?

    - by ATMathew
    There has been a lot of media attention paid in recent months and years to the increase in CS majors and the possibility of a second tech bubble. Some news reports have suggested that as more people enter CS, the market could be flooded with CS professionals and jobs could be increasingly difficult to find. Is this a bad time to be majoring in computer science? Edit: I'm a non-trad student who allready has a Bachelor's degree in economics and will be pursuing a CS degree starting this upcoming summer semester at the Univ of Kansas. I've been programming for about two/three years and just need a more formal education to fill the holes in my head. I have an interest in CS, it's just that I am worries about the prospects for the future.

    Read the article

  • Civilization 4: time in own scenario runs up too quickly.

    - by oKtosiTe
    In Civilization 4 (Beyond the Sword) I started making a scenario that focuses on the colonization of North America. For added realism, I set the StartYear=1780. Everytime I run the scenario, the time runs up far too quickly–probably because I set Era=ERA_ANCIENT and GameTurn=0–so that after just twenty turns the clock nears present day. I'm not really sure how to go fix this, other than by playing an actual marathon game from the start up to the year 1780 and copy over the values. Is there any way to calculate/know the right values for my scenario?

    Read the article

  • What is the impact of a CMS on page load time versus a static site?

    - by PleaseStand
    I am creating a 20-page site that will go on shared hosting. Each page will be about 20 KB (including HTML, CSS, and images common to all pages). To avoid manually adding navigation elements to each page, I am considering using a CMS. However, I am concerned that on a busy server, using a CMS would make the site load more slowly. In a shared hosting environment where PHP is run as a CGI binary, how much does a CMS (WordPress, Drupal, etc.) generally affect page load time, compared to both "plain HTML" static sites and those using PHP as merely a templating language?

    Read the article

  • Time it would take me to learn c++ given my speed? [closed]

    - by ashwin
    I am a student in second year of engineering and my life is hard, nowadays. To make my future secure and at least get good jobs, I have started learning C++; I have learned J2SE, ASP.NET (little, basic C#), PHP (little), HTML, CSS, AJAX, Javascript, SQL, a little android development (I have built a benchmark app) in 4 months and have received 1 gold medal in CSS and 1 each in HTML, CSS, Java. I am able to make things in C#, Java and all other, so I can apply all this knowledge. I was able to do all this, because I loved learning and I hate to ask this question. How much time would it take me to learn C++, good-enough to get good jobs at Google, Microsoft? I am currently learning data structures, so that's excluded.

    Read the article

  • Is it time to deprecate synchronized, wait and notify?

    - by OldCurmudgeon
    Is there a single scenario (other than compatibility with ancient JVMs) where using synchronized is preferable to using a Lock? Can anyone justify using wait or notify over the newer systems? Is there any algorithm that must use one of them in its implementation? I see a previous questions that touched on this matter but I would like to take this a little further and actually deprecate them. There are far too many traps and pitfalls and caveats with them that have been ironed out with the new facilities. I just feel it may soon be time to mark them obsolete.

    Read the article

< Previous Page | 205 206 207 208 209 210 211 212 213 214 215 216  | Next Page >