Search Results

Search found 437 results on 18 pages for 'footprint'.

Page 14/18 | < Previous Page | 10 11 12 13 14 15 16 17 18  | Next Page >

  • Using R to Analyze G1GC Log Files

    - by user12620111
    Using R to Analyze G1GC Log Files body, td { font-family: sans-serif; background-color: white; font-size: 12px; margin: 8px; } tt, code, pre { font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace; } h1 { font-size:2.2em; } h2 { font-size:1.8em; } h3 { font-size:1.4em; } h4 { font-size:1.0em; } h5 { font-size:0.9em; } h6 { font-size:0.8em; } a:visited { color: rgb(50%, 0%, 50%); } pre { margin-top: 0; max-width: 95%; border: 1px solid #ccc; white-space: pre-wrap; } pre code { display: block; padding: 0.5em; } code.r, code.cpp { background-color: #F8F8F8; } table, td, th { border: none; } blockquote { color:#666666; margin:0; padding-left: 1em; border-left: 0.5em #EEE solid; } hr { height: 0px; border-bottom: none; border-top-width: thin; border-top-style: dotted; border-top-color: #999999; } @media print { * { background: transparent !important; color: black !important; filter:none !important; -ms-filter: none !important; } body { font-size:12pt; max-width:100%; } a, a:visited { text-decoration: underline; } hr { visibility: hidden; page-break-before: always; } pre, blockquote { padding-right: 1em; page-break-inside: avoid; } tr, img { page-break-inside: avoid; } img { max-width: 100% !important; } @page :left { margin: 15mm 20mm 15mm 10mm; } @page :right { margin: 15mm 10mm 15mm 20mm; } p, h2, h3 { orphans: 3; widows: 3; } h2, h3 { page-break-after: avoid; } } pre .operator, pre .paren { color: rgb(104, 118, 135) } pre .literal { color: rgb(88, 72, 246) } pre .number { color: rgb(0, 0, 205); } pre .comment { color: rgb(76, 136, 107); } pre .keyword { color: rgb(0, 0, 255); } pre .identifier { color: rgb(0, 0, 0); } pre .string { color: rgb(3, 106, 7); } var hljs=new function(){function m(p){return p.replace(/&/gm,"&").replace(/"}while(y.length||w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("")}while(p!=v.node);s.splice(r,1);while(r'+M[0]+""}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.length-1,M);if(L){var O=R.cN?"":"";if(R.rE){y+=J(R.buffer+N,R)+O}else{if(R.eE){y+=J(R.buffer+N,R)+O+m(M)}else{y+=J(R.buffer+N+M,R)+O}}while(L1){O=D[D.length-2].cN?"":"";y+=O;L--;D.length--}var r=D[D.length-1];D.length--;D[D.length-1].buffer="";if(r.starts){I(r.starts,"")}return R.rE}if(w(M,R)){throw"Illegal"}}var E=e[B];var D=[E.dM];var A=0;var x=0;var y="";try{var s,u=0;E.dM.buffer="";do{s=p(C,u);var t=G(s[0],s[1],s[2]);u+=s[0].length;if(!t){u+=s[1].length}}while(!s[2]);if(D.length1){throw"Illegal"}return{r:A,keyword_count:x,value:y}}catch(H){if(H=="Illegal"){return{r:0,keyword_count:0,value:m(C)}}else{throw H}}}function g(t){var p={keyword_count:0,r:0,value:m(t)};var r=p;for(var q in e){if(!e.hasOwnProperty(q)){continue}var s=d(q,t);s.language=q;if(s.keyword_count+s.rr.keyword_count+r.r){r=s}if(s.keyword_count+s.rp.keyword_count+p.r){r=p;p=s}}if(r.language){p.second_best=r}return p}function i(r,q,p){if(q){r=r.replace(/^((]+|\t)+)/gm,function(t,w,v,u){return w.replace(/\t/g,q)})}if(p){r=r.replace(/\n/g,"")}return r}function n(t,w,r){var x=h(t,r);var v=a(t);var y,s;if(v){y=d(v,x)}else{return}var q=c(t);if(q.length){s=document.createElement("pre");s.innerHTML=y.value;y.value=k(q,c(s),x)}y.value=i(y.value,w,r);var u=t.className;if(!u.match("(\\s|^)(language-)?"+v+"(\\s|$)")){u=u?(u+" "+v):v}if(/MSIE [678]/.test(navigator.userAgent)&&t.tagName=="CODE"&&t.parentNode.tagName=="PRE"){s=t.parentNode;var p=document.createElement("div");p.innerHTML=""+y.value+"";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p|=||=||=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\*",e:"\\*/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.cpp=function(){var a={keyword:{"false":1,"int":1,"float":1,"while":1,"private":1,"char":1,"catch":1,"export":1,virtual:1,operator:2,sizeof:2,dynamic_cast:2,typedef:2,const_cast:2,"const":1,struct:1,"for":1,static_cast:2,union:1,namespace:1,unsigned:1,"long":1,"throw":1,"volatile":2,"static":1,"protected":1,bool:1,template:1,mutable:1,"if":1,"public":1,friend:2,"do":1,"return":1,"goto":1,auto:1,"void":2,"enum":1,"else":1,"break":1,"new":1,extern:1,using:1,"true":1,"class":1,asm:1,"case":1,typeid:1,"short":1,reinterpret_cast:2,"default":1,"double":1,register:1,explicit:1,signed:1,typename:1,"try":1,"this":1,"switch":1,"continue":1,wchar_t:1,inline:1,"delete":1,alignof:1,char16_t:1,char32_t:1,constexpr:1,decltype:1,noexcept:1,nullptr:1,static_assert:1,thread_local:1,restrict:1,_Bool:1,complex:1},built_in:{std:1,string:1,cin:1,cout:1,cerr:1,clog:1,stringstream:1,istringstream:1,ostringstream:1,auto_ptr:1,deque:1,list:1,queue:1,stack:1,vector:1,map:1,set:1,bitset:1,multiset:1,multimap:1,unordered_set:1,unordered_map:1,unordered_multiset:1,unordered_multimap:1,array:1,shared_ptr:1}};return{dM:{k:a,i:"",k:a,r:10,c:["self"]}]}}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d*)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch|library|setGeneric|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if|in|break|next|repeat|else|for|return|switch|while|try|stop|warning|require|attach|detach|source|setMethod|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA|NA_integer_|NA_real_|NA_character_|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL|TRUE|FALSE|T|F|Inf|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]*\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"|=||   Using R to Analyze G1GC Log Files   Using R to Analyze G1GC Log Files Introduction Working in Oracle Platform Integration gives an engineer opportunities to work on a wide array of technologies. My team’s goal is to make Oracle applications run best on the Solaris/SPARC platform. When looking for bottlenecks in a modern applications, one needs to be aware of not only how the CPUs and operating system are executing, but also network, storage, and in some cases, the Java Virtual Machine. I was recently presented with about 1.5 GB of Java Garbage First Garbage Collector log file data. If you’re not familiar with the subject, you might want to review Garbage First Garbage Collector Tuning by Monica Beckwith. The customer had been running Java HotSpot 1.6.0_31 to host a web application server. I was told that the Solaris/SPARC server was running a Java process launched using a commmand line that included the following flags: -d64 -Xms9g -Xmx9g -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:InitiatingHeapOccupancyPercent=80 -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+PrintGC -XX:+PrintGCTimeStamps -XX:+PrintHeapAtGC -XX:+PrintGCDateStamps -XX:+PrintFlagsFinal -XX:+DisableExplicitGC -XX:+UnlockExperimentalVMOptions -XX:ParallelGCThreads=8 Several sources on the internet indicate that if I were to print out the 1.5 GB of log files, it would require enough paper to fill the bed of a pick up truck. Of course, it would be fruitless to try to scan the log files by hand. Tools will be required to summarize the contents of the log files. Others have encountered large Java garbage collection log files. There are existing tools to analyze the log files: IBM’s GC toolkit The chewiebug GCViewer gchisto HPjmeter Instead of using one of the other tools listed, I decide to parse the log files with standard Unix tools, and analyze the data with R. Data Cleansing The log files arrived in two different formats. I guess that the difference is that one set of log files was generated using a more verbose option, maybe -XX:+PrintHeapAtGC, and the other set of log files was generated without that option. Format 1 In some of the log files, the log files with the less verbose format, a single trace, i.e. the report of a singe garbage collection event, looks like this: {Heap before GC invocations=12280 (full 61): garbage-first heap total 9437184K, used 7499918K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) region size 4096K, 1 young (4096K), 0 survivors (0K) compacting perm gen total 262144K, used 144077K [0xffffffff40000000, 0xffffffff50000000, 0xffffffff50000000) the space 262144K, 54% used [0xffffffff40000000, 0xffffffff48cb3758, 0xffffffff48cb3800, 0xffffffff50000000) No shared spaces configured. 2014-05-14T07:24:00.988-0700: 60586.353: [GC pause (young) 7324M->7320M(9216M), 0.1567265 secs] Heap after GC invocations=12281 (full 61): garbage-first heap total 9437184K, used 7496533K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) region size 4096K, 0 young (0K), 0 survivors (0K) compacting perm gen total 262144K, used 144077K [0xffffffff40000000, 0xffffffff50000000, 0xffffffff50000000) the space 262144K, 54% used [0xffffffff40000000, 0xffffffff48cb3758, 0xffffffff48cb3800, 0xffffffff50000000) No shared spaces configured. } A simple grep can be used to extract a summary: $ grep "\[ GC pause (young" g1gc.log 2014-05-13T13:24:35.091-0700: 3.109: [GC pause (young) 20M->5029K(9216M), 0.0146328 secs] 2014-05-13T13:24:35.440-0700: 3.459: [GC pause (young) 9125K->6077K(9216M), 0.0086723 secs] 2014-05-13T13:24:37.581-0700: 5.599: [GC pause (young) 25M->8470K(9216M), 0.0203820 secs] 2014-05-13T13:24:42.686-0700: 10.704: [GC pause (young) 44M->15M(9216M), 0.0288848 secs] 2014-05-13T13:24:48.941-0700: 16.958: [GC pause (young) 51M->20M(9216M), 0.0491244 secs] 2014-05-13T13:24:56.049-0700: 24.066: [GC pause (young) 92M->26M(9216M), 0.0525368 secs] 2014-05-13T13:25:34.368-0700: 62.383: [GC pause (young) 602M->68M(9216M), 0.1721173 secs] But that format wasn't easily read into R, so I needed to be a bit more tricky. I used the following Unix command to create a summary file that was easy for R to read. $ echo "SecondsSinceLaunch BeforeSize AfterSize TotalSize RealTime" $ grep "\[GC pause (young" g1gc.log | grep -v mark | sed -e 's/[A-SU-z\(\),]/ /g' -e 's/->/ /' -e 's/: / /g' | more SecondsSinceLaunch BeforeSize AfterSize TotalSize RealTime 2014-05-13T13:24:35.091-0700 3.109 20 5029 9216 0.0146328 2014-05-13T13:24:35.440-0700 3.459 9125 6077 9216 0.0086723 2014-05-13T13:24:37.581-0700 5.599 25 8470 9216 0.0203820 2014-05-13T13:24:42.686-0700 10.704 44 15 9216 0.0288848 2014-05-13T13:24:48.941-0700 16.958 51 20 9216 0.0491244 2014-05-13T13:24:56.049-0700 24.066 92 26 9216 0.0525368 2014-05-13T13:25:34.368-0700 62.383 602 68 9216 0.1721173 Format 2 In some of the log files, the log files with the more verbose format, a single trace, i.e. the report of a singe garbage collection event, was more complicated than Format 1. Here is a text file with an example of a single G1GC trace in the second format. As you can see, it is quite complicated. It is nice that there is so much information available, but the level of detail can be overwhelming. I wrote this awk script (download) to summarize each trace on a single line. #!/usr/bin/env awk -f BEGIN { printf("SecondsSinceLaunch IncrementalCount FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize\n") } ###################### # Save count data from lines that are at the start of each G1GC trace. # Each trace starts out like this: # {Heap before GC invocations=14 (full 0): # garbage-first heap total 9437184K, used 325496K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000) ###################### /{Heap.*full/{ gsub ( "\\)" , "" ); nf=split($0,a,"="); split(a[2],b," "); getline; if ( match($0, "first") ) { G1GC=1; IncrementalCount=b[1]; FullCount=substr( b[3], 1, length(b[3])-1 ); } else { G1GC=0; } } ###################### # Pull out time stamps that are in lines with this format: # 2014-05-12T14:02:06.025-0700: 94.312: [GC pause (young), 0.08870154 secs] ###################### /GC pause/ { DateTime=$1; SecondsSinceLaunch=substr($2, 1, length($2)-1); } ###################### # Heap sizes are in lines that look like this: # [ 4842M->4838M(9216M)] ###################### /\[ .*]$/ { gsub ( "\\[" , "" ); gsub ( "\ \]" , "" ); gsub ( "->" , " " ); gsub ( "\\( " , " " ); gsub ( "\ \)" , " " ); split($0,a," "); if ( split(a[1],b,"M") > 1 ) {BeforeSize=b[1]*1024;} if ( split(a[1],b,"K") > 1 ) {BeforeSize=b[1];} if ( split(a[2],b,"M") > 1 ) {AfterSize=b[1]*1024;} if ( split(a[2],b,"K") > 1 ) {AfterSize=b[1];} if ( split(a[3],b,"M") > 1 ) {TotalSize=b[1]*1024;} if ( split(a[3],b,"K") > 1 ) {TotalSize=b[1];} } ###################### # Emit an output line when you find input that looks like this: # [Times: user=1.41 sys=0.08, real=0.24 secs] ###################### /\[Times/ { if (G1GC==1) { gsub ( "," , "" ); split($2,a,"="); UserTime=a[2]; split($3,a,"="); SysTime=a[2]; split($4,a,"="); RealTime=a[2]; print DateTime,SecondsSinceLaunch,IncrementalCount,FullCount,UserTime,SysTime,RealTime,BeforeSize,AfterSize,TotalSize; G1GC=0; } } The resulting summary is about 25X smaller that the original file, but still difficult for a human to digest. SecondsSinceLaunch IncrementalCount FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ... 2014-05-12T18:36:34.669-0700: 3985.744 561 0 0.57 0.06 0.16 1724416 1720320 9437184 2014-05-12T18:36:34.839-0700: 3985.914 562 0 0.51 0.06 0.19 1724416 1720320 9437184 2014-05-12T18:36:35.069-0700: 3986.144 563 0 0.60 0.04 0.27 1724416 1721344 9437184 2014-05-12T18:36:35.354-0700: 3986.429 564 0 0.33 0.04 0.09 1725440 1722368 9437184 2014-05-12T18:36:35.545-0700: 3986.620 565 0 0.58 0.04 0.17 1726464 1722368 9437184 2014-05-12T18:36:35.726-0700: 3986.801 566 0 0.43 0.05 0.12 1726464 1722368 9437184 2014-05-12T18:36:35.856-0700: 3986.930 567 0 0.30 0.04 0.07 1726464 1723392 9437184 2014-05-12T18:36:35.947-0700: 3987.023 568 0 0.61 0.04 0.26 1727488 1723392 9437184 2014-05-12T18:36:36.228-0700: 3987.302 569 0 0.46 0.04 0.16 1731584 1724416 9437184 Reading the Data into R Once the GC log data had been cleansed, either by processing the first format with the shell script, or by processing the second format with the awk script, it was easy to read the data into R. g1gc.df = read.csv("summary.txt", row.names = NULL, stringsAsFactors=FALSE,sep="") str(g1gc.df) ## 'data.frame': 8307 obs. of 10 variables: ## $ row.names : chr "2014-05-12T14:00:32.868-0700:" "2014-05-12T14:00:33.179-0700:" "2014-05-12T14:00:33.677-0700:" "2014-05-12T14:00:35.538-0700:" ... ## $ SecondsSinceLaunch: num 1.16 1.47 1.97 3.83 6.1 ... ## $ IncrementalCount : int 0 1 2 3 4 5 6 7 8 9 ... ## $ FullCount : int 0 0 0 0 0 0 0 0 0 0 ... ## $ UserTime : num 0.11 0.05 0.04 0.21 0.08 0.26 0.31 0.33 0.34 0.56 ... ## $ SysTime : num 0.04 0.01 0.01 0.05 0.01 0.06 0.07 0.06 0.07 0.09 ... ## $ RealTime : num 0.02 0.02 0.01 0.04 0.02 0.04 0.05 0.04 0.04 0.06 ... ## $ BeforeSize : int 8192 5496 5768 22528 24576 43008 34816 53248 55296 93184 ... ## $ AfterSize : int 1400 1672 2557 4907 7072 14336 16384 18432 19456 21504 ... ## $ TotalSize : int 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 9437184 ... head(g1gc.df) ## row.names SecondsSinceLaunch IncrementalCount ## 1 2014-05-12T14:00:32.868-0700: 1.161 0 ## 2 2014-05-12T14:00:33.179-0700: 1.472 1 ## 3 2014-05-12T14:00:33.677-0700: 1.969 2 ## 4 2014-05-12T14:00:35.538-0700: 3.830 3 ## 5 2014-05-12T14:00:37.811-0700: 6.103 4 ## 6 2014-05-12T14:00:41.428-0700: 9.720 5 ## FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ## 1 0 0.11 0.04 0.02 8192 1400 9437184 ## 2 0 0.05 0.01 0.02 5496 1672 9437184 ## 3 0 0.04 0.01 0.01 5768 2557 9437184 ## 4 0 0.21 0.05 0.04 22528 4907 9437184 ## 5 0 0.08 0.01 0.02 24576 7072 9437184 ## 6 0 0.26 0.06 0.04 43008 14336 9437184 Basic Statistics Once the data has been read into R, simple statistics are very easy to generate. All of the numbers from high school statistics are available via simple commands. For example, generate a summary of every column: summary(g1gc.df) ## row.names SecondsSinceLaunch IncrementalCount FullCount ## Length:8307 Min. : 1 Min. : 0 Min. : 0.0 ## Class :character 1st Qu.: 9977 1st Qu.:2048 1st Qu.: 0.0 ## Mode :character Median :12855 Median :4136 Median : 12.0 ## Mean :12527 Mean :4156 Mean : 31.6 ## 3rd Qu.:15758 3rd Qu.:6262 3rd Qu.: 61.0 ## Max. :55484 Max. :8391 Max. :113.0 ## UserTime SysTime RealTime BeforeSize ## Min. :0.040 Min. :0.0000 Min. : 0.0 Min. : 5476 ## 1st Qu.:0.470 1st Qu.:0.0300 1st Qu.: 0.1 1st Qu.:5137920 ## Median :0.620 Median :0.0300 Median : 0.1 Median :6574080 ## Mean :0.751 Mean :0.0355 Mean : 0.3 Mean :5841855 ## 3rd Qu.:0.920 3rd Qu.:0.0400 3rd Qu.: 0.2 3rd Qu.:7084032 ## Max. :3.370 Max. :1.5600 Max. :488.1 Max. :8696832 ## AfterSize TotalSize ## Min. : 1380 Min. :9437184 ## 1st Qu.:5002752 1st Qu.:9437184 ## Median :6559744 Median :9437184 ## Mean :5785454 Mean :9437184 ## 3rd Qu.:7054336 3rd Qu.:9437184 ## Max. :8482816 Max. :9437184 Q: What is the total amount of User CPU time spent in garbage collection? sum(g1gc.df$UserTime) ## [1] 6236 As you can see, less than two hours of CPU time was spent in garbage collection. Is that too much? To find the percentage of time spent in garbage collection, divide the number above by total_elapsed_time*CPU_count. In this case, there are a lot of CPU’s and it turns out the the overall amount of CPU time spent in garbage collection isn’t a problem when viewed in isolation. When calculating rates, i.e. events per unit time, you need to ask yourself if the rate is homogenous across the time period in the log file. Does the log file include spikes of high activity that should be separately analyzed? Averaging in data from nights and weekends with data from business hours may alias problems. If you have a reason to suspect that the garbage collection rates include peaks and valleys that need independent analysis, see the “Time Series” section, below. Q: How much garbage is collected on each pass? The amount of heap space that is recovered per GC pass is surprisingly low: At least one collection didn’t recover any data. (“Min.=0”) 25% of the passes recovered 3MB or less. (“1st Qu.=3072”) Half of the GC passes recovered 4MB or less. (“Median=4096”) The average amount recovered was 56MB. (“Mean=56390”) 75% of the passes recovered 36MB or less. (“3rd Qu.=36860”) At least one pass recovered 2GB. (“Max.=2121000”) g1gc.df$Delta = g1gc.df$BeforeSize - g1gc.df$AfterSize summary(g1gc.df$Delta) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0 3070 4100 56400 36900 2120000 Q: What is the maximum User CPU time for a single collection? The worst garbage collection (“Max.”) is many standard deviations away from the mean. The data appears to be right skewed. summary(g1gc.df$UserTime) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0.040 0.470 0.620 0.751 0.920 3.370 sd(g1gc.df$UserTime) ## [1] 0.3966 Basic Graphics Once the data is in R, it is trivial to plot the data with formats including dot plots, line charts, bar charts (simple, stacked, grouped), pie charts, boxplots, scatter plots histograms, and kernel density plots. Histogram of User CPU Time per Collection I don't think that this graph requires any explanation. hist(g1gc.df$UserTime, main="User CPU Time per Collection", xlab="Seconds", ylab="Frequency") Box plot to identify outliers When the initial data is viewed with a box plot, you can see the one crazy outlier in the real time per GC. Save this data point for future analysis and drop the outlier so that it’s not throwing off our statistics. Now the box plot shows many outliers, which will be examined later, using times series analysis. Notice that the scale of the x-axis changes drastically once the crazy outlier is removed. par(mfrow=c(2,1)) boxplot(g1gc.df$UserTime,g1gc.df$SysTime,g1gc.df$RealTime, main="Box Plot of Time per GC\n(dominated by a crazy outlier)", names=c("usr","sys","elapsed"), xlab="Seconds per GC", ylab="Time (Seconds)", horizontal = TRUE, outcol="red") crazy.outlier.df=g1gc.df[g1gc.df$RealTime > 400,] g1gc.df=g1gc.df[g1gc.df$RealTime < 400,] boxplot(g1gc.df$UserTime,g1gc.df$SysTime,g1gc.df$RealTime, main="Box Plot of Time per GC\n(crazy outlier excluded)", names=c("usr","sys","elapsed"), xlab="Seconds per GC", ylab="Time (Seconds)", horizontal = TRUE, outcol="red") box(which = "outer", lty = "solid") Here is the crazy outlier for future analysis: crazy.outlier.df ## row.names SecondsSinceLaunch IncrementalCount ## 8233 2014-05-12T23:15:43.903-0700: 20741 8316 ## FullCount UserTime SysTime RealTime BeforeSize AfterSize TotalSize ## 8233 112 0.55 0.42 488.1 8381440 8235008 9437184 ## Delta ## 8233 146432 R Time Series Data To analyze the garbage collection as a time series, I’ll use Z’s Ordered Observations (zoo). “zoo is the creator for an S3 class of indexed totally ordered observations which includes irregular time series.” require(zoo) ## Loading required package: zoo ## ## Attaching package: 'zoo' ## ## The following objects are masked from 'package:base': ## ## as.Date, as.Date.numeric head(g1gc.df[,1]) ## [1] "2014-05-12T14:00:32.868-0700:" "2014-05-12T14:00:33.179-0700:" ## [3] "2014-05-12T14:00:33.677-0700:" "2014-05-12T14:00:35.538-0700:" ## [5] "2014-05-12T14:00:37.811-0700:" "2014-05-12T14:00:41.428-0700:" options("digits.secs"=3) times=as.POSIXct( g1gc.df[,1], format="%Y-%m-%dT%H:%M:%OS%z:") g1gc.z = zoo(g1gc.df[,-c(1)], order.by=times) head(g1gc.z) ## SecondsSinceLaunch IncrementalCount FullCount ## 2014-05-12 17:00:32.868 1.161 0 0 ## 2014-05-12 17:00:33.178 1.472 1 0 ## 2014-05-12 17:00:33.677 1.969 2 0 ## 2014-05-12 17:00:35.538 3.830 3 0 ## 2014-05-12 17:00:37.811 6.103 4 0 ## 2014-05-12 17:00:41.427 9.720 5 0 ## UserTime SysTime RealTime BeforeSize AfterSize ## 2014-05-12 17:00:32.868 0.11 0.04 0.02 8192 1400 ## 2014-05-12 17:00:33.178 0.05 0.01 0.02 5496 1672 ## 2014-05-12 17:00:33.677 0.04 0.01 0.01 5768 2557 ## 2014-05-12 17:00:35.538 0.21 0.05 0.04 22528 4907 ## 2014-05-12 17:00:37.811 0.08 0.01 0.02 24576 7072 ## 2014-05-12 17:00:41.427 0.26 0.06 0.04 43008 14336 ## TotalSize Delta ## 2014-05-12 17:00:32.868 9437184 6792 ## 2014-05-12 17:00:33.178 9437184 3824 ## 2014-05-12 17:00:33.677 9437184 3211 ## 2014-05-12 17:00:35.538 9437184 17621 ## 2014-05-12 17:00:37.811 9437184 17504 ## 2014-05-12 17:00:41.427 9437184 28672 Example of Two Benchmark Runs in One Log File The data in the following graph is from a different log file, not the one of primary interest to this article. I’m including this image because it is an example of idle periods followed by busy periods. It would be uninteresting to average the rate of garbage collection over the entire log file period. More interesting would be the rate of garbage collect in the two busy periods. Are they the same or different? Your production data may be similar, for example, bursts when employees return from lunch and idle times on weekend evenings, etc. Once the data is in an R Time Series, you can analyze isolated time windows. Clipping the Time Series data Flashing back to our test case… Viewing the data as a time series is interesting. You can see that the work intensive time period is between 9:00 PM and 3:00 AM. Lets clip the data to the interesting period:     par(mfrow=c(2,1)) plot(g1gc.z$UserTime, type="h", main="User Time per GC\nTime: Complete Log File", xlab="Time of Day", ylab="CPU Seconds per GC", col="#1b9e77") clipped.g1gc.z=window(g1gc.z, start=as.POSIXct("2014-05-12 21:00:00"), end=as.POSIXct("2014-05-13 03:00:00")) plot(clipped.g1gc.z$UserTime, type="h", main="User Time per GC\nTime: Limited to Benchmark Execution", xlab="Time of Day", ylab="CPU Seconds per GC", col="#1b9e77") box(which = "outer", lty = "solid") Cumulative Incremental and Full GC count Here is the cumulative incremental and full GC count. When the line is very steep, it indicates that the GCs are repeating very quickly. Notice that the scale on the Y axis is different for full vs. incremental. plot(clipped.g1gc.z[,c(2:3)], main="Cumulative Incremental and Full GC count", xlab="Time of Day", col="#1b9e77") GC Analysis of Benchmark Execution using Time Series data In the following series of 3 graphs: The “After Size” show the amount of heap space in use after each garbage collection. Many Java objects are still referenced, i.e. alive, during each garbage collection. This may indicate that the application has a memory leak, or may indicate that the application has a very large memory footprint. Typically, an application's memory footprint plateau's in the early stage of execution. One would expect this graph to have a flat top. The steep decline in the heap space may indicate that the application crashed after 2:00. The second graph shows that the outliers in real execution time, discussed above, occur near 2:00. when the Java heap seems to be quite full. The third graph shows that Full GCs are infrequent during the first few hours of execution. The rate of Full GC's, (the slope of the cummulative Full GC line), changes near midnight.   plot(clipped.g1gc.z[,c("AfterSize","RealTime","FullCount")], xlab="Time of Day", col=c("#1b9e77","red","#1b9e77")) GC Analysis of heap recovered Each GC trace includes the amount of heap space in use before and after the individual GC event. During garbage coolection, unreferenced objects are identified, the space holding the unreferenced objects is freed, and thus, the difference in before and after usage indicates how much space has been freed. The following box plot and bar chart both demonstrate the same point - the amount of heap space freed per garbage colloection is surprisingly low. par(mfrow=c(2,1)) boxplot(as.vector(clipped.g1gc.z$Delta), main="Amount of Heap Recovered per GC Pass", xlab="Size in KB", horizontal = TRUE, col="red") hist(as.vector(clipped.g1gc.z$Delta), main="Amount of Heap Recovered per GC Pass", xlab="Size in KB", breaks=100, col="red") box(which = "outer", lty = "solid") This graph is the most interesting. The dark blue area shows how much heap is occupied by referenced Java objects. This represents memory that holds live data. The red fringe at the top shows how much data was recovered after each garbage collection. barplot(clipped.g1gc.z[,c("AfterSize","Delta")], col=c("#7570b3","#e7298a"), xlab="Time of Day", border=NA) legend("topleft", c("Live Objects","Heap Recovered on GC"), fill=c("#7570b3","#e7298a")) box(which = "outer", lty = "solid") When I discuss the data in the log files with the customer, I will ask for an explaination for the large amount of referenced data resident in the Java heap. There are two are posibilities: There is a memory leak and the amount of space required to hold referenced objects will continue to grow, limited only by the maximum heap size. After the maximum heap size is reached, the JVM will throw an “Out of Memory” exception every time that the application tries to allocate a new object. If this is the case, the aplication needs to be debugged to identify why old objects are referenced when they are no longer needed. The application has a legitimate requirement to keep a large amount of data in memory. The customer may want to further increase the maximum heap size. Another possible solution would be to partition the application across multiple cluster nodes, where each node has responsibility for managing a unique subset of the data. Conclusion In conclusion, R is a very powerful tool for the analysis of Java garbage collection log files. The primary difficulty is data cleansing so that information can be read into an R data frame. Once the data has been read into R, a rich set of tools may be used for thorough evaluation.

    Read the article

  • iPhone: CoreGraphics and memory management

    - by carloe
    Can someone tell me what I am doing wrong here? I use this method to flip through pages in a PDF. But something in the code seems to not be released properly because every-time I pull a PDF page that contains an image my memory footprint increases. I am fairly new to CoreGraphics, and can't for the life of me figure out where this method would leak memory. -(UIImage *)pageAtIndex:(NSInteger)pageNumber withWidth:(CGFloat)width andHeight:(CGFloat)height { if((pageNumber>0) && (pageNumber<=pageCount)) { CGFloat scaleRatio; // multiplier by which the PDF Page will be scaled UIGraphicsBeginImageContext(CGSizeMake(width, height)); CGContextRef context = UIGraphicsGetCurrentContext(); CGPDFPageRef page = CGPDFDocumentGetPage(pdf, pageNumber); CGRect pageRect = CGPDFPageGetBoxRect(page, kCGPDFBleedBox); //Figure out the orientation of the PDF page and set the scaleRatio accordingly if(pageRect.size.width/pageRect.size.height < 1.0) { scaleRatio = height/pageRect.size.height; } else { scaleRatio = width/pageRect.size.width; } //Calculate the offset to center the image CGFloat xOffset = 0.0; CGFloat yOffset = height; if(pageRect.size.width*scaleRatio<width) { xOffset = (width/2)-(pageRect.size.width*scaleRatio/2); } else { yOffset = height-((height/2)-(pageRect.size.height*scaleRatio/2)); } CGContextTranslateCTM(context, xOffset, yOffset); CGContextScaleCTM(context, 1.0, -1.0); CGContextSaveGState(context); CGAffineTransform pdfTransform = CGPDFPageGetDrawingTransform(page, kCGPDFBleedBox, CGRectMake(0, 0, pageRect.size.width, pageRect.size.height), 0, true); pdfTransform = CGAffineTransformScale(pdfTransform, scaleRatio, scaleRatio); CGContextConcatCTM(context, pdfTransform); CGContextDrawPDFPage(context, page); UIImage *tempImage = [UIGraphicsGetImageFromCurrentImageContext() retain]; CGContextRestoreGState(context); UIGraphicsEndPDFContext(); UIGraphicsEndImageContext(); return tempImage; } return nil; }

    Read the article

  • Perl XML SAX parser emulating XML::Simple record for record

    - by DVK
    Short Q summary: I am looking a fast XML parser (most likely a wrapper around some standard SAX parser) which will produce per-record data structure 100% identical to those produced by XML::Simple. Details: We have a large code infrastructure which depends on processing records one-by-one and expects the record to be a data structure in a format produced by XML::Simple since it always used XML::Simple since early Jurassic era. An example simple XML is: <root> <rec><f1>v1</f1><f2>v2</f2></rec> <rec><f1>v1b</f1><f2>v2b</f2></rec> <rec><f1>v1c</f1><f2>v2c</f2></rec> </root> And example rough code is: sub process_record { my ($obj, $record_hash) = @_; # do_stuff } my $records = XML::Simple->XMLin(@args)->{root}; foreach my $record (@$records) { $obj->process_record($record) }; As everyone knows XML::Simple is, well, simple. And more importantly, it is very slow and a memory hog - due to being a DOM parser and needing to build/store 100% of data in memory. So, it's not the best tool for parsing an XML file consisting of large amount of small records record-by-record. However, re-writing the entire code (which consist of large amount of "process_record"-like methods) to work with standard SAX parser seems like an big task not worth the resources, even at the cost of living with XML::Simple. What I'm looking for is an existing module which will probably be based on a SAX parser (or anything fast with small memory footprint) which can be used to produce $record hashrefs one by one based on the XML pictured above that can be passed to $obj->process_record($record) and be 100% identical to what XML::Simple's hashrefs would have been. I don't care much what the interface of the new module is - e.g whether I need to call next_record() or give it a callback coderef accepting a record.

    Read the article

  • PHP Frameworks: Codeigniter vs. Yii vs. Custom?

    - by Industrial
    Hi everybody, I have used codeigniter for a some years now. Why I chosed to work with codeigniter back then? Pretty much for the extensive documentation that were available and the big user community. It made me as a totally newbie to the MVC pattern able to get a site up and running really fast. I think what is priorited from my side is that the framework doesn't affect performance too much, which Codeigniter seems to be pretty good at (when compared to other frameworks out there) and Yii, an even better option. Since the time has gone from when I started out with codeigniter, the project sizes have also increased and thereby the demand of the framework and it's footprint on the code. I have thought a few times about writing a whole new MVC framework to do only the thing's I want it to do, but it feels like reinventing the wheel and I cannot yet justify it. I am not sure whether or not it's a good solution to build a site that have the potential to become really big on either Yii or Codeigniter. I have tried to find as much as possible documentation about this comparision/issue online before posting here, but have found very few real-life arguments and stories from people that have shifted between the two PHP frameworks or have been in the same situation as me. So - what's your thoughts about Codeigniter vs. Yii vs. going custom? References: http://daniel.carrera.bz/2009/01/comparison-of-php-frameworks-part-i/ http://www.beyondcoding.com/2009/03/02/choosing-a-php-framework-round-2-yii-vs-kohana-vs-codeigniter/

    Read the article

  • iPad + OpenGL ES2. Why the Puzzling Virtual Memory Spike During Device Reorientation?

    - by dugla
    I've been spending the afternoon starring at Xcode Instruments memory monitor trying to decipher the following memory issue. I have a fullscreen OpenGL ES2 app running on iPad. I am fanatical about memory issues so my retains/releases are all nicely balanced. I closely monitor memory leaks. My app is basically squeeky clean. Except occassionally when I reorient the device. Portrait to Landscape. Back and forth I rock the device stress testing my discarding and rebuilding of the OpenGL framebuffer. The ambient memory footprint of my app is about 70MB Real Mems and 180MB Virtual Mems. Real memory hardly varies at all during device rotations. However the virtual mems reading sometimes briefly spikes up to 250MB and then recedes back to 180MB. No real pattern. But clearly related discarding/rebuilding the framebuffer. I see random memory warnings in my NSlogs but the app just hums along, no worries. 1) Since iPhone OS devices don't have VM could someone explain to me what the VM reading actually means? 2) My app totally leak free and generally bulletproof dispite the VM spikes. Never crashes. Rock solid. Should I be concerned about this? 3) There is clearly something happening in OpenGL framebuffer land that is causing this but I am using the API in the proper way: paraphrasing: Discarding the framebuffer: glDeleteRenderbuffers(1, &m_colorbuffer); glDeleteFramebuffers(1, &m_framebuffer); Rebuilding the framebuffer: glGenFramebuffers(1, &m_framebuffer); glGenRenderbuffers(1, &m_colorbuffer); Is there some other memory flushing trick I have missed? Thanks for any insight. Cheers, Doug

    Read the article

  • Feasibility of using Silverlight for web and windows client with common code base for data intensive

    - by Kabeer
    Hello. Recently in a conversation, someone suggested me to make use of Silverlight if I am targeting a web client and a windows client for the same application. This will cut down my effort for supporting the contrast in both presentation layers. Mine is a product, that will be deployed in enterprises. Both web and windows clients are desirable. With the above context, I have few queries: Is it advisable to adopt the recommended approach and whether this approach is becoming a trend? Besides, some configuration & deployment tweaking, will this significantly reduce effort on the presentation layer? Is there a possibility that my future prospects (for this product) will resist Silverlight footprint? Will I be able to make use of the ASP.Net MVC pattern? Will there be any performance implication for the web client? Will Silverlight support incremental load of controls? If my back-end includes SSRS, will I be able to harness all its front end features with Silverlight? Will I be able to support additional devices with same code base in future? Mine is a very data intensive application from both, data entry and reporting perspective. Is it advisable to use 3rd party controls (like Telerik) for improved user experience and developer productivity? Are their any professional quality open source Silverlight controls (library) available? Further, I seek information of best practices in the context I shared above.

    Read the article

  • Force full garbage collection when memory occupation goes beyond a certain threshold

    - by Silvio Donnini
    I have a server application that, in rare occasions, can allocate large chunks of memory. It's not a memory leak, as these chunks can be claimed back by the garbage collector by executing a full garbage collection. Normal garbage collection frees amounts of memory that are too small: it is not adequate in this context. The garbage collector executes these full GCs when it deems appropriate, namely when the memory footprint of the application nears the allotted maximum specified with -Xmx. That would be ok, if it wasn't for the fact that these problematic memory allocations come in bursts, and can cause OutOfMemoryErrors due to the fact that the jvm is not able to perform a GC quickly enough to free the required memory. If I manually call System.gc() beforehand, I can prevent this situation. Anyway, I'd prefer not having to monitor my jvm's memory allocation myself (or insert memory management into my application's logic); it would be nice if there was a way to run the virtual machine with a memory threshold, over which full GCs would be executed automatically, in order to release very early the memory I'm going to need. Long story short: I need a way (a command line option?) to configure the jvm in order to release early a good amount of memory (i.e. perform a full GC) when memory occupation reaches a certain threshold, I don't care if this slows my application down every once in a while. All I've found till now are ways to modify the size of the generations, but that's not what I need (at least not directly). I'd appreciate your suggestions, Silvio P.S. I'm working on a way to avoid large allocations, but it could require a long time and meanwhile my app needs a little stability

    Read the article

  • Perl XML SAX parser emulating XML::Simple record for record

    - by DVK
    Short Q summary: I am looking a fast XML parser (most likely a wrapper around some standard SAX parser) which will produce per-record data structure 100% identical to those produced by XML::Simple. Details: We have a large code infrastructure which depends on processing records one-by-one and expects the record to be a data structure in a format produced by XML::Simple since it always used XML::Simple since early Jurassic era. An example simple XML is: <root> <rec><f1>v1</f1><f2>v2</f2></rec> <rec><f1>v1b</f1><f2>v2b</f2></rec> <rec><f1>v1c</f1><f2>v2c</f2></rec> </root> And example rough code is: sub process_record { my ($obj, $record_hash) = @_; # do_stuff } my $records = XML::Simple->XMLin(@args)->{root}; foreach my $record (@$records) { $obj->process_record($record) }; As everyone knows XML::Simple is, well, simple. And more importantly, it is very slow and a memory hog - due to being a DOM parser and needing to build/store 100% of data in memory. So, it's not the best tool for parsing an XML file consisting of large amount of small records record-by-record. However, re-writing the entire code (which consist of large amount of "process_record"-like methods) to work with standard SAX parser seems like an big task not worth the resources, even at the cost of living with XML::Simple. What I'm looking for is an existing module which will probably be based on a SAX parser (or anything fast with small memory footprint) which can be used to produce $record hashrefs one by one based on the XML pictured above that can be passed to $obj->process_record($record) and be 100% identical to what XML::Simple's hashrefs would have been.

    Read the article

  • Are SharePoint site templates really less performant than site definitions?

    - by Jim
    So, it seems in the SharePoint blogosphere that everybody just copies and pastes the same bullet points from other blogs. One bullet point I've seen is that SharePoint site templates are less performant than site definitions because site definitions are stored on the file system. Is that true? It seems odd that site templates would be less performant. It's my understanding that all site content lives in a database, whether you use a site template or a site definition. A site template is applied once to the database, and from then on the site should not care if the content was created using a site template or not. So, does anybody have an architectural reason why a site template would be less performant than a site definition? Edit: Links to the blogs that say there is a performance difference: From MSDN: Because it is slow to store templates in and retrieve them from the database, site templates can result in slower performance. From DevX: However, user templates in SharePoint can lead to performance problems and may not be the best approach if you're trying to create a set of reusable templates for an entire organization. From IT Footprint: Because it is slow to store templates in and retrieve them from the database, site templates can result in slower performance. Templates in the database are compiled and executed every time a page is rendered. From Branding SharePoint:Custom site definitions hold the following advantages over custom templates: Data is stored directly on the Web servers, so performance is typically better. At a minimum, I think the above articles are incomplete, and I think several are misleading based on what I know of SharePoints architecture. I read another blog post that argued against the performance differences, but I can't find the link.

    Read the article

  • Maintaining a pool of DAO Class instances vs doing new operator

    - by Fazal
    we have been trying to benchmark our application performance in multiple way for sometime now. I always believed that object creation in java using Class.newInstance() was not slow (at least after 1.4 version of java). But we anyways did a test to use newInstance method vs mainitain an object pool of 1000 objects. We did about 200K iterations of loading data from DB using JDBC and populating these objects. I was amazed (even shocked) to see that newInstance code compared to object pool code was almost 10 times slower. These objects represent tables with about 50 fields and all string type. Can someone share there thoughts on this issue as now I am more confused if object pooling of atleast some DAO instances is a better option. The pool size as I see right now should be large enough to meet size of average requests. There is a flip side as my memory footprint will go up but I am beginning to wonder if this kind of idea makes sense atleast for some of the DAO entities representing tables of about 50 or more columns Please share your ideas and let me know if this has been tried by someone or am I missing some point here

    Read the article

  • Fast Lightweight Image Comparisson Metric Algorithm

    - by gav
    Hi All, I am developing an application for the Android platform which contains 1000+ image filters that have been 'evolved'. When a user selects a photo I want to present the most relevant filters first. This 'relevance' should be dependent on previous use cases. I have already developed tools that register when a filtered image is saved; this combination of filter and image can be seen as the training data for my system. The issue is that the comparison must occur between selecting an image and the next screen coming up. From a UI point of view I need the whole process to take less that 4 seconds; select an image- obtain a metric to use for similarity - check against use cases - return 6 closest matches. I figure with 4 seconds I can use animations and progress dialogs to keep the user happy. Due to platform contraints I am fairly limited in the computational expense of the algorithm. I have implemented a technique adapted from various online tutorials for running C code on the G1 and hence this language is available Specific Constraints; Qualcomm® MSM7201A™, 528 MHz Processor 320 x 480 Pixel bitmap in 32 bit ARGB ~ 2 seconds computational time for the native method to get the metric ~ 2 seconds to compare the metric of the current image with training data This is an academic project so all ideas are welcome, anything you can think of or have heard about would be of interest to me. My ideas; I want to keep the complexity down (O(n*m)?) by using pixel data only rather than a neighbourhood function I was looking at using the Colour historgram/Greyscale histogram/Texture/Entropy of the image, combining them to make the measure. There will be an obvious loss of information but I need the resultant metric to be substantially smaller than the memory footprint of the image (~0.512 MB) As I said, any ideas to direct my research would be fantastic. Kind regards, Gavin

    Read the article

  • Unit testing opaque structure based C API

    - by Nicolas Goy
    I have a library I wrote with API based on opaque structures. Using opaque structures has a lot of benefits and I am very happy with it. Now that my API are stable in term of specifications, I'd like to write a complete battery of unit test to ensure a solid base before releasing it. My concern is simple, how do you unit test API based on opaque structures where the main goal is to hide the internal logic? For example, let's take a very simple object, an array with a very simple test: WSArray a = WSArrayCreate(); int foo = 5; WSArrayAppendValue(a, &foo); int *bar = WSArrayGetValueAtIndex(a, 0); if(&foo != bar) printf("Eroneous value returned\n"); else printf("Good value returned\n"); WSRelease(a); Of course, this tests some facts, like the array actually acts as wanted with 1 value, but when I write unit tests, at least in C, I usualy compare the memory footprint of my datastructures with a known state. In my example, I don't know if some internal state of the array is broken. How would you handle that? I'd really like to avoid adding codes in the implementation files only for unit testings, I really emphasis loose coupling of modules, and injecting unit tests into the implementation would seem rather invasive to me. My first thought was to include the implementation file into my unit test, linking my unit test statically to my library. For example: #include <WS/WS.h> #include <WS/Collection/Array.c> static void TestArray(void) { WSArray a = WSArrayCreate(); /* Structure members are available because we included Array.c */ printf("%d\n", a->count); } Is that a good idea? Of course, the unit tests won't benefit from encapsulation, but they are here to ensure it's actually working.

    Read the article

  • How can I reuse .NET resources in multiple executables?

    - by Brandon
    I have an existing application that I'm supposed to take and create a "mini" version of. Both are localized apps and we would like to reuse the resources in the main application as is. So, here's the basic structure of my apps: MainApplication.csproj /Properties/Resources.resx /MainUserControl.xaml (uses strings in Properties/Resources.resx) /MainUserControl.xaml.cs MiniApplication.csproj link to MainApplication/Properties/Resources.resx link to MainApplication/MainUserControl.xaml link to MainApplication/MainUserControl.xaml.cs MiniApplication.xaml (tries to use MainUserControl from MainApplication link) So, in summary, I've added the needed resources and user control from the main application as links in the mini application. However, when I run the mini application, I get the following exception. I'm guessing it's having trouble with the different namespaces, but how do I fix? Could not find any resources appropriate for the specified culture or the neutral culture. Make sure \"MainApplication.Properties.Resources.resources\" was correctly embedded or linked into assembly \"MiniApplication\" at compile time, or that all the satellite assemblies required are loadable and fully signed. FYI, I know I could put the user control in a user control library but the problem is that the mini application needs to have a small footprint. So, we only want to include what we need.

    Read the article

  • Why are references compacted inside Perl lists?

    - by parkan
    Putting a precompiled regex inside two different hashes referenced in a list: my @list = (); my $regex = qr/ABC/; push @list, { 'one' => $regex }; push @list, { 'two' => $regex }; use Data::Dumper; print Dumper(\@list); I'd expect: $VAR1 = [ { 'one' => qr/(?-xism:ABC)/ }, { 'two' => qr/(?-xism:ABC)/ } ]; But instead we get a circular reference: $VAR1 = [ { 'one' => qr/(?-xism:ABC)/ }, { 'two' => $VAR1->[0]{'one'} } ]; This will happen with indefinitely nested hash references and shallowly copied $regex. I'm assuming the basic reason is that precompiled regexes are actually references, and references inside the same list structure are compacted as an optimization (\$scalar behaves the same way). I don't entirely see the utility of doing this (presumably a reference to a reference has the same memory footprint), but maybe there's a reason based on the internal representation Is this the correct behavior? Can I stop it from happening? Aside from probably making GC more difficult, these circular structures create pretty serious headaches. For example, iterating over a list of queries that may sometimes contain the same regular expression will crash the MongoDB driver with a nasty segfault (see https://rt.cpan.org/Public/Bug/Display.html?id=58500)

    Read the article

  • Python, Ruby, and C#: Use cases?

    - by thaorius
    Hi everyone. For as long as I can remember, I've always had a "favorite" language, which I use for most projects, until, for some particular reason, there is no way/point on using it for project XYZ. At that point, I find myself rusty (and sometimes outdated) on other languages+libraries+toolchains. So I decided, I would just use some languages/libs/tools for some things, and some for other, effectively keeping them fresh (there would obviously be exceptions, I'm not looking for an arbitrary rule set, but some guidelines). I wanted an opinion on what would be your standard use cases (new projects) for Python, Ruby, and C# (Mono). At the moment, I have time like this:Languages: C#: Mid-Large Sized Projects (mainly server-side daemons) High Performance (I hardly ever need C's performance, but Python just doesn't cut it) Relatively Low Footprint (vs the JVM, for example) Ruby: Web Applications Python: General Use Scripts (automation, system config, etc) Small-Mid Sized Projects Prototyping Web Applications About Ruby, I have no idea what to use it for that I can't use Python for (specially considering Python is more easily found installed by default). And I like both languages (though I'm really new to Ruby), which makes things even worse. As for C#, I have not used a Windows powered computer in a few years, I don't make things for Windows computers, and I don't mind waiting for Mono to implement some new features. That being said, I haven't found many people on the internet using it for server-sided *nix programming (not web related). I would appreciate some insight on this too. Thanks for your time.

    Read the article

  • Unable to set up ODBC after installing ODAC (Xcopy)

    - by rwilson513
    We are trying to use ODAC Xcopy to minimize the footprint of installing Oracle 11g Client. Currently, we use the Oracle 11g Admin install (~700mb). I've tried using the ODAC Xcopy, and that works. However, the only issue I now have is that I cannot set up an ODBC on the target system by just installing the ODAC Xcopy. After installing ODAC (Windows XP fyi), I go to Control Panel--Admin Tools--Data Sources (ODBC)--System DSN--Add--Microsoft ODBC for Oracle. I get the following error: "The Oracle(tm) client and networking components were not found. These components are supplied by Oracle and are part of the Oracle Version 7.3 (or greater) client software installation. You will be unable to use this driver until these components have been installed." I've tried editing the registry and creating the same keys that the Oracle Admin install creates, but still no luck. I'm not sure how to get past this. Any suggestions? Thanks in advance.

    Read the article

  • Real time embeddable http server library required

    - by Howard May
    Having looked at several available http server libraries I have not yet found what I am looking for and am sure I can't be the first to have this set of requirements. I need a library which presents an API which is 'pipelined'. Pipelining is used to describe an HTTP feature where multiple HTTP requests can be sent across a TCP link at a time without waiting for a response. I want a similar feature on the library API where my application can receive all of those request without having to send a response (I will respond but want the ability to process multiple requests at a time to reduce the impact of internal latency). So the web server library will need to support the following flow 1) HTTP Client transmits http request 1 2) HTTP Client transmits http request 2 ... 3) Web Server Library receives request 1 and passes it to My Web Server App 4) My Web Server App receives request 1 and dispatches it to My System 5) Web Server receives request 2 and passes it to My Web Server App 6) My Web Server App receives request 2 and dispatches it to My System 7) My Web Server App receives response to request 1 from My System and passes it to Web Server 8) Web Server transmits HTTP response 1 to HTTP Client 9) My Web Server App receives response to request 2 from My System and passes it to Web Server 10) Web Server transmits HTTP response 2 to HTTP Client Hopefully this illustrates my requirement. There are two key points to recognise. Responses to the Web Server Library are asynchronous and there may be several HTTP requests passed to My Web Server App with responses outstanding. Additional requirements are Embeddable into an existing 'C' application Small footprint; I don't need all the functionality available in Apache etc. Efficient; will need to support thousands of requests a second Allows asynchronous responses to requests; their is a small latency to responses and given the required request throughput a synchronous architecture is not going to work for me. Support persistent TCP connections Support use with Server-Push Comet connections Open Source / GPL support for HTTPS Portable across linux, windows; preferably more. I will be very grateful for any recommendation Best Regards

    Read the article

  • What is the best way to find a processed memory allocations in terms of C# objects

    - by Shantaram
    I have written various C# console based applications, some of them long running some not, which can over time have a large memory foot print. When looking at the windows perofrmance monitor via the task manager, the same question keeps cropping up in my mind; how do I get a break down of the number objects by type that are contributing to this footprint; and which of those are f-reachable and those which aren't and hence can be collected. On numerous occasions I've performed a code inspection to ensure that I am not unnecessarily holding on objects longer than required and disposing of objects with the using construct. I have also recently looked at employing the CG.Collect method when I have released a large number of objects (for example held in a collection which has just been cleared). However, I am not so sure that this made that much difference, so I threw that code away. I am guessing that there are tools in sysinternals suite than can help to resolve these memory type quiestions but I am not sure which and how to use them. The alternative would be to pay for a third party profiling tool such as JetBrains dotTrace; but I need to make sure that I've explored the free options first before going cap in hand to my manager.

    Read the article

  • Datastructure choices for highspeed and memory efficient detection of duplicate of strings

    - by Jonathan Holland
    I have a interesting problem that could be solved in a number of ways: I have a function that takes in a string. If this function has never seen this string before, it needs to perform some processing. If the function has seen the string before, it needs to skip processing. After a specified amount of time, the function should accept duplicate strings. This function may be called thousands of time per second, and the string data may be very large. This is a highly abstracted explanation of the real application, just trying to get down to the core concept for the purpose of the question. The function will need to store state in order to detect duplicates. It also will need to store an associated timestamp in order to expire duplicates. It does NOT need to store the strings, a unique hash of the string would be fine, providing there is no false positives due to collisions (Use a perfect hash?), and the hash function was performant enough. The naive implementation would be simply (in C#): Dictionary<String,DateTime> though in the interest of lowering memory footprint and potentially increasing performance I'm evaluating a custom data structures to handle this instead of a basic hashtable. So, given these constraints, what would you use? EDIT, some additional information that might change proposed implementations: 99% of the strings will not be duplicates. Almost all of the duplicates will arrive back to back, or nearly sequentially. In the real world, the function will be called from multiple worker threads, so state management will need to be synchronized.

    Read the article

  • This process does not work in JavaScript: createElement() -> setAttribute('id') -> getElementById()

    - by kristovaher
    I was so sure that this question has been answered a thousand times before, but I've been unable to find an answer in StackOverflow. If there is already an answer and I was unable to find it then I apologize. I create hidden form elements dynamically like this: submitForm=document.getElementById('my-form'); var element=document.createElement('input'); element.id='hidden-form-data'; // or setAttribute('id','hidden-form-data'); element.name='my-hidden-form-data'; element.type='hidden'; element.value='my-data'; submitForm.appendChild(element); This works and the input field is created and it is taken into account when submitting the form. But I want to remove it after I have dynamically created it. I was sure that creating a new node this way would be 'correct' for browser and DOM, but apparently it is not. This returns null: element=document.getElementById('hidden-form-data'); if(element!=null){ element.parentNode.removeChild(element); } But it never gets removed and is always null. Is there any way I can remove a dynamically created node with an ID? Thank you! Please do not suggest jQuery, it's not possible to use jQuery for this, footprint is too heavy for such a small task I could not get a working answer from here, which was the closest thread I could find.

    Read the article

  • VS2010 + IE8 Debugging woes - Element not found

    - by Chin
    I am having great difficulty trying to debug with vs2010 and IE8, though I think the problem is more IE8 specific. When starting a debug session 9 times out of 10 I will have the following problem. IE tab says connecting.. - then after a 5 second wait I will get an error in VS saying element not found. Even when I click ok to dismiss the error, the IE window still shows connecting... I will then have to kill the IE process to be able to close IE to try again. Sometimes however I am lucky and it starts. But the whole thing is so random I have no clue where to start. One thing I have noticed is that I always have 2 IE processes started even though there is only one window open. One has a small footprint of 100k, I presume it is some kind of helper. I am using a static port with the built in WebDev server. If anyone has had similar problems please let me know how you resolved it. Its driving me nuts! thanks

    Read the article

  • Sheet and thread memory problem

    - by Xident
    Hi Guys, recently I started a project which can export some precalculated Grafix/Audio to files, for after processing. All I was doing is to put a new Window (with progressindicator and an Abort Button) in my main xib and opened it using the following code: [NSApp beginSheet: REC_Sheet modalForWindow: MOTHER_WINDOW modalDelegate: self didEndSelector: nil contextInfo: nil]; NSModalSession session=[NSApp beginModalSessionForWindow:REC_Sheet]; RECISNOTDONE=YES; while (RECISNOTDONE) { if ([NSApp runModalSession:session]!=NSRunContinuesResponse) break; usleep(100); } [NSApp endModalSession:session]; A Background Thread (pthread) was started earlier, to actually perform the work and save all the targas/wave file. Which worked great, but after an amount of time, it turned out that the main thread was not responding anymore and my memory footprint raised unstoppable. I tried to debug it with Instruments, and saw a lot of CFHash etc stuff growing to infinity. By accident i clicked below the sheet, and temporary it helped, the main thread (AppKit ?) was releasing it's stuff, but just for a little time. I can't explain it to me, first of all I thought it was the access from my thread to the Progressbar to update the Progress (intervalled at 0,5sec), so I cut it out. But even if I'm not updating anything and did nothing with the Progressbar, my Application eat up all the Memory, because of not releasing it's "Main-Event" or whatsoever Stuff. Is there any possibility to "drain" this Main thread Memory stuff (Runloop / NSApp call?). And why the heck doesn't the Main thread respond anymore (after this simple task) ??? I don't have a clou anymore, please help ! Thanks in advance ! P.S. How do you guys implement "threaded long task" Stuff and updating your gui ???

    Read the article

  • Suitable web framework for the following scenario

    - by Paralife
    I have the following scenario: I have a view in an Oracle server and all Iwant is to show that view in a web browser, along with an input field or two for basic filtering. No users, no authentication, just this view maybe with a column or two linking to a second page for master detail viewing. The children are just string descriptions of the columns of the master that contain IDs. No inserts or updates. The question is which is the JAVA based web framework of choice that can accomplish the above in the minimum amount of code lines code time(subjective but also kind of objective if someone has expirience with more than one or two frameworks) configuration effort deployment effort and requirements. dependencies and mem footprint Also: 6. Oracle APEX is not an option. 3,4 and 5 are maybe the same in the sense that they are everything except the functionality coding. I want something that I can compile, deploy by just FTPing to the database host, run and forget. (e.g. For the deployment aspect, Hudson way comes in mind (java -jar hudson.war and that's all)). Also: 3,4 have priority over 1 and 2. (Explanation with a rant: I dont mind coding a lot as long as it is application code and not "why the fuck do we still use javascript over http for everything" code) Thanks.

    Read the article

  • Java: multi-threaded maps: how do the implementations compare?

    - by user346629
    I'm looking for a good hash map implementation. Specifically, one that's good for creating a large number of maps, most of them small. So memory is an issue. It should be thread-safe (though losing the odd put might be an OK compromise in return for better performance), and fast for both get and put. And I'd also like the moon on a stick, please, with a side-order of justice. The options I know are: HashMap. Disastrously un-thread safe. ConcurrentHashMap. My first choice, but this has a hefty memory footprint - about 2k per instance. Collections.sychronizedMap(HashMap). That's working OK for me, but I'm sure there must be faster alternatives. Trove or Colt - I think neither of these are thread-safe, but perhaps the code could be adapted to be thread safe. Any others? Any advice on what beats what when? Any really good new hash map algorithms that Java could use an implementation of? Thanks in advance for your input!

    Read the article

  • Advanced Data Source Engine coming to Telerik Reporting Q1 2010

    This is the final blog post from the pre-release series. In it we are going to share with you some of the updates coming to our reporting solution in Q1 2010. A new Declarative Data Source Engine will be added to Telerik Reporting, that will allow full control over data management, and deliver significant gains in rendering performance and memory consumption. Some of the engines new features will be: Data source parameters - those parameters will be used to limit data retrieved from the data source to just the data needed for the report. Data source parameters are processed on the data source side, however only queried data is fetched to the reporting engine, rather than the full data source. This leads to lower memory consumption, because data operations are performed on queried data only, rather than on all data. As a result, only the queried data needs to be stored in the memory vs. the whole dataset, which was the case with the old approach Support for stored procedures - they will assist in achieving a consistent implementation of logic across applications, and are especially practical for performing repetitive tasks. A stored procedure stores the SQL statements and logic, which can then be executed in different reports and/or applications. Stored Procedures will not only save development time, but they will also improve performance, because each stored procedure is compiled on the data base server once, and then is reutilized. In Telerik Reporting, the stored procedure will also be parameterized, where elements of the SQL statement will be bound to parameters. These parameterized SQL queries will be handled through the data source parameters, and are evaluated at run time. Using parameterized SQL queries will improve the performance and decrease the memory footprint of your application, because they will be applied directly on the database server and only the necessary data will be downloaded on the middle tier or client machine; Calculated fields through expressions - with the help of the new reporting engine you will be able to use field values in formulas to come up with a calculated field. A calculated field is a user defined field that is computed "on the fly" and does not exist in the data source, but can perform calculations using the data of the data source object it belongs to. Calculated fields are very handy for adding frequently used formulas to your reports; Improved performance and optimized in-memory OLAP engine - the new data source will come with several improvements in how aggregates are calculated, and memory is managed. As a result, you may experience between 30% (for simpler reports) and 400% (for calculation-intensive reports) in rendering performance, and about 50% decrease in memory consumption. Full design time support through wizards - Declarative data sources are a great advance and will save developers countless hours of coding. In Q1 2010, and true to Telerik Reportings essence, using the new data source engine and its features requires little to no coding, because we have extended most of the wizards to support the new functionality. The newly extended wizards are available in VS2005/VS2008/VS2010 design-time. More features will be revealed on the product's what's new page when the new version is officially released in a few days. Also make sure you attend the free webinar on Thursday, March 11th that will be dedicated to the updates in Telerik Reporting Q1 2010. Did you know that DotNetSlackers also publishes .net articles written by top known .net Authors? We already have over 80 articles in several categories including Silverlight. Take a look: here.

    Read the article

< Previous Page | 10 11 12 13 14 15 16 17 18  | Next Page >