Skip to content Skip to sidebar Skip to footer

How To Get At This Data

I am looking to scrape the three items that are highlighted and bordered from the html sample below. I've also highlighted a few markers that look useful. How would you do this? A

Solution 1:

I would probably use an XML parser to get the text content first (or this: xmlString.replace(/<[^>]+>/g, "") to replace all tags with empty strings), then use the following regexes to extract the information you need:

/-OPR\s+(\d+\.\d+)/
/Bid:\s+(\d+\.\d+)//Ask:\s+(\d+\.\d+)//Open Interest:\s+(\d+,\d+)/

This process can easily be done in nodejs (more info)or with any other language that supports regex.


live demo:

  • Waits 1 second, then removes tags.
  • Waits another second, then finds all patterns and creates a table.

wait = true; // Set to false to execute instantly.var elem = document.getElementById("parsingStuff");
var str = elem.textContent;

var keywords = ["-OPR", "Bid:", "Ask:", "Open Interest:"];
var output = {};
var timeout = 0;

if (wait) timeout = 1000;

setTimeout(function() { // Removing tags.
  elem.innerHTML = elem.textContent;
}, timeout);

if (wait) timeout = 2000;

setTimeout(function() { // Looking for patterns.for (var i = 0; i < keywords.length; i++) {
    output[keywords[i]] = str.match(RegExp(keywords[i] + "\\s+(\\d+[\\.,]\\d+)"))[1];
  }

  // Creating basic table of found data.
  elem.innerHTML = "";
  var table = document.createElement("table");
  for (k in output) {
    var tr = document.createElement("tr");
    var th = document.createElement("th");
    var td = document.createElement("td");

    th.style.border = "1px solid gray";
    td.style.border = "1px solid gray";

    th.textContent = k;
    td.textContent = output[k];

    tr.appendChild(th);
    tr.appendChild(td);

    table.appendChild(tr);
  }
  elem.appendChild(table);
}, timeout);
<divid="parsingStuff"><divclass="yfi_rt_quote_summary"id="yfi_rt_quote_summary"><divclass="hd"><divclass="title"><h2>GM Feb 2015 36.500 call (GM150220C00036500)</h2><spanclass="rtq_exch"><spanclass="rtq_dash">-</span>OPR
        </span><spanclass="wl_sign"></span></div></div><divclass="yfi_rt_quote_summary_rt_top sigfig_promo_1"><div><spanclass="time_rtq_ticker"><spanid="yfs_110_gm150220c00036500">0.83</span></span></div></div>undefined</div>undefined
  <divclass="yui-u first yfi-start-content"><divclass="yfi_quote_summary"><divid="yfi_quote_summary_data"class="rtq_table"><tableid="table1"><tr><thscope="row"width="48%">Bid:</th><tdclass="yfnc_tabledata1"><spanid="yfs_b00_gm150220c00036500">0.76</span></td></tr><tr><thscope="row"width="48%">Ask:</th><tdclass="yfnc_tabledata1"><spanid="yfs_a00_gm150220c00036500">0.90</span></td></tr></table><tableid="table2"><tr><thscope="row"width="48%">Open Interest:</th><tdclass="yfnc_tabledata1">11,579</td></tr></table></div></div></div></div>

Post a Comment for "How To Get At This Data"