[#4907] Add DSLContext.fetchFromHTML() to convert an HTML table into a jOOQ Result

This commit is contained in:
lukaseder 2016-01-07 15:41:20 +01:00
parent e03f17c48e
commit 40e7abbd38
3 changed files with 85 additions and 0 deletions

View File

@ -2560,6 +2560,47 @@ public interface DSLContext extends Scope , AutoCloseable {
@Support
Result<Record> fetchFromTXT(String string, String nullLiteral) throws DataAccessException;
/**
* Convert an HTML table into a jOOQ {@link Result}.
* <p>
* This is the inverse operation of {@link Result#formatHTML()}. It works
* according to the following parsing rules:
* <ul>
* <li>The input is expected to be well-formed XML. XHTML conformance is not
* required - i.e. unknown elements / attributes, or elements / attributes
* not specified here, such as <code>&lt;caption></code>,
* <code>&lt;thead></code>, <code>&lt;tbody></code> are simply ignored.</li>
* <li>The surrounding <code>&lt;table></code> element is optional, but it
* may appear only once</li>
* <li>A single row containing table headings <code>&lt;th></code> is
* allowed. Further rows containing table headings are ignored. Table
* headings define field names. In the absence of table headings, field
* names are generated.</li>
* <li>The first row <code>&lt;tr></code> specifies the number of columns in
* the table (regardless if it contains table headings or not). Subsequent
* rows containing less columns will be padded. Subsequent rows containing
* more columns will be truncated.</li>
* <li>Comments are ignored</li>
* <li>Nested tables are not supported</li>
* </ul>
* <p>
* Ideal input looks like this: <code><pre>
* &lt;table>
* &lt;tr>&lt;th>COL1&lt;/th>&lt;th>COL2&lt;/th>&lt;/tr>
* &lt;tr>&lt;td>1&lt;/td>&lt;td>a&lt;/td>&lt;/tr>
* &lt;tr>&lt;td>2&lt;/td>&lt;td>b&lt;/td>&lt;/tr>
* &lt;/table>
* </code>
* </pre>
*
* @param string The HTML-formatted string.
* @return The transformed result
* @throws DataAccessException If the supplied string does not adhere to the
* above format rules.
*/
@Support
Result<Record> fetchFromHTML(String string) throws DataAccessException;
/**
* Fetch all data from a CSV string.
* <p>

View File

@ -990,6 +990,11 @@ public class DefaultDSLContext extends AbstractScope implements DSLContext, Seri
return fetchFromStringData(Utils.parseTXT(string, nullLiteral));
}
@Override
public Result<Record> fetchFromHTML(String string) {
return fetchFromStringData(Utils.parseHTML(string));
}
@Override
public Result<Record> fetchFromCSV(String string) {
return fetchFromCSV(string, true, ',');

View File

@ -2734,6 +2734,45 @@ final class Utils {
}
}
private static final Pattern P_PARSE_HTML_ROW = Pattern.compile("<tr>(.*?)</tr>");
private static final Pattern P_PARSE_HTML_COL_HEAD = Pattern.compile("<th>(.*?)</th>");
private static final Pattern P_PARSE_HTML_COL_BODY = Pattern.compile("<td>(.*?)</td>");
static List<String[]> parseHTML(String string) {
List<String[]> result = new ArrayList<String[]>();
Matcher mRow = P_PARSE_HTML_ROW.matcher(string);
while (mRow.find()) {
String row = mRow.group(1);
List<String> col = new ArrayList<String>();
// Header was not yet emitted
if (result.isEmpty()) {
Matcher mColHead = P_PARSE_HTML_COL_HEAD.matcher(row);
while (mColHead.find()) {
col.add(mColHead.group(1));
}
}
if (col.isEmpty()) {
Matcher mColBody = P_PARSE_HTML_COL_BODY.matcher(row);
while (mColBody.find()) {
col.add(mColBody.group(1));
}
if (result.isEmpty())
result.add(fieldNames(col.size()));
}
result.add(col.toArray(new String[col.size()]));
}
return result;
}
/**
* Wrap a <code>DROP .. IF EXISTS</code> statement with
* <code>BEGIN EXECUTE IMMEDIATE '...' EXCEPTION WHEN ... END;</code>, if