[#4907] Add DSLContext.fetchFromHTML() to convert an HTML table into a jOOQ Result
This commit is contained in:
parent
e03f17c48e
commit
40e7abbd38
@ -2560,6 +2560,47 @@ public interface DSLContext extends Scope , AutoCloseable {
|
||||
@Support
|
||||
Result<Record> fetchFromTXT(String string, String nullLiteral) throws DataAccessException;
|
||||
|
||||
/**
|
||||
* Convert an HTML table into a jOOQ {@link Result}.
|
||||
* <p>
|
||||
* This is the inverse operation of {@link Result#formatHTML()}. It works
|
||||
* according to the following parsing rules:
|
||||
* <ul>
|
||||
* <li>The input is expected to be well-formed XML. XHTML conformance is not
|
||||
* required - i.e. unknown elements / attributes, or elements / attributes
|
||||
* not specified here, such as <code><caption></code>,
|
||||
* <code><thead></code>, <code><tbody></code> are simply ignored.</li>
|
||||
* <li>The surrounding <code><table></code> element is optional, but it
|
||||
* may appear only once</li>
|
||||
* <li>A single row containing table headings <code><th></code> is
|
||||
* allowed. Further rows containing table headings are ignored. Table
|
||||
* headings define field names. In the absence of table headings, field
|
||||
* names are generated.</li>
|
||||
* <li>The first row <code><tr></code> specifies the number of columns in
|
||||
* the table (regardless if it contains table headings or not). Subsequent
|
||||
* rows containing less columns will be padded. Subsequent rows containing
|
||||
* more columns will be truncated.</li>
|
||||
* <li>Comments are ignored</li>
|
||||
* <li>Nested tables are not supported</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Ideal input looks like this: <code><pre>
|
||||
* <table>
|
||||
* <tr><th>COL1</th><th>COL2</th></tr>
|
||||
* <tr><td>1</td><td>a</td></tr>
|
||||
* <tr><td>2</td><td>b</td></tr>
|
||||
* </table>
|
||||
* </code>
|
||||
* </pre>
|
||||
*
|
||||
* @param string The HTML-formatted string.
|
||||
* @return The transformed result
|
||||
* @throws DataAccessException If the supplied string does not adhere to the
|
||||
* above format rules.
|
||||
*/
|
||||
@Support
|
||||
Result<Record> fetchFromHTML(String string) throws DataAccessException;
|
||||
|
||||
/**
|
||||
* Fetch all data from a CSV string.
|
||||
* <p>
|
||||
|
||||
@ -990,6 +990,11 @@ public class DefaultDSLContext extends AbstractScope implements DSLContext, Seri
|
||||
return fetchFromStringData(Utils.parseTXT(string, nullLiteral));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result<Record> fetchFromHTML(String string) {
|
||||
return fetchFromStringData(Utils.parseHTML(string));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result<Record> fetchFromCSV(String string) {
|
||||
return fetchFromCSV(string, true, ',');
|
||||
|
||||
@ -2734,6 +2734,45 @@ final class Utils {
|
||||
}
|
||||
}
|
||||
|
||||
private static final Pattern P_PARSE_HTML_ROW = Pattern.compile("<tr>(.*?)</tr>");
|
||||
private static final Pattern P_PARSE_HTML_COL_HEAD = Pattern.compile("<th>(.*?)</th>");
|
||||
private static final Pattern P_PARSE_HTML_COL_BODY = Pattern.compile("<td>(.*?)</td>");
|
||||
|
||||
static List<String[]> parseHTML(String string) {
|
||||
|
||||
List<String[]> result = new ArrayList<String[]>();
|
||||
|
||||
Matcher mRow = P_PARSE_HTML_ROW.matcher(string);
|
||||
while (mRow.find()) {
|
||||
String row = mRow.group(1);
|
||||
List<String> col = new ArrayList<String>();
|
||||
|
||||
// Header was not yet emitted
|
||||
if (result.isEmpty()) {
|
||||
Matcher mColHead = P_PARSE_HTML_COL_HEAD.matcher(row);
|
||||
|
||||
while (mColHead.find()) {
|
||||
col.add(mColHead.group(1));
|
||||
}
|
||||
}
|
||||
|
||||
if (col.isEmpty()) {
|
||||
Matcher mColBody = P_PARSE_HTML_COL_BODY.matcher(row);
|
||||
|
||||
while (mColBody.find()) {
|
||||
col.add(mColBody.group(1));
|
||||
}
|
||||
|
||||
if (result.isEmpty())
|
||||
result.add(fieldNames(col.size()));
|
||||
}
|
||||
|
||||
result.add(col.toArray(new String[col.size()]));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a <code>DROP .. IF EXISTS</code> statement with
|
||||
* <code>BEGIN EXECUTE IMMEDIATE '...' EXCEPTION WHEN ... END;</code>, if
|
||||
|
||||
Loading…
Reference in New Issue
Block a user